Add alpha channel video support with proper strip photography orientation

- Add --alpha flag to generate PNG sequences with transparency - Implement extract_column_strip_alpha() and extract_row_strip_alpha() functions - Create BGRA PNG frames with transparent backgrounds instead of black padding - Perfect for video editing workflows - no keyframe compression - Each PNG shows progressive scan line accumulation with alpha channel - Row mode properly rotated CCW 90° to match image mode orientation - Add horizontal flipping to all modes so time flows right to left (strip photography convention) - Progressive content grows from right to left in video frames - Compatible with all major video editors (Premiere, Final Cut, DaVinci Resolve) - Auto-generates organized output directories for PNG sequences - Add comprehensive documentation for alpha video mode - Tested successfully with PNG sequence generation - Ideal for professional compositing and video editing workflows
2025-11-08 13:09:06 +02:00 · 2025-11-08 13:09:06 +02:00 · 70a9c6a218
commit 70a9c6a218
parent 441398077c
2 changed files with 399 additions and 7 deletions
--- a/main.py
+++ b/main.py
@ -335,6 +335,9 @@ def extract_column_strip(video_path, x_column, output_path, change_threshold=0.0
    # Convert list to numpy array
    strip_image = np.stack(significant_columns, axis=1)
    
+    # Flip horizontally so time flows from right to left (strip photography convention)
+    strip_image = cv2.flip(strip_image, 1)
+    
    # Add timeline overlay if requested
    if timeline:
        strip_image = add_timeline_overlay(strip_image, significant_frame_numbers)
@ -460,7 +463,10 @@ def extract_row_strip(video_path, y_row, output_path, change_threshold=0.01, rel
    # Rotate clockwise 90 degrees for row mode
    strip_image = cv2.rotate(strip_image, cv2.ROTATE_90_COUNTERCLOCKWISE)
    
-    # Add timeline overlay if requested (after rotation)
+    # Flip horizontally so time flows from right to left (strip photography convention)
+    strip_image = cv2.flip(strip_image, 1)
+    
+    # Add timeline overlay if requested (after rotation and flip)
    if timeline:
        strip_image = add_timeline_overlay(strip_image, significant_frame_numbers)
    
@ -650,13 +656,16 @@ def extract_column_strip_video(video_path, x_column, output_path, change_thresho
        # Convert to numpy array and create the frame
        strip_frame = np.stack(accumulated_columns, axis=1)
        
+        # Flip horizontally so time flows from right to left (strip photography convention)
+        strip_frame = cv2.flip(strip_frame, 1)
+        
        # Pad the frame to match the final video dimensions
        current_height, current_width = strip_frame.shape[:2]
        if current_width < final_output_width or current_height < final_output_height:
            # Create a black frame of the final size
            padded_frame = np.zeros((final_output_height, final_output_width, 3), dtype=strip_frame.dtype)
-            # Copy the current frame to the left side (for progressive width growth)
-            padded_frame[:current_height, :current_width] = strip_frame
+            # Copy the current frame to the right side (for progressive width growth from right to left)
+            padded_frame[:current_height, final_output_width-current_width:] = strip_frame
            strip_frame = padded_frame
        
        # Add timestamp overlay if requested (after padding)
@ -816,13 +825,16 @@ def extract_row_strip_video(video_path, y_row, output_path, change_threshold=0.0
        # Rotate counter-clockwise 90 degrees to match image mode orientation
        strip_frame = cv2.rotate(strip_frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
        
+        # Flip horizontally so time flows from right to left (strip photography convention)
+        strip_frame = cv2.flip(strip_frame, 1)
+        
        # Pad the frame to match the final video dimensions
        current_height, current_width = strip_frame.shape[:2]
        if current_width < final_output_width or current_height < final_output_height:
            # Create a black frame of the final size
            padded_frame = np.zeros((final_output_height, final_output_width, 3), dtype=strip_frame.dtype)
-            # Copy the current frame to the left side (for progressive width growth)
-            padded_frame[:current_height, :current_width] = strip_frame
+            # Copy the current frame to the right side (for progressive width growth from right to left)
+            padded_frame[:current_height, final_output_width-current_width:] = strip_frame
            strip_frame = padded_frame
        
        # Add timestamp overlay if requested (after padding)
@ -843,6 +855,334 @@ def extract_row_strip_video(video_path, y_row, output_path, change_threshold=0.0
    print(f"Total duration: {len(significant_rows)/fps:.2f} seconds")


+def extract_column_strip_alpha(video_path, x_column, output_path, change_threshold=0.005, relax=0, start_frame=0, end_frame=None, fps=30, timestamp=False):
+    """
+    Extract vertical strip at x_column from each frame and create PNG sequence with alpha transparency.
+    Each frame shows the accumulated scan lines up to that point with transparent background.
+    
+    Args:
+        video_path: Path to input video file
+        x_column: X-coordinate of the column to extract
+        output_path: Path for output directory (PNG sequence)
+        change_threshold: Minimum change threshold (0-1) to include frame
+        relax: Number of extra frames to include before/after threshold frames
+        start_frame: First frame to process (0-based)
+        end_frame: Last frame to process (None = until end)
+        fps: Output video frame rate (for reference)
+        timestamp: If True, embed frame count on bottom left corner
+    """
+    cap = cv2.VideoCapture(str(video_path))
+    
+    if not cap.isOpened():
+        raise ValueError(f"Could not open video file: {video_path}")
+    
+    # Get video properties
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    
+    if x_column >= frame_width:
+        raise ValueError(f"Column {x_column} is outside video width ({frame_width})")
+    
+    # Set end frame if not specified
+    if end_frame is None:
+        end_frame = total_frames - 1
+    
+    print(f"Processing frames {start_frame} to {end_frame} ({end_frame - start_frame + 1} frames)...")
+    print(f"Extracting column {x_column} from {frame_width}x{frame_height} frames")
+    print(f"Change threshold: {change_threshold}")
+    if relax > 0:
+        print(f"Relax: including {relax} frames before/after threshold frames")
+    
+    # First pass: collect all columns and identify significant frames
+    all_columns = []
+    changes = []
+    frame_numbers = []
+    previous_column = None
+    
+    frame_idx = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        
+        # Skip frames before start
+        if frame_idx < start_frame:
+            frame_idx += 1
+            continue
+        
+        # Stop after end frame
+        if frame_idx > end_frame:
+            break
+            
+        # Extract current column
+        current_column = frame[:, x_column, :].copy()
+        all_columns.append(current_column)
+        frame_numbers.append(frame_idx)
+        
+        # Calculate change from previous frame
+        if previous_column is not None:
+            change = calculate_line_difference(current_column, previous_column)
+            changes.append(change)
+        else:
+            changes.append(0)  # First frame has no change
+        
+        previous_column = current_column
+        frame_idx += 1
+        
+        if (frame_idx - start_frame) % 100 == 0:
+            print(f"Processed {frame_idx - start_frame}/{end_frame - start_frame + 1} frames")
+    
+    cap.release()
+    
+    # Second pass: determine which frames to include
+    include_mask = [False] * len(all_columns)
+    
+    for i, change in enumerate(changes):
+        if i == 0 or change >= change_threshold:
+            # Mark this frame and surrounding frames
+            start = max(0, i - relax)
+            end = min(len(all_columns), i + relax + 1)
+            for j in range(start, end):
+                include_mask[j] = True
+    
+    # Collect significant columns
+    significant_columns = []
+    significant_frame_numbers = []
+    for i, col in enumerate(all_columns):
+        if include_mask[i]:
+            significant_columns.append(col)
+            significant_frame_numbers.append(frame_numbers[i])
+    
+    included_frames = sum(include_mask)
+    skipped_frames = len(all_columns) - included_frames
+    
+    if not significant_columns:
+        raise ValueError("No significant changes detected. Try lowering the threshold.")
+    
+    print(f"Original frames in segment: {len(all_columns)}")
+    print(f"Included frames: {included_frames}")
+    print(f"Skipped frames: {skipped_frames}")
+    print(f"Compression ratio: {skipped_frames/len(all_columns):.1%}")
+    
+    # Create output directory
+    # For column mode: width = number of significant frames, height = input frame height
+    final_output_width = len(significant_columns)
+    final_output_height = frame_height
+    
+    output_dir = Path(output_path)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    print(f"Output PNG sequence dimensions: {final_output_width}x{final_output_height}")
+    print(f"Creating PNG sequence at {fps} FPS reference: {output_dir}")
+    
+    # Generate PNG frames - each frame shows accumulated scan lines up to that point
+    for frame_idx in range(len(significant_columns)):
+        # Create accumulated strip image up to current frame
+        accumulated_columns = significant_columns[:frame_idx + 1]
+        
+        # Convert to numpy array and create the frame with alpha channel
+        strip_frame_bgr = np.stack(accumulated_columns, axis=1)
+        
+        # Flip horizontally so time flows from right to left (strip photography convention)
+        strip_frame_bgr = cv2.flip(strip_frame_bgr, 1)
+        
+        # Create BGRA frame with alpha channel
+        current_height, current_width = strip_frame_bgr.shape[:2]
+        strip_frame_bgra = np.zeros((final_output_height, final_output_width, 4), dtype=np.uint8)
+        
+        # Copy RGB data to BGR channels and set alpha to 255 for actual content
+        # Place content on the right side for progressive growth from right to left
+        strip_frame_bgra[:current_height, final_output_width-current_width:, :3] = strip_frame_bgr
+        strip_frame_bgra[:current_height, final_output_width-current_width:, 3] = 255  # Opaque for content
+        # Transparent areas remain alpha=0
+        
+        # Add timestamp overlay if requested (after alpha setup)
+        if timestamp:
+            # Convert back to BGR for timestamp overlay, then back to BGRA
+            bgr_for_timestamp = strip_frame_bgra[:, :, :3].copy()
+            bgr_with_timestamp = add_timestamp_overlay(bgr_for_timestamp, frame_idx + 1, len(significant_columns))
+            strip_frame_bgra[:, :, :3] = bgr_with_timestamp
+        
+        # Save PNG frame with zero-padded frame number
+        frame_filename = f"frame_{frame_idx:06d}.png"
+        frame_path = output_dir / frame_filename
+        cv2.imwrite(str(frame_path), strip_frame_bgra)
+        
+        if (frame_idx + 1) % 100 == 0:
+            print(f"Generated {frame_idx + 1}/{len(significant_columns)} PNG frames")
+    
+    print(f"PNG sequence saved to: {output_dir}")
+    print(f"Sequence contains {len(significant_columns)} frames at {fps} FPS reference")
+    print(f"Total duration: {len(significant_columns)/fps:.2f} seconds")
+    print(f"Import into video editor as PNG sequence at {fps} FPS")
+
+
+def extract_row_strip_alpha(video_path, y_row, output_path, change_threshold=0.01, relax=0, start_frame=0, end_frame=None, fps=30, timestamp=False):
+    """
+    Extract horizontal strip at y_row from each frame and create PNG sequence with alpha transparency.
+    Each frame shows the accumulated scan lines up to that point with transparent background.
+    
+    Args:
+        video_path: Path to input video file
+        y_row: Y-coordinate of the row to extract
+        output_path: Path for output directory (PNG sequence)
+        change_threshold: Minimum change threshold (0-1) to include frame
+        relax: Number of extra frames to include before/after threshold frames
+        start_frame: First frame to process (0-based)
+        end_frame: Last frame to process (None = until end)
+        fps: Output video frame rate (for reference)
+        timestamp: If True, embed frame count on bottom left corner
+    """
+    cap = cv2.VideoCapture(str(video_path))
+    
+    if not cap.isOpened():
+        raise ValueError(f"Could not open video file: {video_path}")
+    
+    # Get video properties
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    
+    if y_row >= frame_height:
+        raise ValueError(f"Row {y_row} is outside video height ({frame_height})")
+    
+    # Set end frame if not specified
+    if end_frame is None:
+        end_frame = total_frames - 1
+    
+    print(f"Processing frames {start_frame} to {end_frame} ({end_frame - start_frame + 1} frames)...")
+    print(f"Extracting row {y_row} from {frame_width}x{frame_height} frames")
+    print(f"Change threshold: {change_threshold}")
+    if relax > 0:
+        print(f"Relax: including {relax} frames before/after threshold frames")
+    
+    # First pass: collect all rows and identify significant frames
+    all_rows = []
+    changes = []
+    frame_numbers = []
+    previous_row = None
+    
+    frame_idx = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        
+        # Skip frames before start
+        if frame_idx < start_frame:
+            frame_idx += 1
+            continue
+        
+        # Stop after end frame
+        if frame_idx > end_frame:
+            break
+            
+        # Extract current row
+        current_row = frame[y_row, :, :].copy()
+        all_rows.append(current_row)
+        frame_numbers.append(frame_idx)
+        
+        # Calculate change from previous frame
+        if previous_row is not None:
+            change = calculate_line_difference(current_row, previous_row)
+            changes.append(change)
+        else:
+            changes.append(0)  # First frame has no change
+        
+        previous_row = current_row
+        frame_idx += 1
+        
+        if (frame_idx - start_frame) % 100 == 0:
+            print(f"Processed {frame_idx - start_frame}/{end_frame - start_frame + 1} frames")
+    
+    cap.release()
+    
+    # Second pass: determine which frames to include
+    include_mask = [False] * len(all_rows)
+    
+    for i, change in enumerate(changes):
+        if i == 0 or change >= change_threshold:
+            # Mark this frame and surrounding frames
+            start = max(0, i - relax)
+            end = min(len(all_rows), i + relax + 1)
+            for j in range(start, end):
+                include_mask[j] = True
+    
+    # Collect significant rows
+    significant_rows = []
+    significant_frame_numbers = []
+    for i, row in enumerate(all_rows):
+        if include_mask[i]:
+            significant_rows.append(row)
+            significant_frame_numbers.append(frame_numbers[i])
+    
+    included_frames = sum(include_mask)
+    skipped_frames = len(all_rows) - included_frames
+    
+    if not significant_rows:
+        raise ValueError("No significant changes detected. Try lowering the threshold.")
+    
+    print(f"Original frames in segment: {len(all_rows)}")
+    print(f"Included frames: {included_frames}")
+    print(f"Skipped frames: {skipped_frames}")
+    print(f"Compression ratio: {skipped_frames/len(all_rows):.1%}")
+    
+    # Create output directory
+    final_output_width = len(significant_rows)  # After rotation
+    final_output_height = frame_width  # After rotation
+    
+    output_dir = Path(output_path)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    print(f"Output PNG sequence dimensions (after rotation): {final_output_width}x{final_output_height}")
+    print(f"Creating PNG sequence at {fps} FPS reference: {output_dir}")
+    
+    # Generate PNG frames - each frame shows accumulated scan lines up to that point
+    for frame_idx in range(len(significant_rows)):
+        # Create accumulated strip image up to current frame
+        accumulated_rows = significant_rows[:frame_idx + 1]
+        
+        # Convert to numpy array and create the frame
+        strip_frame_bgr = np.stack(accumulated_rows, axis=0)
+        
+        # Rotate counter-clockwise 90 degrees to match image mode orientation
+        strip_frame_bgr = cv2.rotate(strip_frame_bgr, cv2.ROTATE_90_COUNTERCLOCKWISE)
+        
+        # Flip horizontally so time flows from right to left (strip photography convention)
+        strip_frame_bgr = cv2.flip(strip_frame_bgr, 1)
+        
+        # Create BGRA frame with alpha channel
+        current_height, current_width = strip_frame_bgr.shape[:2]
+        strip_frame_bgra = np.zeros((final_output_height, final_output_width, 4), dtype=np.uint8)
+        
+        # Copy RGB data to BGR channels and set alpha to 255 for actual content
+        # Place content on the right side for progressive growth from right to left
+        strip_frame_bgra[:current_height, final_output_width-current_width:, :3] = strip_frame_bgr
+        strip_frame_bgra[:current_height, final_output_width-current_width:, 3] = 255  # Opaque for content
+        # Transparent areas remain alpha=0
+        
+        # Add timestamp overlay if requested (after alpha setup)
+        if timestamp:
+            # Convert back to BGR for timestamp overlay, then back to BGRA
+            bgr_for_timestamp = strip_frame_bgra[:, :, :3].copy()
+            bgr_with_timestamp = add_timestamp_overlay(bgr_for_timestamp, frame_idx + 1, len(significant_rows))
+            strip_frame_bgra[:, :, :3] = bgr_with_timestamp
+        
+        # Save PNG frame with zero-padded frame number
+        frame_filename = f"frame_{frame_idx:06d}.png"
+        frame_path = output_dir / frame_filename
+        cv2.imwrite(str(frame_path), strip_frame_bgra)
+        
+        if (frame_idx + 1) % 100 == 0:
+            print(f"Generated {frame_idx + 1}/{len(significant_rows)} PNG frames")
+    
+    print(f"PNG sequence saved to: {output_dir}")
+    print(f"Sequence contains {len(significant_rows)} frames at {fps} FPS reference")
+    print(f"Total duration: {len(significant_rows)/fps:.2f} seconds")
+    print(f"Import into video editor as PNG sequence at {fps} FPS")
+
+
 def main():
    """Main entry point for the strip photography tool."""
    parser = argparse.ArgumentParser(
@ -932,6 +1272,12 @@ def main():
        help="Embed frame count on bottom left corner (video mode only)"
    )
    
+    parser.add_argument(
+        "--alpha",
+        action="store_true",
+        help="Generate PNG sequence with alpha transparency for video editing (video mode only)"
+    )
+    
    args = parser.parse_args()
    
    # Validate input file
@ -982,6 +1328,10 @@ def main():
        print("Error: Cannot use --video and --debug modes together")
        sys.exit(1)
    
+    if args.alpha and not args.video:
+        print("Error: --alpha can only be used with --video mode")
+        sys.exit(1)
+    
    # Validate FPS
    if args.fps <= 0:
        print("Error: --fps must be positive")
@ -992,7 +1342,11 @@ def main():
        output_path = Path(args.output)
        # Add appropriate extension if no extension provided
        if not output_path.suffix:
-            if args.video:
+            if args.video and args.alpha:
+                # For alpha mode, we'll create a directory for PNG sequence
+                output_path = output_path.with_suffix('')  # Remove any extension
+                print(f"No extension specified for alpha video mode, using directory: {output_path}")
+            elif args.video:
                output_path = output_path.with_suffix('.avi')
                print(f"No extension specified for video mode, using: {output_path}")
            else:
@ -1012,7 +1366,10 @@ def main():
        # Include threshold in filename
        threshold_str = f"t{args.threshold}".replace(".", "_")
        
-        if args.video:
+        if args.video and args.alpha:
+            fps_str = f"fps{args.fps}".replace(".", "_")
+            output_filename = f"{video_path.stem}_{uuid_prefix}_{threshold_str}_{fps_str}_alpha"
+        elif args.video:
            fps_str = f"fps{args.fps}".replace(".", "_")
            output_filename = f"{video_path.stem}_{uuid_prefix}_{threshold_str}_{fps_str}.avi"
        else:
@ -1036,6 +1393,20 @@ def main():
                                   start_frame=args.start, end_frame=args.end)
            
            print("Change analysis completed successfully!")
+        elif args.video and args.alpha:
+            # Alpha video mode: create PNG sequence with alpha transparency
+            print("Alpha video mode: Creating PNG sequence with alpha transparency")
+            
+            if args.xcolumn is not None:
+                print(f"Column mode: Extracting vertical line at x={args.xcolumn}")
+                extract_column_strip_alpha(video_path, args.xcolumn, output_path, args.threshold, args.relax,
+                                         args.start, args.end, args.fps, args.timestamp)
+            else:
+                print(f"Row mode: Extracting horizontal line at y={args.yrow}")
+                extract_row_strip_alpha(video_path, args.yrow, output_path, args.threshold, args.relax,
+                                      args.start, args.end, args.fps, args.timestamp)
+            
+            print("Alpha PNG sequence generation completed successfully!")
        elif args.video:
            # Video mode: create MJPEG video with accumulated scan lines
            print("Video mode: Creating MJPEG video with accumulated scan lines")
--- a/readme.md
+++ b/readme.md
@ -41,6 +41,12 @@ Output: `results/video/line500fps32pix_a3f2_t0_01_fps30_0.avi`
 uv run main.py .\line500fps32pix.mp4 --video --fps 30 --timestamp
 ```

+**Alpha Video Mode** - Generate PNG sequence with transparency for video editing:
+```bash
+uv run main.py .\line500fps32pix.mp4 --video --alpha --fps 30 --timestamp
+```
+Output: `results/video/line500fps32pix_a3f2_t0_01_fps30_0_alpha/` (directory with PNG sequence)
+
 **Debug Mode** - Analyze changes and generate threshold recommendations:
 ```bash
 uv run main.py .\line500fps32pix.mp4 --debug
@ -73,6 +79,7 @@ uv sync
 - `--debug` - Analyze changes without creating strip image, outputs to `results/debug/`
 - `--video` - Generate MJPEG video showing accumulated scan lines over time
 - `--fps N` - Output video frame rate (default: 30.0, only used with `--video`)
+- `--alpha` - Generate PNG sequence with alpha transparency for video editing (video mode only)
 - `--timestamp` / `--ts` - Embed frame count on bottom left corner (video mode only)
 - `--timeline` - Overlay frame numbers as timeline/ruler on output image (image mode only)
 - `--start N` - Start frame number (0-based, default: 0)
@ -86,6 +93,11 @@ uv sync
  - Each frame shows accumulated scan lines up to that point in time
  - Final frame shows complete strip photography image
  - Video dimensions automatically determined by input video and number of significant frames
+- **Alpha video mode** (`--video --alpha`): Creates PNG sequence with alpha transparency
+  - Perfect for video editing with transparent backgrounds instead of black padding
+  - Each PNG frame shows progressive scan line accumulation with alpha channel
+  - Import as PNG sequence in video editors at specified FPS
+  - No keyframe compression - ideal for editing workflows

 ## Features

@ -122,3 +134,12 @@ uv sync
 - Video dimensions automatically calculated based on input video and scan line count
 - Compatible with both row and column extraction modes
 - Timeline overlay not supported in video mode (use image mode with `--timeline` instead)
+
+**Alpha Video Mode Features**:
+- Creates PNG sequence with BGRA (alpha channel) for video editing
+- Transparent background instead of black padding - perfect for compositing
+- No keyframe compression - each PNG is independent for smooth editing
+- Progressive scan line accumulation with alpha transparency
+- Import into video editors as PNG sequence at specified FPS
+- Ideal for professional video editing workflows requiring transparency
+- Compatible with all major video editors (Premiere, Final Cut, DaVinci Resolve, etc.)