gst-plugin-linescan/analyze_sma.py
yair c783de425a feat: Add CSV logging and analysis tools for rollingsum plugin
- Add csv-file property to log frame statistics
- Create analyze_sma.py for automated CSV analysis with visualizations
- Add comprehensive ROLLINGSUM_GUIDE.md documentation
- Include debugging guide and threshold recommendations
- Uses uv for Python dependency management
2025-11-14 14:21:40 +02:00

184 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# /// script
# dependencies = [
# "pandas>=2.0.0",
# "matplotlib>=3.7.0",
# "numpy>=1.24.0",
# ]
# ///
"""
Rolling Sum Analysis Tool
Analyzes CSV output from the GStreamer rollingsum plugin
Usage: uv run analyze_sma.py [csv_file]
"""
import sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
def analyze_csv(csv_file: str = "output.csv"):
"""Analyze the rolling sum CSV data and generate insights."""
# Read the CSV
try:
df = pd.read_csv(csv_file)
except FileNotFoundError:
print(f"Error: CSV file '{csv_file}' not found.")
sys.exit(1)
print("=" * 80)
print(f"ROLLING SUM ANALYSIS - {csv_file}")
print("=" * 80)
print()
# Basic statistics
print("DATASET OVERVIEW:")
print(f" Total frames: {len(df)}")
print(f" Frames dropped: {df['dropped'].sum()}")
print(f" Frames kept: {(df['dropped'] == 0).sum()}")
print(f" Drop rate: {df['dropped'].mean() * 100:.2f}%")
print()
# Column mean statistics
print("COLUMN MEAN STATISTICS:")
print(f" Min: {df['column_mean'].min():.6f}")
print(f" Max: {df['column_mean'].max():.6f}")
print(f" Range: {df['column_mean'].max() - df['column_mean'].min():.6f}")
print(f" Mean: {df['column_mean'].mean():.6f}")
print(f" Std Dev: {df['column_mean'].std():.6f}")
print()
# Deviation statistics
print("DEVIATION STATISTICS:")
print(f" Min deviation: {df['deviation'].min():.6f}")
print(f" Max deviation: {df['deviation'].max():.6f}")
print(f" Mean deviation: {df['deviation'].mean():.6f}")
print(f" Std dev of deviations: {df['deviation'].std():.6f}")
print()
# Normalized deviation statistics
print("NORMALIZED DEVIATION STATISTICS:")
print(f" Min: {df['normalized_deviation'].min():.8f}")
print(f" Max: {df['normalized_deviation'].max():.8f}")
print(f" Mean: {df['normalized_deviation'].mean():.8f}")
print(f" Median: {df['normalized_deviation'].median():.8f}")
print(f" 95th percentile: {df['normalized_deviation'].quantile(0.95):.8f}")
print(f" 99th percentile: {df['normalized_deviation'].quantile(0.99):.8f}")
print()
# Threshold recommendations
print("THRESHOLD RECOMMENDATIONS:")
print(" (Based on normalized deviation percentiles)")
print()
percentiles = [50, 75, 90, 95, 99]
for p in percentiles:
threshold = df['normalized_deviation'].quantile(p / 100)
frames_dropped = (df['normalized_deviation'] > threshold).sum()
drop_rate = (frames_dropped / len(df)) * 100
print(f" {p}th percentile: threshold={threshold:.8f}")
print(f" → Would drop {frames_dropped} frames ({drop_rate:.1f}%)")
print()
# Suggest optimal thresholds based on standard deviations
mean_norm_dev = df['normalized_deviation'].mean()
std_norm_dev = df['normalized_deviation'].std()
print("STANDARD DEVIATION-BASED THRESHOLDS:")
for n in [1, 2, 3]:
threshold = mean_norm_dev + (n * std_norm_dev)
frames_dropped = (df['normalized_deviation'] > threshold).sum()
drop_rate = (frames_dropped / len(df)) * 100
print(f" Mean + {n}σ: threshold={threshold:.8f}")
print(f" → Would drop {frames_dropped} frames ({drop_rate:.1f}%)")
print()
# Create visualizations
create_plots(df, csv_file)
print("=" * 80)
print("PLOTS SAVED:")
print(f" - {csv_file.replace('.csv', '_analysis.png')}")
print("=" * 80)
def create_plots(df: pd.DataFrame, csv_file: str):
"""Create analysis plots."""
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle(f'Rolling Sum Analysis - {csv_file}', fontsize=16, fontweight='bold')
# Plot 1: Column mean over time
ax1 = axes[0, 0]
ax1.plot(df['frame'], df['column_mean'], label='Column Mean', linewidth=1)
ax1.plot(df['frame'], df['rolling_mean'], label='Rolling Mean', linewidth=1, alpha=0.7)
ax1.set_xlabel('Frame')
ax1.set_ylabel('Pixel Value')
ax1.set_title('Column Mean vs Rolling Mean')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Plot 2: Deviation over time
ax2 = axes[0, 1]
ax2.plot(df['frame'], df['deviation'], linewidth=1, color='orange')
ax2.axhline(y=df['deviation'].mean(), color='r', linestyle='--',
label=f'Mean: {df["deviation"].mean():.4f}')
ax2.axhline(y=df['deviation'].quantile(0.95), color='g', linestyle='--',
label=f'95th: {df["deviation"].quantile(0.95):.4f}')
ax2.set_xlabel('Frame')
ax2.set_ylabel('Absolute Deviation')
ax2.set_title('Deviation from Rolling Mean')
ax2.legend()
ax2.grid(True, alpha=0.3)
# Plot 3: Normalized deviation distribution
ax3 = axes[1, 0]
ax3.hist(df['normalized_deviation'], bins=50, edgecolor='black', alpha=0.7)
ax3.axvline(x=df['normalized_deviation'].mean(), color='r', linestyle='--',
label=f'Mean: {df["normalized_deviation"].mean():.6f}')
ax3.axvline(x=df['normalized_deviation'].median(), color='g', linestyle='--',
label=f'Median: {df["normalized_deviation"].median():.6f}')
ax3.set_xlabel('Normalized Deviation')
ax3.set_ylabel('Frequency')
ax3.set_title('Normalized Deviation Distribution')
ax3.legend()
ax3.grid(True, alpha=0.3, axis='y')
# Plot 4: Cumulative distribution
ax4 = axes[1, 1]
sorted_norm_dev = np.sort(df['normalized_deviation'])
cumulative = np.arange(1, len(sorted_norm_dev) + 1) / len(sorted_norm_dev) * 100
ax4.plot(sorted_norm_dev, cumulative, linewidth=2)
# Mark percentiles
for p in [50, 75, 90, 95, 99]:
threshold = df['normalized_deviation'].quantile(p / 100)
ax4.axvline(x=threshold, color='red', linestyle=':', alpha=0.5)
ax4.text(threshold, p, f'{p}th', rotation=90, va='bottom', ha='right', fontsize=8)
ax4.set_xlabel('Normalized Deviation')
ax4.set_ylabel('Cumulative Percentage (%)')
ax4.set_title('Cumulative Distribution Function')
ax4.grid(True, alpha=0.3)
plt.tight_layout()
# Save the plot
output_file = csv_file.replace('.csv', '_analysis.png')
plt.savefig(output_file, dpi=150, bbox_inches='tight')
print(f"\n✓ Saved analysis plot to: {output_file}\n")
if __name__ == "__main__":
csv_file = sys.argv[1] if len(sys.argv) > 1 else "output.csv"
if not Path(csv_file).exists():
print(f"Error: File '{csv_file}' not found.")
print(f"Usage: uv run analyze_sma.py [csv_file]")
sys.exit(1)
analyze_csv(csv_file)