feat: Add CSV logging and analysis tools for rollingsum plugin
- Add csv-file property to log frame statistics - Create analyze_sma.py for automated CSV analysis with visualizations - Add comprehensive ROLLINGSUM_GUIDE.md documentation - Include debugging guide and threshold recommendations - Uses uv for Python dependency management
This commit is contained in:
184
analyze_sma.py
Normal file
184
analyze_sma.py
Normal file
@@ -0,0 +1,184 @@
|
||||
#!/usr/bin/env python3
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "pandas>=2.0.0",
|
||||
# "matplotlib>=3.7.0",
|
||||
# "numpy>=1.24.0",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
"""
|
||||
Rolling Sum Analysis Tool
|
||||
Analyzes CSV output from the GStreamer rollingsum plugin
|
||||
Usage: uv run analyze_sma.py [csv_file]
|
||||
"""
|
||||
|
||||
import sys
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def analyze_csv(csv_file: str = "output.csv"):
|
||||
"""Analyze the rolling sum CSV data and generate insights."""
|
||||
|
||||
# Read the CSV
|
||||
try:
|
||||
df = pd.read_csv(csv_file)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: CSV file '{csv_file}' not found.")
|
||||
sys.exit(1)
|
||||
|
||||
print("=" * 80)
|
||||
print(f"ROLLING SUM ANALYSIS - {csv_file}")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
# Basic statistics
|
||||
print("DATASET OVERVIEW:")
|
||||
print(f" Total frames: {len(df)}")
|
||||
print(f" Frames dropped: {df['dropped'].sum()}")
|
||||
print(f" Frames kept: {(df['dropped'] == 0).sum()}")
|
||||
print(f" Drop rate: {df['dropped'].mean() * 100:.2f}%")
|
||||
print()
|
||||
|
||||
# Column mean statistics
|
||||
print("COLUMN MEAN STATISTICS:")
|
||||
print(f" Min: {df['column_mean'].min():.6f}")
|
||||
print(f" Max: {df['column_mean'].max():.6f}")
|
||||
print(f" Range: {df['column_mean'].max() - df['column_mean'].min():.6f}")
|
||||
print(f" Mean: {df['column_mean'].mean():.6f}")
|
||||
print(f" Std Dev: {df['column_mean'].std():.6f}")
|
||||
print()
|
||||
|
||||
# Deviation statistics
|
||||
print("DEVIATION STATISTICS:")
|
||||
print(f" Min deviation: {df['deviation'].min():.6f}")
|
||||
print(f" Max deviation: {df['deviation'].max():.6f}")
|
||||
print(f" Mean deviation: {df['deviation'].mean():.6f}")
|
||||
print(f" Std dev of deviations: {df['deviation'].std():.6f}")
|
||||
print()
|
||||
|
||||
# Normalized deviation statistics
|
||||
print("NORMALIZED DEVIATION STATISTICS:")
|
||||
print(f" Min: {df['normalized_deviation'].min():.8f}")
|
||||
print(f" Max: {df['normalized_deviation'].max():.8f}")
|
||||
print(f" Mean: {df['normalized_deviation'].mean():.8f}")
|
||||
print(f" Median: {df['normalized_deviation'].median():.8f}")
|
||||
print(f" 95th percentile: {df['normalized_deviation'].quantile(0.95):.8f}")
|
||||
print(f" 99th percentile: {df['normalized_deviation'].quantile(0.99):.8f}")
|
||||
print()
|
||||
|
||||
# Threshold recommendations
|
||||
print("THRESHOLD RECOMMENDATIONS:")
|
||||
print(" (Based on normalized deviation percentiles)")
|
||||
print()
|
||||
|
||||
percentiles = [50, 75, 90, 95, 99]
|
||||
for p in percentiles:
|
||||
threshold = df['normalized_deviation'].quantile(p / 100)
|
||||
frames_dropped = (df['normalized_deviation'] > threshold).sum()
|
||||
drop_rate = (frames_dropped / len(df)) * 100
|
||||
print(f" {p}th percentile: threshold={threshold:.8f}")
|
||||
print(f" → Would drop {frames_dropped} frames ({drop_rate:.1f}%)")
|
||||
print()
|
||||
|
||||
# Suggest optimal thresholds based on standard deviations
|
||||
mean_norm_dev = df['normalized_deviation'].mean()
|
||||
std_norm_dev = df['normalized_deviation'].std()
|
||||
|
||||
print("STANDARD DEVIATION-BASED THRESHOLDS:")
|
||||
for n in [1, 2, 3]:
|
||||
threshold = mean_norm_dev + (n * std_norm_dev)
|
||||
frames_dropped = (df['normalized_deviation'] > threshold).sum()
|
||||
drop_rate = (frames_dropped / len(df)) * 100
|
||||
print(f" Mean + {n}σ: threshold={threshold:.8f}")
|
||||
print(f" → Would drop {frames_dropped} frames ({drop_rate:.1f}%)")
|
||||
print()
|
||||
|
||||
# Create visualizations
|
||||
create_plots(df, csv_file)
|
||||
|
||||
print("=" * 80)
|
||||
print("PLOTS SAVED:")
|
||||
print(f" - {csv_file.replace('.csv', '_analysis.png')}")
|
||||
print("=" * 80)
|
||||
|
||||
|
||||
def create_plots(df: pd.DataFrame, csv_file: str):
|
||||
"""Create analysis plots."""
|
||||
|
||||
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
||||
fig.suptitle(f'Rolling Sum Analysis - {csv_file}', fontsize=16, fontweight='bold')
|
||||
|
||||
# Plot 1: Column mean over time
|
||||
ax1 = axes[0, 0]
|
||||
ax1.plot(df['frame'], df['column_mean'], label='Column Mean', linewidth=1)
|
||||
ax1.plot(df['frame'], df['rolling_mean'], label='Rolling Mean', linewidth=1, alpha=0.7)
|
||||
ax1.set_xlabel('Frame')
|
||||
ax1.set_ylabel('Pixel Value')
|
||||
ax1.set_title('Column Mean vs Rolling Mean')
|
||||
ax1.legend()
|
||||
ax1.grid(True, alpha=0.3)
|
||||
|
||||
# Plot 2: Deviation over time
|
||||
ax2 = axes[0, 1]
|
||||
ax2.plot(df['frame'], df['deviation'], linewidth=1, color='orange')
|
||||
ax2.axhline(y=df['deviation'].mean(), color='r', linestyle='--',
|
||||
label=f'Mean: {df["deviation"].mean():.4f}')
|
||||
ax2.axhline(y=df['deviation'].quantile(0.95), color='g', linestyle='--',
|
||||
label=f'95th: {df["deviation"].quantile(0.95):.4f}')
|
||||
ax2.set_xlabel('Frame')
|
||||
ax2.set_ylabel('Absolute Deviation')
|
||||
ax2.set_title('Deviation from Rolling Mean')
|
||||
ax2.legend()
|
||||
ax2.grid(True, alpha=0.3)
|
||||
|
||||
# Plot 3: Normalized deviation distribution
|
||||
ax3 = axes[1, 0]
|
||||
ax3.hist(df['normalized_deviation'], bins=50, edgecolor='black', alpha=0.7)
|
||||
ax3.axvline(x=df['normalized_deviation'].mean(), color='r', linestyle='--',
|
||||
label=f'Mean: {df["normalized_deviation"].mean():.6f}')
|
||||
ax3.axvline(x=df['normalized_deviation'].median(), color='g', linestyle='--',
|
||||
label=f'Median: {df["normalized_deviation"].median():.6f}')
|
||||
ax3.set_xlabel('Normalized Deviation')
|
||||
ax3.set_ylabel('Frequency')
|
||||
ax3.set_title('Normalized Deviation Distribution')
|
||||
ax3.legend()
|
||||
ax3.grid(True, alpha=0.3, axis='y')
|
||||
|
||||
# Plot 4: Cumulative distribution
|
||||
ax4 = axes[1, 1]
|
||||
sorted_norm_dev = np.sort(df['normalized_deviation'])
|
||||
cumulative = np.arange(1, len(sorted_norm_dev) + 1) / len(sorted_norm_dev) * 100
|
||||
ax4.plot(sorted_norm_dev, cumulative, linewidth=2)
|
||||
|
||||
# Mark percentiles
|
||||
for p in [50, 75, 90, 95, 99]:
|
||||
threshold = df['normalized_deviation'].quantile(p / 100)
|
||||
ax4.axvline(x=threshold, color='red', linestyle=':', alpha=0.5)
|
||||
ax4.text(threshold, p, f'{p}th', rotation=90, va='bottom', ha='right', fontsize=8)
|
||||
|
||||
ax4.set_xlabel('Normalized Deviation')
|
||||
ax4.set_ylabel('Cumulative Percentage (%)')
|
||||
ax4.set_title('Cumulative Distribution Function')
|
||||
ax4.grid(True, alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save the plot
|
||||
output_file = csv_file.replace('.csv', '_analysis.png')
|
||||
plt.savefig(output_file, dpi=150, bbox_inches='tight')
|
||||
print(f"\n✓ Saved analysis plot to: {output_file}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
csv_file = sys.argv[1] if len(sys.argv) > 1 else "output.csv"
|
||||
|
||||
if not Path(csv_file).exists():
|
||||
print(f"Error: File '{csv_file}' not found.")
|
||||
print(f"Usage: uv run analyze_sma.py [csv_file]")
|
||||
sys.exit(1)
|
||||
|
||||
analyze_csv(csv_file)
|
||||
Reference in New Issue
Block a user