gst-plugin-linescan/scripts/analyze_sma.py
yair 94f7c04dc6 Fix recv_raw_column.py height mismatch and update script paths in docs
- Fixed HEIGHT from 480 to 640 to match actual videotestsrc output
- Added DEBUG flag to control debug output visibility
- Added cv2.namedWindow() for proper window initialization
- Updated all Python script references in markdown files to scripts/ folder
- Updated network_guide.md with correct frame dimensions and Python receiver option
2025-11-14 15:33:17 +02:00

200 lines
7.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# /// script
# dependencies = [
# "pandas>=2.0.0",
# "matplotlib>=3.7.0",
# "numpy>=1.24.0",
# ]
# ///
"""
Rolling Sum Analysis Tool
Analyzes CSV output from the GStreamer rollingsum plugin
Usage: uv run analyze_sma.py [csv_file]
"""
import sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from datetime import datetime
import shutil
def analyze_csv(csv_file: str = "output.csv"):
"""Analyze the rolling sum CSV data and generate insights."""
# Create output directory
output_dir = Path("results/debug")
output_dir.mkdir(parents=True, exist_ok=True)
# Read the CSV
try:
df = pd.read_csv(csv_file)
except FileNotFoundError:
print(f"Error: CSV file '{csv_file}' not found.")
sys.exit(1)
# Copy input CSV to results directory with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_name = Path(csv_file).stem
archived_csv = output_dir / f"{csv_name}_{timestamp}.csv"
shutil.copy(csv_file, archived_csv)
print("=" * 80)
print(f"ROLLING SUM ANALYSIS - {csv_file}")
print("=" * 80)
print()
# Basic statistics
print("DATASET OVERVIEW:")
print(f" Total frames: {len(df)}")
print(f" Frames dropped: {df['dropped'].sum()}")
print(f" Frames kept: {(df['dropped'] == 0).sum()}")
print(f" Drop rate: {df['dropped'].mean() * 100:.2f}%")
print()
# Column mean statistics
print("COLUMN MEAN STATISTICS:")
print(f" Min: {df['column_mean'].min():.6f}")
print(f" Max: {df['column_mean'].max():.6f}")
print(f" Range: {df['column_mean'].max() - df['column_mean'].min():.6f}")
print(f" Mean: {df['column_mean'].mean():.6f}")
print(f" Std Dev: {df['column_mean'].std():.6f}")
print()
# Deviation statistics
print("DEVIATION STATISTICS:")
print(f" Min deviation: {df['deviation'].min():.6f}")
print(f" Max deviation: {df['deviation'].max():.6f}")
print(f" Mean deviation: {df['deviation'].mean():.6f}")
print(f" Std dev of deviations: {df['deviation'].std():.6f}")
print()
# Normalized deviation statistics
print("NORMALIZED DEVIATION STATISTICS:")
print(f" Min: {df['normalized_deviation'].min():.8f}")
print(f" Max: {df['normalized_deviation'].max():.8f}")
print(f" Mean: {df['normalized_deviation'].mean():.8f}")
print(f" Median: {df['normalized_deviation'].median():.8f}")
print(f" 95th percentile: {df['normalized_deviation'].quantile(0.95):.8f}")
print(f" 99th percentile: {df['normalized_deviation'].quantile(0.99):.8f}")
print()
# Threshold recommendations
print("THRESHOLD RECOMMENDATIONS:")
print(" (Based on normalized deviation percentiles)")
print()
percentiles = [50, 75, 90, 95, 99]
for p in percentiles:
threshold = df['normalized_deviation'].quantile(p / 100)
frames_dropped = (df['normalized_deviation'] > threshold).sum()
drop_rate = (frames_dropped / len(df)) * 100
print(f" {p}th percentile: threshold={threshold:.8f}")
print(f" → Would drop {frames_dropped} frames ({drop_rate:.1f}%)")
print()
# Suggest optimal thresholds based on standard deviations
mean_norm_dev = df['normalized_deviation'].mean()
std_norm_dev = df['normalized_deviation'].std()
print("STANDARD DEVIATION-BASED THRESHOLDS:")
for n in [1, 2, 3]:
threshold = mean_norm_dev + (n * std_norm_dev)
frames_dropped = (df['normalized_deviation'] > threshold).sum()
drop_rate = (frames_dropped / len(df)) * 100
print(f" Mean + {n}σ: threshold={threshold:.8f}")
print(f" → Would drop {frames_dropped} frames ({drop_rate:.1f}%)")
print()
# Create visualizations
plot_file = create_plots(df, csv_file, output_dir, timestamp)
print("=" * 80)
print("OUTPUT FILES:")
print(f" CSV Archive: {archived_csv}")
print(f" Analysis Plot: {plot_file}")
print("=" * 80)
def create_plots(df: pd.DataFrame, csv_file: str, output_dir: Path, timestamp: str) -> Path:
"""Create analysis plots and return the output file path."""
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle(f'Rolling Sum Analysis - {csv_file}', fontsize=16, fontweight='bold')
# Plot 1: Column mean over time
ax1 = axes[0, 0]
ax1.plot(df['frame'], df['column_mean'], label='Column Mean', linewidth=1)
ax1.plot(df['frame'], df['rolling_mean'], label='Rolling Mean', linewidth=1, alpha=0.7)
ax1.set_xlabel('Frame')
ax1.set_ylabel('Pixel Value')
ax1.set_title('Column Mean vs Rolling Mean')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Plot 2: Deviation over time
ax2 = axes[0, 1]
ax2.plot(df['frame'], df['deviation'], linewidth=1, color='orange')
ax2.axhline(y=df['deviation'].mean(), color='r', linestyle='--',
label=f'Mean: {df["deviation"].mean():.4f}')
ax2.axhline(y=df['deviation'].quantile(0.95), color='g', linestyle='--',
label=f'95th: {df["deviation"].quantile(0.95):.4f}')
ax2.set_xlabel('Frame')
ax2.set_ylabel('Absolute Deviation')
ax2.set_title('Deviation from Rolling Mean')
ax2.legend()
ax2.grid(True, alpha=0.3)
# Plot 3: Normalized deviation distribution
ax3 = axes[1, 0]
ax3.hist(df['normalized_deviation'], bins=50, edgecolor='black', alpha=0.7)
ax3.axvline(x=df['normalized_deviation'].mean(), color='r', linestyle='--',
label=f'Mean: {df["normalized_deviation"].mean():.6f}')
ax3.axvline(x=df['normalized_deviation'].median(), color='g', linestyle='--',
label=f'Median: {df["normalized_deviation"].median():.6f}')
ax3.set_xlabel('Normalized Deviation')
ax3.set_ylabel('Frequency')
ax3.set_title('Normalized Deviation Distribution')
ax3.legend()
ax3.grid(True, alpha=0.3, axis='y')
# Plot 4: Cumulative distribution
ax4 = axes[1, 1]
sorted_norm_dev = np.sort(df['normalized_deviation'])
cumulative = np.arange(1, len(sorted_norm_dev) + 1) / len(sorted_norm_dev) * 100
ax4.plot(sorted_norm_dev, cumulative, linewidth=2)
# Mark percentiles
for p in [50, 75, 90, 95, 99]:
threshold = df['normalized_deviation'].quantile(p / 100)
ax4.axvline(x=threshold, color='red', linestyle=':', alpha=0.5)
ax4.text(threshold, p, f'{p}th', rotation=90, va='bottom', ha='right', fontsize=8)
ax4.set_xlabel('Normalized Deviation')
ax4.set_ylabel('Cumulative Percentage (%)')
ax4.set_title('Cumulative Distribution Function')
ax4.grid(True, alpha=0.3)
plt.tight_layout()
# Save the plot to results/debug
csv_name = Path(csv_file).stem
output_file = output_dir / f"{csv_name}_analysis_{timestamp}.png"
plt.savefig(output_file, dpi=150, bbox_inches='tight')
print(f"\n✓ Saved analysis plot to: {output_file}\n")
return output_file
if __name__ == "__main__":
csv_file = sys.argv[1] if len(sys.argv) > 1 else "output.csv"
if not Path(csv_file).exists():
print(f"Error: File '{csv_file}' not found.")
print(f"Usage: uv run analyze_sma.py [csv_file]")
sys.exit(1)
analyze_csv(csv_file)