import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import os import glob import argparse from pathlib import Path def parse_arguments(): parser = argparse.ArgumentParser(description='Cross-experiment analysis of chain performance.') parser.add_argument('--experiments-dir', '-e', required=True, help='Path to directory containing experiment subdirectories') parser.add_argument('--supplementary', '-s', required=True, help='Path to supplementary.csv file with input delays') parser.add_argument('--output', '-o', default='cross_experiment_analysis', help='Output filename prefix for the plots (will add experiment type and .png)') parser.add_argument('--experiment-duration', '-d', type=int, default=20, help='Duration of each experiment in seconds (default: 20)') return parser.parse_args() def load_supplementary_data(supplementary_path): """Load the supplementary data with input delays and theoretical perfect times for each chain.""" supp_df = pd.read_csv(supplementary_path) # Create dictionaries for quick lookup delay_dict = dict(zip(supp_df['chain'], supp_df['input_delay'])) # Load theoretical perfect e2e time (assuming the third column exists) if len(supp_df.columns) >= 3: perfect_time_dict = dict(zip(supp_df['chain'], supp_df.iloc[:, 2])) # Third column return delay_dict, perfect_time_dict else: print("Warning: No third column found for theoretical perfect times. Using input_delay as fallback.") perfect_time_dict = delay_dict.copy() # Fallback to input_delay return delay_dict, perfect_time_dict def calculate_theoretical_max_runs(chain, input_delay_ms, experiment_duration_s): """Calculate the theoretical maximum number of runs for a chain.""" runs_per_second = 1000 / input_delay_ms # Convert ms to runs per second max_runs = runs_per_second * experiment_duration_s return int(max_runs) def load_experiment_data(experiments_dir, delay_dict, perfect_time_dict, experiment_duration): """Load all experiment data and calculate performance metrics.""" all_data = [] # Find all subdirectories containing results.csv experiment_dirs = [d for d in Path(experiments_dir).iterdir() if d.is_dir() and (d / 'results.csv').exists()] print(f"Found {len(experiment_dirs)} experiment directories") for exp_dir in experiment_dirs: results_path = exp_dir / 'results.csv' try: df = pd.read_csv(results_path) # Extract experiment name (remove timestamp if present) if 'experiment_name' in df.columns: exp_name = df['experiment_name'].iloc[0] exp_name = exp_name.split('-')[0] if '-' in exp_name else exp_name else: exp_name = exp_dir.name # Group by chain and calculate metrics for chain, chain_data in df.groupby('chain'): if chain in delay_dict and chain in perfect_time_dict: # Calculate theoretical maximum runs input_delay = delay_dict[chain] perfect_time = perfect_time_dict[chain] theoretical_max = calculate_theoretical_max_runs( chain, input_delay, experiment_duration ) # Calculate actual performance metrics actual_runs = chain_data['count'].mean() mean_latency = chain_data['mean'].mean() std_latency = chain_data['std'].mean() # Normalize latency by theoretical perfect time normalized_latency = mean_latency / perfect_time # Calculate percentage of theoretical maximum completion_percentage = (actual_runs / theoretical_max) * 100 if completion_percentage > 100: print(f"Warning: Completion percentage for {chain} in {exp_name} exceeds 100%: {completion_percentage:.2f}%") # Cap at 105% for visualization purposes # This is to avoid visual clutter in the plot # and to handle cases where the actual runs exceed theoretical max. # This is a safeguard and should be adjusted based on actual data characteristics. # In practice, this might indicate an issue with the data or the calculation. completion_percentage = 105 all_data.append({ 'experiment_type': exp_name, 'experiment_dir': exp_dir.name, 'chain': chain, 'mean_latency_ms': mean_latency, 'normalized_latency': normalized_latency, 'std_latency_ms': std_latency, 'actual_runs': actual_runs, 'theoretical_max_runs': theoretical_max, 'completion_percentage': completion_percentage, 'input_delay_ms': input_delay, 'perfect_time_ms': perfect_time }) else: missing_info = [] if chain not in delay_dict: missing_info.append("input delay") if chain not in perfect_time_dict: missing_info.append("perfect time") print(f"Warning: Chain '{chain}' missing {', '.join(missing_info)} in supplementary data") except Exception as e: print(f"Error processing {results_path}: {e}") return pd.DataFrame(all_data) def create_visualizations(data_df, output_prefix): """Create separate visualization plots for each experiment type.""" plt.style.use('seaborn-v0_8-darkgrid') # Get unique experiment types experiment_types = sorted(data_df['experiment_type'].unique()) print(f"Creating {len(experiment_types)} separate plots for experiment types: {experiment_types}") created_files = [] for exp_type in experiment_types: # Filter data for this experiment type exp_data = data_df[data_df['experiment_type'] == exp_type] # Get unique chains for this experiment chains = sorted(exp_data['chain'].unique()) # Create color palette for chains chain_colors = sns.color_palette("husl", len(chains)) chain_color_map = dict(zip(chains, chain_colors)) # Set up the figure fig, ax = plt.subplots(figsize=(14, 10)) # Plot data points for each chain for chain in chains: chain_data = exp_data[exp_data['chain'] == chain] ax.scatter( chain_data['completion_percentage'], chain_data['normalized_latency'], color=chain_color_map[chain], label=chain, s=120, alpha=0.8, edgecolors='black', linewidth=0.8 ) # Set labels and title ax.set_xlabel('Completion Rate (% of Theoretical Maximum)', fontsize=14, fontweight='bold') ax.set_ylabel('Normalized Latency (Actual / Theoretical Perfect)', fontsize=14, fontweight='bold') ax.set_title(f'Performance Analysis: {exp_type}\nNormalized Latency vs Chain Completion Rate', fontsize=16, fontweight='bold', pad=20) # Add grid for better readability ax.grid(True, alpha=0.3) # Set axis limits ax.set_xlim(0, 107) ax.set_ylim(bottom=1) # Create legend for chains legend = ax.legend(title='Chain Output', loc='best', fontsize=10, title_fontsize=12, framealpha=0.9, fancybox=True, shadow=True, bbox_to_anchor=(1.05, 1)) # Adjust layout to accommodate legend plt.tight_layout() # Save the plot safe_exp_name = exp_type.replace('/', '_').replace(' ', '_') output_path = f"{output_prefix}_{safe_exp_name}.png" plt.savefig(output_path, dpi=300, bbox_inches='tight') created_files.append(output_path) # Show the plot plt.show() # Close the figure to free memory plt.close() return created_files def create_combined_summary_plot(data_df, output_prefix): """Create a combined summary plot showing all experiment types in subplots.""" experiment_types = sorted(data_df['experiment_type'].unique()) n_experiments = len(experiment_types) # Calculate subplot grid dimensions n_cols = min(3, n_experiments) # Max 3 columns n_rows = (n_experiments + n_cols - 1) // n_cols # Ceiling division fig, axes = plt.subplots(n_rows, n_cols, figsize=(6*n_cols, 5*n_rows)) # Ensure axes is always a 2D array if n_rows == 1 and n_cols == 1: axes = np.array([[axes]]) elif n_rows == 1: axes = axes.reshape(1, -1) elif n_cols == 1: axes = axes.reshape(-1, 1) plt.style.use('seaborn-v0_8-darkgrid') for i, exp_type in enumerate(experiment_types): row = i // n_cols col = i % n_cols ax = axes[row, col] # Filter data for this experiment type exp_data = data_df[data_df['experiment_type'] == exp_type] chains = sorted(exp_data['chain'].unique()) # Create color palette for chains chain_colors = sns.color_palette("husl", len(chains)) chain_color_map = dict(zip(chains, chain_colors)) # Plot data points for chain in chains: chain_data = exp_data[exp_data['chain'] == chain] ax.scatter( chain_data['completion_percentage'], chain_data['normalized_latency'], color=chain_color_map[chain], s=60, alpha=0.7, edgecolors='black', linewidth=0.5 ) ax.set_title(exp_type, fontsize=12, fontweight='bold') ax.set_xlabel('Completion Rate (%)', fontsize=10) ax.set_ylabel('Normalized Latency', fontsize=10) ax.grid(True, alpha=0.3) # Set axis limits for consistency ax.set_xlim(0, 107) ax.set_ylim(bottom=1) # Hide unused subplots for i in range(n_experiments, n_rows * n_cols): row = i // n_cols col = i % n_cols axes[row, col].set_visible(False) plt.suptitle('Performance Analysis Summary - All Experiment Types\n(Normalized Latency vs Completion Rate)', fontsize=16, fontweight='bold', y=0.98) plt.tight_layout() summary_output = f"{output_prefix}_summary.png" plt.savefig(summary_output, dpi=300, bbox_inches='tight') plt.show() plt.close() return summary_output def print_summary_statistics(data_df): """Print summary statistics for the analysis.""" print("\n" + "="*80) print("CROSS-EXPERIMENT ANALYSIS SUMMARY") print("="*80) print(f"\nTotal experiments analyzed: {data_df['experiment_type'].nunique()}") print(f"Total chains analyzed: {data_df['chain'].nunique()}") print(f"Total data points: {len(data_df)}") print("\nPer Experiment Type Summary:") exp_summary = data_df.groupby('experiment_type').agg({ 'completion_percentage': ['mean', 'std', 'min', 'max'], 'normalized_latency': ['mean', 'std', 'min', 'max'], 'mean_latency_ms': ['mean', 'std', 'min', 'max'], 'chain': 'count' }).round(2) print(exp_summary) print("\nPer Chain Summary:") chain_summary = data_df.groupby('chain').agg({ 'completion_percentage': ['mean', 'std'], 'normalized_latency': ['mean', 'std'], 'mean_latency_ms': ['mean', 'std'], 'experiment_type': 'count' }).round(2) print(chain_summary) # Find best and worst performing combinations print("\nBest Performance (highest completion rate):") best_completion = data_df.loc[data_df['completion_percentage'].idxmax()] print(f" {best_completion['experiment_type']} - {best_completion['chain']}") print(f" Completion: {best_completion['completion_percentage']:.1f}%, Normalized Latency: {best_completion['normalized_latency']:.2f}x, Raw Latency: {best_completion['mean_latency_ms']:.1f}ms") print("\nWorst Performance (lowest completion rate):") worst_completion = data_df.loc[data_df['completion_percentage'].idxmin()] print(f" {worst_completion['experiment_type']} - {worst_completion['chain']}") print(f" Completion: {worst_completion['completion_percentage']:.1f}%, Normalized Latency: {worst_completion['normalized_latency']:.2f}x, Raw Latency: {worst_completion['mean_latency_ms']:.1f}ms") print("\nBest Normalized Latency (closest to theoretical perfect):") best_latency = data_df.loc[data_df['normalized_latency'].idxmin()] print(f" {best_latency['experiment_type']} - {best_latency['chain']}") print(f" Normalized Latency: {best_latency['normalized_latency']:.2f}x, Completion: {best_latency['completion_percentage']:.1f}%, Raw Latency: {best_latency['mean_latency_ms']:.1f}ms") print("\nWorst Normalized Latency (furthest from theoretical perfect):") worst_latency = data_df.loc[data_df['normalized_latency'].idxmax()] print(f" {worst_latency['experiment_type']} - {worst_latency['chain']}") print(f" Normalized Latency: {worst_latency['normalized_latency']:.2f}x, Completion: {worst_latency['completion_percentage']:.1f}%, Raw Latency: {worst_latency['mean_latency_ms']:.1f}ms") def main(): args = parse_arguments() print("Starting cross-experiment analysis...") # Load supplementary data print(f"Loading supplementary data from: {args.supplementary}") delay_dict, perfect_time_dict = load_supplementary_data(args.supplementary) print(f"Found delay information for {len(delay_dict)} chains") print(f"Found perfect time information for {len(perfect_time_dict)} chains") # Load all experiment data print(f"Loading experiment data from: {args.experiments_dir}") data_df = load_experiment_data(args.experiments_dir, delay_dict, perfect_time_dict, args.experiment_duration) if data_df.empty: print("No data found! Please check your paths and file formats.") return print(f"Loaded data for {len(data_df)} experiment-chain combinations") # Create individual visualizations for each experiment type print(f"Creating individual visualizations...") created_files = create_visualizations(data_df, args.output) # Create combined summary plot print(f"Creating combined summary plot...") summary_file = create_combined_summary_plot(data_df, args.output) created_files.append(summary_file) # Print summary statistics print_summary_statistics(data_df) # Save detailed data to CSV for further analysis csv_output = f"{args.output}_detailed_data.csv" data_df.to_csv(csv_output, index=False) print(f"\nDetailed data saved to: {csv_output}") print(f"\nCreated visualization files:") for file in created_files: print(f" - {file}") if __name__ == "__main__": main()