import pandas as pd import argparse import seaborn as sns import matplotlib.pyplot as plt def parse_arguments(): parser = argparse.ArgumentParser(description='Analyze chain data from CSV file.') parser.add_argument('--input', '-i', required=True, help='Path to the input CSV file') return parser.parse_args() def main(): args = parse_arguments() # Load the CSV file from the input argument df = pd.read_csv(args.input) # Extract the experiment_name which should be the same across all rows if 'experiment_name' not in df.columns: raise ValueError("Input CSV must contain 'experiment_name' column.") experiment_name = df['experiment_name'].iloc[0] # Strip timestamp from experiment_name if it exists experiment_name = experiment_name.split('-')[0] if '-' in experiment_name else experiment_name # Group data by chain chain_groups = df.groupby('chain') # For each chain, create a figure with five subplots for boxplots (mean, std, min, max, count) for chain_name, chain_data in chain_groups: fig, axs = plt.subplots(1, 5, figsize=(18, 6), constrained_layout=True) # Normalize chain name for filename chain_name_fs = str(chain_name).replace('--> /', '-').replace('/', '_').replace(' ', '') # Create a DataFrame with the columns we want to plot plot_data = chain_data[['mean', 'std', 'min', 'max', 'count']].copy() plot_data.columns = ['Mean', 'Std', 'Min', 'Max', 'Count'] # Make all plots have the same color palette palette = sns.color_palette("husl", 4) # Add a distinct color for the 'Count' plot, as it is a different metric colors = palette + ['lightcoral'] for idx, (col, color) in enumerate(zip(['Mean', 'Std', 'Min', 'Max', 'Count'], colors)): ax = axs[idx] # Create boxplots sns.boxplot(data=plot_data[col], ax=ax, color=color, showfliers=True, width=0.4) # Add individual data points sns.swarmplot(data=plot_data[col], ax=ax, color='black', size=3, alpha=0.6) # Set labels and title ax.set_title(f'{col} Distribution', fontsize=14, fontweight='bold') ax.set_xticks([]) # Remove x-ticks for clarity ax.set_xlabel('') # No x-label needed ax.set_ylabel('Latency (ms)' if col != 'Count' else 'Count', fontsize=12) # Calculate statistics of the statistics data_values = plot_data[col] stats_text = ( f"Mean: {data_values.mean():.2f}\n" f"Std: {data_values.std():.2f}\n" f"Min: {data_values.min():.2f}\n" f"Max: {data_values.max():.2f}" ) # --- Place legend in the top right using axes fraction coordinates --- ax.text( 0.95, 0.98, # axes fraction: 95% right, 98% up stats_text, transform=ax.transAxes, verticalalignment='top', horizontalalignment='right', fontsize=10, bbox=dict(facecolor='white', alpha=0.9, boxstyle='round,pad=0.3', edgecolor='gray') ) # Add grid for better readability ax.grid(axis='y', linestyle='--', alpha=0.4) # Set the overall title for the figure plt.suptitle( f'Statistics for Chain: {chain_name}\nAcross {len(chain_data)} Experiment Runs - {experiment_name}', fontsize=18, fontweight='bold' ) # Save the figure with a filename that includes the chain name plt.tight_layout() output_file = args.input.replace('.csv', f'_chain_{chain_name_fs}_analysis.png') plt.savefig(output_file, dpi=300) plt.close() # Print summary statistics for the chain summary = chain_data.describe() print(f"\nSummary for chain: {chain_name}") print(summary[['mean', 'std', 'min', 'max', 'count']]) print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}") if __name__ == "__main__": main()