From 9bf91d654d1da64fe2cd88181fbfad62c7396e9b Mon Sep 17 00:00:00 2001 From: Niklas Halle Date: Mon, 16 Jun 2025 11:00:18 +0000 Subject: [PATCH] added count, beautified boxplot output --- batch_analysis_analysis.py | 104 +++++++++++++++++++++++-------------- batch_analyze.py | 2 +- trace-analysis.ipynb | 8 +-- 3 files changed, 71 insertions(+), 43 deletions(-) diff --git a/batch_analysis_analysis.py b/batch_analysis_analysis.py index 7968a08..7bc42a6 100644 --- a/batch_analysis_analysis.py +++ b/batch_analysis_analysis.py @@ -1,19 +1,16 @@ import pandas as pd -import numpy as np import argparse import seaborn as sns import matplotlib.pyplot as plt - def parse_arguments(): parser = argparse.ArgumentParser(description='Analyze chain data from CSV file.') parser.add_argument('--input', '-i', required=True, help='Path to the input CSV file') return parser.parse_args() - def main(): args = parse_arguments() - + # Load the CSV file from the input argument df = pd.read_csv(args.input) @@ -21,56 +18,85 @@ def main(): if 'experiment_name' not in df.columns: raise ValueError("Input CSV must contain 'experiment_name' column.") experiment_name = df['experiment_name'].iloc[0] - + # Strip timestamp from experiment_name if it exists experiment_name = experiment_name.split('-')[0] if '-' in experiment_name else experiment_name - + # Group data by chain chain_groups = df.groupby('chain') - - # For each chain, create a plot with four boxplots (mean, std, min, max) + + # For each chain, create a figure with five subplots for boxplots (mean, std, min, max, count) for chain_name, chain_data in chain_groups: - # Create a figure for this chain - plt.figure(figsize=(12, 8)) - + fig, axs = plt.subplots(1, 5, figsize=(18, 6), constrained_layout=True) + # Normalize chain name for filename chain_name_fs = str(chain_name).replace('--> /', '-').replace('/', '_').replace(' ', '') - + # Create a DataFrame with the columns we want to plot - plot_data = pd.DataFrame({ - 'Mean': chain_data['mean'], - 'Std': chain_data['std'], - 'Min': chain_data['min'], - 'Max': chain_data['max'] - }) - - # Create boxplots - ax = sns.boxplot(data=plot_data, palette='Set3') - - # Add individual data points - sns.stripplot(data=plot_data, color='black', alpha=0.5, size=4, jitter=True) - - # Set labels and title - plt.title(f'Statistics for Chain: {chain_name}\nAcross {len(chain_data)} Experiment Runs\n{experiment_name}', fontsize=14) - plt.ylabel('Latency (ms)', fontsize=12) - plt.xlabel('Statistic Type', fontsize=12) - - # Add grid for better readability - plt.grid(axis='y', linestyle='--', alpha=0.7) - - # Tighten layout and save the figure + plot_data = chain_data[['mean', 'std', 'min', 'max', 'count']].copy() + plot_data.columns = ['Mean', 'Std', 'Min', 'Max', 'Count'] + + # Make all plots have the same color palette + palette = sns.color_palette("husl", 4) + # Add a distinct color for the 'Count' plot, as it is a different metric + colors = palette + ['lightcoral'] + + for idx, (col, color) in enumerate(zip(['Mean', 'Std', 'Min', 'Max', 'Count'], colors)): + ax = axs[idx] + + # Create boxplots + sns.boxplot(data=plot_data[col], ax=ax, color=color, showfliers=True, width=0.4) + + # Add individual data points + sns.swarmplot(data=plot_data[col], ax=ax, color='black', size=3, alpha=0.6) + + # Set labels and title + ax.set_title(f'{col} Distribution', fontsize=14, fontweight='bold') + ax.set_xticks([]) # Remove x-ticks for clarity + ax.set_xlabel('') # No x-label needed + ax.set_ylabel('Latency (ms)' if col != 'Count' else 'Count', fontsize=12) + + # Calculate statistics of the statistics + data_values = plot_data[col] + stats_text = ( + f"Mean: {data_values.mean():.2f}\n" + f"Std: {data_values.std():.2f}\n" + f"Min: {data_values.min():.2f}\n" + f"Max: {data_values.max():.2f}" + ) + + # --- Place legend in the top right using axes fraction coordinates --- + ax.text( + 0.95, 0.98, # axes fraction: 95% right, 98% up + stats_text, + transform=ax.transAxes, + verticalalignment='top', + horizontalalignment='right', + fontsize=10, + bbox=dict(facecolor='white', alpha=0.9, boxstyle='round,pad=0.3', edgecolor='gray') + ) + + # Add grid for better readability + ax.grid(axis='y', linestyle='--', alpha=0.4) + + # Set the overall title for the figure + plt.suptitle( + f'Statistics for Chain: {chain_name}\nAcross {len(chain_data)} Experiment Runs - {experiment_name}', + fontsize=18, fontweight='bold' + ) + + # Save the figure with a filename that includes the chain name plt.tight_layout() output_file = args.input.replace('.csv', f'_chain_{chain_name_fs}_analysis.png') plt.savefig(output_file, dpi=300) plt.close() - - # Also calculate and print summary statistics for this chain + + # Print summary statistics for the chain summary = chain_data.describe() print(f"\nSummary for chain: {chain_name}") - print(summary[['mean', 'std', 'min', 'max']]) - - print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}") + print(summary[['mean', 'std', 'min', 'max', 'count']]) + print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}") if __name__ == "__main__": main() \ No newline at end of file diff --git a/batch_analyze.py b/batch_analyze.py index 4d77a76..f92e6a5 100755 --- a/batch_analyze.py +++ b/batch_analyze.py @@ -51,7 +51,7 @@ def main(base_dir, name_filter): pm.execute_notebook( "./trace-analysis.ipynb", os.path.join(current_artifact, "output", "trace-analysis.ipynb"), - log_output=True + log_output=False ) except Exception as e: LOGGER.exception(e) diff --git a/trace-analysis.ipynb b/trace-analysis.ipynb index 20b2319..6746ac5 100644 --- a/trace-analysis.ipynb +++ b/trace-analysis.ipynb @@ -683,6 +683,7 @@ " std_latency = np.std(e2e_latencies)\n", " min_latency = np.min(e2e_latencies)\n", " max_latency = np.max(e2e_latencies)\n", + " count_latencies = len(e2e_latencies)\n", " ax.axvline(mean_latency, c=\"red\", linewidth=2)\n", " _, max_ylim = ax.get_ylim()\n", " # Create a multi-line string with all stats\n", @@ -690,7 +691,8 @@ " f\"Mean: {mean_latency:.2f} ms\\n\"\n", " f\"Std: {std_latency:.2f} ms\\n\"\n", " f\"Min: {min_latency:.2f} ms\\n\"\n", - " f\"Max: {max_latency:.2f} ms\"\n", + " f\"Max: {max_latency:.2f} ms\\n\"\n", + " f\"Count: {count_latencies}\"\n", " )\n", " # Place text near top right of plot\n", " ax.text(\n", @@ -703,10 +705,10 @@ " bbox=dict(facecolor='white', alpha=0.7, boxstyle='round,pad=0.3')\n", " )\n", " plt.savefig(os.path.join(OUT_PATH, f\"plot_e2es_{name}.png\"))\n", - " result_strings.append(f\"Chain {topics[0]} --> {topics[-1]} E2E stats: Mean: {mean_latency:.2f} ms, Std: {std_latency:.2f} ms, Min: {min_latency:.2f} ms, Max: {max_latency:.2f} ms\")\n", + " result_strings.append(f\"Chain {topics[0]} --> {topics[-1]} E2E stats: Mean: {mean_latency:.2f} ms, Std: {std_latency:.2f} ms, Min: {min_latency:.2f} ms, Max: {max_latency:.2f} ms, Count: {count_latencies}\")\n", " # also do it as csv of order: exepriment_name, chain, mean, std, min, max\n", " result_strings_csv.append(\n", - " f\"{EXPERIMENT_NAME},{topics[0]} --> {topics[-1]},{mean_latency:.2f},{std_latency:.2f},{min_latency:.2f},{max_latency:.2f}\"\n", + " f\"{EXPERIMENT_NAME},{topics[0]} --> {topics[-1]},{mean_latency:.2f},{std_latency:.2f},{min_latency:.2f},{max_latency:.2f},{count_latencies}\"\n", " )\n", "\n", " ##################################################\n",