added count, beautified boxplot output
This commit is contained in:
		
							parent
							
								
									b5b0f2f84b
								
							
						
					
					
						commit
						9bf91d654d
					
				
					 3 changed files with 71 additions and 43 deletions
				
			
		|  | @ -1,19 +1,16 @@ | |||
| import pandas as pd | ||||
| import numpy as np | ||||
| import argparse | ||||
| import seaborn as sns | ||||
| import matplotlib.pyplot as plt | ||||
| 
 | ||||
| 
 | ||||
| def parse_arguments(): | ||||
|     parser = argparse.ArgumentParser(description='Analyze chain data from CSV file.') | ||||
|     parser.add_argument('--input', '-i', required=True, help='Path to the input CSV file') | ||||
|     return parser.parse_args() | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     args = parse_arguments() | ||||
|      | ||||
| 
 | ||||
|     # Load the CSV file from the input argument | ||||
|     df = pd.read_csv(args.input) | ||||
| 
 | ||||
|  | @ -21,56 +18,85 @@ def main(): | |||
|     if 'experiment_name' not in df.columns: | ||||
|         raise ValueError("Input CSV must contain 'experiment_name' column.") | ||||
|     experiment_name = df['experiment_name'].iloc[0] | ||||
|      | ||||
| 
 | ||||
|     # Strip timestamp from experiment_name if it exists | ||||
|     experiment_name = experiment_name.split('-')[0] if '-' in experiment_name else experiment_name | ||||
|      | ||||
| 
 | ||||
|     # Group data by chain | ||||
|     chain_groups = df.groupby('chain') | ||||
|      | ||||
|     # For each chain, create a plot with four boxplots (mean, std, min, max) | ||||
| 
 | ||||
|     # For each chain, create a figure with five subplots for boxplots (mean, std, min, max, count) | ||||
|     for chain_name, chain_data in chain_groups: | ||||
|         # Create a figure for this chain | ||||
|         plt.figure(figsize=(12, 8)) | ||||
|          | ||||
|         fig, axs = plt.subplots(1, 5, figsize=(18, 6), constrained_layout=True) | ||||
| 
 | ||||
|         # Normalize chain name for filename | ||||
|         chain_name_fs = str(chain_name).replace('--> /', '-').replace('/', '_').replace(' ', '') | ||||
|          | ||||
| 
 | ||||
|         # Create a DataFrame with the columns we want to plot | ||||
|         plot_data = pd.DataFrame({ | ||||
|             'Mean': chain_data['mean'], | ||||
|             'Std': chain_data['std'], | ||||
|             'Min': chain_data['min'], | ||||
|             'Max': chain_data['max'] | ||||
|         }) | ||||
|          | ||||
|         # Create boxplots | ||||
|         ax = sns.boxplot(data=plot_data, palette='Set3') | ||||
|          | ||||
|         # Add individual data points | ||||
|         sns.stripplot(data=plot_data, color='black', alpha=0.5, size=4, jitter=True) | ||||
|          | ||||
|         # Set labels and title | ||||
|         plt.title(f'Statistics for Chain: {chain_name}\nAcross {len(chain_data)} Experiment Runs\n{experiment_name}', fontsize=14) | ||||
|         plt.ylabel('Latency (ms)', fontsize=12) | ||||
|         plt.xlabel('Statistic Type', fontsize=12) | ||||
|          | ||||
|         # Add grid for better readability | ||||
|         plt.grid(axis='y', linestyle='--', alpha=0.7) | ||||
|          | ||||
|         # Tighten layout and save the figure | ||||
|         plot_data = chain_data[['mean', 'std', 'min', 'max', 'count']].copy() | ||||
|         plot_data.columns = ['Mean', 'Std', 'Min', 'Max', 'Count'] | ||||
| 
 | ||||
|         # Make all plots have the same color palette | ||||
|         palette = sns.color_palette("husl", 4) | ||||
|         # Add a distinct color for the 'Count' plot, as it is a different metric | ||||
|         colors = palette + ['lightcoral'] | ||||
| 
 | ||||
|         for idx, (col, color) in enumerate(zip(['Mean', 'Std', 'Min', 'Max', 'Count'], colors)): | ||||
|             ax = axs[idx] | ||||
| 
 | ||||
|             # Create boxplots | ||||
|             sns.boxplot(data=plot_data[col], ax=ax, color=color, showfliers=True, width=0.4) | ||||
| 
 | ||||
|             # Add individual data points | ||||
|             sns.swarmplot(data=plot_data[col], ax=ax, color='black', size=3, alpha=0.6) | ||||
| 
 | ||||
|             # Set labels and title | ||||
|             ax.set_title(f'{col} Distribution', fontsize=14, fontweight='bold') | ||||
|             ax.set_xticks([]) # Remove x-ticks for clarity | ||||
|             ax.set_xlabel('') # No x-label needed | ||||
|             ax.set_ylabel('Latency (ms)' if col != 'Count' else 'Count', fontsize=12) | ||||
| 
 | ||||
|             # Calculate statistics of the statistics | ||||
|             data_values = plot_data[col] | ||||
|             stats_text = ( | ||||
|                 f"Mean: {data_values.mean():.2f}\n" | ||||
|                 f"Std: {data_values.std():.2f}\n" | ||||
|                 f"Min: {data_values.min():.2f}\n" | ||||
|                 f"Max: {data_values.max():.2f}" | ||||
|             ) | ||||
| 
 | ||||
|             # --- Place legend in the top right using axes fraction coordinates --- | ||||
|             ax.text( | ||||
|                 0.95, 0.98,  # axes fraction: 95% right, 98% up | ||||
|                 stats_text, | ||||
|                 transform=ax.transAxes, | ||||
|                 verticalalignment='top', | ||||
|                 horizontalalignment='right', | ||||
|                 fontsize=10, | ||||
|                 bbox=dict(facecolor='white', alpha=0.9, boxstyle='round,pad=0.3', edgecolor='gray') | ||||
|             ) | ||||
| 
 | ||||
|             # Add grid for better readability | ||||
|             ax.grid(axis='y', linestyle='--', alpha=0.4) | ||||
| 
 | ||||
|         # Set the overall title for the figure | ||||
|         plt.suptitle( | ||||
|             f'Statistics for Chain: {chain_name}\nAcross {len(chain_data)} Experiment Runs - {experiment_name}', | ||||
|             fontsize=18, fontweight='bold' | ||||
|         ) | ||||
| 
 | ||||
|         # Save the figure with a filename that includes the chain name | ||||
|         plt.tight_layout() | ||||
|         output_file = args.input.replace('.csv', f'_chain_{chain_name_fs}_analysis.png') | ||||
|         plt.savefig(output_file, dpi=300) | ||||
|         plt.close() | ||||
|          | ||||
|         # Also calculate and print summary statistics for this chain | ||||
| 
 | ||||
|         # Print summary statistics for the chain | ||||
|         summary = chain_data.describe() | ||||
|         print(f"\nSummary for chain: {chain_name}") | ||||
|         print(summary[['mean', 'std', 'min', 'max']]) | ||||
|          | ||||
|     print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}") | ||||
|         print(summary[['mean', 'std', 'min', 'max', 'count']]) | ||||
| 
 | ||||
|     print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}") | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
|  | @ -51,7 +51,7 @@ def main(base_dir, name_filter): | |||
|             pm.execute_notebook( | ||||
|                 "./trace-analysis.ipynb", | ||||
|                 os.path.join(current_artifact, "output", "trace-analysis.ipynb"), | ||||
|                 log_output=True | ||||
|                 log_output=False | ||||
|             ) | ||||
|         except Exception as e: | ||||
|             LOGGER.exception(e) | ||||
|  |  | |||
|  | @ -683,6 +683,7 @@ | |||
|     "    std_latency = np.std(e2e_latencies)\n", | ||||
|     "    min_latency = np.min(e2e_latencies)\n", | ||||
|     "    max_latency = np.max(e2e_latencies)\n", | ||||
|     "    count_latencies = len(e2e_latencies)\n", | ||||
|     "    ax.axvline(mean_latency, c=\"red\", linewidth=2)\n", | ||||
|     "    _, max_ylim = ax.get_ylim()\n", | ||||
|     "    # Create a multi-line string with all stats\n", | ||||
|  | @ -690,7 +691,8 @@ | |||
|     "        f\"Mean: {mean_latency:.2f} ms\\n\"\n", | ||||
|     "        f\"Std:  {std_latency:.2f} ms\\n\"\n", | ||||
|     "        f\"Min:  {min_latency:.2f} ms\\n\"\n", | ||||
|     "        f\"Max:  {max_latency:.2f} ms\"\n", | ||||
|     "        f\"Max:  {max_latency:.2f} ms\\n\"\n", | ||||
|     "        f\"Count: {count_latencies}\"\n", | ||||
|     "    )\n", | ||||
|     "    # Place text near top right of plot\n", | ||||
|     "    ax.text(\n", | ||||
|  | @ -703,10 +705,10 @@ | |||
|     "        bbox=dict(facecolor='white', alpha=0.7, boxstyle='round,pad=0.3')\n", | ||||
|     "    )\n", | ||||
|     "    plt.savefig(os.path.join(OUT_PATH, f\"plot_e2es_{name}.png\"))\n", | ||||
|     "    result_strings.append(f\"Chain {topics[0]} --> {topics[-1]} E2E stats: Mean: {mean_latency:.2f} ms, Std: {std_latency:.2f} ms, Min: {min_latency:.2f} ms, Max: {max_latency:.2f} ms\")\n", | ||||
|     "    result_strings.append(f\"Chain {topics[0]} --> {topics[-1]} E2E stats: Mean: {mean_latency:.2f} ms, Std: {std_latency:.2f} ms, Min: {min_latency:.2f} ms, Max: {max_latency:.2f} ms, Count: {count_latencies}\")\n", | ||||
|     "    # also do it as csv of order: exepriment_name, chain, mean, std, min, max\n", | ||||
|     "    result_strings_csv.append(\n", | ||||
|     "        f\"{EXPERIMENT_NAME},{topics[0]} --> {topics[-1]},{mean_latency:.2f},{std_latency:.2f},{min_latency:.2f},{max_latency:.2f}\"\n", | ||||
|     "        f\"{EXPERIMENT_NAME},{topics[0]} --> {topics[-1]},{mean_latency:.2f},{std_latency:.2f},{min_latency:.2f},{max_latency:.2f},{count_latencies}\"\n", | ||||
|     "    )\n", | ||||
|     "\n", | ||||
|     "    ##################################################\n", | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue