added count, beautified boxplot output

This commit is contained in:
Niklas Halle 2025-06-16 11:00:18 +00:00
parent b5b0f2f84b
commit 9bf91d654d
3 changed files with 71 additions and 43 deletions

View file

@ -1,16 +1,13 @@
import pandas as pd import pandas as pd
import numpy as np
import argparse import argparse
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
def parse_arguments(): def parse_arguments():
parser = argparse.ArgumentParser(description='Analyze chain data from CSV file.') parser = argparse.ArgumentParser(description='Analyze chain data from CSV file.')
parser.add_argument('--input', '-i', required=True, help='Path to the input CSV file') parser.add_argument('--input', '-i', required=True, help='Path to the input CSV file')
return parser.parse_args() return parser.parse_args()
def main(): def main():
args = parse_arguments() args = parse_arguments()
@ -28,49 +25,78 @@ def main():
# Group data by chain # Group data by chain
chain_groups = df.groupby('chain') chain_groups = df.groupby('chain')
# For each chain, create a plot with four boxplots (mean, std, min, max) # For each chain, create a figure with five subplots for boxplots (mean, std, min, max, count)
for chain_name, chain_data in chain_groups: for chain_name, chain_data in chain_groups:
# Create a figure for this chain fig, axs = plt.subplots(1, 5, figsize=(18, 6), constrained_layout=True)
plt.figure(figsize=(12, 8))
# Normalize chain name for filename # Normalize chain name for filename
chain_name_fs = str(chain_name).replace('--> /', '-').replace('/', '_').replace(' ', '') chain_name_fs = str(chain_name).replace('--> /', '-').replace('/', '_').replace(' ', '')
# Create a DataFrame with the columns we want to plot # Create a DataFrame with the columns we want to plot
plot_data = pd.DataFrame({ plot_data = chain_data[['mean', 'std', 'min', 'max', 'count']].copy()
'Mean': chain_data['mean'], plot_data.columns = ['Mean', 'Std', 'Min', 'Max', 'Count']
'Std': chain_data['std'],
'Min': chain_data['min'], # Make all plots have the same color palette
'Max': chain_data['max'] palette = sns.color_palette("husl", 4)
}) # Add a distinct color for the 'Count' plot, as it is a different metric
colors = palette + ['lightcoral']
for idx, (col, color) in enumerate(zip(['Mean', 'Std', 'Min', 'Max', 'Count'], colors)):
ax = axs[idx]
# Create boxplots # Create boxplots
ax = sns.boxplot(data=plot_data, palette='Set3') sns.boxplot(data=plot_data[col], ax=ax, color=color, showfliers=True, width=0.4)
# Add individual data points # Add individual data points
sns.stripplot(data=plot_data, color='black', alpha=0.5, size=4, jitter=True) sns.swarmplot(data=plot_data[col], ax=ax, color='black', size=3, alpha=0.6)
# Set labels and title # Set labels and title
plt.title(f'Statistics for Chain: {chain_name}\nAcross {len(chain_data)} Experiment Runs\n{experiment_name}', fontsize=14) ax.set_title(f'{col} Distribution', fontsize=14, fontweight='bold')
plt.ylabel('Latency (ms)', fontsize=12) ax.set_xticks([]) # Remove x-ticks for clarity
plt.xlabel('Statistic Type', fontsize=12) ax.set_xlabel('') # No x-label needed
ax.set_ylabel('Latency (ms)' if col != 'Count' else 'Count', fontsize=12)
# Calculate statistics of the statistics
data_values = plot_data[col]
stats_text = (
f"Mean: {data_values.mean():.2f}\n"
f"Std: {data_values.std():.2f}\n"
f"Min: {data_values.min():.2f}\n"
f"Max: {data_values.max():.2f}"
)
# --- Place legend in the top right using axes fraction coordinates ---
ax.text(
0.95, 0.98, # axes fraction: 95% right, 98% up
stats_text,
transform=ax.transAxes,
verticalalignment='top',
horizontalalignment='right',
fontsize=10,
bbox=dict(facecolor='white', alpha=0.9, boxstyle='round,pad=0.3', edgecolor='gray')
)
# Add grid for better readability # Add grid for better readability
plt.grid(axis='y', linestyle='--', alpha=0.7) ax.grid(axis='y', linestyle='--', alpha=0.4)
# Tighten layout and save the figure # Set the overall title for the figure
plt.suptitle(
f'Statistics for Chain: {chain_name}\nAcross {len(chain_data)} Experiment Runs - {experiment_name}',
fontsize=18, fontweight='bold'
)
# Save the figure with a filename that includes the chain name
plt.tight_layout() plt.tight_layout()
output_file = args.input.replace('.csv', f'_chain_{chain_name_fs}_analysis.png') output_file = args.input.replace('.csv', f'_chain_{chain_name_fs}_analysis.png')
plt.savefig(output_file, dpi=300) plt.savefig(output_file, dpi=300)
plt.close() plt.close()
# Also calculate and print summary statistics for this chain # Print summary statistics for the chain
summary = chain_data.describe() summary = chain_data.describe()
print(f"\nSummary for chain: {chain_name}") print(f"\nSummary for chain: {chain_name}")
print(summary[['mean', 'std', 'min', 'max']]) print(summary[['mean', 'std', 'min', 'max', 'count']])
print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}") print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

@ -51,7 +51,7 @@ def main(base_dir, name_filter):
pm.execute_notebook( pm.execute_notebook(
"./trace-analysis.ipynb", "./trace-analysis.ipynb",
os.path.join(current_artifact, "output", "trace-analysis.ipynb"), os.path.join(current_artifact, "output", "trace-analysis.ipynb"),
log_output=True log_output=False
) )
except Exception as e: except Exception as e:
LOGGER.exception(e) LOGGER.exception(e)

View file

@ -683,6 +683,7 @@
" std_latency = np.std(e2e_latencies)\n", " std_latency = np.std(e2e_latencies)\n",
" min_latency = np.min(e2e_latencies)\n", " min_latency = np.min(e2e_latencies)\n",
" max_latency = np.max(e2e_latencies)\n", " max_latency = np.max(e2e_latencies)\n",
" count_latencies = len(e2e_latencies)\n",
" ax.axvline(mean_latency, c=\"red\", linewidth=2)\n", " ax.axvline(mean_latency, c=\"red\", linewidth=2)\n",
" _, max_ylim = ax.get_ylim()\n", " _, max_ylim = ax.get_ylim()\n",
" # Create a multi-line string with all stats\n", " # Create a multi-line string with all stats\n",
@ -690,7 +691,8 @@
" f\"Mean: {mean_latency:.2f} ms\\n\"\n", " f\"Mean: {mean_latency:.2f} ms\\n\"\n",
" f\"Std: {std_latency:.2f} ms\\n\"\n", " f\"Std: {std_latency:.2f} ms\\n\"\n",
" f\"Min: {min_latency:.2f} ms\\n\"\n", " f\"Min: {min_latency:.2f} ms\\n\"\n",
" f\"Max: {max_latency:.2f} ms\"\n", " f\"Max: {max_latency:.2f} ms\\n\"\n",
" f\"Count: {count_latencies}\"\n",
" )\n", " )\n",
" # Place text near top right of plot\n", " # Place text near top right of plot\n",
" ax.text(\n", " ax.text(\n",
@ -703,10 +705,10 @@
" bbox=dict(facecolor='white', alpha=0.7, boxstyle='round,pad=0.3')\n", " bbox=dict(facecolor='white', alpha=0.7, boxstyle='round,pad=0.3')\n",
" )\n", " )\n",
" plt.savefig(os.path.join(OUT_PATH, f\"plot_e2es_{name}.png\"))\n", " plt.savefig(os.path.join(OUT_PATH, f\"plot_e2es_{name}.png\"))\n",
" result_strings.append(f\"Chain {topics[0]} --> {topics[-1]} E2E stats: Mean: {mean_latency:.2f} ms, Std: {std_latency:.2f} ms, Min: {min_latency:.2f} ms, Max: {max_latency:.2f} ms\")\n", " result_strings.append(f\"Chain {topics[0]} --> {topics[-1]} E2E stats: Mean: {mean_latency:.2f} ms, Std: {std_latency:.2f} ms, Min: {min_latency:.2f} ms, Max: {max_latency:.2f} ms, Count: {count_latencies}\")\n",
" # also do it as csv of order: exepriment_name, chain, mean, std, min, max\n", " # also do it as csv of order: exepriment_name, chain, mean, std, min, max\n",
" result_strings_csv.append(\n", " result_strings_csv.append(\n",
" f\"{EXPERIMENT_NAME},{topics[0]} --> {topics[-1]},{mean_latency:.2f},{std_latency:.2f},{min_latency:.2f},{max_latency:.2f}\"\n", " f\"{EXPERIMENT_NAME},{topics[0]} --> {topics[-1]},{mean_latency:.2f},{std_latency:.2f},{min_latency:.2f},{max_latency:.2f},{count_latencies}\"\n",
" )\n", " )\n",
"\n", "\n",
" ##################################################\n", " ##################################################\n",