added count, beautified boxplot output
This commit is contained in:
parent
b5b0f2f84b
commit
9bf91d654d
3 changed files with 71 additions and 43 deletions
|
@ -1,19 +1,16 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
|
||||||
import argparse
|
import argparse
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
parser = argparse.ArgumentParser(description='Analyze chain data from CSV file.')
|
parser = argparse.ArgumentParser(description='Analyze chain data from CSV file.')
|
||||||
parser.add_argument('--input', '-i', required=True, help='Path to the input CSV file')
|
parser.add_argument('--input', '-i', required=True, help='Path to the input CSV file')
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
args = parse_arguments()
|
args = parse_arguments()
|
||||||
|
|
||||||
# Load the CSV file from the input argument
|
# Load the CSV file from the input argument
|
||||||
df = pd.read_csv(args.input)
|
df = pd.read_csv(args.input)
|
||||||
|
|
||||||
|
@ -21,56 +18,85 @@ def main():
|
||||||
if 'experiment_name' not in df.columns:
|
if 'experiment_name' not in df.columns:
|
||||||
raise ValueError("Input CSV must contain 'experiment_name' column.")
|
raise ValueError("Input CSV must contain 'experiment_name' column.")
|
||||||
experiment_name = df['experiment_name'].iloc[0]
|
experiment_name = df['experiment_name'].iloc[0]
|
||||||
|
|
||||||
# Strip timestamp from experiment_name if it exists
|
# Strip timestamp from experiment_name if it exists
|
||||||
experiment_name = experiment_name.split('-')[0] if '-' in experiment_name else experiment_name
|
experiment_name = experiment_name.split('-')[0] if '-' in experiment_name else experiment_name
|
||||||
|
|
||||||
# Group data by chain
|
# Group data by chain
|
||||||
chain_groups = df.groupby('chain')
|
chain_groups = df.groupby('chain')
|
||||||
|
|
||||||
# For each chain, create a plot with four boxplots (mean, std, min, max)
|
# For each chain, create a figure with five subplots for boxplots (mean, std, min, max, count)
|
||||||
for chain_name, chain_data in chain_groups:
|
for chain_name, chain_data in chain_groups:
|
||||||
# Create a figure for this chain
|
fig, axs = plt.subplots(1, 5, figsize=(18, 6), constrained_layout=True)
|
||||||
plt.figure(figsize=(12, 8))
|
|
||||||
|
|
||||||
# Normalize chain name for filename
|
# Normalize chain name for filename
|
||||||
chain_name_fs = str(chain_name).replace('--> /', '-').replace('/', '_').replace(' ', '')
|
chain_name_fs = str(chain_name).replace('--> /', '-').replace('/', '_').replace(' ', '')
|
||||||
|
|
||||||
# Create a DataFrame with the columns we want to plot
|
# Create a DataFrame with the columns we want to plot
|
||||||
plot_data = pd.DataFrame({
|
plot_data = chain_data[['mean', 'std', 'min', 'max', 'count']].copy()
|
||||||
'Mean': chain_data['mean'],
|
plot_data.columns = ['Mean', 'Std', 'Min', 'Max', 'Count']
|
||||||
'Std': chain_data['std'],
|
|
||||||
'Min': chain_data['min'],
|
# Make all plots have the same color palette
|
||||||
'Max': chain_data['max']
|
palette = sns.color_palette("husl", 4)
|
||||||
})
|
# Add a distinct color for the 'Count' plot, as it is a different metric
|
||||||
|
colors = palette + ['lightcoral']
|
||||||
# Create boxplots
|
|
||||||
ax = sns.boxplot(data=plot_data, palette='Set3')
|
for idx, (col, color) in enumerate(zip(['Mean', 'Std', 'Min', 'Max', 'Count'], colors)):
|
||||||
|
ax = axs[idx]
|
||||||
# Add individual data points
|
|
||||||
sns.stripplot(data=plot_data, color='black', alpha=0.5, size=4, jitter=True)
|
# Create boxplots
|
||||||
|
sns.boxplot(data=plot_data[col], ax=ax, color=color, showfliers=True, width=0.4)
|
||||||
# Set labels and title
|
|
||||||
plt.title(f'Statistics for Chain: {chain_name}\nAcross {len(chain_data)} Experiment Runs\n{experiment_name}', fontsize=14)
|
# Add individual data points
|
||||||
plt.ylabel('Latency (ms)', fontsize=12)
|
sns.swarmplot(data=plot_data[col], ax=ax, color='black', size=3, alpha=0.6)
|
||||||
plt.xlabel('Statistic Type', fontsize=12)
|
|
||||||
|
# Set labels and title
|
||||||
# Add grid for better readability
|
ax.set_title(f'{col} Distribution', fontsize=14, fontweight='bold')
|
||||||
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
ax.set_xticks([]) # Remove x-ticks for clarity
|
||||||
|
ax.set_xlabel('') # No x-label needed
|
||||||
# Tighten layout and save the figure
|
ax.set_ylabel('Latency (ms)' if col != 'Count' else 'Count', fontsize=12)
|
||||||
|
|
||||||
|
# Calculate statistics of the statistics
|
||||||
|
data_values = plot_data[col]
|
||||||
|
stats_text = (
|
||||||
|
f"Mean: {data_values.mean():.2f}\n"
|
||||||
|
f"Std: {data_values.std():.2f}\n"
|
||||||
|
f"Min: {data_values.min():.2f}\n"
|
||||||
|
f"Max: {data_values.max():.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Place legend in the top right using axes fraction coordinates ---
|
||||||
|
ax.text(
|
||||||
|
0.95, 0.98, # axes fraction: 95% right, 98% up
|
||||||
|
stats_text,
|
||||||
|
transform=ax.transAxes,
|
||||||
|
verticalalignment='top',
|
||||||
|
horizontalalignment='right',
|
||||||
|
fontsize=10,
|
||||||
|
bbox=dict(facecolor='white', alpha=0.9, boxstyle='round,pad=0.3', edgecolor='gray')
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add grid for better readability
|
||||||
|
ax.grid(axis='y', linestyle='--', alpha=0.4)
|
||||||
|
|
||||||
|
# Set the overall title for the figure
|
||||||
|
plt.suptitle(
|
||||||
|
f'Statistics for Chain: {chain_name}\nAcross {len(chain_data)} Experiment Runs - {experiment_name}',
|
||||||
|
fontsize=18, fontweight='bold'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save the figure with a filename that includes the chain name
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
output_file = args.input.replace('.csv', f'_chain_{chain_name_fs}_analysis.png')
|
output_file = args.input.replace('.csv', f'_chain_{chain_name_fs}_analysis.png')
|
||||||
plt.savefig(output_file, dpi=300)
|
plt.savefig(output_file, dpi=300)
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
# Also calculate and print summary statistics for this chain
|
# Print summary statistics for the chain
|
||||||
summary = chain_data.describe()
|
summary = chain_data.describe()
|
||||||
print(f"\nSummary for chain: {chain_name}")
|
print(f"\nSummary for chain: {chain_name}")
|
||||||
print(summary[['mean', 'std', 'min', 'max']])
|
print(summary[['mean', 'std', 'min', 'max', 'count']])
|
||||||
|
|
||||||
print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}")
|
|
||||||
|
|
||||||
|
print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
|
@ -51,7 +51,7 @@ def main(base_dir, name_filter):
|
||||||
pm.execute_notebook(
|
pm.execute_notebook(
|
||||||
"./trace-analysis.ipynb",
|
"./trace-analysis.ipynb",
|
||||||
os.path.join(current_artifact, "output", "trace-analysis.ipynb"),
|
os.path.join(current_artifact, "output", "trace-analysis.ipynb"),
|
||||||
log_output=True
|
log_output=False
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOGGER.exception(e)
|
LOGGER.exception(e)
|
||||||
|
|
|
@ -683,6 +683,7 @@
|
||||||
" std_latency = np.std(e2e_latencies)\n",
|
" std_latency = np.std(e2e_latencies)\n",
|
||||||
" min_latency = np.min(e2e_latencies)\n",
|
" min_latency = np.min(e2e_latencies)\n",
|
||||||
" max_latency = np.max(e2e_latencies)\n",
|
" max_latency = np.max(e2e_latencies)\n",
|
||||||
|
" count_latencies = len(e2e_latencies)\n",
|
||||||
" ax.axvline(mean_latency, c=\"red\", linewidth=2)\n",
|
" ax.axvline(mean_latency, c=\"red\", linewidth=2)\n",
|
||||||
" _, max_ylim = ax.get_ylim()\n",
|
" _, max_ylim = ax.get_ylim()\n",
|
||||||
" # Create a multi-line string with all stats\n",
|
" # Create a multi-line string with all stats\n",
|
||||||
|
@ -690,7 +691,8 @@
|
||||||
" f\"Mean: {mean_latency:.2f} ms\\n\"\n",
|
" f\"Mean: {mean_latency:.2f} ms\\n\"\n",
|
||||||
" f\"Std: {std_latency:.2f} ms\\n\"\n",
|
" f\"Std: {std_latency:.2f} ms\\n\"\n",
|
||||||
" f\"Min: {min_latency:.2f} ms\\n\"\n",
|
" f\"Min: {min_latency:.2f} ms\\n\"\n",
|
||||||
" f\"Max: {max_latency:.2f} ms\"\n",
|
" f\"Max: {max_latency:.2f} ms\\n\"\n",
|
||||||
|
" f\"Count: {count_latencies}\"\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" # Place text near top right of plot\n",
|
" # Place text near top right of plot\n",
|
||||||
" ax.text(\n",
|
" ax.text(\n",
|
||||||
|
@ -703,10 +705,10 @@
|
||||||
" bbox=dict(facecolor='white', alpha=0.7, boxstyle='round,pad=0.3')\n",
|
" bbox=dict(facecolor='white', alpha=0.7, boxstyle='round,pad=0.3')\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" plt.savefig(os.path.join(OUT_PATH, f\"plot_e2es_{name}.png\"))\n",
|
" plt.savefig(os.path.join(OUT_PATH, f\"plot_e2es_{name}.png\"))\n",
|
||||||
" result_strings.append(f\"Chain {topics[0]} --> {topics[-1]} E2E stats: Mean: {mean_latency:.2f} ms, Std: {std_latency:.2f} ms, Min: {min_latency:.2f} ms, Max: {max_latency:.2f} ms\")\n",
|
" result_strings.append(f\"Chain {topics[0]} --> {topics[-1]} E2E stats: Mean: {mean_latency:.2f} ms, Std: {std_latency:.2f} ms, Min: {min_latency:.2f} ms, Max: {max_latency:.2f} ms, Count: {count_latencies}\")\n",
|
||||||
" # also do it as csv of order: exepriment_name, chain, mean, std, min, max\n",
|
" # also do it as csv of order: exepriment_name, chain, mean, std, min, max\n",
|
||||||
" result_strings_csv.append(\n",
|
" result_strings_csv.append(\n",
|
||||||
" f\"{EXPERIMENT_NAME},{topics[0]} --> {topics[-1]},{mean_latency:.2f},{std_latency:.2f},{min_latency:.2f},{max_latency:.2f}\"\n",
|
" f\"{EXPERIMENT_NAME},{topics[0]} --> {topics[-1]},{mean_latency:.2f},{std_latency:.2f},{min_latency:.2f},{max_latency:.2f},{count_latencies}\"\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
" ##################################################\n",
|
" ##################################################\n",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue