batch analysis analysis
This commit is contained in:
		
							parent
							
								
									6259e856e5
								
							
						
					
					
						commit
						b5b0f2f84b
					
				
					 4 changed files with 123 additions and 1864 deletions
				
			
		
							
								
								
									
										76
									
								
								batch_analysis_analysis.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								batch_analysis_analysis.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,76 @@ | |||
| import pandas as pd | ||||
| import numpy as np | ||||
| import argparse | ||||
| import seaborn as sns | ||||
| import matplotlib.pyplot as plt | ||||
| 
 | ||||
| 
 | ||||
| def parse_arguments(): | ||||
|     parser = argparse.ArgumentParser(description='Analyze chain data from CSV file.') | ||||
|     parser.add_argument('--input', '-i', required=True, help='Path to the input CSV file') | ||||
|     return parser.parse_args() | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     args = parse_arguments() | ||||
|      | ||||
|     # Load the CSV file from the input argument | ||||
|     df = pd.read_csv(args.input) | ||||
| 
 | ||||
|     # Extract the experiment_name which should be the same across all rows | ||||
|     if 'experiment_name' not in df.columns: | ||||
|         raise ValueError("Input CSV must contain 'experiment_name' column.") | ||||
|     experiment_name = df['experiment_name'].iloc[0] | ||||
|      | ||||
|     # Strip timestamp from experiment_name if it exists | ||||
|     experiment_name = experiment_name.split('-')[0] if '-' in experiment_name else experiment_name | ||||
|      | ||||
|     # Group data by chain | ||||
|     chain_groups = df.groupby('chain') | ||||
|      | ||||
|     # For each chain, create a plot with four boxplots (mean, std, min, max) | ||||
|     for chain_name, chain_data in chain_groups: | ||||
|         # Create a figure for this chain | ||||
|         plt.figure(figsize=(12, 8)) | ||||
|          | ||||
|         # Normalize chain name for filename | ||||
|         chain_name_fs = str(chain_name).replace('--> /', '-').replace('/', '_').replace(' ', '') | ||||
|          | ||||
|         # Create a DataFrame with the columns we want to plot | ||||
|         plot_data = pd.DataFrame({ | ||||
|             'Mean': chain_data['mean'], | ||||
|             'Std': chain_data['std'], | ||||
|             'Min': chain_data['min'], | ||||
|             'Max': chain_data['max'] | ||||
|         }) | ||||
|          | ||||
|         # Create boxplots | ||||
|         ax = sns.boxplot(data=plot_data, palette='Set3') | ||||
|          | ||||
|         # Add individual data points | ||||
|         sns.stripplot(data=plot_data, color='black', alpha=0.5, size=4, jitter=True) | ||||
|          | ||||
|         # Set labels and title | ||||
|         plt.title(f'Statistics for Chain: {chain_name}\nAcross {len(chain_data)} Experiment Runs\n{experiment_name}', fontsize=14) | ||||
|         plt.ylabel('Latency (ms)', fontsize=12) | ||||
|         plt.xlabel('Statistic Type', fontsize=12) | ||||
|          | ||||
|         # Add grid for better readability | ||||
|         plt.grid(axis='y', linestyle='--', alpha=0.7) | ||||
|          | ||||
|         # Tighten layout and save the figure | ||||
|         plt.tight_layout() | ||||
|         output_file = args.input.replace('.csv', f'_chain_{chain_name_fs}_analysis.png') | ||||
|         plt.savefig(output_file, dpi=300) | ||||
|         plt.close() | ||||
|          | ||||
|         # Also calculate and print summary statistics for this chain | ||||
|         summary = chain_data.describe() | ||||
|         print(f"\nSummary for chain: {chain_name}") | ||||
|         print(summary[['mean', 'std', 'min', 'max']]) | ||||
|          | ||||
|     print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}") | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
|  | @ -35,14 +35,17 @@ def main(base_dir, name_filter): | |||
|         print(f"Found {len(unprocessed)} unprocessed and {len(unprocessable)} unprocessable artifacts.") | ||||
| 
 | ||||
|         current_artifact = unprocessed.pop() | ||||
|         print(f"Now working on {current_artifact}.") | ||||
|         experiment_name = os.path.basename(current_artifact) | ||||
|         print(f"Now working on {current_artifact} --> {experiment_name}.") | ||||
| 
 | ||||
|         out_dir = os.path.join(current_artifact, 'output') | ||||
| 
 | ||||
|         shutil.rmtree(out_dir, ignore_errors=True) | ||||
|         os.makedirs(out_dir, exist_ok=False) | ||||
| 
 | ||||
|         os.environ["ANA_NB_OUT_PATH"] = f"'{out_dir}'" | ||||
|         os.environ["ANA_NB_TR_PATH"] = f"'{os.path.join(current_artifact, 'tracing/max-ma-trace/ust')}'" | ||||
|         os.environ["ANA_NB_EXPERIMENT_NAME"] = f"'{experiment_name}'" | ||||
|         os.environ["ANA_NB_TR_PATH"] = f"'/home/niklas/dataflow-analysis/{current_artifact}/ust'" | ||||
| 
 | ||||
|         try: | ||||
|             pm.execute_notebook( | ||||
|  |  | |||
|  | @ -11,3 +11,5 @@ pyvis | |||
| ruamel.yaml | ||||
| termcolor | ||||
| tqdm | ||||
| seaborn | ||||
| papermill | ||||
							
								
								
									
										1902
									
								
								trace-analysis.ipynb
									
										
									
									
									
								
							
							
						
						
									
										1902
									
								
								trace-analysis.ipynb
									
										
									
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue