added a bunch of helper scripts
This commit is contained in:
		
							parent
							
								
									f72408cd88
								
							
						
					
					
						commit
						a24aeeffe7
					
				
					 6 changed files with 347 additions and 0 deletions
				
			
		
							
								
								
									
										30
									
								
								add_csv_header.sh
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										30
									
								
								add_csv_header.sh
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| #!/usr/bin/env bash | ||||
| 
 | ||||
| # Usage: ./add_csv_header.sh /path/to/trace_root | ||||
| 
 | ||||
| set -euo pipefail | ||||
| 
 | ||||
| if [[ $# -ne 1 ]]; then | ||||
|   echo "Usage: $0 /path/to/trace_root" | ||||
|   exit 1 | ||||
| fi | ||||
| 
 | ||||
| TRACE_ROOT="$1" | ||||
| HEADER="experiment_name,chain,mean,std,min,max,count" | ||||
| 
 | ||||
| if [[ ! -d "$TRACE_ROOT" ]]; then | ||||
|   echo "Error: '$TRACE_ROOT' is not a directory." | ||||
|   exit 1 | ||||
| fi | ||||
| 
 | ||||
| # Find all results.csv files one level below the trace root | ||||
| find "$TRACE_ROOT" -mindepth 2 -maxdepth 2 -type f -name results.csv | while IFS= read -r csvfile; do | ||||
|   # Insert header only if not already present | ||||
|   first_line=$(head -n 1 "$csvfile") | ||||
|   if [[ "$first_line" != "$HEADER" ]]; then | ||||
|     echo "Adding header to $csvfile" | ||||
|     sed -i "1i$HEADER" "$csvfile" | ||||
|   else | ||||
|     echo "Header already present in $csvfile, skipping." | ||||
|   fi | ||||
| done | ||||
|  | @ -27,6 +27,9 @@ def main(): | |||
|     # Group data by chain | ||||
|     chain_groups = df.groupby('chain') | ||||
| 
 | ||||
|     # Prepare list to collect summary data for CSV export | ||||
|     summary_data = [] | ||||
| 
 | ||||
|     # For each chain, create a figure with five subplots for boxplots (mean, std, min, max, count) | ||||
|     for chain_name, chain_data in chain_groups: | ||||
|         fig, axs = plt.subplots(1, 5, figsize=(18, 6), constrained_layout=True) | ||||
|  | @ -38,6 +41,17 @@ def main(): | |||
|         plot_data = chain_data[['mean', 'std', 'min', 'max', 'count']].copy() | ||||
|         plot_data.columns = ['Mean', 'Std', 'Min', 'Max', 'Count'] | ||||
| 
 | ||||
|         # Calculate summary statistics for CSV export | ||||
|         chain_summary = { | ||||
|             'chain': chain_name, | ||||
|             'mean_count': plot_data['Count'].mean(), | ||||
|             'mean_mean': plot_data['Mean'].mean(), | ||||
|             'mean_std': plot_data['Std'].mean(), | ||||
|             'mean_min': plot_data['Min'].mean(), | ||||
|             'mean_max': plot_data['Max'].mean() | ||||
|         } | ||||
|         summary_data.append(chain_summary) | ||||
| 
 | ||||
|         # Make all plots have the same color palette | ||||
|         palette = sns.color_palette("husl", 4) | ||||
|         # Add a distinct color for the 'Count' plot, as it is a different metric | ||||
|  | @ -114,7 +128,13 @@ def main(): | |||
|         print(f"\nSummary for chain: {chain_name}") | ||||
|         print(summary[['mean', 'std', 'min', 'max', 'count']]) | ||||
| 
 | ||||
|     # Create and save the summary CSV | ||||
|     summary_df = pd.DataFrame(summary_data) | ||||
|     summary_csv_file = args.input.replace('.csv', '_summary.csv') | ||||
|     summary_df.to_csv(summary_csv_file, index=False) | ||||
| 
 | ||||
|     print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}") | ||||
|     print(f"Summary CSV saved as: {summary_csv_file}") | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
							
								
								
									
										214
									
								
								csv2table.py
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										214
									
								
								csv2table.py
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,214 @@ | |||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| import pandas as pd | ||||
| import numpy as np | ||||
| 
 | ||||
| def csv_to_latex_table(csv_file_path, output_file_path=None, decimal_places=2): | ||||
|     """ | ||||
|     Convert a CSV file to a LaTeX table with proper formatting. | ||||
|      | ||||
|     Args: | ||||
|         csv_file_path (str): Path to the input CSV file | ||||
|         output_file_path (str, optional): Path to save the LaTeX output | ||||
|         decimal_places (int): Number of decimal places for numeric values | ||||
|      | ||||
|     Returns: | ||||
|         str: LaTeX table code | ||||
|     """ | ||||
|      | ||||
|     # Read the CSV file | ||||
|     df = pd.read_csv(csv_file_path) | ||||
|      | ||||
|     # Create a more readable version of the chain column | ||||
|     df['chain'] = df['chain'].str.replace('/input/', '').str.replace('/output/', '→ ') | ||||
|     df['chain'] = df['chain'].str.replace('/', ' ') | ||||
|      | ||||
|     # Round numeric columns to specified decimal places | ||||
|     numeric_columns = df.select_dtypes(include=[np.number]).columns | ||||
|     df[numeric_columns] = df[numeric_columns].round(decimal_places) | ||||
|      | ||||
|     # Start building the LaTeX table | ||||
|     latex_code = [] | ||||
|      | ||||
|     # Document setup (optional - can be removed if embedding in existing document) | ||||
|     latex_code.append("\\documentclass{article}") | ||||
|     latex_code.append("\\usepackage{booktabs}") | ||||
|     latex_code.append("\\usepackage{array}") | ||||
|     latex_code.append("\\usepackage{longtable}") | ||||
|     latex_code.append("\\begin{document}") | ||||
|     latex_code.append("") | ||||
|      | ||||
|     # Table setup | ||||
|     num_cols = len(df.columns) | ||||
|     col_spec = "l" + "r" * (num_cols - 1)  # Left align first column, right align others | ||||
|      | ||||
|     latex_code.append("\\begin{longtable}{" + col_spec + "}") | ||||
|     latex_code.append("\\toprule") | ||||
|      | ||||
|     # Create header | ||||
|     headers = [] | ||||
|     for col in df.columns: | ||||
|         if col == 'chain': | ||||
|             headers.append("Chain") | ||||
|         else: | ||||
|             # Convert column names to more readable format | ||||
|             readable_name = col.replace('_', ' ').title() | ||||
|             headers.append(readable_name) | ||||
|      | ||||
|     latex_code.append(" & ".join(headers) + " \\\\") | ||||
|     latex_code.append("\\midrule") | ||||
|     latex_code.append("\\endfirsthead") | ||||
|     latex_code.append("") | ||||
|      | ||||
|     # Header for continuation pages | ||||
|     latex_code.append("\\multicolumn{" + str(num_cols) + "}{c}") | ||||
|     latex_code.append("{\\tablename\\ \\thetable{} -- continued from previous page} \\\\") | ||||
|     latex_code.append("\\toprule") | ||||
|     latex_code.append(" & ".join(headers) + " \\\\") | ||||
|     latex_code.append("\\midrule") | ||||
|     latex_code.append("\\endhead") | ||||
|     latex_code.append("") | ||||
|      | ||||
|     # Footer for non-final pages | ||||
|     latex_code.append("\\midrule") | ||||
|     latex_code.append("\\multicolumn{" + str(num_cols) + "}{r}{Continued on next page} \\\\") | ||||
|     latex_code.append("\\endfoot") | ||||
|     latex_code.append("") | ||||
|      | ||||
|     # Final footer | ||||
|     latex_code.append("\\bottomrule") | ||||
|     latex_code.append("\\endlastfoot") | ||||
|     latex_code.append("") | ||||
|      | ||||
|     # Add data rows | ||||
|     for _, row in df.iterrows(): | ||||
|         row_data = [] | ||||
|         for i, value in enumerate(row): | ||||
|             if i == 0:  # Chain column - use texttt for monospace | ||||
|                 # Split long chains for better formatting | ||||
|                 chain_parts = str(value).split('→') | ||||
|                 if len(chain_parts) == 2: | ||||
|                     formatted_chain = f"\\texttt{{{chain_parts[0].strip()}}} → \\texttt{{{chain_parts[1].strip()}}}" | ||||
|                 else: | ||||
|                     formatted_chain = f"\\texttt{{{str(value)}}}" | ||||
|                 row_data.append(formatted_chain) | ||||
|             else: | ||||
|                 row_data.append(str(value)) | ||||
|          | ||||
|         latex_code.append(" & ".join(row_data) + " \\\\") | ||||
|      | ||||
|     latex_code.append("\\end{longtable}") | ||||
|     latex_code.append("") | ||||
|     latex_code.append("\\end{document}") | ||||
|      | ||||
|     # Join all lines | ||||
|     latex_output = "\n".join(latex_code) | ||||
|      | ||||
|     # Save to file if path is provided | ||||
|     if output_file_path: | ||||
|         with open(output_file_path, 'w', encoding='utf-8') as f: | ||||
|             f.write(latex_output) | ||||
|         print(f"LaTeX table saved to {output_file_path}") | ||||
|      | ||||
|     return latex_output | ||||
| 
 | ||||
| def csv_to_latex_table_simple(csv_file_path, decimal_places=2): | ||||
|     """ | ||||
|     Convert CSV to LaTeX table without document wrapper (for embedding). | ||||
|      | ||||
|     Args: | ||||
|         csv_file_path (str): Path to the input CSV file | ||||
|         decimal_places (int): Number of decimal places for numeric values | ||||
|      | ||||
|     Returns: | ||||
|         str: LaTeX table code only | ||||
|     """ | ||||
|      | ||||
|     # Read the CSV file | ||||
|     df = pd.read_csv(csv_file_path) | ||||
|      | ||||
|     # Create a more readable version of the chain column | ||||
|     df['chain'] = df['chain'].str.replace('/input/', '').str.replace('/output/', ' → ') | ||||
|     df['chain'] = df['chain'].str.replace('/', ' ') | ||||
|      | ||||
|     # Round numeric columns | ||||
|     numeric_columns = df.select_dtypes(include=[np.number]).columns | ||||
|     df[numeric_columns] = df[numeric_columns].round(decimal_places) | ||||
|      | ||||
|     # Build table | ||||
|     latex_code = [] | ||||
|     num_cols = len(df.columns) | ||||
|     col_spec = "l" + "r" * (num_cols - 1) | ||||
|      | ||||
|     latex_code.append("\\begin{tabular}{" + col_spec + "}") | ||||
|     latex_code.append("    \\toprule") | ||||
|      | ||||
|     # Headers | ||||
|     headers = [] | ||||
|     for col in df.columns: | ||||
|         if col == 'chain': | ||||
|             headers.append("Chain") | ||||
|         else: | ||||
|             readable_name = col.replace('_', ' ').title() | ||||
|             headers.append(readable_name) | ||||
|      | ||||
|     latex_code.append(" & ".join(headers) + " \\\\") | ||||
|     latex_code.append("\\midrule") | ||||
|      | ||||
|     # Data rows | ||||
|     for _, row in df.iterrows(): | ||||
|         row_data = [] | ||||
|         for i, value in enumerate(row): | ||||
|             if i == 0:  # Chain column | ||||
|                 chain_parts = str(value).split(' → ') | ||||
|                 if len(chain_parts) == 2: | ||||
|                     formatted_chain = f"\\texttt{{{chain_parts[0].strip()}}} → \\texttt{{{chain_parts[1].strip()}}}" | ||||
|                 else: | ||||
|                     formatted_chain = f"\\texttt{{{str(value)}}}" | ||||
|                 row_data.append(formatted_chain) | ||||
|             else: | ||||
|                 row_data.append(str(value)) | ||||
|          | ||||
|         latex_code.append(" & ".join(row_data) + " \\\\") | ||||
|      | ||||
|     latex_code.append("\\bottomrule") | ||||
|     latex_code.append("\\end{tabular}") | ||||
|      | ||||
|     return "\n".join(latex_code) | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     import argparse | ||||
|     import sys | ||||
|      | ||||
|     parser = argparse.ArgumentParser(description='Convert CSV file to LaTeX table') | ||||
|     parser.add_argument('csv_file', help='Path to the input CSV file') | ||||
|     parser.add_argument('-o', '--output', help='Output LaTeX file path (optional)') | ||||
|     parser.add_argument('-d', '--decimals', type=int, default=2,  | ||||
|                        help='Number of decimal places for numeric values (default: 2)') | ||||
|     parser.add_argument('-s', '--simple', action='store_true', | ||||
|                        help='Generate simple table only (no document wrapper)') | ||||
|      | ||||
|     args = parser.parse_args() | ||||
|      | ||||
|     try: | ||||
|         if args.simple: | ||||
|             # Generate simple table for embedding | ||||
|             latex_output = csv_to_latex_table_simple(args.csv_file, args.decimals) | ||||
|             print(latex_output) | ||||
|              | ||||
|             if args.output: | ||||
|                 with open(args.output, 'w', encoding='utf-8') as f: | ||||
|                     f.write(latex_output) | ||||
|                 print(f"\nSimple LaTeX table saved to {args.output}", file=sys.stderr) | ||||
|         else: | ||||
|             # Generate complete LaTeX document | ||||
|             latex_output = csv_to_latex_table(args.csv_file, args.output, args.decimals) | ||||
|             if not args.output: | ||||
|                 print(latex_output) | ||||
|              | ||||
|     except FileNotFoundError: | ||||
|         print(f"Error: CSV file '{args.csv_file}' not found.", file=sys.stderr) | ||||
|         sys.exit(1) | ||||
|     except Exception as e: | ||||
|         print(f"Error: {str(e)}", file=sys.stderr) | ||||
|         sys.exit(1) | ||||
							
								
								
									
										29
									
								
								csvfix.sh
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										29
									
								
								csvfix.sh
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,29 @@ | |||
| #!/bin/bash | ||||
| 
 | ||||
| # Check if filename parameter is provided | ||||
| if [ $# -eq 0 ]; then | ||||
|     echo "Usage: $0 <csv_filename>" | ||||
|     echo "Example: $0 data.csv" | ||||
|     exit 1 | ||||
| fi | ||||
| 
 | ||||
| CSV_FILE="$1" | ||||
| 
 | ||||
| # Check if file exists | ||||
| if [ ! -f "$CSV_FILE" ]; then | ||||
|     echo "Error: File '$CSV_FILE' not found!" | ||||
|     exit 1 | ||||
| fi | ||||
| 
 | ||||
| echo "Processing file: $CSV_FILE" | ||||
| echo "========================================" | ||||
| 
 | ||||
| # Count prefixes from first column | ||||
| echo -e "\n=== Total number of unique prefixes ===" | ||||
| cut -d',' -f1 "$CSV_FILE" | sort | uniq | wc -l | ||||
| 
 | ||||
| # Readable output format | ||||
| echo "=== Formatted output ===" | ||||
| echo "Prefix -> Count" | ||||
| echo "---------------" | ||||
| cut -d',' -f1 "$CSV_FILE" | sort | uniq -c | sort -nr | awk '{printf "%-20s -> %d\n", $2, $1}' | ||||
							
								
								
									
										23
									
								
								run_batch_analysis_analysis.sh
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										23
									
								
								run_batch_analysis_analysis.sh
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,23 @@ | |||
| #!/usr/bin/env bash | ||||
| 
 | ||||
| # Usage: ./run_batch_analysis_analysis.sh /path/to/target_dir | ||||
| 
 | ||||
| set -euo pipefail | ||||
| 
 | ||||
| if [[ $# -ne 1 ]]; then | ||||
|   echo "Usage: $0 /path/to/target_dir" | ||||
|   exit 1 | ||||
| fi | ||||
| 
 | ||||
| TARGET_DIR="$1" | ||||
| 
 | ||||
| if [[ ! -d "$TARGET_DIR" ]]; then | ||||
|   echo "Error: '$TARGET_DIR' is not a directory." | ||||
|   exit 1 | ||||
| fi | ||||
| 
 | ||||
| # Find all results.csv files directly under subdirectories | ||||
| find "$TARGET_DIR" -mindepth 2 -maxdepth 2 -type f -name results.csv | while IFS= read -r csvfile; do | ||||
|   echo "Analyzing $csvfile" | ||||
|   ./batch_analysis_analysis.py -i "$csvfile" | ||||
| done | ||||
							
								
								
									
										31
									
								
								run_batch_analyze.sh
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										31
									
								
								run_batch_analyze.sh
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,31 @@ | |||
| #!/usr/bin/env bash | ||||
| 
 | ||||
| # Usage: ./run_batch_analyze.sh /path/to/trace_root | ||||
| 
 | ||||
| set -euo pipefail | ||||
| 
 | ||||
| if [[ $# -ne 1 ]]; then | ||||
|   echo "Usage: $0 /path/to/trace_root" | ||||
|   exit 1 | ||||
| fi | ||||
| 
 | ||||
| TRACE_ROOT="$1" | ||||
| 
 | ||||
| if [[ ! -d "$TRACE_ROOT" ]]; then | ||||
|   echo "Error: '$TRACE_ROOT' is not a directory." | ||||
|   exit 1 | ||||
| fi | ||||
| 
 | ||||
| for dir in "$TRACE_ROOT"/*; do | ||||
|   if [[ -d "$dir" ]]; then | ||||
|     dirname=$(basename "$dir") | ||||
|     # Extract everything before first underscore as type | ||||
|     type=$(echo "$dirname" | grep -oP '^[a-z]+(?=_)') | ||||
|     if [[ -z "$type" ]]; then | ||||
|       echo "Warning: Could not extract type from '$dirname', skipping." | ||||
|       continue | ||||
|     fi | ||||
|     echo "Running batch_analyze.py on $dir with filter ${type}*" | ||||
|     ./batch_analyze.py -d "$dir" -f "${type}*" | ||||
|   fi | ||||
| done | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue