added a bunch of helper scripts
This commit is contained in:
parent
f72408cd88
commit
a24aeeffe7
6 changed files with 347 additions and 0 deletions
30
add_csv_header.sh
Executable file
30
add_csv_header.sh
Executable file
|
@ -0,0 +1,30 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Usage: ./add_csv_header.sh /path/to/trace_root
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -ne 1 ]]; then
|
||||
echo "Usage: $0 /path/to/trace_root"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TRACE_ROOT="$1"
|
||||
HEADER="experiment_name,chain,mean,std,min,max,count"
|
||||
|
||||
if [[ ! -d "$TRACE_ROOT" ]]; then
|
||||
echo "Error: '$TRACE_ROOT' is not a directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Find all results.csv files one level below the trace root
|
||||
find "$TRACE_ROOT" -mindepth 2 -maxdepth 2 -type f -name results.csv | while IFS= read -r csvfile; do
|
||||
# Insert header only if not already present
|
||||
first_line=$(head -n 1 "$csvfile")
|
||||
if [[ "$first_line" != "$HEADER" ]]; then
|
||||
echo "Adding header to $csvfile"
|
||||
sed -i "1i$HEADER" "$csvfile"
|
||||
else
|
||||
echo "Header already present in $csvfile, skipping."
|
||||
fi
|
||||
done
|
|
@ -27,6 +27,9 @@ def main():
|
|||
# Group data by chain
|
||||
chain_groups = df.groupby('chain')
|
||||
|
||||
# Prepare list to collect summary data for CSV export
|
||||
summary_data = []
|
||||
|
||||
# For each chain, create a figure with five subplots for boxplots (mean, std, min, max, count)
|
||||
for chain_name, chain_data in chain_groups:
|
||||
fig, axs = plt.subplots(1, 5, figsize=(18, 6), constrained_layout=True)
|
||||
|
@ -38,6 +41,17 @@ def main():
|
|||
plot_data = chain_data[['mean', 'std', 'min', 'max', 'count']].copy()
|
||||
plot_data.columns = ['Mean', 'Std', 'Min', 'Max', 'Count']
|
||||
|
||||
# Calculate summary statistics for CSV export
|
||||
chain_summary = {
|
||||
'chain': chain_name,
|
||||
'mean_count': plot_data['Count'].mean(),
|
||||
'mean_mean': plot_data['Mean'].mean(),
|
||||
'mean_std': plot_data['Std'].mean(),
|
||||
'mean_min': plot_data['Min'].mean(),
|
||||
'mean_max': plot_data['Max'].mean()
|
||||
}
|
||||
summary_data.append(chain_summary)
|
||||
|
||||
# Make all plots have the same color palette
|
||||
palette = sns.color_palette("husl", 4)
|
||||
# Add a distinct color for the 'Count' plot, as it is a different metric
|
||||
|
@ -114,7 +128,13 @@ def main():
|
|||
print(f"\nSummary for chain: {chain_name}")
|
||||
print(summary[['mean', 'std', 'min', 'max', 'count']])
|
||||
|
||||
# Create and save the summary CSV
|
||||
summary_df = pd.DataFrame(summary_data)
|
||||
summary_csv_file = args.input.replace('.csv', '_summary.csv')
|
||||
summary_df.to_csv(summary_csv_file, index=False)
|
||||
|
||||
print(f"\nAnalysis complete. Plots saved with base name: {args.input.replace('.csv', '_chain_*_analysis.png')}")
|
||||
print(f"Summary CSV saved as: {summary_csv_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
214
csv2table.py
Executable file
214
csv2table.py
Executable file
|
@ -0,0 +1,214 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
def csv_to_latex_table(csv_file_path, output_file_path=None, decimal_places=2):
|
||||
"""
|
||||
Convert a CSV file to a LaTeX table with proper formatting.
|
||||
|
||||
Args:
|
||||
csv_file_path (str): Path to the input CSV file
|
||||
output_file_path (str, optional): Path to save the LaTeX output
|
||||
decimal_places (int): Number of decimal places for numeric values
|
||||
|
||||
Returns:
|
||||
str: LaTeX table code
|
||||
"""
|
||||
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(csv_file_path)
|
||||
|
||||
# Create a more readable version of the chain column
|
||||
df['chain'] = df['chain'].str.replace('/input/', '').str.replace('/output/', '→ ')
|
||||
df['chain'] = df['chain'].str.replace('/', ' ')
|
||||
|
||||
# Round numeric columns to specified decimal places
|
||||
numeric_columns = df.select_dtypes(include=[np.number]).columns
|
||||
df[numeric_columns] = df[numeric_columns].round(decimal_places)
|
||||
|
||||
# Start building the LaTeX table
|
||||
latex_code = []
|
||||
|
||||
# Document setup (optional - can be removed if embedding in existing document)
|
||||
latex_code.append("\\documentclass{article}")
|
||||
latex_code.append("\\usepackage{booktabs}")
|
||||
latex_code.append("\\usepackage{array}")
|
||||
latex_code.append("\\usepackage{longtable}")
|
||||
latex_code.append("\\begin{document}")
|
||||
latex_code.append("")
|
||||
|
||||
# Table setup
|
||||
num_cols = len(df.columns)
|
||||
col_spec = "l" + "r" * (num_cols - 1) # Left align first column, right align others
|
||||
|
||||
latex_code.append("\\begin{longtable}{" + col_spec + "}")
|
||||
latex_code.append("\\toprule")
|
||||
|
||||
# Create header
|
||||
headers = []
|
||||
for col in df.columns:
|
||||
if col == 'chain':
|
||||
headers.append("Chain")
|
||||
else:
|
||||
# Convert column names to more readable format
|
||||
readable_name = col.replace('_', ' ').title()
|
||||
headers.append(readable_name)
|
||||
|
||||
latex_code.append(" & ".join(headers) + " \\\\")
|
||||
latex_code.append("\\midrule")
|
||||
latex_code.append("\\endfirsthead")
|
||||
latex_code.append("")
|
||||
|
||||
# Header for continuation pages
|
||||
latex_code.append("\\multicolumn{" + str(num_cols) + "}{c}")
|
||||
latex_code.append("{\\tablename\\ \\thetable{} -- continued from previous page} \\\\")
|
||||
latex_code.append("\\toprule")
|
||||
latex_code.append(" & ".join(headers) + " \\\\")
|
||||
latex_code.append("\\midrule")
|
||||
latex_code.append("\\endhead")
|
||||
latex_code.append("")
|
||||
|
||||
# Footer for non-final pages
|
||||
latex_code.append("\\midrule")
|
||||
latex_code.append("\\multicolumn{" + str(num_cols) + "}{r}{Continued on next page} \\\\")
|
||||
latex_code.append("\\endfoot")
|
||||
latex_code.append("")
|
||||
|
||||
# Final footer
|
||||
latex_code.append("\\bottomrule")
|
||||
latex_code.append("\\endlastfoot")
|
||||
latex_code.append("")
|
||||
|
||||
# Add data rows
|
||||
for _, row in df.iterrows():
|
||||
row_data = []
|
||||
for i, value in enumerate(row):
|
||||
if i == 0: # Chain column - use texttt for monospace
|
||||
# Split long chains for better formatting
|
||||
chain_parts = str(value).split('→')
|
||||
if len(chain_parts) == 2:
|
||||
formatted_chain = f"\\texttt{{{chain_parts[0].strip()}}} → \\texttt{{{chain_parts[1].strip()}}}"
|
||||
else:
|
||||
formatted_chain = f"\\texttt{{{str(value)}}}"
|
||||
row_data.append(formatted_chain)
|
||||
else:
|
||||
row_data.append(str(value))
|
||||
|
||||
latex_code.append(" & ".join(row_data) + " \\\\")
|
||||
|
||||
latex_code.append("\\end{longtable}")
|
||||
latex_code.append("")
|
||||
latex_code.append("\\end{document}")
|
||||
|
||||
# Join all lines
|
||||
latex_output = "\n".join(latex_code)
|
||||
|
||||
# Save to file if path is provided
|
||||
if output_file_path:
|
||||
with open(output_file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(latex_output)
|
||||
print(f"LaTeX table saved to {output_file_path}")
|
||||
|
||||
return latex_output
|
||||
|
||||
def csv_to_latex_table_simple(csv_file_path, decimal_places=2):
|
||||
"""
|
||||
Convert CSV to LaTeX table without document wrapper (for embedding).
|
||||
|
||||
Args:
|
||||
csv_file_path (str): Path to the input CSV file
|
||||
decimal_places (int): Number of decimal places for numeric values
|
||||
|
||||
Returns:
|
||||
str: LaTeX table code only
|
||||
"""
|
||||
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(csv_file_path)
|
||||
|
||||
# Create a more readable version of the chain column
|
||||
df['chain'] = df['chain'].str.replace('/input/', '').str.replace('/output/', ' → ')
|
||||
df['chain'] = df['chain'].str.replace('/', ' ')
|
||||
|
||||
# Round numeric columns
|
||||
numeric_columns = df.select_dtypes(include=[np.number]).columns
|
||||
df[numeric_columns] = df[numeric_columns].round(decimal_places)
|
||||
|
||||
# Build table
|
||||
latex_code = []
|
||||
num_cols = len(df.columns)
|
||||
col_spec = "l" + "r" * (num_cols - 1)
|
||||
|
||||
latex_code.append("\\begin{tabular}{" + col_spec + "}")
|
||||
latex_code.append(" \\toprule")
|
||||
|
||||
# Headers
|
||||
headers = []
|
||||
for col in df.columns:
|
||||
if col == 'chain':
|
||||
headers.append("Chain")
|
||||
else:
|
||||
readable_name = col.replace('_', ' ').title()
|
||||
headers.append(readable_name)
|
||||
|
||||
latex_code.append(" & ".join(headers) + " \\\\")
|
||||
latex_code.append("\\midrule")
|
||||
|
||||
# Data rows
|
||||
for _, row in df.iterrows():
|
||||
row_data = []
|
||||
for i, value in enumerate(row):
|
||||
if i == 0: # Chain column
|
||||
chain_parts = str(value).split(' → ')
|
||||
if len(chain_parts) == 2:
|
||||
formatted_chain = f"\\texttt{{{chain_parts[0].strip()}}} → \\texttt{{{chain_parts[1].strip()}}}"
|
||||
else:
|
||||
formatted_chain = f"\\texttt{{{str(value)}}}"
|
||||
row_data.append(formatted_chain)
|
||||
else:
|
||||
row_data.append(str(value))
|
||||
|
||||
latex_code.append(" & ".join(row_data) + " \\\\")
|
||||
|
||||
latex_code.append("\\bottomrule")
|
||||
latex_code.append("\\end{tabular}")
|
||||
|
||||
return "\n".join(latex_code)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
parser = argparse.ArgumentParser(description='Convert CSV file to LaTeX table')
|
||||
parser.add_argument('csv_file', help='Path to the input CSV file')
|
||||
parser.add_argument('-o', '--output', help='Output LaTeX file path (optional)')
|
||||
parser.add_argument('-d', '--decimals', type=int, default=2,
|
||||
help='Number of decimal places for numeric values (default: 2)')
|
||||
parser.add_argument('-s', '--simple', action='store_true',
|
||||
help='Generate simple table only (no document wrapper)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
if args.simple:
|
||||
# Generate simple table for embedding
|
||||
latex_output = csv_to_latex_table_simple(args.csv_file, args.decimals)
|
||||
print(latex_output)
|
||||
|
||||
if args.output:
|
||||
with open(args.output, 'w', encoding='utf-8') as f:
|
||||
f.write(latex_output)
|
||||
print(f"\nSimple LaTeX table saved to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
# Generate complete LaTeX document
|
||||
latex_output = csv_to_latex_table(args.csv_file, args.output, args.decimals)
|
||||
if not args.output:
|
||||
print(latex_output)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: CSV file '{args.csv_file}' not found.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}", file=sys.stderr)
|
||||
sys.exit(1)
|
29
csvfix.sh
Executable file
29
csvfix.sh
Executable file
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Check if filename parameter is provided
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Usage: $0 <csv_filename>"
|
||||
echo "Example: $0 data.csv"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CSV_FILE="$1"
|
||||
|
||||
# Check if file exists
|
||||
if [ ! -f "$CSV_FILE" ]; then
|
||||
echo "Error: File '$CSV_FILE' not found!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Processing file: $CSV_FILE"
|
||||
echo "========================================"
|
||||
|
||||
# Count prefixes from first column
|
||||
echo -e "\n=== Total number of unique prefixes ==="
|
||||
cut -d',' -f1 "$CSV_FILE" | sort | uniq | wc -l
|
||||
|
||||
# Readable output format
|
||||
echo "=== Formatted output ==="
|
||||
echo "Prefix -> Count"
|
||||
echo "---------------"
|
||||
cut -d',' -f1 "$CSV_FILE" | sort | uniq -c | sort -nr | awk '{printf "%-20s -> %d\n", $2, $1}'
|
23
run_batch_analysis_analysis.sh
Executable file
23
run_batch_analysis_analysis.sh
Executable file
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Usage: ./run_batch_analysis_analysis.sh /path/to/target_dir
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -ne 1 ]]; then
|
||||
echo "Usage: $0 /path/to/target_dir"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TARGET_DIR="$1"
|
||||
|
||||
if [[ ! -d "$TARGET_DIR" ]]; then
|
||||
echo "Error: '$TARGET_DIR' is not a directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Find all results.csv files directly under subdirectories
|
||||
find "$TARGET_DIR" -mindepth 2 -maxdepth 2 -type f -name results.csv | while IFS= read -r csvfile; do
|
||||
echo "Analyzing $csvfile"
|
||||
./batch_analysis_analysis.py -i "$csvfile"
|
||||
done
|
31
run_batch_analyze.sh
Executable file
31
run_batch_analyze.sh
Executable file
|
@ -0,0 +1,31 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Usage: ./run_batch_analyze.sh /path/to/trace_root
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -ne 1 ]]; then
|
||||
echo "Usage: $0 /path/to/trace_root"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TRACE_ROOT="$1"
|
||||
|
||||
if [[ ! -d "$TRACE_ROOT" ]]; then
|
||||
echo "Error: '$TRACE_ROOT' is not a directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for dir in "$TRACE_ROOT"/*; do
|
||||
if [[ -d "$dir" ]]; then
|
||||
dirname=$(basename "$dir")
|
||||
# Extract everything before first underscore as type
|
||||
type=$(echo "$dirname" | grep -oP '^[a-z]+(?=_)')
|
||||
if [[ -z "$type" ]]; then
|
||||
echo "Warning: Could not extract type from '$dirname', skipping."
|
||||
continue
|
||||
fi
|
||||
echo "Running batch_analyze.py on $dir with filter ${type}*"
|
||||
./batch_analyze.py -d "$dir" -f "${type}*"
|
||||
fi
|
||||
done
|
Loading…
Add table
Add a link
Reference in a new issue