dataflow-analysis/csv2table.py

214 lines
No EOL
7.4 KiB
Python
Executable file

#!/usr/bin/env python3
import pandas as pd
import numpy as np
def csv_to_latex_table(csv_file_path, output_file_path=None, decimal_places=2):
"""
Convert a CSV file to a LaTeX table with proper formatting.
Args:
csv_file_path (str): Path to the input CSV file
output_file_path (str, optional): Path to save the LaTeX output
decimal_places (int): Number of decimal places for numeric values
Returns:
str: LaTeX table code
"""
# Read the CSV file
df = pd.read_csv(csv_file_path)
# Create a more readable version of the chain column
df['chain'] = df['chain'].str.replace('/input/', '').str.replace('/output/', '')
df['chain'] = df['chain'].str.replace('/', ' ')
# Round numeric columns to specified decimal places
numeric_columns = df.select_dtypes(include=[np.number]).columns
df[numeric_columns] = df[numeric_columns].round(decimal_places)
# Start building the LaTeX table
latex_code = []
# Document setup (optional - can be removed if embedding in existing document)
latex_code.append("\\documentclass{article}")
latex_code.append("\\usepackage{booktabs}")
latex_code.append("\\usepackage{array}")
latex_code.append("\\usepackage{longtable}")
latex_code.append("\\begin{document}")
latex_code.append("")
# Table setup
num_cols = len(df.columns)
col_spec = "l" + "r" * (num_cols - 1) # Left align first column, right align others
latex_code.append("\\begin{longtable}{" + col_spec + "}")
latex_code.append("\\toprule")
# Create header
headers = []
for col in df.columns:
if col == 'chain':
headers.append("Chain")
else:
# Convert column names to more readable format
readable_name = col.replace('_', ' ').title()
headers.append(readable_name)
latex_code.append(" & ".join(headers) + " \\\\")
latex_code.append("\\midrule")
latex_code.append("\\endfirsthead")
latex_code.append("")
# Header for continuation pages
latex_code.append("\\multicolumn{" + str(num_cols) + "}{c}")
latex_code.append("{\\tablename\\ \\thetable{} -- continued from previous page} \\\\")
latex_code.append("\\toprule")
latex_code.append(" & ".join(headers) + " \\\\")
latex_code.append("\\midrule")
latex_code.append("\\endhead")
latex_code.append("")
# Footer for non-final pages
latex_code.append("\\midrule")
latex_code.append("\\multicolumn{" + str(num_cols) + "}{r}{Continued on next page} \\\\")
latex_code.append("\\endfoot")
latex_code.append("")
# Final footer
latex_code.append("\\bottomrule")
latex_code.append("\\endlastfoot")
latex_code.append("")
# Add data rows
for _, row in df.iterrows():
row_data = []
for i, value in enumerate(row):
if i == 0: # Chain column - use texttt for monospace
# Split long chains for better formatting
chain_parts = str(value).split('')
if len(chain_parts) == 2:
formatted_chain = f"\\texttt{{{chain_parts[0].strip()}}}\\texttt{{{chain_parts[1].strip()}}}"
else:
formatted_chain = f"\\texttt{{{str(value)}}}"
row_data.append(formatted_chain)
else:
row_data.append(str(value))
latex_code.append(" & ".join(row_data) + " \\\\")
latex_code.append("\\end{longtable}")
latex_code.append("")
latex_code.append("\\end{document}")
# Join all lines
latex_output = "\n".join(latex_code)
# Save to file if path is provided
if output_file_path:
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write(latex_output)
print(f"LaTeX table saved to {output_file_path}")
return latex_output
def csv_to_latex_table_simple(csv_file_path, decimal_places=2):
"""
Convert CSV to LaTeX table without document wrapper (for embedding).
Args:
csv_file_path (str): Path to the input CSV file
decimal_places (int): Number of decimal places for numeric values
Returns:
str: LaTeX table code only
"""
# Read the CSV file
df = pd.read_csv(csv_file_path)
# Create a more readable version of the chain column
df['chain'] = df['chain'].str.replace('/input/', '').str.replace('/output/', '')
df['chain'] = df['chain'].str.replace('/', ' ')
# Round numeric columns
numeric_columns = df.select_dtypes(include=[np.number]).columns
df[numeric_columns] = df[numeric_columns].round(decimal_places)
# Build table
latex_code = []
num_cols = len(df.columns)
col_spec = "l" + "r" * (num_cols - 1)
latex_code.append("\\begin{tabular}{" + col_spec + "}")
latex_code.append(" \\toprule")
# Headers
headers = []
for col in df.columns:
if col == 'chain':
headers.append("Chain")
else:
readable_name = col.replace('_', ' ').title()
headers.append(readable_name)
latex_code.append(" & ".join(headers) + " \\\\")
latex_code.append("\\midrule")
# Data rows
for _, row in df.iterrows():
row_data = []
for i, value in enumerate(row):
if i == 0: # Chain column
chain_parts = str(value).split('')
if len(chain_parts) == 2:
formatted_chain = f"\\texttt{{{chain_parts[0].strip()}}}\\texttt{{{chain_parts[1].strip()}}}"
else:
formatted_chain = f"\\texttt{{{str(value)}}}"
row_data.append(formatted_chain)
else:
row_data.append(str(value))
latex_code.append(" & ".join(row_data) + " \\\\")
latex_code.append("\\bottomrule")
latex_code.append("\\end{tabular}")
return "\n".join(latex_code)
if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser(description='Convert CSV file to LaTeX table')
parser.add_argument('csv_file', help='Path to the input CSV file')
parser.add_argument('-o', '--output', help='Output LaTeX file path (optional)')
parser.add_argument('-d', '--decimals', type=int, default=2,
help='Number of decimal places for numeric values (default: 2)')
parser.add_argument('-s', '--simple', action='store_true',
help='Generate simple table only (no document wrapper)')
args = parser.parse_args()
try:
if args.simple:
# Generate simple table for embedding
latex_output = csv_to_latex_table_simple(args.csv_file, args.decimals)
print(latex_output)
if args.output:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(latex_output)
print(f"\nSimple LaTeX table saved to {args.output}", file=sys.stderr)
else:
# Generate complete LaTeX document
latex_output = csv_to_latex_table(args.csv_file, args.output, args.decimals)
if not args.output:
print(latex_output)
except FileNotFoundError:
print(f"Error: CSV file '{args.csv_file}' not found.", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error: {str(e)}", file=sys.stderr)
sys.exit(1)