Document tracing_interop/utils.py
This commit is contained in:
parent
7f3b6c5aa1
commit
5764e45567
1 changed files with 28 additions and 4 deletions
|
@ -2,27 +2,51 @@ import pandas as pd
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
def row_to_type(row, type, **type_kwargs):
|
def row_to_type(type, type_args, type_kwargs):
|
||||||
return type(**row, **type_kwargs)
|
"""
|
||||||
|
Instantiate an object of type `type` using `row` as positional arguments and `type_kwargs` as its keyword arguments.
|
||||||
|
:param type: The type to instantiate (e.g. TrNode)
|
||||||
|
:param type_args: The positional arguments as an iterable
|
||||||
|
:param type_kwargs: The keyword arguments as a dictionary (str -> Any)
|
||||||
|
:return: The instantiated object
|
||||||
|
"""
|
||||||
|
return type(**type_args, **type_kwargs)
|
||||||
|
|
||||||
|
|
||||||
def df_to_type_list(df, type, column_value_mappers=None, column_to_field_mappings=None, **type_kwargs):
|
def df_to_type_list(df, type, column_value_mappers=None, column_to_field_mappings=None, **type_kwargs):
|
||||||
|
"""
|
||||||
|
Convert the Pandas DataFrame `df` to a list of instances of type `type`.
|
||||||
|
Map column values using `column_value_mappers` first and rename columns using `column_to_field_mappings` thereafter.
|
||||||
|
If one/both of these arguments are `None`, skip the respective action(s).
|
||||||
|
:param df:
|
||||||
|
:param type: The type to instantiate (e.g. TrNode)
|
||||||
|
:param column_value_mappers: A dict of `<column_name> -> func(x)` to transform column values. `None` causes this step to be skipped.
|
||||||
|
:param column_to_field_mappings: A dict of `<column_name> -> <field_name>` to rename columns to fit the constructor of `type`. `None` causes this step to be skipped.
|
||||||
|
:param type_kwargs: Additional keyword arguments given to the `type` constructor for all instantiations.
|
||||||
|
:return: The list of instances of type `type`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Map column values and overwrite the original column
|
||||||
if column_value_mappers is not None:
|
if column_value_mappers is not None:
|
||||||
for col, mapper in column_value_mappers.items():
|
for col, mapper in column_value_mappers.items():
|
||||||
df[col] = df[col].map(mapper)
|
df[col] = df[col].map(mapper)
|
||||||
|
|
||||||
|
# Then rename columns (delete the one with the old name)
|
||||||
if column_to_field_mappings is not None:
|
if column_to_field_mappings is not None:
|
||||||
for col, field in column_to_field_mappings.items():
|
for col, field in column_to_field_mappings.items():
|
||||||
df[field] = df[col]
|
df[field] = df[col]
|
||||||
del df[col]
|
del df[col]
|
||||||
|
|
||||||
|
# If there is an index in the DataFrame, move its values to the `"id"` column later
|
||||||
has_idx = not isinstance(df.index, pd.RangeIndex)
|
has_idx = not isinstance(df.index, pd.RangeIndex)
|
||||||
ret_list = []
|
ret_list = []
|
||||||
|
|
||||||
|
#
|
||||||
for row in tqdm(df.itertuples(index=has_idx), desc=f" ├─ Processing {type.__name__}s", total=len(df)):
|
for row in tqdm(df.itertuples(index=has_idx), desc=f" ├─ Processing {type.__name__}s", total=len(df)):
|
||||||
row_dict = row._asdict()
|
row_dict = row._asdict()
|
||||||
if has_idx:
|
if has_idx: # Move index to `"id"` column if present
|
||||||
row_dict["id"] = row.Index
|
row_dict["id"] = row.Index
|
||||||
del row_dict["Index"]
|
del row_dict["Index"]
|
||||||
ret_list.append(row_to_type(row_dict, type, **type_kwargs))
|
# Instantiate and append `type` object
|
||||||
|
ret_list.append(row_to_type(type, row_dict, type_kwargs))
|
||||||
return ret_list
|
return ret_list
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue