Add util function to compute difference between two columns

This commit is contained in:
Christophe Bedard 2019-08-09 14:20:03 +02:00
parent 498b9f4d15
commit 94941538d8
2 changed files with 62 additions and 0 deletions

View file

@ -70,6 +70,45 @@ class TestDataModelUtil(unittest.TestCase):
)
assert_frame_equal(result_df, expected_df, check_dtype=False)
def test_compute_column_difference(self) -> None:
input_df = DataFrame(
data=[
{
'a': 10,
'b': 13,
'c': 1,
},
{
'a': 1,
'b': 3,
'c': 69,
},
],
)
expected_df = DataFrame(
data=[
{
'a': 10,
'b': 13,
'c': 1,
'diff': 3,
},
{
'a': 1,
'b': 3,
'c': 69,
'diff': 2,
},
],
)
DataModelUtil.compute_column_difference(
input_df,
'b',
'a',
'diff',
)
assert_frame_equal(input_df, expected_df)
if __name__ == '__main__':
unittest.main()

View file

@ -79,6 +79,23 @@ class DataModelUtil():
)
return df
@staticmethod
def compute_column_difference(
df: DataFrame,
left_column: str,
right_column: str,
diff_column: str,
) -> None:
"""
Create new column with difference between two columns.
:param df: the dataframe (inplace)
:param left_column: the name of the left column
:param right_column: the name of the right column
:param diff_column: the name of the new column with differences
"""
df[diff_column] = df.apply(lambda row: row[left_column] - row[right_column], axis=1)
class ProfileDataModelUtil(DataModelUtil):
"""Profiling data model utility class."""
@ -127,6 +144,12 @@ class ProfileDataModelUtil(DataModelUtil):
(tid_df['depth'] == depth) &
(tid_df['function_name'] == name)
][['start_timestamp', 'duration', 'actual_duration']]
self.compute_column_difference(
data,
'duration',
'actual_duration',
'duration_difference',
)
functions_data.append({
'depth': depth,
'function_name': name,