From 90cbfbfe40f825b24392ffe3c35405e9b3a147e8 Mon Sep 17 00:00:00 2001 From: Christophe Bedard Date: Sun, 29 Dec 2019 11:25:50 -0500 Subject: [PATCH] Move max memory usage computation to data util --- .../scripts/memory_usage.py | 40 ++++-------- .../tracetools_analysis/utils/memory_usage.py | 63 ++++++++++++++++--- 2 files changed, 65 insertions(+), 38 deletions(-) diff --git a/tracetools_analysis/tracetools_analysis/scripts/memory_usage.py b/tracetools_analysis/tracetools_analysis/scripts/memory_usage.py index d18b041..f5ba982 100644 --- a/tracetools_analysis/tracetools_analysis/scripts/memory_usage.py +++ b/tracetools_analysis/tracetools_analysis/scripts/memory_usage.py @@ -18,50 +18,32 @@ import pandas as pd from tracetools_analysis.loading import load_file from tracetools_analysis.processor import Processor +from tracetools_analysis.processor.memory_usage import KernelMemoryUsageHandler from tracetools_analysis.processor.memory_usage import UserspaceMemoryUsageHandler from tracetools_analysis.processor.ros2 import Ros2Handler from tracetools_analysis.utils.memory_usage import MemoryUsageDataModelUtil from tracetools_analysis.utils.ros2 import Ros2DataModelUtil -# From: https://stackoverflow.com/a/32009595/6476709 -def format_memory_size(size: int, precision: int = 2): - suffixes = ['B', 'KB', 'MB', 'GB', 'TB'] - suffixIndex = 0 - while size > 1024 and suffixIndex < 4: - # Increment the index of the suffix - suffixIndex += 1 - # Apply the division - size = size / 1024.0 - return f'{size:.{precision}f} {suffixes[suffixIndex]}' - - def main(): if len(sys.argv) < 2: print('Syntax: ') - sys.exit(-1) + sys.exit(1) file_path = sys.argv[1] events = load_file(file_path) ust_memory_handler = UserspaceMemoryUsageHandler() + kernel_memory_handler = KernelMemoryUsageHandler() ros2_handler = Ros2Handler() - Processor(ust_memory_handler, ros2_handler).process(events) + Processor(ust_memory_handler, kernel_memory_handler, ros2_handler).process(events) - ust_memory_data_util = MemoryUsageDataModelUtil(ust_memory_handler.data) + memory_data_util = MemoryUsageDataModelUtil( + userspace=ust_memory_handler.data, + kernel=kernel_memory_handler.data, + ) ros2_data_util = Ros2DataModelUtil(ros2_handler.data) - ust_memory_usage_dfs = ust_memory_data_util.get_absolute_userspace_memory_usage_by_tid() + summary_df = memory_data_util.get_max_memory_usage_per_tid() tids = ros2_data_util.get_tids() - - data = [ - [ - tid, - ros2_data_util.get_node_names_from_tid(tid), - format_memory_size(memory_usage['memory_usage'].max(), precision=1), - ] - for tid, memory_usage in ust_memory_usage_dfs.items() - if tid in tids - ] - - summary_df = pd.DataFrame(data, columns=['tid', 'node_names', 'max_memory_usage']) - print('\n' + summary_df.to_string(index=False)) + filtered_df = summary_df.loc[summary_df['tid'].isin(tids)] + print('\n' + filtered_df.to_string(index=False)) diff --git a/tracetools_analysis/tracetools_analysis/utils/memory_usage.py b/tracetools_analysis/tracetools_analysis/utils/memory_usage.py index aae343d..5f49b34 100644 --- a/tracetools_analysis/tracetools_analysis/utils/memory_usage.py +++ b/tracetools_analysis/tracetools_analysis/utils/memory_usage.py @@ -29,25 +29,70 @@ class MemoryUsageDataModelUtil(DataModelUtil): def __init__( self, *, - data_model_userspace: MemoryUsageDataModel = None, - data_model_kernel: MemoryUsageDataModel = None, + userspace: MemoryUsageDataModel = None, + kernel: MemoryUsageDataModel = None, ) -> None: """ Create a MemoryUsageDataModelUtil. At least one non-`None` `MemoryUsageDataModel` must be given - :param data_model_userspace: the userspace data model object to use - :param data_model_kernel: the kernel data model object to use + :param userspace: the userspace data model object to use + :param kernel: the kernel data model object to use """ # Not giving any model to the base class; we'll own them ourselves super().__init__(None) - if data_model_userspace is None and data_model_kernel is None: + if userspace is None and kernel is None: raise RuntimeError('must provide at least one (userspace or kernel) data model!') - self.data_ust = data_model_userspace - self.data_kernel = data_model_kernel + self.data_ust = userspace + self.data_kernel = kernel + + @staticmethod + def format_size(size: int, precision: int = 2): + """ + Format a memory size to a string with a units suffix. + + From: https://stackoverflow.com/a/32009595/6476709 + + :param size: the memory size, in bytes + :param precision: the number of digits to display after the period + """ + suffixes = ['B', 'KB', 'MB', 'GB', 'TB'] + suffixIndex = 0 + while size > 1024 and suffixIndex < 4: + # Increment the index of the suffix + suffixIndex += 1 + # Apply the division + size = size / 1024.0 + return f'{size:.{precision}f} {suffixes[suffixIndex]}' + + def get_max_memory_usage_per_tid(self) -> DataFrame: + """ + Get the maximum memory usage per tid. + + :return dataframe with maximum memory usage (userspace & kernel) per tid + """ + if self.data_ust is not None: + ust_memory_usage_dfs = self.get_absolute_userspace_memory_usage_by_tid() + tids_ust = set(ust_memory_usage_dfs.keys()) + if self.data_kernel is not None: + kernel_memory_usage_dfs = self.get_absolute_kernel_memory_usage_by_tid() + tids_kernel = set(kernel_memory_usage_dfs.keys()) + # Use only the userspace tid values if they're available, otherwise use the kernel tid valus + tids = tids_ust if self.data_ust is not None else tids_kernel + data = [ + [ + tid, + self.format_size(ust_memory_usage_dfs[tid]['memory_usage'].max(), precision=1) + if self.data_ust is not None else None, + self.format_size(kernel_memory_usage_dfs[tid]['memory_usage'].max(), precision=1) + if self.data_kernel is not None and ust_memory_usage_dfs.get(tid) is not None else None, + ] + for tid in tids + ] + return DataFrame(data, columns=['tid', 'max_memory_usage_ust', 'max_memory_usage_kernel']) def get_absolute_userspace_memory_usage_by_tid(self) -> Dict[int, DataFrame]: """ @@ -67,11 +112,11 @@ class MemoryUsageDataModelUtil(DataModelUtil): def _get_absolute_memory_usage_by_tid( self, - data: MemoryUsageDataModel, + data_model: MemoryUsageDataModel, ) -> Dict[int, DataFrame]: previous = defaultdict(int) data = defaultdict(list) - for index, row in self.data.memory_diff.iterrows(): + for index, row in data_model.memory_diff.iterrows(): timestamp = row['timestamp'] tid = int(row['tid']) diff = row['memory_diff']