Merge branch 'kernel-memory-analysis' into 'master'

Add kernel memory usage analysis

See merge request micro-ROS/ros_tracing/tracetools_analysis!37
This commit is contained in:
Christophe Bedard 2019-12-29 16:38:13 +00:00
commit 694a0d852b
5 changed files with 209 additions and 64 deletions

View file

@ -13,7 +13,7 @@
"# (wait a few seconds, then kill with Ctrl+C)\n", "# (wait a few seconds, then kill with Ctrl+C)\n",
"# AND\n", "# AND\n",
"# Convert trace data:\n", "# Convert trace data:\n",
"# $ ros2 trace-analysis convert ~/.ros/tracing/memory-usage/ust" "# $ ros2 trace-analysis convert ~/.ros/tracing/memory-usage"
] ]
}, },
{ {
@ -22,7 +22,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"converted_file_path = '~/.ros/tracing/memory-usage/ust/converted'" "converted_file_path = '~/.ros/tracing/memory-usage/converted'"
] ]
}, },
{ {
@ -43,7 +43,6 @@
"from bokeh.plotting import figure\n", "from bokeh.plotting import figure\n",
"from bokeh.plotting import output_notebook\n", "from bokeh.plotting import output_notebook\n",
"from bokeh.io import show\n", "from bokeh.io import show\n",
"from bokeh.layouts import row\n",
"from bokeh.models import ColumnDataSource\n", "from bokeh.models import ColumnDataSource\n",
"from bokeh.models import DatetimeTickFormatter\n", "from bokeh.models import DatetimeTickFormatter\n",
"from bokeh.models import NumeralTickFormatter\n", "from bokeh.models import NumeralTickFormatter\n",
@ -52,7 +51,8 @@
"\n", "\n",
"from tracetools_analysis.loading import load_file\n", "from tracetools_analysis.loading import load_file\n",
"from tracetools_analysis.processor import Processor\n", "from tracetools_analysis.processor import Processor\n",
"from tracetools_analysis.processor.memory_usage import MemoryUsageHandler\n", "from tracetools_analysis.processor.memory_usage import KernelMemoryUsageHandler\n",
"from tracetools_analysis.processor.memory_usage import UserspaceMemoryUsageHandler\n",
"from tracetools_analysis.processor.ros2 import Ros2Handler\n", "from tracetools_analysis.processor.ros2 import Ros2Handler\n",
"from tracetools_analysis.utils.memory_usage import MemoryUsageDataModelUtil\n", "from tracetools_analysis.utils.memory_usage import MemoryUsageDataModelUtil\n",
"from tracetools_analysis.utils.ros2 import Ros2DataModelUtil" "from tracetools_analysis.utils.ros2 import Ros2DataModelUtil"
@ -66,9 +66,10 @@
"source": [ "source": [
"# Process\n", "# Process\n",
"events = load_file(converted_file_path)\n", "events = load_file(converted_file_path)\n",
"memory_handler = MemoryUsageHandler()\n", "ust_memory_handler = UserspaceMemoryUsageHandler()\n",
"kernel_memory_handler = KernelMemoryUsageHandler()\n",
"ros2_handler = Ros2Handler()\n", "ros2_handler = Ros2Handler()\n",
"Processor(memory_handler, ros2_handler).process(events)" "Processor(ust_memory_handler, kernel_memory_handler, ros2_handler).process(events)"
] ]
}, },
{ {
@ -77,7 +78,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"memory_data_util = MemoryUsageDataModelUtil(memory_handler.data)\n", "memory_data_util = MemoryUsageDataModelUtil(\n",
" userspace=ust_memory_handler.data,\n",
" kernel=kernel_memory_handler.data,\n",
")\n",
"ros2_data_util = Ros2DataModelUtil(ros2_handler.data)\n", "ros2_data_util = Ros2DataModelUtil(ros2_handler.data)\n",
"\n", "\n",
"output_notebook()\n", "output_notebook()\n",
@ -91,31 +95,42 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Plot memory usage\n", "# Plot memory usage\n",
"memory_usage_dfs = memory_data_util.get_absolute_memory_usage_by_tid()\n", "ust_memory_usage_dfs = memory_data_util.get_absolute_userspace_memory_usage_by_tid()\n",
"kernel_memory_usage_dfs = memory_data_util.get_absolute_kernel_memory_usage_by_tid()\n",
"tids = ros2_data_util.get_tids()\n", "tids = ros2_data_util.get_tids()\n",
"\n", "\n",
"colours = viridis(len(tids) + 1)\n", "colours = viridis(len(tids) + 1)\n",
"first_tid = min(tids)\n", "first_tid = min(tids)\n",
"starttime = memory_usage_dfs[first_tid].loc[:, 'timestamp'].iloc[0].strftime('%Y-%m-%d %H:%M')\n", "starttime = ust_memory_usage_dfs[first_tid].loc[:, 'timestamp'].iloc[0].strftime('%Y-%m-%d %H:%M')\n",
"memory = figure(\n", "memory = figure(\n",
" title='Userspace memory usage per thread/node',\n", " title='Memory usage per thread/node',\n",
" x_axis_label=f'time ({starttime})',\n", " x_axis_label=f'time ({starttime})',\n",
" y_axis_label='memory usage',\n", " y_axis_label='memory usage',\n",
" plot_width=psize, plot_height=psize,\n", " plot_width=psize, plot_height=psize,\n",
")\n", ")\n",
"\n", "\n",
"i_colour = 0\n", "i_colour = 0\n",
"for tid, memory_usage_df in memory_usage_dfs.items():\n", "for tid in tids:\n",
" if tid not in tids:\n", " legend = str(tid) + ' ' + str(ros2_data_util.get_node_names_from_tid(tid))\n",
" continue\n", " # Userspace\n",
" memory.line(\n", " memory.line(\n",
" x='timestamp',\n", " x='timestamp',\n",
" y='memory_usage',\n", " y='memory_usage',\n",
" legend=str(tid) + ' ' + str(ros2_data_util.get_node_names_from_tid(tid)),\n", " legend=legend + ' (ust)',\n",
" line_width=2,\n", " line_width=2,\n",
" source=ColumnDataSource(memory_usage_df),\n", " source=ColumnDataSource(ust_memory_usage_dfs[tid]),\n",
" line_color=colours[i_colour],\n", " line_color=colours[i_colour],\n",
" )\n", " )\n",
" # Kernel\n",
" memory.line(\n",
" x='timestamp',\n",
" y='memory_usage',\n",
" legend=legend + ' (kernel)',\n",
" line_width=2,\n",
" source=ColumnDataSource(kernel_memory_usage_dfs[tid]),\n",
" line_color=colours[i_colour],\n",
" line_dash='dotted',\n",
" )\n",
" i_colour += 1\n", " i_colour += 1\n",
"\n", "\n",
"memory.title.align = 'center'\n", "memory.title.align = 'center'\n",

View file

@ -32,6 +32,10 @@ def generate_launch_description():
'lttng_ust_libc:memalign', 'lttng_ust_libc:memalign',
'lttng_ust_libc:posix_memalign', 'lttng_ust_libc:posix_memalign',
] + DEFAULT_EVENTS_ROS, ] + DEFAULT_EVENTS_ROS,
events_kernel=[
'kmem_mm_page_alloc',
'kmem_mm_page_free',
],
), ),
Node( Node(
package='tracetools_test', package='tracetools_test',

View file

@ -24,8 +24,33 @@ from ..data_model.memory_usage import MemoryUsageDataModel
class MemoryUsageHandler(EventHandler): class MemoryUsageHandler(EventHandler):
"""Generic handler for memory usage."""
def __init__(
self,
**kwargs,
) -> None:
super().__init__(**kwargs)
self._data_model = MemoryUsageDataModel()
@property
def data(self) -> MemoryUsageDataModel:
return self._data_model
def _update(
self,
timestamp: int,
tid: int,
memory_difference: int,
) -> None:
# Add to data model
self.data.add_memory_difference(timestamp, tid, memory_difference)
class UserspaceMemoryUsageHandler(MemoryUsageHandler):
""" """
Handler that extracts data for memory usage. Handler that extracts userspace memory usage data.
It uses the following events: It uses the following events:
* lttng_ust_libc:malloc * lttng_ust_libc:malloc
@ -66,17 +91,11 @@ class MemoryUsageHandler(EventHandler):
**kwargs, **kwargs,
) )
self._data_model = MemoryUsageDataModel()
# Temporary buffers # Temporary buffers
# pointer -> current memory size # pointer -> current memory size
# (used to know keep track of the memory size allocated at a given pointer) # (used to know keep track of the memory size allocated at a given pointer)
self._memory: Dict[int, int] = {} self._memory: Dict[int, int] = {}
@property
def data(self) -> MemoryUsageDataModel:
return self._data_model
def _handle_malloc( def _handle_malloc(
self, event: Dict, metadata: EventMetadata self, event: Dict, metadata: EventMetadata
) -> None: ) -> None:
@ -148,5 +167,59 @@ class MemoryUsageHandler(EventHandler):
if allocated_memory is not None: if allocated_memory is not None:
memory_difference = -allocated_memory memory_difference = -allocated_memory
# Add to data model self._update(timestamp, tid, memory_difference)
self.data.add_memory_difference(timestamp, tid, memory_difference)
class KernelMemoryUsageHandler(MemoryUsageHandler):
"""
Handler that extracts userspace memory usage data.
It uses the following events:
* kmem_mm_page_alloc
* kmem_mm_page_free
Implementation inspired by Trace Compass' implementation:
https://git.eclipse.org/c/tracecompass/org.eclipse.tracecompass.git/tree/analysis/org.eclipse.tracecompass.analysis.os.linux.core/src/org/eclipse/tracecompass/analysis/os/linux/core/kernelmemoryusage/KernelMemoryStateProvider.java#n84
"""
PAGE_SIZE = 4096
def __init__(
self,
**kwargs,
) -> None:
# Link event to handling method
handler_map = {
'kmem_mm_page_alloc':
self._handle_malloc,
'kmem_mm_page_free':
self._handle_free,
}
super().__init__(
handler_map=handler_map,
**kwargs,
)
def _handle_malloc(
self, event: Dict, metadata: EventMetadata
) -> None:
self._handle(event, metadata, self.PAGE_SIZE)
def _handle_free(
self, event: Dict, metadata: EventMetadata
) -> None:
self._handle(event, metadata, -self.PAGE_SIZE)
def _handle(
self,
event: Dict,
metadata: EventMetadata,
inc: int,
) -> None:
order = get_field(event, 'order')
inc <<= order
timestamp = metadata.timestamp
tid = metadata.tid
self._update(timestamp, tid, inc)

View file

@ -14,54 +14,34 @@
import sys import sys
import pandas as pd
from tracetools_analysis.loading import load_file from tracetools_analysis.loading import load_file
from tracetools_analysis.processor import Processor from tracetools_analysis.processor import Processor
from tracetools_analysis.processor.memory_usage import MemoryUsageHandler from tracetools_analysis.processor.memory_usage import KernelMemoryUsageHandler
from tracetools_analysis.processor.memory_usage import UserspaceMemoryUsageHandler
from tracetools_analysis.processor.ros2 import Ros2Handler from tracetools_analysis.processor.ros2 import Ros2Handler
from tracetools_analysis.utils.memory_usage import MemoryUsageDataModelUtil from tracetools_analysis.utils.memory_usage import MemoryUsageDataModelUtil
from tracetools_analysis.utils.ros2 import Ros2DataModelUtil from tracetools_analysis.utils.ros2 import Ros2DataModelUtil
# From: https://stackoverflow.com/a/32009595/6476709
def format_memory_size(size: int, precision: int = 2):
suffixes = ['B', 'KB', 'MB', 'GB', 'TB']
suffixIndex = 0
while size > 1024 and suffixIndex < 4:
# Increment the index of the suffix
suffixIndex += 1
# Apply the division
size = size / 1024.0
return f'{size:.{precision}f} {suffixes[suffixIndex]}'
def main(): def main():
if len(sys.argv) < 2: if len(sys.argv) < 2:
print('Syntax: <converted tracefile>') print('Syntax: <converted tracefile>')
sys.exit(-1) sys.exit(1)
file_path = sys.argv[1] file_path = sys.argv[1]
events = load_file(file_path) events = load_file(file_path)
memory_handler = MemoryUsageHandler() ust_memory_handler = UserspaceMemoryUsageHandler()
kernel_memory_handler = KernelMemoryUsageHandler()
ros2_handler = Ros2Handler() ros2_handler = Ros2Handler()
Processor(memory_handler, ros2_handler).process(events) Processor(ust_memory_handler, kernel_memory_handler, ros2_handler).process(events)
memory_data_util = MemoryUsageDataModelUtil(memory_handler.data) memory_data_util = MemoryUsageDataModelUtil(
userspace=ust_memory_handler.data,
kernel=kernel_memory_handler.data,
)
ros2_data_util = Ros2DataModelUtil(ros2_handler.data) ros2_data_util = Ros2DataModelUtil(ros2_handler.data)
memory_usage_dfs = memory_data_util.get_absolute_memory_usage_by_tid() summary_df = memory_data_util.get_max_memory_usage_per_tid()
tids = ros2_data_util.get_tids() tids = ros2_data_util.get_tids()
filtered_df = summary_df.loc[summary_df['tid'].isin(tids)]
data = [ print('\n' + filtered_df.to_string(index=False))
[
tid,
ros2_data_util.get_node_names_from_tid(tid),
format_memory_size(memory_usage['memory_usage'].max(), precision=1),
]
for tid, memory_usage in memory_usage_dfs.items()
if tid in tids
]
summary_df = pd.DataFrame(data, columns=['tid', 'node_names', 'max_memory_usage'])
print('\n' + summary_df.to_string(index=False))

View file

@ -28,24 +28,97 @@ class MemoryUsageDataModelUtil(DataModelUtil):
def __init__( def __init__(
self, self,
data_model: MemoryUsageDataModel, *,
userspace: MemoryUsageDataModel = None,
kernel: MemoryUsageDataModel = None,
) -> None: ) -> None:
""" """
Create a MemoryUsageDataModelUtil. Create a MemoryUsageDataModelUtil.
:param data_model: the data model object to use At least one non-`None` `MemoryUsageDataModel` must be given.
"""
super().__init__(data_model)
def get_absolute_memory_usage_by_tid(self) -> Dict[int, DataFrame]: :param userspace: the userspace data model object to use
:param kernel: the kernel data model object to use
""" """
Get absolute memory usage over time per tid. # Not giving any model to the base class; we'll own them ourselves
super().__init__(None)
if userspace is None and kernel is None:
raise RuntimeError('must provide at least one (userspace or kernel) data model!')
self.data_ust = userspace
self.data_kernel = kernel
@staticmethod
def format_size(size: int, precision: int = 2):
"""
Format a memory size to a string with a units suffix.
From: https://stackoverflow.com/a/32009595/6476709
:param size: the memory size, in bytes
:param precision: the number of digits to display after the period
"""
suffixes = ['B', 'KB', 'MB', 'GB', 'TB']
suffixIndex = 0
while size > 1024 and suffixIndex < 4:
# Increment the index of the suffix
suffixIndex += 1
# Apply the division
size = size / 1024.0
return f'{size:.{precision}f} {suffixes[suffixIndex]}'
def get_max_memory_usage_per_tid(self) -> DataFrame:
"""
Get the maximum memory usage per tid.
:return dataframe with maximum memory usage (userspace & kernel) per tid
"""
if self.data_ust is not None:
ust_memory_usage_dfs = self.get_absolute_userspace_memory_usage_by_tid()
tids_ust = set(ust_memory_usage_dfs.keys())
if self.data_kernel is not None:
kernel_memory_usage_dfs = self.get_absolute_kernel_memory_usage_by_tid()
tids_kernel = set(kernel_memory_usage_dfs.keys())
# Use only the userspace tid values if available, otherwise use the kernel tid values
tids = tids_ust if self.data_ust is not None else tids_kernel
data = [
[
tid,
self.format_size(ust_memory_usage_dfs[tid]['memory_usage'].max(), precision=1)
if self.data_ust is not None
else None,
self.format_size(kernel_memory_usage_dfs[tid]['memory_usage'].max(), precision=1)
if self.data_kernel is not None and ust_memory_usage_dfs.get(tid) is not None
else None,
]
for tid in tids
]
return DataFrame(data, columns=['tid', 'max_memory_usage_ust', 'max_memory_usage_kernel'])
def get_absolute_userspace_memory_usage_by_tid(self) -> Dict[int, DataFrame]:
"""
Get absolute userspace memory usage over time per tid.
:return (tid -> DataFrame of absolute memory usage over time) :return (tid -> DataFrame of absolute memory usage over time)
""" """
return self._get_absolute_memory_usage_by_tid(self.data_ust)
def get_absolute_kernel_memory_usage_by_tid(self) -> Dict[int, DataFrame]:
"""
Get absolute kernel memory usage over time per tid.
:return (tid -> DataFrame of absolute memory usage over time)
"""
return self._get_absolute_memory_usage_by_tid(self.data_kernel)
def _get_absolute_memory_usage_by_tid(
self,
data_model: MemoryUsageDataModel,
) -> Dict[int, DataFrame]:
previous = defaultdict(int) previous = defaultdict(int)
data = defaultdict(list) data = defaultdict(list)
for index, row in self.data.memory_diff.iterrows(): for index, row in data_model.memory_diff.iterrows():
timestamp = row['timestamp'] timestamp = row['timestamp']
tid = int(row['tid']) tid = int(row['tid'])
diff = row['memory_diff'] diff = row['memory_diff']