diff --git a/README.md b/README.md index 64dc2d9..9f275a0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # MA Autoware Trace Analysis -Automatically extract data dependencies and end-to-end (E2E) latencies from ROS2 trace data. +Automatically extract data dependencies and end-to-end (E2E) latencies from ROS2 trace data and source code. ## Prerequisites * Python 3.10 or newer (this is crucial!) @@ -9,6 +9,12 @@ Automatically extract data dependencies and end-to-end (E2E) latencies from ROS2 * [Tracetools Analysis](https://gitlab.com/ros-tracing/tracetools_analysis) * `python3-babeltrace` and `python3-lttng`, e.g. via `sudo apt install`, **for Python 3.10** (this requires either Ubuntu 22.04 or custom installation) +## Requirements for Optional Features +* `ros2_internal_dependency_analyzer` for finding intra-node dependencies via static analysis, which can then be + utilized by this tool +* `ros2 multitopic bw` for recording message sizes which can then be processed by this tool + + ## Installation ```shell @@ -36,7 +42,9 @@ E2E_ENABLED = True ```shell # In the shell of your choice (choose Bash!): # Each setting is named the same as in the notebook but prefixed by "ANA_NB_". -ANA_NB_TR_PATH="path/to/trace/dir" +# For strings, quotes have to be included IN THE VARIABLE VALUE, i.e. double quotation "'...'" +# has to be used in most cases +ANA_NB_TR_PATH="'path/to/trace/dir'" ANA_NB_E2E_ENABLED="True" ``` @@ -46,7 +54,7 @@ from the command line: jupyter nbconvert --to notebook --execute trace-analysis.ipynb ``` -nbconvert can also be called from Python directly, -read more info on nbconvert [here](https://nbconvert.readthedocs.io/en/latest/execute_api.html). +The notebook also supports invocation via [Papermill](https://papermill.readthedocs.io/en/latest/). -The output files are found in the configured output dir (default: `out/`). Inputs are processed and cached in `cache/`. +The output files are found in the configured output dir (default: `out/`). +Inputs are processed and cached in `cache/`. diff --git a/message_tree/message_tree_algorithms.py b/message_tree/message_tree_algorithms.py index 47c4111..9e7e75c 100644 --- a/message_tree/message_tree_algorithms.py +++ b/message_tree/message_tree_algorithms.py @@ -347,3 +347,27 @@ def aggregate_e2e_paths(paths: List[List[TrPublishInstance | TrCallbackInstance] path_cohorts[key].append(path) return path_cohorts + + +def label_latency_item(item: E2EBreakdownItem): + match item.type: + case "cpu": + return f"{_repr(item.location[0])}" + case "idle": + cb_inst: TrCallbackInstance = item.location[0] + owner = cb_inst.callback_obj.owner + match owner: + case TrTimer() as tmr: + tmr: TrTimer + node_name = tmr.node.path + case TrSubscriptionObject() as sub: + sub: TrSubscriptionObject + node_name = sub.subscription.node.path + case _: + raise TypeError() + return f"{node_name}" + case "dds": + msg_inst: TrPublishInstance = item.location[0] + return f"{msg_inst.publisher.topic_name}" + case _: + return ValueError() diff --git a/trace-analysis.ipynb b/trace-analysis.ipynb index b871757..88cbd70 100644 --- a/trace-analysis.ipynb +++ b/trace-analysis.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "outputs": [], "source": [ "import os\n", @@ -25,7 +25,7 @@ "\n", "from misc.utils import cached, parse_as\n", "\n", - "# TODO: This is useless for some reason (goal is to make graphs in notebook large and hi-res)\n", + "# TODO: This has no effect for some reason (goal is to make graphs in notebook large and hi-res)\n", "plt.rcParams['figure.dpi'] = 300\n", "\n", "%matplotlib inline" @@ -45,8 +45,38 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 2, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "User Settings:\n", + " TRACING_WS_BUILD_PATH................... := /home/max/Projects/autoware/build\n", + " TR_PATH................................. := /home/max/Projects/ma-autoware-trace-analysis/data/trace-awsim-x86/ust\n", + " OUT_PATH................................ := /home/max/Projects/ma-autoware-trace-analysis/out\n", + " CACHING_ENABLED......................... := False\n", + " BW_ENABLED.............................. := True\n", + " BW_PATH................................. := /home/max/Projects/ma-hw-perf-tools/data/messages-x86.h5\n", + " CL_ENABLED.............................. := False\n", + " CL_PATH................................. := /home/max/Projects/llvm-project/clang-tools-extra/ros2-internal-dependency-checker/output\n", + " DFG_ENABLED............................. := True\n", + " DFG_PLOT................................ := True\n", + " DFG_MAX_HIER_LEVEL...................... := 100\n", + " DFG_INPUT_NODE_PATTERNS................. := ['^/sensing']\n", + " DFG_OUTPUT_NODE_PATTERNS................ := ['^/awapi', '^/control/external_cmd_converter', 'emergency']\n", + " DFG_EXCL_NODE_PATTERNS.................. := ['^/rviz2', 'transform_listener_impl']\n", + " E2E_ENABLED............................. := True\n", + " E2E_PLOT................................ := True\n", + " E2E_PLOT_TIMESTAMP...................... := 1000\n", + " E2E_TIME_LIMIT_S........................ := 2\n", + " E2E_OUTPUT_TOPIC_PATTERNS............... := ['^/control/command/control_cmd$']\n", + " E2E_INPUT_TOPIC_PATTERNS................ := ['^/sensing/.*?pointcloud']\n", + " E2E_EXCLUDED_PATH_PATTERNS.............. := ['NDTScanMatcher', '^/parameter_events']\n", + " DEBUG................................... := False\n" + ] + } + ], "source": [ "##################################################\n", "# User Settings\n", @@ -135,12 +165,25 @@ "\n", "# E2E paths are uniquely identified by a string like \"/topic/1 -> void(Node1)(args1) -> /topic/2 -> void(Node2)(args2) -> void(Node2)(args3) -> ...\".\n", "# Certain patterns only occur in initial setup or in scenario switching and can be excluded via RegEx patterns here.\n", - "E2E_EXCLUDED_PATH_PATTERNS = [r\"NDTScanMatcher\", r\"^/parameter_events\"]\n", + "E2E_EXCL_PATH_PATTERNS = [r\"NDTScanMatcher\", r\"^/parameter_events\", \"hazard\", \"turn_indicator\", \"gear_cmd\", \"emergency_cmd\",\n", + " \"external_cmd\", \"/control/operation_mode\", \"/planning/scenario_planning/scenario$\"]\n", "\n", + "# To specify paths of interest, topic/callback name patterns that HAVE TO OCCUR in each E2E path can be specified as RegEx here.\n", + "E2E_INCL_PATH_PATTERNS = [\"BehaviorPathPlanner\", \"BehaviorVelocityPlanner\", \"pointcloud_preprocessor::Filter\", r\"^/sensing/.*?pointcloud\"]\n", "\n", + "# For development purposes only. Leave this at False.\n", "DEBUG = False\n", "\n", - "# This code overrides the above constants with environment variables, do not edit.\n", + "# For development purposes only. Leave this at False.\n", + "MANUAL_CACHE = False\n", + "\n", + "##################################################\n", + "# End of User Settings\n", + "##################################################\n", + "\n", + "# This code overrides the above constants with environment variables, with values from the environment.\n", + "# Values will be parsed through Python's literal_eval() function. Thus, strings have to be specified\n", + "# including \" or '.\n", "for env_key, env_value in os.environ.items():\n", " if env_key.startswith(\"ANA_NB_\"):\n", " key = env_key.removeprefix(\"ANA_NB_\")\n", @@ -163,6 +206,7 @@ "\n", "os.makedirs(OUT_PATH, exist_ok=True)\n", "\n", + "# Print parsed user settings\n", "print(\"User Settings:\")\n", "for k, v in globals().copy().items():\n", " if not k.isupper():\n", @@ -175,9 +219,10 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "outputs": [], "source": [ + "# The last few imports can only be resolved using the user-specified paths above\n", "sys.path.append(os.path.join(TRACING_WS_BUILD_PATH, \"tracetools_read/\"))\n", "sys.path.append(os.path.join(TRACING_WS_BUILD_PATH, \"tracetools_analysis/\"))\n", "# noinspection PyUnresolvedReferences\n", @@ -187,9 +232,7 @@ "# noinspection PyUnresolvedReferences\n", "from tracetools_analysis.processor.ros2 import Ros2Handler\n", "\n", - "from tracing_interop.tr_types import *\n", - "from message_tree.message_tree_structure import DepTree\n", - "from matching.subscriptions import sanitize" + "from tracing_interop.tr_types import *" ], "metadata": { "collapsed": false @@ -198,7 +241,9 @@ { "cell_type": "markdown", "source": [ - "# Load Trace Data" + "# Load Trace Data\n", + "\n", + "Load (and, if necessary, convert) tracing data obtained through ros2_tracing. Build indices for fast analysis." ], "metadata": { "collapsed": false @@ -219,7 +264,7 @@ "_tr_globals = [\"nodes\", \"publishers\", \"subscriptions\", \"timers\", \"timer_node_links\", \"subscription_objects\",\n", " \"callback_objects\", \"callback_symbols\", \"publish_instances\", \"callback_instances\", \"topics\"]\n", "\n", - "# Help the IDE recognize those identifiers\n", + "# Help the IDE recognize those identifiers by assigning a dummy value to their name.\n", "nodes = publishers = subscriptions = timers = timer_node_links = subscription_objects = callback_objects = callback_symbols = publish_instances = callback_instances = topics = None\n", "\n", "for name in _tr_globals:\n", @@ -236,17 +281,21 @@ "execution_count": null, "outputs": [], "source": [ + "##################################################\n", + "# Print (All/Input/Output) Topic Message Counts\n", + "##################################################\n", + "\n", "for topic in sorted(topics, key=lambda t: t.name):\n", " topic: TrTopic\n", " print(f\"{topic.name:.<120s} | {sum(map(lambda p: len(p.instances), topic.publishers))}\")\n", "\n", - "print(\"\\n[ENTKÄFERN] INPUT TOPICS\")\n", + "print(\"\\n[DEBUG] INPUT TOPICS\")\n", "for t in sorted(topics, key=lambda t: t.name):\n", " for f in E2E_INPUT_TOPIC_PATTERNS:\n", " if re.search(f, t.name):\n", - " print(f\"--[ENTKÄFERN] {f:<30s}:{t.name:.<89s} | {sum(map(lambda p: len(p.instances), t.publishers))}\")\n", + " print(f\"--[DEBUG] {f:<30s}:{t.name:.<89s} | {sum(map(lambda p: len(p.instances), t.publishers))}\")\n", "\n", - "print(\"\\n[ENTKÄFERN] OUTPUT TOPICS\")\n", + "print(\"\\n[DEBUG] OUTPUT TOPICS\")\n", "for t in sorted(topics, key=lambda t: t.name):\n", " for f in E2E_OUTPUT_TOPIC_PATTERNS:\n", " if re.search(f, t.name):\n", @@ -259,7 +308,8 @@ { "cell_type": "markdown", "source": [ - "# Analyze ROS Graph" + "# Analyze ROS Graph\n", + "Reconstruct namespace hierarchy, data flow graph between callbacks." ], "metadata": { "collapsed": false @@ -286,7 +336,8 @@ { "cell_type": "markdown", "source": [ - "## Plot ROS Graph (full)" + "## Plot ROS Graph (full)\n", + "Plot the DFG hierarchically by node namespace. Plot each internal and external dependency between callbacks as one arrow." ], "metadata": { "collapsed": false @@ -315,7 +366,8 @@ { "cell_type": "markdown", "source": [ - "## Plot Latency Graph (overview)" + "## Plot Latency Graph (overview)\n", + "Plot the DFG down to a certain hierarchy level in a flattened manner. Aggregate dependencies from multiple callbacks into corresponding node-node dependencies." ], "metadata": { "collapsed": false @@ -348,7 +400,8 @@ { "cell_type": "markdown", "source": [ - "# Analyze Message Flow" + "# Analyze Message Flow\n", + "Build dependency trees ending in the specified output topics." ], "metadata": { "collapsed": false @@ -366,7 +419,7 @@ "end_topics = [t for t in _tracing_context.topics if any(re.search(f, t.name) for f in E2E_OUTPUT_TOPIC_PATTERNS)]\n", "\n", "def _build_dep_trees():\n", - " return build_dep_trees(end_topics, lat_graph, _tracing_context, E2E_EXCLUDED_PATH_PATTERNS, E2E_TIME_LIMIT_S)\n", + " return build_dep_trees(end_topics, lat_graph, _tracing_context, E2E_EXCL_PATH_PATTERNS, E2E_TIME_LIMIT_S)\n", "\n", "try:\n", " trees = cached(\"trees\", _build_dep_trees, [TR_PATH], not CACHING_ENABLED)\n", @@ -387,6 +440,11 @@ "%%skip_if_false E2E_ENABLED\n", "%%skip_if_false BW_ENABLED\n", "\n", + "##################################################\n", + "# Get message bandwidths from `ros2 multitopic bw`\n", + "# output.\n", + "##################################################\n", + "\n", "from bw_interop.process_bw_output import get_topic_messages\n", "msgs = get_topic_messages(BW_PATH)\n", "\n", @@ -397,56 +455,6 @@ "collapsed": false } }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "%%skip_if_false E2E_ENABLED\n", - "%%skip_if_false E2E_PLOT\n", - "%%skip_if_false DEBUG\n", - "\n", - "\n", - "fig, ax = plt.subplots(num=\"e2e_plot\")\n", - "DS = 1\n", - "times = [tree.head.timestamp - trees[0].head.timestamp for tree in trees]\n", - "\n", - "ax2 = ax.twinx()\n", - "ax2.plot(times, list(map(lambda paths: sum(map(len, paths)) / len(paths), e2e_pathss)), color=\"orange\")\n", - "ax2.fill_between(times,\n", - " list(map(lambda paths: min(map(len, paths)), e2e_pathss)),\n", - " list(map(lambda paths: max(map(len, paths)), e2e_pathss)),\n", - " alpha=.3, color=\"orange\")\n", - "\n", - "ax.plot(times, [np.mean(e2es) for e2es in e2ess])\n", - "ax.fill_between(times, [np.min(e2es) for e2es in e2ess], [np.max(e2es) for e2es in e2ess], alpha=.3)\n", - "\n", - "\n", - "def scatter_topic(topic_name, y=0, **scatter_kwargs):\n", - " for pub in topics.by_name[topic_name].publishers:\n", - " if not pub:\n", - " continue\n", - "\n", - " inst_timestamps = [inst.timestamp - trees[0].head.timestamp for inst in pub.instances if\n", - " inst.timestamp >= trees[0].head.timestamp]\n", - " scatter_kwargs_default = {\"marker\": \"x\", \"color\": \"indianred\"}\n", - " scatter_kwargs_default.update(scatter_kwargs)\n", - " ax.scatter(inst_timestamps, np.full(len(inst_timestamps), fill_value=y), **scatter_kwargs_default)\n", - "\n", - "\n", - "scatter_topic(\"/autoware/engage\")\n", - "scatter_topic(\"/planning/scenario_planning/parking/trajectory\", y=-.04, color=\"cadetblue\")\n", - "scatter_topic(\"/planning/scenario_planning/lane_driving/trajectory\", y=-.08, color=\"darkgreen\")\n", - "\n", - "ax.set_xlabel(\"Simulation time [s]\")\n", - "ax.set_ylabel(\"End-to-End latency [s]\")\n", - "ax2.set_ylabel(\"End-to-End path length\")\n", - "None" - ], - "metadata": { - "collapsed": false - } - }, { "cell_type": "code", "execution_count": null, @@ -470,17 +478,17 @@ "%%skip_if_false E2E_ENABLED\n", "\n", "from message_tree.message_tree_algorithms import e2e_paths_sorted_desc\n", - "from message_tree.message_tree_plots import e2e_breakdown_type_hist\n", "from message_tree.message_tree_algorithms import owner\n", "\n", + "##################################################\n", + "# Find and filter relevant E2E paths in trees\n", + "##################################################\n", "\n", "trees_paths = [e2e_paths_sorted_desc(tree, E2E_INPUT_TOPIC_PATTERNS) for tree in tqdm(trees, mininterval=10.0,\n", " desc=\"Extracting E2E paths\")]\n", "all_paths = [p for paths in trees_paths for p in paths]\n", - "# all_e2e_items = [i for p in all_paths for i in p]\n", - "# print(trees[0])\n", - "\n", - "lidar_paths = [p for p in all_paths if any(map(lambda inst: re.search(\"^/sensing/.*?pointcloud\", owner(inst)), p))]\n" + "relevant_paths = [p for p in all_paths\n", + " if any(map(lambda inst: re.search(\"^/sensing/.*?pointcloud\", owner(inst)), p))]\n" ], "metadata": { "collapsed": false @@ -495,7 +503,11 @@ "\n", "from message_tree.message_tree_algorithms import aggregate_e2e_paths\n", "\n", - "cohorts = aggregate_e2e_paths(lidar_paths) #all_paths)\n", + "##################################################\n", + "# Group dataflows by DFG path\n", + "##################################################\n", + "\n", + "cohorts = aggregate_e2e_paths(relevant_paths) #all_paths)\n", "cohort_pairs = [(k, v) for k, v in cohorts.items()]\n", "cohort_pairs.sort(key=lambda kv: len(kv[1]), reverse=True)\n", "\n", @@ -519,14 +531,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "outputs": [], "source": [ + "%%skip_if_false MANUAL_CACHE\n", + "\n", "import pickle\n", - "#with open(\"state.pkl\", \"wb\") as f:\n", - "# pickle.dump((trees_paths, all_paths, lidar_paths, cohorts), f)\n", + "# with open(\"state.pkl\", \"wb\") as f:\n", + "# pickle.dump((trees_paths, all_paths, lidar_paths, cohorts), f)\n", "with open(\"state.pkl\", \"rb\") as f:\n", - " (trees_paths, all_paths, lidar_paths, cohorts) = pickle.load(f)" + " (trees_paths, all_paths, relevant_paths, cohorts) = pickle.load(f)" ], "metadata": { "collapsed": false @@ -534,15 +548,52 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 12, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1183 1\n", + "\n", + "\n", + " (11342)\n", + " void(pointcloud_preprocessor::Filter)(sensor_msgs::msg::PointCloud2,pcl_msgs::msg::PointIndices)\n", + " -> /sensing/lidar/top/outlier_filtered/pointcloud\n", + " -> void(pointcloud_preprocessor::PointCloudConcatenateDataSynchronizerComponent)(sensor_msgs::msg::PointCloud2,std::__cxx11::basic_string,>)\n", + " -> void(pointcloud_preprocessor::PointCloudConcatenateDataSynchronizerComponent)()\n", + " -> /sensing/lidar/concatenated/pointcloud\n", + " -> void(message_filters::Subscribersubscribe(rclcpp::Node,std::__cxx11::basic_string,>,rmw_qos_profile_t))(sensor_msgs::msg::PointCloud2)\n", + " -> /perception/occupancy_grid_map/map\n", + " -> void(behavior_path_planner::BehaviorPathPlannerNode)(nav_msgs::msg::OccupancyGrid)\n", + " -> void(behavior_path_planner::BehaviorPathPlannerNode)()\n", + " -> /planning/scenario_planning/lane_driving/behavior_planning/path_with_lane_id\n", + " -> void(behavior_velocity_planner::BehaviorVelocityPlannerNode)(autoware_auto_planning_msgs::msg::PathWithLaneId)\n", + " -> /planning/scenario_planning/lane_driving/behavior_planning/path\n", + " -> void(ObstacleAvoidancePlanner)(autoware_auto_planning_msgs::msg::Path)\n", + " -> /planning/scenario_planning/lane_driving/motion_planning/obstacle_avoidance_planner/trajectory\n", + " -> void(motion_planning::ObstacleStopPlannerNode)(autoware_auto_planning_msgs::msg::Trajectory)\n", + " -> /planning/scenario_planning/lane_driving/trajectory\n", + " -> void(ScenarioSelectorNode)(autoware_auto_planning_msgs::msg::Trajectory)\n", + " -> /planning/scenario_planning/scenario_selector/trajectory\n", + " -> void(motion_velocity_smoother::MotionVelocitySmootherNode)(autoware_auto_planning_msgs::msg::Trajectory)\n", + " -> /planning/scenario_planning/trajectory\n", + " -> void(autoware::motion::control::trajectory_follower_nodes::Controller)(autoware_auto_planning_msgs::msg::Trajectory)\n", + " -> void(autoware::motion::control::trajectory_follower_nodes::Controller)()\n", + " -> /control/trajectory_follower/control_cmd\n", + " -> void(vehicle_cmd_gate::VehicleCmdGate)(autoware_auto_control_msgs::msg::AckermannControlCommand)\n", + " -> /control/command/control_cmd\n" + ] + } + ], "source": [ - "COHORT_EXCL_PATTERNS = [\"hazard\", \"turn_indicator\", \"gear_cmd\", \"emergency_cmd\", \"external_cmd\", \"/control/operation_mode\",\n", - " \"/planning/scenario_planning/scenario$\"]\n", - "COHORT_INCL_PATTERNS = [\"BehaviorPathPlanner\", \"BehaviorVelocityPlanner\", \"pointcloud_preprocessor::Filter\"]\n", + "##################################################\n", + "# Filter DFG paths through must-be-included and\n", + "# cannot-be-included patterns\n", + "##################################################\n", "\n", "cohorts_filt = {k: v for k, v in cohorts.items()\n", - " if not any(re.search(f, k) for f in COHORT_EXCL_PATTERNS) and all(re.search(f, k) for f in COHORT_INCL_PATTERNS)}\n", + " if not any(re.search(f, k) for f in E2E_EXCL_PATH_PATTERNS) and all(re.search(f, k) for f in E2E_INCL_PATH_PATTERNS)}\n", "\n", "\n", "print(len(cohorts), len(cohorts_filt))\n", @@ -550,7 +601,10 @@ " print(f\"\\n\\n ({len(v)})\\n \", end=\"\")\n", " print(\"\\n -> \".join(k.split(\" -> \")))\n", "\n", - "lidar_chain, lidar_cohort = next(iter(cohorts_filt.items()))" + "if len(cohorts_filt) != 1:\n", + " print(f\"[WARN] Expected exactly one cohort to remain, got {len(cohorts_filt)}. Only the first one will be used.\")\n", + "\n", + "relevant_path, relevant_dataflows = next(iter(cohorts_filt.items()))" ], "metadata": { "collapsed": false @@ -558,60 +612,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "outputs": [], "source": [ - "def e2e_latency_breakdown__(path: list):\n", - " \"\"\"\n", - " Separates E2E latency into a sequence of dds, idle, and cpu times.\n", - " This method expects a publish instance at the last position in `path`.\n", + "from message_tree.message_tree_algorithms import e2e_latency_breakdown\n", "\n", - " The return format is a list of the form [(\"\",