dataflow-analysis/trace-analysis.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import re\n",
    "from tqdm import tqdm\n",
    "from typing import Iterable\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "from misc.utils import cached, parse_as\n",
    "\n",
    "# TODO: This has no effect for some reason (goal is to make graphs in notebook large and hi-res)\n",
    "plt.rcParams['figure.dpi'] = 300\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# User Settings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "##################################################\n",
    "# User Settings\n",
    "##################################################\n",
    "# Change these to influence the execution of the\n",
    "# notebook.\n",
    "# You can override these from the command line\n",
    "# by defining environment variables with the\n",
    "# name of the constants below, prefixed with\n",
    "# \"ANA_NB_\".\n",
    "# For example, the environment variable\n",
    "# \"ANA_NB_TR_PATH\" will override the \"TR_PATH\"\n",
    "# setting below.\n",
    "##################################################\n",
    "\n",
    "# The path to the build folder of a ROS2 workspace that contains the\n",
    "# tracetools_read and tracetools_analysis folders.\n",
    "TRACING_WS_BUILD_PATH = \"/path/to/tracing_ws/build\"\n",
    "\n",
    "# Path to trace directory (e.g. ~/.ros/my-trace/ust) or to a converted trace file.\n",
    "# Using the path \"/ust\" at the end is optional but greatly reduces processing time\n",
    "# if kernel traces are also present.\n",
    "TR_PATH = \"/path/to/tracing/session-name/ust\"\n",
    "\n",
    "# Path to the folder all artifacts from this notebook are saved to.\n",
    "# This entails plots as well as data tables.\n",
    "OUT_PATH = \"out/\"\n",
    "\n",
    "# Whether to cache the results of long computations per set of inputs\n",
    "CACHING_ENABLED = False\n",
    "\n",
    "# Whether to annotate topics/publications with bandwidth/message size\n",
    "BW_ENABLED = False\n",
    "# Path to a HDF5 file as output by ma-hw-perf-tools/messages/record.bash\n",
    "# Used to annotate message sizes in E2E latency calculations\n",
    "BW_PATH = \"path/to/messages.h5\"\n",
    "\n",
    "# Whether to use dependencies extracted by the Clang-tools to supplement\n",
    "# automatic node-internal data flow annotations.\n",
    "# If in doubt, set to False.\n",
    "CL_ENABLED = False\n",
    "# Path to the output directory of the ROS2 dependency checker.\n",
    "# Will only be used if CL_ENABLED is True.\n",
    "CL_PATH = \"/path/to/code_analysis/output\"\n",
    "\n",
    "# Whether to compute data flow graphs.\n",
    "# If you are only interested in E2E latencies, set this to False\n",
    "DFG_ENABLED = False\n",
    "# Whether to plot data flow graphs (ignored if DFG_ENABLED is False)\n",
    "DFG_PLOT = False\n",
    "\n",
    "# The maximum node namespace hierarchy level to be plotted.\n",
    "# Top-level (1): e.g. /sensing, /control, etc.\n",
    "# Level 3: e.g. /sensing/lidar/pointcloud_processor\n",
    "DFG_MAX_HIER_LEVEL = 100\n",
    "\n",
    "# RegEx pattern for nodes that shall be marked as system inputs\n",
    "# These will be plotted with a start arrow as known from automata diagrams\n",
    "DFG_INPUT_NODE_PATTERNS = [r\"^/sensing\"]\n",
    "# RegEx pattern for nodes that shall be marked as system outputs\n",
    "# These will be plotted with a double border\n",
    "DFG_OUTPUT_NODE_PATTERNS = [r\"^/control/external_cmd_converter\"]\n",
    "# RegEx for nodes which shall not be plotted in the DFG\n",
    "DFG_EXCL_NODE_PATTERNS = [r\"^/rviz2\"]\n",
    "\n",
    "# Whether to compute E2E latencies.\n",
    "E2E_ENABLED = True\n",
    "# Whether to plot end-to-end latency information (ignored if E2E_ENABLED is False)\n",
    "E2E_PLOT = True\n",
    "# The index of the output message that shall be used in plots that visualize a specific\n",
    "# message dependency tree. This index has to be 0 <= n < #output messages\n",
    "E2E_PLOT_TIMESTAMP = 1000\n",
    "# E2E latency threshold. Every E2E latency higher than this is discarded.\n",
    "# Set this as low as comfortably possible to speed up calculations.\n",
    "# WARNING: If you set this too low (i.e. to E2E latencies that plausibly can happen)\n",
    "# your results will be wrong)\n",
    "E2E_TIME_LIMIT_S = 2\n",
    "\n",
    "# All topics containing any of these RegEx patterns are considered output topics in E2E latency calculations\n",
    "# E.g. r\"^/control/\" will cover all control topics\n",
    "E2E_OUTPUT_TOPIC_PATTERNS = [r\"^/control/command/control_cmd$\"]\n",
    "# All topics containing any of these RegEx patterns are considered input topics in E2E latency calculations\n",
    "# E.g. r\"^/sensing/\" will cover all sensing topics\n",
    "E2E_INPUT_TOPIC_PATTERNS = [r\"pointcloud\"]\n",
    "\n",
    "# E2E paths are uniquely identified by a string like \"/topic/1 -> void(Node1)(args1) -> /topic/2 -> void(Node2)(args2) -> void(Node2)(args3) -> ...\".\n",
    "# Certain patterns only occur in initial setup or in scenario switching and can be excluded via RegEx patterns here.\n",
    "E2E_EXCL_PATH_PATTERNS = [r\"^/parameter_events\", \"hazard\", \"turn_indicator\", \"gear_cmd\", \"emergency_cmd\", \"emergency_state\",\n",
    "                          \"external_cmd\", \"/control/operation_mode\"]\n",
    "\n",
    "# To specify paths of interest, topic/callback name patterns that HAVE TO OCCUR in each E2E path can be specified as RegEx here.\n",
    "#E2E_INCL_PATH_PATTERNS = [\"BehaviorPathPlanner\", \"BehaviorVelocityPlanner\", \"pointcloud_preprocessor::Filter\", r\"^/sensing/.*?pointcloud\"]\n",
    "E2E_INCL_PATH_PATTERNS = []\n",
    "\n",
    "# If an exact path through the system is known, this variabe can be set to a list (order matters!) of all elements on the path.\n",
    "# The first item ist the one at the \"input end\" of the system, the last one the \"output end\".\n",
    "E2E_EXACT_PATH = [\n",
    "    \"void(pointcloud_preprocessor::Filter)(sensor_msgs::msg::PointCloud2,pcl_msgs::msg::PointIndices)\",\n",
    "    \"/localization/util/downsample/pointcloud\",\n",
    "    \"void(NDTScanMatcher)(sensor_msgs::msg::PointCloud2)\",\n",
    "    \"/localization/pose_estimator/pose_with_covariance\",\n",
    "    \"void(EKFLocalizer)(geometry_msgs::msg::PoseWithCovarianceStamped)\",\n",
    "    \"void(EKFLocalizer)()\",\n",
    "    \"/localization/pose_twist_fusion_filter/kinematic_state\",\n",
    "    \"void(StopFilter)(nav_msgs::msg::Odometry)\",\n",
    "    \"/localization/kinematic_state\",\n",
    "    \"void(behavior_path_planner::BehaviorPathPlannerNode)(nav_msgs::msg::Odometry)\",\n",
    "    \"void(behavior_path_planner::BehaviorPathPlannerNode)()\",\n",
    "    \"/planning/scenario_planning/lane_driving/behavior_planning/path_with_lane_id\",\n",
    "    \"void(behavior_velocity_planner::BehaviorVelocityPlannerNode)(autoware_auto_planning_msgs::msg::PathWithLaneId)\",\n",
    "    \"/planning/scenario_planning/lane_driving/behavior_planning/path\",\n",
    "    \"void(ObstacleAvoidancePlanner)(autoware_auto_planning_msgs::msg::Path)\",\n",
    "    \"/planning/scenario_planning/lane_driving/motion_planning/obstacle_avoidance_planner/trajectory\",\n",
    "    \"void(motion_planning::ObstacleStopPlannerNode)(autoware_auto_planning_msgs::msg::Trajectory)\",\n",
    "    \"/planning/scenario_planning/lane_driving/trajectory\",\n",
    "    \"void(ScenarioSelectorNode)(autoware_auto_planning_msgs::msg::Trajectory)\",\n",
    "    \"/planning/scenario_planning/scenario_selector/trajectory\",\n",
    "    \"void(motion_velocity_smoother::MotionVelocitySmootherNode)(autoware_auto_planning_msgs::msg::Trajectory)\",\n",
    "    \"/planning/scenario_planning/trajectory\",\n",
    "    \"void(autoware::motion::control::trajectory_follower_nodes::Controller)(autoware_auto_planning_msgs::msg::Trajectory)\",\n",
    "    \"void(autoware::motion::control::trajectory_follower_nodes::Controller)()\",\n",
    "    \"/control/trajectory_follower/control_cmd\",\n",
    "    \"void(vehicle_cmd_gate::VehicleCmdGate)(autoware_auto_control_msgs::msg::AckermannControlCommand)\",\n",
    "    \"/control/command/control_cmd\",\n",
    "]\n",
    "\n",
    "# For development purposes only. Leave this at False.\n",
    "DEBUG = False\n",
    "\n",
    "# For development purposes only. Leave this at False.\n",
    "MANUAL_CACHE = False\n",
    "\n",
    "##################################################\n",
    "# End of User Settings\n",
    "##################################################\n",
    "\n",
    "# This code overrides the above constants with environment variables, with values from the environment.\n",
    "# Values will be parsed through Python's literal_eval() function. Thus, strings have to be specified\n",
    "# including \" or '.\n",
    "for env_key, env_value in os.environ.items():\n",
    "    if env_key.startswith(\"ANA_NB_\"):\n",
    "        key = env_key.removeprefix(\"ANA_NB_\")\n",
    "        if key not in globals().keys():\n",
    "            continue\n",
    "        value = parse_as(type(globals()[key]), env_value)\n",
    "        globals()[key] = value\n",
    "\n",
    "\n",
    "# Convert input paths to absolute paths\n",
    "def _expand_path(path):\n",
    "    return os.path.realpath(os.path.expandvars(os.path.expanduser(path)))\n",
    "\n",
    "\n",
    "TRACING_WS_BUILD_PATH = _expand_path(TRACING_WS_BUILD_PATH)\n",
    "TR_PATH = _expand_path(TR_PATH)\n",
    "OUT_PATH = _expand_path(OUT_PATH)\n",
    "BW_PATH = _expand_path(BW_PATH)\n",
    "CL_PATH = _expand_path(CL_PATH)\n",
    "\n",
    "os.makedirs(OUT_PATH, exist_ok=True)\n",
    "\n",
    "# Print parsed user settings\n",
    "print(\"User Settings:\")\n",
    "for k, v in globals().copy().items():\n",
    "    if not k.isupper():\n",
    "        continue\n",
    "    if isinstance(v, Iterable) and not isinstance(v, str):\n",
    "        v = (\"\\n  \" + (\" \" * 44)).join(list(map(str, v)))\n",
    "    print(f\"  {k:.<40s} := {v}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# The last few imports can only be resolved using the user-specified paths above\n",
    "sys.path.append(os.path.join(TRACING_WS_BUILD_PATH, \"tracetools_read/\"))\n",
    "sys.path.append(os.path.join(TRACING_WS_BUILD_PATH, \"tracetools_analysis/\"))\n",
    "# noinspection PyUnresolvedReferences\n",
    "from tracetools_read.trace import *\n",
    "# noinspection PyUnresolvedReferences\n",
    "from tracetools_analysis.loading import load_file\n",
    "# noinspection PyUnresolvedReferences\n",
    "from tracetools_analysis.processor.ros2 import Ros2Handler\n",
    "\n",
    "from tracing_interop.tr_types import *"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# Load Trace Data\n",
    "\n",
    "Load (and, if necessary, convert) tracing data obtained through ros2_tracing. Build indices for fast analysis."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def _load_traces():\n",
    "    file = load_file(TR_PATH)\n",
    "    handler = Ros2Handler.process(file)\n",
    "    return TrContext(handler)\n",
    "\n",
    "\n",
    "_tracing_context = cached(\"tr_objects\", _load_traces, [TR_PATH], not CACHING_ENABLED)\n",
    "_tr_globals = [\"nodes\", \"publishers\", \"subscriptions\", \"timers\", \"timer_node_links\", \"subscription_objects\",\n",
    "               \"callback_objects\", \"callback_symbols\", \"publish_instances\", \"callback_instances\", \"topics\"]\n",
    "\n",
    "# Help the IDE recognize those identifiers by assigning a dummy value to their name.\n",
    "nodes: Index = Index([])\n",
    "publishers: Index = Index([])\n",
    "subscriptions: Index = Index([])\n",
    "timers: Index = Index([])\n",
    "timer_node_links: Index = Index([])\n",
    "subscription_objects: Index = Index([])\n",
    "callback_objects: Index = Index([])\n",
    "callback_symbols: Index = Index([])\n",
    "publish_instances: Index = Index([])\n",
    "callback_instances: Index = Index([])\n",
    "topics: Index = Index([])\n",
    "\n",
    "for name in _tr_globals:\n",
    "    globals()[name] = getattr(_tracing_context, name)\n",
    "\n",
    "print(\"Done.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "##################################################\n",
    "# Split /tf into Artificial Topics\n",
    "##################################################\n",
    "# /tf is one ROS2 topic but carries multiple\n",
    "# different streams of information:\n",
    "# One for each publisher of the /tf topic.\n",
    "# We can separate the messages on /tf according\n",
    "# to those publishers.\n",
    "# Still, all subscribers to /tf are subscribed\n",
    "# to ALL of those split topics, so the\n",
    "# found dependencies are still a vast over-\n",
    "# approximation of the actual dataflow.\n",
    "##################################################\n",
    "\n",
    "tf_topic: TrTopic = topics.by_name[\"/tf\"]\n",
    "\n",
    "tf_split_topics = []\n",
    "for pub in tf_topic.publishers:\n",
    "    pub: TrPublisher\n",
    "    topic = TrTopic(f\"/tf[{pub.node.path}_{pub.id}]\", _tracing_context)\n",
    "    pub.topic_name = topic.name\n",
    "    for sub in tf_topic.subscriptions:\n",
    "        sub.topic_names.append(topic.name)\n",
    "\n",
    "    tf_split_topics.append(topic)\n",
    "\n",
    "print(\"Rebuilding subscription index\")\n",
    "subscriptions.rebuild()\n",
    "print(\"Rebuilding publisher index\")\n",
    "publishers.rebuild()\n",
    "print(\"Adding new topics and rebuilding topic index\")\n",
    "topics.append(tf_split_topics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(\"Artificial TF topics (/tf[...]):\")\n",
    "for t in tf_split_topics:\n",
    "    print(f\"{t.name:.<110s} | {len(t.publishers)} pubs, {sum(map(lambda p: len(p.instances), t.publishers))}\")\n",
    "    for sub in t.subscriptions:\n",
    "        print(f\"    {sub.node.path:.<106s} | {len(sub.subscription_object.callback_object.callback_instances)} {len(sub.node.publishers)} {sum(map(lambda p: len(p.instances), sub.node.publishers))}\")\n",
    "        #prefix = os.path.split(sub.node.path)[0]\n",
    "        #for node in nodes:\n",
    "        #    if node.path.startswith(prefix) and node.path.removeprefix(prefix).count(\"/\") <= 1 and \"transform_listener_impl\" not in node.path:\n",
    "        #        print(f\"    -> {' ' * (len(prefix) - 3)}{node.path.removeprefix(prefix)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "##################################################\n",
    "# Print (All/Input/Output) Topic Message Counts\n",
    "##################################################\n",
    "\n",
    "for topic in sorted(topics, key=lambda t: t.name):\n",
    "    topic: TrTopic\n",
    "    print(f\"{topic.name:.<130s} | {sum(map(lambda p: len(p.instances), topic.publishers)):>5d} msgs\")\n",
    "\n",
    "print(\"\\n[DEBUG] INPUT TOPICS\")\n",
    "for t in sorted(topics, key=lambda t: t.name):\n",
    "    for f in E2E_INPUT_TOPIC_PATTERNS:\n",
    "        if re.search(f, t.name):\n",
    "            print(f\"--[DEBUG] {f:<30s}:{t.name:.<89s} | {sum(map(lambda p: len(p.instances), t.publishers)):>5d} msgs\")\n",
    "\n",
    "print(\"\\n[DEBUG] OUTPUT TOPICS\")\n",
    "for t in sorted(topics, key=lambda t: t.name):\n",
    "    for f in E2E_OUTPUT_TOPIC_PATTERNS:\n",
    "        if re.search(f, t.name):\n",
    "            print(f\"--[DEBUG] {f:<30s}:{t.name:.<89s} | {sum(map(lambda p: len(p.instances), t.publishers)):>5d} msgs\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# Analyze ROS Graph\n",
    "Reconstruct namespace hierarchy, data flow graph between callbacks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import latency_graph.latency_graph_structure as lg\n",
    "\n",
    "\n",
    "def _make_latency_graph():\n",
    "    return lg.LatencyGraph(_tracing_context)\n",
    "\n",
    "\n",
    "lat_graph = cached(\"lat_graph\", _make_latency_graph, [TR_PATH], not CACHING_ENABLED)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Plot ROS Graph (full)\n",
    "Plot the DFG hierarchically by node namespace. Plot each internal and external dependency between callbacks as one arrow."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%%skip_if_false DFG_ENABLED\n",
    "%%skip_if_false DFG_PLOT\n",
    "\n",
    "#################################################\n",
    "# Plot full DFG, down to the callback level\n",
    "#################################################\n",
    "\n",
    "from latency_graph.latency_graph_plots import plot_latency_graph_full\n",
    "\n",
    "plot_latency_graph_full(lat_graph, _tracing_context, os.path.join(OUT_PATH, \"latency_graph_full\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Plot Latency Graph (overview)\n",
    "Plot the DFG down to a certain hierarchy level in a flattened manner. Aggregate dependencies from multiple callbacks into corresponding node-node dependencies."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%%%skip_if_false DFG_ENABLED\n"
    }
   },
   "outputs": [],
   "source": [
    "%%skip_if_false DFG_ENABLED\n",
    "%%skip_if_false DFG_PLOT\n",
    "\n",
    "#################################################\n",
    "# Plot overview of DFG, down to a certain\n",
    "# hierarchy level\n",
    "#################################################\n",
    "\n",
    "from latency_graph.latency_graph_plots import plot_latency_graph_overview\n",
    "\n",
    "plot_latency_graph_overview(lat_graph, DFG_EXCL_NODE_PATTERNS, DFG_INPUT_NODE_PATTERNS, DFG_OUTPUT_NODE_PATTERNS, DFG_MAX_HIER_LEVEL, os.path.join(OUT_PATH, \"latency_graph_overview\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# Analyze Message Flow\n",
    "Build dependency trees ending in the specified output topics."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "\n",
    "from message_tree.message_tree_algorithms import build_dep_trees\n",
    "\n",
    "end_topics = [t for t in _tracing_context.topics if any(re.search(f, t.name) for f in E2E_OUTPUT_TOPIC_PATTERNS)]\n",
    "\n",
    "def _build_dep_trees():\n",
    "    return build_dep_trees(end_topics, lat_graph, _tracing_context, E2E_EXCL_PATH_PATTERNS, E2E_TIME_LIMIT_S, exact_path=E2E_EXACT_PATH)\n",
    "\n",
    "try:\n",
    "    trees = cached(\"trees\", _build_dep_trees, [TR_PATH], not CACHING_ENABLED)\n",
    "except Exception as e:\n",
    "    import traceback\n",
    "    print(e)\n",
    "    traceback.print_exc()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "%%skip_if_false BW_ENABLED\n",
    "\n",
    "##################################################\n",
    "# Get message bandwidths from `ros2 multitopic bw`\n",
    "# output.\n",
    "##################################################\n",
    "\n",
    "from bw_interop.process_bw_output import get_topic_messages\n",
    "msgs = get_topic_messages(BW_PATH)\n",
    "\n",
    "from bw_interop.bw_plots import dds_lat_msg_size_scatter\n",
    "plot_topic = \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "\n",
    "from message_tree.message_tree_algorithms import e2e_paths_sorted_desc\n",
    "from message_tree.message_tree_algorithms import owner\n",
    "\n",
    "##################################################\n",
    "# Find and filter relevant E2E paths in trees\n",
    "##################################################\n",
    "\n",
    "trees_paths = [e2e_paths_sorted_desc(tree, E2E_INPUT_TOPIC_PATTERNS) for tree in tqdm(trees, mininterval=10.0,\n",
    "                                                                                      desc=\"Extracting E2E paths\")]\n",
    "all_paths = [p for paths in trees_paths for p in paths]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "\n",
    "from message_tree.message_tree_algorithms import aggregate_e2e_paths\n",
    "\n",
    "##################################################\n",
    "# Group dataflows by DFG path\n",
    "##################################################\n",
    "\n",
    "cohorts = aggregate_e2e_paths(all_paths) #all_paths)\n",
    "cohort_pairs = [(k, v) for k, v in cohorts.items()]\n",
    "cohort_pairs.sort(key=lambda kv: len(kv[1]), reverse=True)\n",
    "\n",
    "path_records = [{\"path\": path_key,\n",
    "                 \"timestamp\": path[-1].timestamp,\n",
    "                 \"e2e_latency\": path[-1].timestamp - path[0].timestamp} \\\n",
    "                for path_key, paths in cohort_pairs for path in paths if path]\n",
    "\n",
    "out_df = pd.DataFrame.from_records(path_records)\n",
    "out_df.to_csv(os.path.join(OUT_PATH, \"e2e.csv\"), sep=\"\\t\", index=False)\n",
    "\n",
    "df_print = out_df[['path', 'e2e_latency']].groupby(\"path\").agg(['count', 'mean', 'min', 'max']).reset_index()\n",
    "#df_print['path'] = df_print['path'].apply(lambda path: \" -> \".join(filter(lambda part: part.startswith(\"/\"), path.split(\" -> \"))))\n",
    "df_print = df_print.sort_values((\"e2e_latency\", \"count\"), ascending=False)\n",
    "df_print.to_csv(os.path.join(OUT_PATH, \"e2e_overview.csv\"), sep=\"\\t\", index=False)\n",
    "df_print"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "# with open(\"state.pkl\", \"wb\") as f:\n",
    "#     pickle.dump((trees_paths, all_paths, cohorts), f)\n",
    "with open(os.path.join(OUT_PATH, \"state.pkl\"), \"wb\") as f:\n",
    "    pickle.dump((trees_paths, all_paths, cohorts), f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "##################################################\n",
    "# Filter DFG paths through must-be-included and\n",
    "# cannot-be-included patterns\n",
    "##################################################\n",
    "\n",
    "#for k in cohorts.keys():\n",
    "#    print(\"   \" + \"\\n-> \".join(k.split(\" -> \")))\n",
    "#    print()\n",
    "#    print()\n",
    "\n",
    "cohorts_filt = {k: v for k, v in cohorts.items()\n",
    "                if all(re.search(f.removeprefix(\"^\").removesuffix(\"$\"), k) for f in E2E_INCL_PATH_PATTERNS)}\n",
    "\n",
    "\n",
    "print(len(cohorts), len(cohorts_filt))\n",
    "for k, v in cohorts_filt.items():\n",
    "    print(f\"\\n\\n    ({len(v)})\\n    \", end=\"\")\n",
    "    print(\"\\n -> \".join(k.split(\" -> \")))\n",
    "\n",
    "if len(cohorts_filt) != 1:\n",
    "    print(f\"[WARN] Expected exactly one cohort to remain, got {len(cohorts_filt)}. Only the first one will be used.\")\n",
    "\n",
    "relevant_path, relevant_dataflows = next(iter(cohorts_filt.items()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from message_tree.message_tree_algorithms import e2e_latency_breakdown\n",
    "\n",
    "##################################################\n",
    "# Break down the E2E latency into its constituent\n",
    "# latencies.\n",
    "##################################################\n",
    "\n",
    "e2e_breakdowns = list(map(e2e_latency_breakdown, relevant_dataflows))\n",
    "#filt = [(path, bdown) for (path, bdown) in zip(relevant_dataflows, e2e_breakdowns)\n",
    "#                if not any(True for item in bdown\n",
    "#                           if item.type == \"idle\" and item.duration > item.location[1].callback_obj.owner.period * 1e-9)]\n",
    "\n",
    "# Backup for debug purposes.\n",
    "#relevant_dataflows_orig = relevant_dataflows\n",
    "#e2e_breakdowns_orig = e2e_breakdowns\n",
    "\n",
    "#relevant_dataflows, e2e_breakdowns = zip(*filt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "%%skip_if_false E2E_PLOT\n",
    "\n",
    "##################################################\n",
    "# Output the chosen DFG path to a file\n",
    "##################################################\n",
    "\n",
    "from message_tree.message_tree_algorithms import e2e_latency_breakdown\n",
    "\n",
    "conv_items = [i for p in e2e_breakdowns for i in p]\n",
    "with open(os.path.join(OUT_PATH, \"plot_e2es_path.txt\"), \"w\") as f:\n",
    "    f.write(f\"Number of path instances: {len(relevant_dataflows)}\\n\")\n",
    "    f.write( \"    \" + \"\\n -> \".join(relevant_path.split(\" -> \")))\n",
    "    f.write(\"\\n\")\n",
    "\n",
    "conv_items_unique = set(conv_items)\n",
    "\n",
    "def e2e_breakdown_type_hist__(items):\n",
    "    \"\"\"\n",
    "    Given a list of e2e breakdown instances of the form `(\"<type>\", <duration>)`, plots a histogram for each encountered\n",
    "    type.\n",
    "    \"\"\"\n",
    "    plot_types = (\"dds\", \"idle\", \"cpu\")\n",
    "    #assert all(item.type in plot_types for item in items)\n",
    "\n",
    "    plt.close(\"E2E type breakdown histograms\")\n",
    "    fig, axes = plt.subplots(1, 3, num=\"E2E type breakdown histograms\", dpi=300, figsize=(16, 9))\n",
    "    fig.suptitle(\"E2E Latency Breakdown by Resource Type\")\n",
    "\n",
    "    for type, ax in zip(plot_types, axes):\n",
    "        durations = [item.duration for item in items if item.type == type]\n",
    "\n",
    "        df = pd.Series(durations)\n",
    "        df.to_csv(os.path.join(OUT_PATH, f\"plot_e2es_{type}_portion.csv\"), header=[f\"e2e_latency_{type}_portion_s\"], index=False)\n",
    "\n",
    "        ax.set_title(type)\n",
    "        ax.hist(durations, bins=50)\n",
    "        #ax.set_yscale(\"log\")\n",
    "        ax.set_xlabel(\"Duration [s]\")\n",
    "        ax.set_ylabel(\"Occurrences\")\n",
    "\n",
    "    return fig\n",
    "\n",
    "##################################################\n",
    "# Plot DDS/idle/computation time histograms\n",
    "# Histograms show individual latency components\n",
    "# over all dataflows (de-duplicated)\n",
    "##################################################\n",
    "\n",
    "fig = e2e_breakdown_type_hist__(conv_items_unique)\n",
    "plt.savefig(os.path.join(OUT_PATH, \"plot_e2e_portions.png\"))\n",
    "\n",
    "None\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "%%skip_if_false E2E_PLOT\n",
    "\n",
    "##################################################\n",
    "# Plot histogram of all E2E latencies observed\n",
    "##################################################\n",
    "\n",
    "e2es = [path[-1].timestamp - path[0].timestamp for path in relevant_dataflows]\n",
    "\n",
    "df = pd.Series(e2es)\n",
    "df.to_csv(os.path.join(OUT_PATH, \"plot_e2es.csv\"), index=False, header=[\"e2e_latency_s\"])\n",
    "\n",
    "plt.close(\"E2E histogram\")\n",
    "fig, ax = plt.subplots(num=\"E2E histogram\", dpi=300, figsize=(16, 9))\n",
    "fig.suptitle(\"E2E Latency Histogram\")\n",
    "ax: plt.Axes\n",
    "ax.hist(e2es, bins=30)\n",
    "ax.set_xlabel(\"E2E Latency [s]\")\n",
    "ax.set_ylabel(\"Occurrences\")\n",
    "ax.axvline(np.mean(e2es), c=\"red\", linewidth=2)\n",
    "_, max_ylim = ax.get_ylim()\n",
    "ax.text(np.mean(e2es) * 1.02, max_ylim * 0.98, 'Mean: {:.3f}s'.format(np.mean(e2es)))\n",
    "plt.savefig(os.path.join(OUT_PATH, \"plot_e2es.png\"))\n",
    "None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "%%skip_if_false E2E_PLOT\n",
    "\n",
    "from message_tree.message_tree_algorithms import label_latency_item\n",
    "\n",
    "##################################################\n",
    "# Plot violin plot of latencies on each station\n",
    "# of the DFG path (aggregated over all dataflows)\n",
    "##################################################\n",
    "\n",
    "plt.close(\"E2E path breakdown\")\n",
    "fig, ax = plt.subplots(num=\"E2E path breakdown\", dpi=300, figsize=(16, 5))\n",
    "fig.suptitle(\"E2E Latency Path Breakdown\")\n",
    "ax: plt.Axes\n",
    "\n",
    "component_durations = list(zip(*[e2e_latency_breakdown(p) for p in tqdm(relevant_dataflows, desc=\"Calculating breakdowns\")]))\n",
    "component_latency_items = component_durations\n",
    "labels = [label_latency_item(item) for item in e2e_latency_breakdown(relevant_dataflows[0])]\n",
    "types = [item.type for item in e2e_latency_breakdown(relevant_dataflows[0])]\n",
    "component_durations = [list(map(lambda item: item.duration, d)) for d in component_durations]\n",
    "print(len(component_durations), len(labels))\n",
    "\n",
    "import matplotlib.patches as mpatches\n",
    "\n",
    "legend_entries = []\n",
    "def add_label(violin, label):\n",
    "    color = violin[\"bodies\"][0].get_facecolor().flatten()\n",
    "    legend_entries.append((mpatches.Patch(color=color), label))\n",
    "\n",
    "for type in (\"idle\", \"dds\", \"cpu\"):\n",
    "    indices = [i for i, t in enumerate(types) if t == type]\n",
    "    xs = [component_durations[i] for i in indices]\n",
    "    vln = ax.violinplot(xs, indices)\n",
    "    add_label(vln, type)\n",
    "    for i, x in zip(indices, xs):\n",
    "        df_out = pd.Series(x)\n",
    "        df_out.to_csv(os.path.join(OUT_PATH, f\"plot_e2es_violin_{i:02d}.csv\"), index=False, header=[\"duration_s\"])\n",
    "ax.set_ylabel(\"Latency contribution [s]\")\n",
    "ax.set_xticks(range(len(labels)), labels, rotation=90)\n",
    "ax.legend(*zip(*legend_entries))\n",
    "plt.savefig(os.path.join(OUT_PATH, \"plot_e2es_violin.png\"))\n",
    "\n",
    "df_labels = pd.Series(labels)\n",
    "df_labels.to_csv(os.path.join(OUT_PATH, \"plot_e2es_violin_labels.csv\"), index=False, header=[\"label\"])\n",
    "\n",
    "df_types = pd.Series(types)\n",
    "df_types.to_csv(os.path.join(OUT_PATH, \"plot_e2es_violin_types.csv\"), index=False, header=[\"type\"])\n",
    "\n",
    "None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for concat_pc_items in component_latency_items:\n",
    "    if not isinstance(concat_pc_items[0].location[0], TrPublishInstance):\n",
    "        continue\n",
    "    dur_ts_records = [(item.location[0].timestamp, item.duration) for item in concat_pc_items]\n",
    "    df_dur_ts = pd.DataFrame(dur_ts_records, columns=(\"timestamp\", \"duration\"))\n",
    "    df_dur_ts.to_csv(os.path.join(OUT_PATH, f\"dur_ts_{concat_pc_items[0].location[0].publisher.topic_name.replace('/', '__')}.csv\"), index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "indices = [i for i, t in enumerate(types) if t == \"cpu\"]\n",
    "xs = [np.array(list(map(lambda item: item.location[0].duration, component_latency_items[i]))) for i in indices]\n",
    "lbls = [labels[i] for i in indices]\n",
    "\n",
    "records = [(lbl, x.mean(), x.std(), x.min(), x.max()) for x, lbl in zip(xs, lbls)]\n",
    "df_cpu = pd.DataFrame(records, columns=[\"callback\", \"mean\", \"std\", \"min\", \"max\"])\n",
    "df_cpu.to_csv(os.path.join(OUT_PATH, \"calc_times.csv\"), index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "%%skip_if_false E2E_PLOT\n",
    "\n",
    "from message_tree.message_tree_plots import e2e_breakdown_stack\n",
    "\n",
    "fig = e2e_breakdown_stack(*e2e_breakdowns)\n",
    "fig.set_size_inches(16, 9)\n",
    "fig.set_dpi(300)\n",
    "None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(\"Done.\")"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}