dataflow-analysis/trace-analysis.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import re\n",
    "import math\n",
    "from tqdm import tqdm\n",
    "from bisect import bisect_right\n",
    "from termcolor import colored\n",
    "\n",
    "import matplotlib.patches as mpatch\n",
    "from scipy import stats\n",
    "from cycler import cycler\n",
    "\n",
    "import glob\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "from misc.utils import cached, parse_as\n",
    "\n",
    "plt.rcParams['figure.dpi'] = 300\n",
    "\n",
    "%matplotlib inline"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# User Settings"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "##################################################\n",
    "# User Settings\n",
    "##################################################\n",
    "# Change these to influence the execution of the\n",
    "# notebook.\n",
    "# You can override these from the command line\n",
    "# by defining environment variables with the\n",
    "# name of the constants below, prefixed with\n",
    "# \"ANA_NB_\".\n",
    "# For example, the environment variable\n",
    "# \"ANA_NB_TR_PATH\" will override the \"TR_PATH\"\n",
    "# setting below.\n",
    "##################################################\n",
    "\n",
    "# The path to the build folder of a ROS2 workspace that contains the\n",
    "# tracetools_read and tracetools_analysis folders.\n",
    "TRACING_WS_BUILD_PATH = \"~/Projects/autoware/build\"\n",
    "\n",
    "# Path to trace directory (e.g. ~/.ros/my-trace/ust) or to a converted trace file.\n",
    "# Using the path \"/ust\" at the end is optional but greatly reduces processing time\n",
    "# if kernel traces are also present.\n",
    "# TR_PATH = \"/home/max/Downloads/iteration2_worker1/aw_replay/tracing/scenario-trace/ust\"\n",
    "TR_PATH = \"data/trace-awsim-x86/ust\"\n",
    "\n",
    "# Path to the folder all artifacts from this notebook are saved to.\n",
    "# This entails plots as well as data tables.\n",
    "OUT_PATH = \"out/\"\n",
    "\n",
    "# Whether to cache the results of long computations per set of inputs\n",
    "CACHING_ENABLED = False\n",
    "\n",
    "# Whether to annotate topics/publications with bandwidth/message size\n",
    "BW_ENABLED = False\n",
    "# Path to a results folder as output by ma-hw-perf-tools/messages/record.bash\n",
    "# Used to annotate message sizes in E2E latency calculations\n",
    "BW_PATH = \"../ma-hw-perf-tools/data/results\"\n",
    "\n",
    "# Whether to use dependencies extracted by the Clang-tools to supplement\n",
    "# automatic node-internal data flow annotations.\n",
    "# If in doubt, set to False.\n",
    "CL_ENABLED = False\n",
    "# Path to the output directory of the ROS2 dependency checker.\n",
    "# Will only be used if CL_ENABLED is True.\n",
    "CL_PATH = \"~/Projects/llvm-project/clang-tools-extra/ros2-internal-dependency-checker/output\"\n",
    "\n",
    "# Whether to compute data flow graphs.\n",
    "# If you are only interested in E2E latencies, set this to False\n",
    "DFG_ENABLED = True\n",
    "# Whether to plot data flow graphs (ignored if DFG_ENABLED is False)\n",
    "DFG_PLOT = True\n",
    "\n",
    "# The maximum node namespace hierarchy level to be plotted.\n",
    "# Top-level (1): e.g. /sensing, /control, etc.\n",
    "# Level 3: e.g. /sensing/lidar/pointcloud_processor\n",
    "DFG_MAX_HIER_LEVEL = 100\n",
    "\n",
    "# RegEx pattern for nodes that shall be marked as system inputs\n",
    "# These will be plotted with a start arrow as known from automata diagrams\n",
    "DFG_INPUT_NODE_PATTERNS = [r\"^/sensing\"]\n",
    "# RegEx pattern for nodes that shall be marked as system outputs\n",
    "# These will be plotted with a double border\n",
    "DFG_OUTPUT_NODE_PATTERNS = [r\"^/awapi\", r\"^/control/external_cmd_converter\", \"emergency\"]\n",
    "# RegEx for nodes which shall not be plotted in the DFG\n",
    "DFG_EXCL_NODE_PATTERNS = [r\"^/rviz2\", r\"transform_listener_impl\"]\n",
    "\n",
    "# Whether to compute E2E latencies.\n",
    "E2E_ENABLED = True\n",
    "# Whether to plot end-to-end latency information (ignored if E2E_ENABLED is False)\n",
    "E2E_PLOT = True\n",
    "# The index of the output message that shall be used in plots that visualize a specific\n",
    "# message dependency tree. This index has to be 0 <= n < #output messages\n",
    "E2E_PLOT_TIMESTAMP = 1000\n",
    "# E2E latency threshold. Every E2E latency higher than this is discarded.\n",
    "# Set this as low as comfortably possible to speed up calculations.\n",
    "# WARNING: If you set this too low (i.e. to E2E latencies that plausibly can happen)\n",
    "# your results will be wrong)\n",
    "E2E_TIME_LIMIT_S = 2\n",
    "\n",
    "# All topics containing any of these RegEx patterns are considered output topics in E2E latency calculations\n",
    "# E.g. r\"^/control/\" will cover all control topics\n",
    "E2E_OUTPUT_TOPIC_PATTERNS = [r\"emergency/control_cmd\"]\n",
    "# All topics containing any of these RegEx patterns are considered input topics in E2E latency calculations\n",
    "# E.g. r\"^/sensing/\" will cover all sensing topics\n",
    "E2E_INPUT_TOPIC_PATTERNS = [r\"^/vehicle/status/\", r\"^/sensing/(lidar/[^c]|[^l])\"]\n",
    "\n",
    "# E2E paths are uniquely identified by a string like \"/topic/1 -> void(Node1)(args1) -> /topic/2 -> void(Node2)(args2) -> void(Node2)(args3) -> ...\".\n",
    "# Certain patterns only occur in initial setup or in scenario switching and can be excluded via RegEx patterns here.\n",
    "E2E_EXCLUDED_PATH_PATTERNS = [r\"NDTScanMatcher\"]\n",
    "\n",
    "\n",
    "DDD = False\n",
    "\n",
    "# This code overrides the above constants with environment variables, do not edit.\n",
    "for env_key, env_value in os.environ.items():\n",
    "    if env_key.startswith(\"ANA_NB_\"):\n",
    "        key = env_key.removeprefix(\"ANA_NB_\")\n",
    "        if key not in globals().keys():\n",
    "            continue\n",
    "        value = parse_as(type(globals()[key]), env_value)\n",
    "        globals()[key] = value\n",
    "\n",
    "\n",
    "# Convert input paths to absolute paths\n",
    "def _expand_path(path):\n",
    "    return os.path.realpath(os.path.expandvars(os.path.expanduser(path)))\n",
    "\n",
    "\n",
    "TRACING_WS_BUILD_PATH = _expand_path(TRACING_WS_BUILD_PATH)\n",
    "TR_PATH = _expand_path(TR_PATH)\n",
    "OUT_PATH = _expand_path(OUT_PATH)\n",
    "BW_PATH = _expand_path(BW_PATH)\n",
    "CL_PATH = _expand_path(CL_PATH)\n",
    "\n",
    "os.makedirs(OUT_PATH, exist_ok=True)\n",
    "\n",
    "print(\"User Settings:\")\n",
    "for k, v in globals().copy().items():\n",
    "    if not k.isupper():\n",
    "        continue\n",
    "    print(f\"  {k:.<40s} := {v}\")"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "sys.path.append(os.path.join(TRACING_WS_BUILD_PATH, \"tracetools_read/\"))\n",
    "sys.path.append(os.path.join(TRACING_WS_BUILD_PATH, \"tracetools_analysis/\"))\n",
    "# noinspection PyUnresolvedReferences\n",
    "from tracetools_read.trace import *\n",
    "# noinspection PyUnresolvedReferences\n",
    "from tracetools_analysis.loading import load_file\n",
    "# noinspection PyUnresolvedReferences\n",
    "from tracetools_analysis.processor.ros2 import Ros2Handler\n",
    "\n",
    "from tracing_interop.tr_types import *\n",
    "from message_tree.message_tree_structure import DepTree\n",
    "from matching.subscriptions import sanitize"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Load Trace Data"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "def _load_traces():\n",
    "    file = load_file(TR_PATH)\n",
    "    handler = Ros2Handler.process(file)\n",
    "    return TrContext(handler)\n",
    "\n",
    "\n",
    "_tracing_context = cached(\"tr_objects\", _load_traces, [TR_PATH], not CACHING_ENABLED)\n",
    "_tr_globals = [\"nodes\", \"publishers\", \"subscriptions\", \"timers\", \"timer_node_links\", \"subscription_objects\",\n",
    "               \"callback_objects\", \"callback_symbols\", \"publish_instances\", \"callback_instances\", \"topics\"]\n",
    "\n",
    "# Help the IDE recognize those identifiers\n",
    "nodes = publishers = subscriptions = timers = timer_node_links = subscription_objects = callback_objects = callback_symbols = publish_instances = callback_instances = topics = None\n",
    "\n",
    "for name in _tr_globals:\n",
    "    globals()[name] = getattr(_tracing_context, name)\n",
    "\n",
    "print(\"Done.\")"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "\n",
    "for topic in sorted(topics, key=lambda t: t.name):\n",
    "    topic: TrTopic\n",
    "    print(f\"{topic.name:.<120s} | {sum(map(lambda p: len(p.instances), topic.publishers))}\")\n",
    "\n",
    "print(\"\\n[ENTKÄFERN] INPUT TOPICS\")\n",
    "for t in sorted(topics, key=lambda t: t.name):\n",
    "    for f in E2E_INPUT_TOPIC_PATTERNS:\n",
    "        if re.search(f, t.name):\n",
    "            print(f\"--[ENTKÄFERN] {f:<30s}:{t.name:.<89s} | {sum(map(lambda p: len(p.instances), t.publishers))}\")\n",
    "\n",
    "print(\"\\n[ENTKÄFERN] OUTPUT TOPICS\")\n",
    "for t in sorted(topics, key=lambda t: t.name):\n",
    "    for f in E2E_OUTPUT_TOPIC_PATTERNS:\n",
    "        if re.search(f, t.name):\n",
    "            print(f\"--[ENTKÄFERN] {f:<30s}:{t.name:.<89s} | {sum(map(lambda p: len(p.instances), t.publishers))}\")"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Analyze ROS Graph"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import latency_graph.latency_graph_structure as lg\n",
    "\n",
    "\n",
    "def _make_latency_graph():\n",
    "    return lg.LatencyGraph(_tracing_context)\n",
    "\n",
    "\n",
    "lat_graph = cached(\"lat_graph\", _make_latency_graph, [TR_PATH], not CACHING_ENABLED)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Plot ROS Graph (full)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "%%skip_if_false DFG_ENABLED\n",
    "%%skip_if_false DFG_PLOT\n",
    "\n",
    "#################################################\n",
    "# Plot full DFG, down to the callback level\n",
    "#################################################\n",
    "\n",
    "from latency_graph.latency_graph_plots import plot_latency_graph_full\n",
    "\n",
    "plot_latency_graph_full(lat_graph, _tracing_context, \"latency_graph_full\")"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Plot Latency Graph (overview)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "%%skip_if_false DFG_ENABLED\n",
    "%%skip_if_false DFG_PLOT\n",
    "\n",
    "#################################################\n",
    "# Plot overview of DFG, down to a certain\n",
    "# hierarchy level\n",
    "#################################################\n",
    "\n",
    "from latency_graph.latency_graph_plots import plot_latency_graph_overview\n",
    "\n",
    "plot_latency_graph_overview(lat_graph, DFG_EXCL_NODE_PATTERNS, DFG_INPUT_NODE_PATTERNS, DFG_OUTPUT_NODE_PATTERNS, DFG_MAX_HIER_LEVEL, \"latency_graph_overview\")"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%%%skip_if_false DFG_ENABLED\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Analyze Message Flow"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "\n",
    "from message_tree.message_tree_algorithms import build_dep_trees\n",
    "\n",
    "end_topics = [t for t in _tracing_context.topics if any(re.search(f, t.name) for f in E2E_OUTPUT_TOPIC_PATTERNS)]\n",
    "\n",
    "def _build_dep_trees():\n",
    "    return build_dep_trees(end_topics, lat_graph, _tracing_context, E2E_EXCLUDED_PATH_PATTERNS, E2E_TIME_LIMIT_S)\n",
    "\n",
    "try:\n",
    "    trees = cached(\"trees\", _build_dep_trees, [TR_PATH], not CACHING_ENABLED)\n",
    "except Exception as e:\n",
    "    import traceback\n",
    "    print(e)\n",
    "    traceback.print_exc()"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "%%skip_if_false BW_ENABLED\n",
    "\n",
    "\n",
    "def parse_bytes(string):\n",
    "    match string[-1]:\n",
    "        case 'K':\n",
    "            exponent = 1e3\n",
    "        case 'M':\n",
    "            exponent = 1e6\n",
    "        case _:\n",
    "            exponent = 1\n",
    "\n",
    "    num = float(string.split(\" \")[0])\n",
    "    return num * exponent\n",
    "\n",
    "\n",
    "def bytes_str(bytes):\n",
    "    if bytes >= 1024 ** 2:\n",
    "        return f\"{bytes / (1024 ** 2):.2f} MiB\"\n",
    "    if bytes >= 1024:\n",
    "        return f\"{bytes / 1024:.2f} KiB\"\n",
    "    return f\"{bytes:.0f} B\"\n",
    "\n",
    "\n",
    "bw_files = glob.glob(os.path.join(BW_PATH, \"*.log\"))\n",
    "msg_sizes = {}\n",
    "for bw_file in bw_files:\n",
    "    with open(bw_file) as f:\n",
    "        lines = f.readlines()\n",
    "        topic = os.path.splitext(os.path.split(bw_file)[1])[0].replace(\"__\", \"/\")\n",
    "\n",
    "        if not lines or re.match(f\"^\\s*$\", lines[-1]):\n",
    "            #print(f\"No data for {topic}\")\n",
    "            continue\n",
    "\n",
    "        line_pattern = re.compile(\n",
    "            r\"(?P<bw>[0-9.]+ [KM]?)B/s from (?P<n_msgs>[0-9.]+) messages --- Message size mean: (?P<mean>[0-9.]+ [KM]?)B min: (?P<min>[0-9.]+ [KM]?)B max: (?P<max>[0-9.]+ [KM]?)B\\n\")\n",
    "        m = re.fullmatch(line_pattern, lines[-1])\n",
    "        if m is None:\n",
    "            print(f\"Line could not be parsed in {topic}: '{lines[-1]}'\")\n",
    "            continue\n",
    "\n",
    "        msg_sizes[topic] = {'bw': parse_bytes(m.group(\"bw\")),\n",
    "                            'min': parse_bytes(m.group(\"min\")),\n",
    "                            'mean': parse_bytes(m.group(\"mean\")),\n",
    "                            'max': parse_bytes(m.group(\"max\"))}"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "%%skip_if_false E2E_PLOT\n",
    "%%skip_if_false DDD\n",
    "\n",
    "\n",
    "fig, ax = plt.subplots(num=\"e2e_plot\")\n",
    "DS = 1\n",
    "times = [tree.head.timestamp - trees[0].head.timestamp for tree in trees]\n",
    "\n",
    "ax2 = ax.twinx()\n",
    "ax2.plot(times, list(map(lambda paths: sum(map(len, paths)) / len(paths), e2e_pathss)), color=\"orange\")\n",
    "ax2.fill_between(times,\n",
    "                 list(map(lambda paths: min(map(len, paths)), e2e_pathss)),\n",
    "                 list(map(lambda paths: max(map(len, paths)), e2e_pathss)),\n",
    "                 alpha=.3, color=\"orange\")\n",
    "\n",
    "ax.plot(times, [np.mean(e2es) for e2es in e2ess])\n",
    "ax.fill_between(times, [np.min(e2es) for e2es in e2ess], [np.max(e2es) for e2es in e2ess], alpha=.3)\n",
    "\n",
    "\n",
    "def scatter_topic(topic_name, y=0, **scatter_kwargs):\n",
    "    for pub in topics.by_name[topic_name].publishers:\n",
    "        if not pub:\n",
    "            continue\n",
    "\n",
    "        inst_timestamps = [inst.timestamp - trees[0].head.timestamp for inst in pub.instances if\n",
    "                           inst.timestamp >= trees[0].head.timestamp]\n",
    "        scatter_kwargs_default = {\"marker\": \"x\", \"color\": \"indianred\"}\n",
    "        scatter_kwargs_default.update(scatter_kwargs)\n",
    "        ax.scatter(inst_timestamps, np.full(len(inst_timestamps), fill_value=y), **scatter_kwargs_default)\n",
    "\n",
    "\n",
    "scatter_topic(\"/autoware/engage\")\n",
    "scatter_topic(\"/planning/scenario_planning/parking/trajectory\", y=-.04, color=\"cadetblue\")\n",
    "scatter_topic(\"/planning/scenario_planning/lane_driving/trajectory\", y=-.08, color=\"darkgreen\")\n",
    "\n",
    "ax.set_xlabel(\"Simulation time [s]\")\n",
    "ax.set_ylabel(\"End-to-End latency [s]\")\n",
    "ax2.set_ylabel(\"End-to-End path length\")\n",
    "None"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "%%skip_if_false E2E_ENABLED\n",
    "%%skip_if_false E2E_PLOT\n",
    "\n",
    "%load_ext line_profiler\n",
    "\n",
    "\n",
    "from message_tree.message_tree_algorithms import e2e_paths_sorted_desc\n",
    "from message_tree.message_tree_plots import e2e_breakdown_type_hist\n",
    "\n",
    "def _map_tree_to_e2es(tree):\n",
    "    return e2e_paths_sorted_desc(tree, E2E_INPUT_TOPIC_PATTERNS)\n",
    "\n",
    "def ppp():\n",
    "    trees_paths = [e2e_paths_sorted_desc(tree, E2E_INPUT_TOPIC_PATTERNS) for tree in tqdm(trees, desc=\"Extracting E2E paths\")]\n",
    "    all_paths = [p for paths in trees_paths for p in paths]\n",
    "    all_e2e_items = [i for p in all_paths for i in p]\n",
    "\n",
    "    return all_e2e_items\n",
    "\n",
    "%lprun -f e2e_paths_sorted_desc ppp()\n",
    "\n",
    "e2e_breakdown_type_hist(ppp())\n",
    "\n",
    "None\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "with open(\"trees.pkl\", \"wb\") as f:\n",
    "    pickle.dump(trees, f)\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}