From 33f8c61c4ad3ef75fd676efd4e70ea2ed1e069a8 Mon Sep 17 00:00:00 2001 From: Maximilian Schmeller Date: Sun, 25 Dec 2022 20:37:10 +0900 Subject: [PATCH] Add a notebook that plots comparisons of multiple simulation runs [WIP] --- make-plots.ipynb | 236 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 make-plots.ipynb diff --git a/make-plots.ipynb b/make-plots.ipynb new file mode 100644 index 0000000..b3f9ec8 --- /dev/null +++ b/make-plots.ipynb @@ -0,0 +1,236 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import glob\n", + "import os.path\n", + "from collections import OrderedDict\n", + "from dataclasses import dataclass\n", + "import tikzplotlib as tkz\n", + "\n", + "import pandas as pd\n", + "\n", + "DATA_PATHS = glob.glob(\"/home/max/Projects/ma-measurements/artifacts_*\")\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import numpy as np\n", + "from termcolor import colored\n", + "from typing import List\n", + "\n", + "\n", + "@dataclass\n", + "class Run:\n", + " path: str\n", + " host_name: str\n", + " config_name: str\n", + " n_runs: int\n", + "\n", + " element_types: List[str]\n", + " element_labels: List[str]\n", + " element_latencies: List\n", + "\n", + " def __init__(self, path):\n", + " self.path = path\n", + " self.n_runs = 1\n", + "\n", + " yml_files_found = glob.glob(\"*.yml\", root_dir=path)\n", + " if len(yml_files_found) != 1:\n", + " raise ValueError(f\"Expected exactly one '.yml' file in {path}\")\n", + "\n", + " cfg_file = yml_files_found[0]\n", + " self.config_name = cfg_file.removeprefix(\"aw_awsim\").removeprefix(\"_\").removesuffix(\".yml\")\n", + " if self.config_name == \"\":\n", + " self.config_name = \"baseline\"\n", + "\n", + " hostname_path = os.path.join(path, \"collect_sysinfo/sysinfo/hostname.log\")\n", + " if not os.path.isfile(hostname_path):\n", + " raise ValueError(\"Did not find hostname log\")\n", + "\n", + " with open(hostname_path) as f:\n", + " self.host_name = f.readline().strip()\n", + "\n", + " violin_labels_path = os.path.join(path, \"output/plot_e2es_violin_labels.csv\")\n", + " violin_types_path = os.path.join(path, \"output/plot_e2es_violin_types.csv\")\n", + "\n", + " if not all(os.path.isfile(f) for f in (violin_types_path, violin_labels_path)):\n", + " raise ValueError(f\"{self.scenario_name}: Violin labels/types CSVs are not present\")\n", + "\n", + " self.element_types = pd.read_csv(violin_types_path).squeeze(\"columns\").tolist()\n", + " self.element_labels = pd.read_csv(violin_labels_path).squeeze(\"columns\").tolist()\n", + "\n", + " self.element_latencies = []\n", + " for i in range(len(self.element_labels)):\n", + " lats_path = os.path.join(path, f\"output/plot_e2es_violin_{i:02d}.csv\")\n", + " if not os.path.isfile(lats_path):\n", + " raise ValueError(f\"{self.scenario_name}: Expected {lats_path} to exists but it doesn't\")\n", + " lats = pd.read_csv(lats_path, dtype=np.float64).squeeze(\"columns\")\n", + " self.element_latencies.append(lats)\n", + "\n", + " @property\n", + " def scenario_name(self):\n", + " return f\"{self.host_name} - {self.config_name}\"\n", + "\n", + " def e2es(self, type=None):\n", + " if not type:\n", + " element_latencies = self.element_latencies\n", + " else:\n", + " element_latencies = [e_latency for e_type, e_latency in zip(self.element_types, self.element_latencies) if e_type == type]\n", + " return np.array(list(sum(timestep) for timestep in zip(*element_latencies)))\n", + "\n", + " def append(self, other: 'Run'):\n", + " if self.scenario_name != other.scenario_name:\n", + " raise ValueError(f\"Runs of different scenarios cannot be appended: '{self.scenario_name}' != '{other.scenario_name}'\")\n", + "\n", + " self.n_runs += 1\n", + " self.element_latencies = [pd.concat([my, other]) for my, other in zip(self.element_latencies, other.element_latencies)]\n", + "\n", + "runs = {}\n", + "for p in DATA_PATHS:\n", + " try:\n", + " r = Run(p)\n", + " if r_existing := runs.get(r.scenario_name):\n", + " r_existing.append(r)\n", + " print(f\"{r.scenario_name}: Valid, appended\")\n", + " else:\n", + " runs[r.scenario_name] = r\n", + " print(f\"{r.scenario_name}: Valid, new\")\n", + " except ValueError as e:\n", + " print(colored(e, \"red\"))\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "\n", + "MIN_SAMPLES = 20\n", + "N_BINS = 20\n", + "PERCENTILE = 99\n", + "TYPE=\"cpu\"\n", + "\n", + "ORDERING_H = OrderedDict([(\"edgar-hil-x86\", \"x86\"), (\"edgar-sim-dev\", \"ARM\")])\n", + "ORDERING_V = OrderedDict([\n", + " (\"baseline\", \"Baseline\"),\n", + " (\"taskset_00-15\", \"16-Core\"),\n", + " (\"taskset_00-07\", \"8-Core\"),\n", + " (\"taskset_00-03\", \"4-Core\")\n", + "])\n", + "\n", + "fig, axs = plt.subplots(len(ORDERING_V), len(ORDERING_H), figsize=(7.5 * len(ORDERING_H) ,7.5 * len(ORDERING_V)), dpi=120)\n", + "\n", + "for i, (host_key, host_label) in enumerate(ORDERING_H.items()):\n", + " for j, (run_key, run_label) in enumerate(ORDERING_V.items()):\n", + " r: Run | None = runs.get(f\"{host_key} - {run_key}\")\n", + " e2es = r.e2es(type=TYPE) if r else None\n", + "\n", + " ax: plt.Axes = axs[j, i]\n", + " ax.set_xlabel(\"E2E Latency [s]\")\n", + " ax.set_ylabel(\"Frequency\")\n", + " title = f\"{host_label}\\n\" if j == 0 else \"\"\n", + " title += run_label\n", + " if not r:\n", + " title += \" (not recorded)\"\n", + " ax.title.set_color(\"red\")\n", + " else:\n", + " title += f\" ({r.n_runs} run{'s' if r.n_runs > 1 else ''}, {len(e2es)} samples)\"\n", + "\n", + " if len(e2es) < MIN_SAMPLES:\n", + " title += \" (DNF)\"\n", + " ax.title.set_color(\"red\")\n", + " else:\n", + " ax.hist(e2es, bins=N_BINS)\n", + " l_med = ax.axvline(np.median(e2es), c=\"black\", linestyle=\"-\")\n", + " l_perc = ax.axvline(np.percentile(e2es, PERCENTILE), c=\"black\", linestyle=\"-.\")\n", + " l_max = ax.axvline(np.max(e2es), c=\"black\", linestyle=\":\")\n", + " ax.legend([l_med, l_perc, l_max], [\"Median\", f\"{PERCENTILE}th Percentile\", \"Max\"], loc=\"upper right\")\n", + "\n", + " ax.set_title(title)\n", + "\n", + "plt.savefig(\"e2e_grid.pdf\")\n", + "tkz.save(filepath=\"../ma-thesis/figures/04_e2e_grid.tex\")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "ORDERING_T = [\"dds\", \"idle\", \"cpu\", None]\n", + "\n", + "fig, axs = plt.subplots(len(ORDERING_T), 1, sharex='all', figsize=(15, 7.5 * len(ORDERING_T)), dpi=120)\n", + "\n", + "type_to_boxes = {t: [] for t in ORDERING_T}\n", + "labels = []\n", + "\n", + "for host_key, host_label in list(ORDERING_H.items())[:1]:\n", + " for run_key, run_label in ORDERING_V.items():\n", + " labels.append(f\"{host_label} - {run_label}\")\n", + " r: Run | None = runs.get(f\"{host_key} - {run_key}\")\n", + " if r is None:\n", + " for v in type_to_boxes.values():\n", + " v.append([])\n", + " continue\n", + "\n", + " for type in ORDERING_T:\n", + " type_to_boxes[type].append(r.e2es(type))\n", + "\n", + "for type, ax in zip(ORDERING_T, axs):\n", + " ax: plt.Axes\n", + " ax.boxplot(type_to_boxes[type], labels=labels)\n", + " ax.set_title(type or \"E2E\")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}