In [None]:
import glob
import os.path
from collections import OrderedDict
from dataclasses import dataclass
import tikzplotlib as tkz

import pandas as pd

DATA_PATHS = glob.glob("/home/max/Projects/ma-measurements/artifacts_*")


In [None]:
import numpy as np
from termcolor import colored
from typing import List


@dataclass
class Run:
    path: str
    host_name: str
    config_name: str
    n_runs: int

    element_types: List[str]
    element_labels: List[str]
    element_latencies: List

    def __init__(self, path):
        self.path = path
        self.n_runs = 1

        yml_files_found = glob.glob("*.yml", root_dir=path)
        if len(yml_files_found) != 1:
            raise ValueError(f"Expected exactly one '.yml' file in {path}")

        cfg_file = yml_files_found[0]
        self.config_name = cfg_file.removeprefix("aw_awsim").removeprefix("_").removesuffix(".yml")
        if self.config_name == "":
            self.config_name = "baseline"

        hostname_path = os.path.join(path, "collect_sysinfo/sysinfo/hostname.log")
        if not os.path.isfile(hostname_path):
            raise ValueError("Did not find hostname log")

        with open(hostname_path) as f:
            self.host_name = f.readline().strip()

        violin_labels_path = os.path.join(path, "output/plot_e2es_violin_labels.csv")
        violin_types_path = os.path.join(path, "output/plot_e2es_violin_types.csv")

        if not all(os.path.isfile(f) for f in (violin_types_path, violin_labels_path)):
            raise ValueError(f"{self.scenario_name}: Violin labels/types CSVs are not present")

        self.element_types = pd.read_csv(violin_types_path).squeeze("columns").tolist()
        self.element_labels = pd.read_csv(violin_labels_path).squeeze("columns").tolist()

        self.element_latencies = []
        for i in range(len(self.element_labels)):
            lats_path = os.path.join(path, f"output/plot_e2es_violin_{i:02d}.csv")
            if not os.path.isfile(lats_path):
                raise ValueError(f"{self.scenario_name}: Expected {lats_path} to exists but it doesn't")
            lats = pd.read_csv(lats_path, dtype=np.float64).squeeze("columns")
            self.element_latencies.append(lats)

    @property
    def scenario_name(self):
        return f"{self.host_name} - {self.config_name}"

    def e2es(self, type=None):
        if not type:
            element_latencies = self.element_latencies
        else:
            element_latencies = [e_latency for e_type, e_latency in zip(self.element_types, self.element_latencies) if e_type == type]
        return np.array(list(sum(timestep) for timestep in zip(*element_latencies)))

    def append(self, other: 'Run'):
        if self.scenario_name != other.scenario_name:
            raise ValueError(f"Runs of different scenarios cannot be appended: '{self.scenario_name}' != '{other.scenario_name}'")

        self.n_runs += 1
        self.element_latencies = [pd.concat([my, other]) for my, other in zip(self.element_latencies, other.element_latencies)]

runs = {}
for p in DATA_PATHS:
    try:
        r = Run(p)
        if r_existing := runs.get(r.scenario_name):
            r_existing.append(r)
            print(f"{r.scenario_name}: Valid, appended")
        else:
            runs[r.scenario_name] = r
            print(f"{r.scenario_name}: Valid, new")
    except ValueError as e:
        print(colored(e, "red"))


In [None]:
from matplotlib import pyplot as plt

MIN_SAMPLES = 20
N_BINS = 20
PERCENTILE = 99
TYPE="cpu"

ORDERING_H = OrderedDict([("edgar-hil-x86", "x86"), ("edgar-sim-dev", "ARM")])
ORDERING_V = OrderedDict([
    ("baseline", "Baseline"),
    ("taskset_00-15", "16-Core"),
    ("taskset_00-07", "8-Core"),
    ("taskset_00-03", "4-Core")
])

fig, axs = plt.subplots(len(ORDERING_V), len(ORDERING_H), figsize=(7.5 * len(ORDERING_H) ,7.5 * len(ORDERING_V)), dpi=120)

for i, (host_key, host_label) in enumerate(ORDERING_H.items()):
    for j, (run_key, run_label) in enumerate(ORDERING_V.items()):
        r: Run | None = runs.get(f"{host_key} - {run_key}")
        e2es = r.e2es(type=TYPE) if r else None

        ax: plt.Axes = axs[j, i]
        ax.set_xlabel("E2E Latency [s]")
        ax.set_ylabel("Frequency")
        title = f"{host_label}\n" if j == 0 else ""
        title += run_label
        if not r:
            title += " (not recorded)"
            ax.title.set_color("red")
        else:
            title += f" ({r.n_runs} run{'s' if r.n_runs > 1 else ''}, {len(e2es)} samples)"

            if len(e2es) < MIN_SAMPLES:
                title += " (DNF)"
                ax.title.set_color("red")
            else:
                ax.hist(e2es, bins=N_BINS)
                l_med = ax.axvline(np.median(e2es), c="black", linestyle="-")
                l_perc = ax.axvline(np.percentile(e2es, PERCENTILE), c="black", linestyle="-.")
                l_max = ax.axvline(np.max(e2es), c="black", linestyle=":")
                ax.legend([l_med, l_perc, l_max], ["Median", f"{PERCENTILE}th Percentile", "Max"], loc="upper right")

        ax.set_title(title)

plt.savefig("e2e_grid.pdf")
tkz.save(filepath="../ma-thesis/figures/04_e2e_grid.tex")

In [None]:
ORDERING_T = ["dds", "idle", "cpu", None]

fig, axs = plt.subplots(len(ORDERING_T), 1, sharex='all', figsize=(15, 7.5 * len(ORDERING_T)), dpi=120)

type_to_boxes = {t: [] for t in ORDERING_T}
labels = []

for host_key, host_label in list(ORDERING_H.items())[:1]:
    for run_key, run_label in ORDERING_V.items():
        labels.append(f"{host_label} - {run_label}")
        r: Run | None = runs.get(f"{host_key} - {run_key}")
        if r is None:
            for v in type_to_boxes.values():
                v.append([])
            continue

        for type in ORDERING_T:
            type_to_boxes[type].append(r.e2es(type))

for type, ax in zip(ORDERING_T, axs):
    ax: plt.Axes
    ax.boxplot(type_to_boxes[type], labels=labels)
    ax.set_title(type or "E2E")