Fixes in message tree calculations, plots for the paper, started work on BW plots for the paper

This commit is contained in:
Maximilian Schmeller 2022-10-28 22:37:48 +09:00
parent 65c21fb6ce
commit a1369890bf
5 changed files with 342 additions and 73 deletions

View file

@ -0,0 +1,45 @@
import pandas as pd
import tables as tb
import numpy as np
def bytes_str(bytes):
if bytes >= 1024**2:
return f"{bytes/(1024**2):.2f} MiB"
if bytes >= 1024:
return f"{bytes/1024:.2f} KiB"
return f"{bytes:.0f} B"
def get_topic_messages(h5_filename: str):
topic_messages = {}
with tb.open_file(h5_filename, root_uep="/messages") as f:
for node in f.list_nodes("/"):
topic = node.title
messages = pd.DataFrame.from_records(node[:])
topic_messages[topic] = messages
# if len(messages) >= 2:
# total_data = np.sum(messages["size"])
# print(f'{len(messages):>5d}m, {bytes_str(total_data):>10s}, '
# f'{bytes_str(total_data / (np.max(messages["timestamp"]) - np.min(messages["timestamp"]))):>10s}/s, '
# f'{topic}')
return topic_messages
def get_topic_stats(topics_dict: dict):
records = []
for topic, messages in topics_dict:
total_data = np.sum(messages["size"])
records.append({
"topic": topic,
"count": len(messages),
"total_data": total_data,
"bandwidth": total_data / (np.max(messages["timestamp"]) - np.min(messages["timestamp"])),
"min_size": np.min(messages["size"]),
"avg_size": np.mean(messages["size"]),
"max_size": np.max(messages["size"])
})
return pd.DataFrame.from_records(records)