filter exterme outliers before plotting the boxplots
This commit is contained in:
parent
9bf91d654d
commit
50d5dac71c
1 changed files with 24 additions and 8 deletions
|
@ -44,11 +44,18 @@ def main():
|
||||||
for idx, (col, color) in enumerate(zip(['Mean', 'Std', 'Min', 'Max', 'Count'], colors)):
|
for idx, (col, color) in enumerate(zip(['Mean', 'Std', 'Min', 'Max', 'Count'], colors)):
|
||||||
ax = axs[idx]
|
ax = axs[idx]
|
||||||
|
|
||||||
|
# Prepare the data for the current column
|
||||||
|
current_plot_data = plot_data[col].dropna()
|
||||||
|
# Remove outliers for better visualization
|
||||||
|
filtered_plot_data = current_plot_data[current_plot_data.between(current_plot_data.quantile(.03), current_plot_data.quantile(0.97))]
|
||||||
|
|
||||||
|
filtered_count = current_plot_data.count() - filtered_plot_data.count()
|
||||||
|
|
||||||
# Create boxplots
|
# Create boxplots
|
||||||
sns.boxplot(data=plot_data[col], ax=ax, color=color, showfliers=True, width=0.4)
|
sns.boxplot(data=filtered_plot_data, ax=ax, color=color, showfliers=False, width=0.4) # type: ignore
|
||||||
|
|
||||||
# Add individual data points
|
# Add individual data points
|
||||||
sns.swarmplot(data=plot_data[col], ax=ax, color='black', size=3, alpha=0.6)
|
sns.swarmplot(data=filtered_plot_data, ax=ax, color='black', size=3, alpha=0.6) # type: ignore
|
||||||
|
|
||||||
# Set labels and title
|
# Set labels and title
|
||||||
ax.set_title(f'{col} Distribution', fontsize=14, fontweight='bold')
|
ax.set_title(f'{col} Distribution', fontsize=14, fontweight='bold')
|
||||||
|
@ -56,13 +63,22 @@ def main():
|
||||||
ax.set_xlabel('') # No x-label needed
|
ax.set_xlabel('') # No x-label needed
|
||||||
ax.set_ylabel('Latency (ms)' if col != 'Count' else 'Count', fontsize=12)
|
ax.set_ylabel('Latency (ms)' if col != 'Count' else 'Count', fontsize=12)
|
||||||
|
|
||||||
# Calculate statistics of the statistics
|
# Calculate statistics of the statistics - here based on the original data with outliers!
|
||||||
data_values = plot_data[col]
|
data_values = plot_data[col]
|
||||||
|
first_line_length = len(f"Mean: {data_values.mean():.2f}")
|
||||||
|
second_line_length = len(f"Std: {data_values.std():.2f}")
|
||||||
|
third_line_length = len(f"Min: {data_values.min():.2f}")
|
||||||
|
fourth_line_length = len(f"Max: {data_values.max():.2f}")
|
||||||
|
fivth_line_length = len(f"Filtered: {filtered_count}")
|
||||||
|
max_length = max(first_line_length, second_line_length, third_line_length, fourth_line_length, fivth_line_length) + 1
|
||||||
|
# Prepare the text for the legend
|
||||||
|
|
||||||
stats_text = (
|
stats_text = (
|
||||||
f"Mean: {data_values.mean():.2f}\n"
|
f"Mean:{' ' * (max_length - first_line_length)}{data_values.mean():.2f}\n"
|
||||||
f"Std: {data_values.std():.2f}\n"
|
f"Std:{' ' * (max_length - second_line_length)}{data_values.std():.2f}\n"
|
||||||
f"Min: {data_values.min():.2f}\n"
|
f"Min:{' ' * (max_length - third_line_length)}{data_values.min():.2f}\n"
|
||||||
f"Max: {data_values.max():.2f}"
|
f"Max:{' ' * (max_length - fourth_line_length)}{data_values.max():.2f}\n"
|
||||||
|
f"Filtered:{' ' * (max_length - fivth_line_length)}{filtered_count}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- Place legend in the top right using axes fraction coordinates ---
|
# --- Place legend in the top right using axes fraction coordinates ---
|
||||||
|
@ -73,6 +89,7 @@ def main():
|
||||||
verticalalignment='top',
|
verticalalignment='top',
|
||||||
horizontalalignment='right',
|
horizontalalignment='right',
|
||||||
fontsize=10,
|
fontsize=10,
|
||||||
|
fontfamily='monospace',
|
||||||
bbox=dict(facecolor='white', alpha=0.9, boxstyle='round,pad=0.3', edgecolor='gray')
|
bbox=dict(facecolor='white', alpha=0.9, boxstyle='round,pad=0.3', edgecolor='gray')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -86,7 +103,6 @@ def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
# Save the figure with a filename that includes the chain name
|
# Save the figure with a filename that includes the chain name
|
||||||
plt.tight_layout()
|
|
||||||
output_file = args.input.replace('.csv', f'_chain_{chain_name_fs}_analysis.png')
|
output_file = args.input.replace('.csv', f'_chain_{chain_name_fs}_analysis.png')
|
||||||
plt.savefig(output_file, dpi=300)
|
plt.savefig(output_file, dpi=300)
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue