-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdistribution.py
More file actions
93 lines (78 loc) · 2.12 KB
/
distribution.py
File metadata and controls
93 lines (78 loc) · 2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import argparse
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from parser import get_prismo_entries
def plot_offset_distribution(
df: pd.DataFrame,
bins: int,
output_file: str = 'png/offset_dist.png'
) -> None:
sns.histplot(
df['offset'],
bins=bins,
stat="density",
kde=True,
kde_kws={"bw_adjust": 1.5}
)
plt.xlabel("Offset")
plt.ylabel("Density")
plt.title("Offsets Distribution")
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
def plot_column_distribution(
df: pd.DataFrame,
column: str,
output_file: str = 'png/column_dist.png'
) -> None:
counts = df[column].value_counts().sort_index()
plt.figure(figsize=(10, 5))
sns.barplot(x=counts.index.astype(str), y=counts.values)
plt.xlabel(f'{column.upper()} Code')
plt.ylabel('Frequency')
plt.title(f'Frequency of {column.upper()} Codes')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(output_file, dpi=300)
plt.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(
prog='distribution',
description='Distribution analysis',
)
parser.add_argument(
'-i',
'--input',
type=str,
required=True,
help='prismo log file path'
)
parser.add_argument(
'-b',
'--bins',
type=int,
default=10,
required=False,
help='number of bins in offset distribution'
)
args = parser.parse_args()
df = get_prismo_entries(args.input)
columns: list[str] = [
'pid',
'tid',
'req',
'proc',
'offset',
'ret',
'errno',
'type'
]
output_file = 'png/offset_dist.png'
plot_offset_distribution(df, args.bins)
print(f'Saved offset distribution plot to {output_file}')
for column in columns:
output_file = f'png/{column}_dist.png'
plot_column_distribution(df, column, output_file=output_file)
print(f'Saved {column} frequency plot to {output_file}')