-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmsg_ck.py
More file actions
92 lines (78 loc) · 3.3 KB
/
msg_ck.py
File metadata and controls
92 lines (78 loc) · 3.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import MeCab
import numpy as np
from Logger import Logger
logger = Logger()
class TextAnalysis:
def __init__(self, text_list) -> None:
self.m = MeCab.Tagger("-Ochasen")
# リスト
self.text_list: list = text_list
# 文字列の長さ
self.textlen_list: list = [len(i) for i in self.text_list]
# 文章を単語に分割
self.textc_list: list = []
for i in text_list:
text = [line.split()[0] for line in self.m.parse(i).splitlines()]
del text[-1]
self.textc_list.append(text)
# 単語の長さ
self.textclen_list: list = [[len(z) for z in i]
for i in self.textc_list]
def sentence_w(self, target_text: str) -> int:
result_point: list = []
point: int = 0
# 被っている文章を抽出します。
target_text_list = [line.split()[0]
for line in self.m.parse(target_text).splitlines()]
del target_text_list[-1]
if not not self.text_list:
for textc in self.textc_list:
# 一致している単語を抽出
w_text, text = [], []
for z in range(len(textc)):
for i in range(len(target_text_list)):
if target_text_list[i] == textc[z] and i not in w_text:
text.append([textc[z], z])
w_text.append(i)
if not not w_text:
# 連続している言葉を抽出
x = np.array(w_text)
result = []
tmp = [x[0]]
for i in range(len(x)-1):
if x[i+1] - x[i] == 1:
tmp.append(x[i+1])
else:
if len(tmp) > 0:
result.append(tmp)
tmp = []
tmp.append(x[i+1])
result.append(tmp)
result = [len(i) for i in result]
# 脅威をポイント化
result_point_tmp: int = 0
for i in result:
result_point_tmp += i**2
result_point.append(result_point_tmp)
else:
result_point.append(0)
# print(f"文字列処理時間:{time.time() - start}")
# ポイント化
logger.debug(sorted(result_point))
q_all = np.percentile(
result_point, [i*10 for i in range(len(self.text_list))])
logger.debug(self.textlen_list)
logger.debug(list(q_all))
q25 = np.percentile(result_point, 25)
q50 = np.percentile(result_point, 50)
q75 = np.percentile(result_point, 75)
if len(self.text_list) >= 5 and q25 >= 1:
if 0 <= q25-q50 < 10:
point += 60
elif 0 <= q25-q50 < 20:
point += 40
if 0 <= q25-q75 < 10:
point += 120
elif 0 <= q25-q75 < 20:
point += 60
return point