-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathevaluation.py
46 lines (41 loc) · 1.59 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# -*- coding: utf-8 -*-
from difflib import ndiff, unified_diff, context_diff
from os import listdir, mkdir
import shutil
from os.path import dirname, join, isfile
from underthesea.util.file_io import read, write
from models.chunking_1 import chunk
def load_input(input_file):
lines = read(input_file).strip().split("\n")
if lines[0][0] == "#":
lines = lines[1:]
content = [line.split("\t")[0] for line in lines]
content = u" ".join(content)
return content
def load_output(input_file):
lines = read(input_file).strip().split("\n")
if lines[0][0] == "#":
lines = lines[1:]
text = "\n".join(lines)
return text
def extract_sentence(content):
return "# " + " ".join([token.split("\t")[0] for token in content.split("\n")])
if __name__ == '__main__':
test_dir = join(dirname(__file__), "test_set")
files = [f for f in listdir(test_dir) if isfile(join(test_dir, f))]
model_id = "1"
try:
shutil.rmtree(join(test_dir, model_id))
except:
pass
mkdir(join(test_dir, model_id))
for f in files:
input = load_input(join(test_dir, f))
output = chunk(input)
actual = "\n".join(["\t".join(tokens) for tokens in chunk(input)])
expected = load_output(join(test_dir, f))
if actual != expected:
print("\n{}".format(f))
diff = '\n'.join(ndiff(expected.splitlines(), actual.splitlines()))
write(join(test_dir, model_id, f), "\n".join([extract_sentence(actual), actual]))
write(join(test_dir, model_id, f + ".diff"), "\n".join([extract_sentence(actual), diff]))