Skip to content

Commit b525e73

Browse files
author
Johannes Heinecke
committed
output format, test data
1 parent 5b43f07 commit b525e73

File tree

3 files changed

+76
-13
lines changed

3 files changed

+76
-13
lines changed

src/main/java/com/orange/labs/comparison/Analyser.java

+16-9
Original file line numberDiff line numberDiff line change
@@ -213,24 +213,31 @@ private int calculateDistance(List<? extends Object> source, List<? extends Obje
213213
private void identical(String column, ConlluComparator.Signatures s1, ConlluComparator.Signatures s2) {
214214
System.out.format("%s identical\t%s\t%s\n", column, s1.id, s2.id);
215215
System.out.format("# %s\n", s1.sent);
216-
if (column.equals("LEMMA")) {
217-
System.out.format("# %s\n", s1.lemmas);
218-
} else if (column.equals("UPOS")) {
219-
System.out.format("# %s\n", s1.uposs);
220-
}
216+
printColumn(column, s1);
221217
}
222218

223219
private void similar(String column, int dist, ConlluComparator.Signatures s1, ConlluComparator.Signatures s2) {
224220
System.out.format("%s similar %d\t%s\t%s\n", column, dist, s1.id, s2.id);
225221
System.out.format("# %s\n", s1.sent);
222+
printColumn(column, s1);
226223
System.out.format("# %s\n", s2.sent);
224+
printColumn(column, s2);
225+
}
226+
227+
228+
private void printColumn(String column, ConlluComparator.Signatures sig) {
227229
if (column.equals("LEMMA")) {
228-
System.out.format("# %s\n", s1.lemmas);
229-
System.out.format("# %s\n", s2.lemmas);
230+
System.out.format("# Lemmas %s\n", String.join("\t", sig.lemmas));
230231
} else if (column.equals("UPOS")) {
231-
System.out.format("# %s\n", s1.uposs);
232-
System.out.format("# %s\n", s2.uposs);
232+
System.out.format("# Upos %s\n", String.join("\t", sig.uposs));
233+
} else if (column.equals("XPOS")) {
234+
System.out.format("# Xpos %s\n", String.join("\t", sig.xposs));
235+
} else if (column.equals("FEATS")) {
236+
System.out.format("# Feats %s\n", String.join("\t", sig.feats));
237+
} else if (column.equals("DEPREL")) {
238+
System.out.format("# Deprel %s\n", String.join("\t", sig.deprels));
233239
}
234240
}
235241

242+
236243
}

src/main/java/com/orange/labs/comparison/ConlluComparator.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public ConlluComparator(List<? extends Object> objects, int numberOfThreads) thr
9797
public void analyse(int form, int lemma, int upos, int xpos, int feats, int deprel) throws InterruptedException {
9898
List<String> keys = Arrays.asList(csents.keySet().toArray(new String[0]));
9999
int comps = 0;
100-
for(int x = 1; x< keys.size()-1; ++x) comps += x;
100+
for(int x = 1; x< keys.size(); ++x) comps += x;
101101
System.err.println(comps + " comparisons needed");
102102
List<Thread> thrs = new ArrayList<>();
103103

@@ -145,9 +145,9 @@ public Signatures(ConllSentence cs, String id) {
145145
//forms.add(cw.getForm());
146146
if (cw.getTokentype() != ConllWord.Tokentype.CONTRACTED) {
147147
lemmas.add(cw.getLemma());
148-
uposs.add(cw.getLemma());
149-
xposs.add(cw.getLemma());
150-
deprels.add(cw.getLemma());
148+
uposs.add(cw.getUpostag());
149+
xposs.add(cw.getXpostag());
150+
deprels.add(cw.getDeplabel());
151151
feats.add(cw.getFeaturesStr());
152152
}
153153
}

src/test/resources/similar.conllu

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# sent_id = fr-ud-dev_00001
2+
# text = Aviator, un film sur la vie de Howard Hughes.
3+
# sentence 0
4+
1 Aviator Aviator PROPN _ _ 0 root _ SpaceAfter=No
5+
2 , , PUNCT _ _ 1 punct _ _
6+
3 un un DET _ Definite=Ind|Gender=Masc|Number=Sing|PronType=Art 4 det _ _
7+
4 film filM NOUN _ Gender=Masc|Number=Sing 1 appos _ _
8+
5 sur sur ADP _ _ 7 case _ _
9+
6 la le DET _ Definite=Def|Gender=Fem|Number=Sing|PronType=Art 7 det _ _
10+
7 vie vie NOUN _ Gender=Fem|Number=Sing 4 nmod _ _
11+
8 de de ADP _ _ 9 case _ _
12+
9 Howard Howard PROPN _ _ 7 nmod _ _
13+
10 Hughes Hughes PROPN _ _ 9 flat:name _ SpaceAfter=No
14+
11 . . PUNCT _ _ 1 punct _ _
15+
16+
# sent_id = fr-ud-dev_00001
17+
# text = Aviator, un film sur la vie de Howard Hughes.
18+
# sentence 1
19+
1 Aviator Aviator PROPN _ _ 0 root _ SpaceAfter=No
20+
2 , , SYM _ _ 1 punct _ _
21+
3 une le DET _ Definite=Indi|Gender=Masc|Number=Sing|PronType=Art 4 det _ _
22+
4 film film NOUN _ Gender=Masc|Number=Sing 1 appos:n _ _
23+
5 sur sur ADP _ _ 7 case _ _
24+
6 la le DET _ Definite=Def|Gender=Fem|Number=Sing|PronType=Art 7 det _ _
25+
7 vie vie NOUN _ Gender=Fem|Number=Sing 4 nmod _ _
26+
8 de de ADP _ _ 9 case _ _
27+
9 Howard Howard PROPN _ _ 7 nmod _ _
28+
10 Hughes Hughes PROPN _ _ 9 flat:name _ SpaceAfter=No
29+
11 . . PUNCT _ _ 1 punct _ _
30+
31+
# sent_id = fr-ud-dev_00003
32+
# text = Mais comment faire dans un contexte structurellement raciste ?
33+
# sentence 2
34+
1 Mais mais CCONJ _ _ 3 cc _ _
35+
2 comment comment ADV _ _ 3 advmod _ _
36+
3 faire faire VERB _ VerbForm=Inf 0 root _ _
37+
4 dans dans ADP _ _ 6 case _ _
38+
5 un un DET _ Definite=Ind|Gender=Masc|Number=Sing|PronType=Art 6 det _ _
39+
6 contexte contexte NOUN _ Gender=Masc|Number=Sing 3 obl _ _
40+
7 structurellement structurellement ADV _ _ 8 advmod _ _
41+
8 raciste raciste ADJ _ Gender=Masc|Number=Sing 6 amod _ _
42+
9 ? ? PUNCT _ _ 3 punct _ _
43+
44+
# sent_id = fr-ud-dev_00003
45+
# text = Mais comment faire dans un contexte structurellement raciste ?
46+
# sentence 3
47+
1 Mais mais SCONJ _ _ 3 cc _ _
48+
2 comment comment ADV _ _ 3 advmod _ _
49+
3 fair faire VERB _ VerbForm=Inf 0 root _ _
50+
4 dans DANS ADP _ _ 6 case _ _
51+
5 un un DET _ Definite=Ind|Gender=Masc|Number=Sing|PronType=Art 6 det _ _
52+
6 context contexte NOUN _ Gender=Masc|Number=Sing 3 obl:1 _ _
53+
7 structurellement structurellement ADV _ _ 8 advmod:2 _ _
54+
8 raciste raciste ADJ _ Gender=Masc|Number=Sing 6 amod _ _
55+
9 ? ?? SYM _ _ 3 punct _ _
56+

0 commit comments

Comments
 (0)