Skip to content

Commit d73dc1b

Browse files
author
Johannes Heinecke
committed
output
1 parent 6f604a3 commit d73dc1b

File tree

2 files changed

+17
-19
lines changed

2 files changed

+17
-19
lines changed

src/main/java/com/orange/labs/comparison/Analyser.java

+13-17
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,11 @@ public void run() {
7676
ConlluComparator.Signatures cursent = csents.get(keys.get(i));
7777
for (int j = i + 1; j < len; ++j) {
7878
if (j % totalthreads != modulo) {
79-
continue; // do only one sentence out of module
79+
continue; // do only one sentence out of totalthreads in this thread
8080
}
8181
//System.err.println("comparing " + i + " " + j);
8282
ConlluComparator.Signatures othersent = csents.get(keys.get(j));
83-
// compare
83+
8484
if (form == 0) {
8585
boolean rtc = cursent.sent.equals(othersent.sent);
8686
if (rtc) {
@@ -102,7 +102,6 @@ public void run() {
102102
}
103103
} else if (lemma > 0) {
104104
int dist = calculateDistance(cursent.lemmas, othersent.lemmas);
105-
//System.err.println("ZZZZZ " + dist + "\n" + cursent.lemmas + "\n"+ othersent.lemmas);
106105
if (dist == 0) {
107106
identical("LEMMA", cursent, othersent);
108107
} else if (dist <= lemma) {
@@ -117,7 +116,6 @@ public void run() {
117116
}
118117
} else if (upos > 0) {
119118
int dist = calculateDistance(cursent.uposs, othersent.uposs);
120-
//System.err.println("ZZZZZ " + dist + "\n" + cursent.lemmas + "\n"+ othersent.lemmas);
121119
if (dist == 0) {
122120
identical("UPOS", cursent, othersent);
123121
} else if (dist <= upos) {
@@ -213,28 +211,26 @@ private int calculateDistance(List<? extends Object> source, List<? extends Obje
213211

214212

215213
private void identical(String column, ConlluComparator.Signatures s1, ConlluComparator.Signatures s2) {
216-
System.err.format("%s identical\t%s\t%s\n", column, s1.id, s2.id);
217-
System.err.format("# %s\n", s1.sent);
214+
System.out.format("%s identical\t%s\t%s\n", column, s1.id, s2.id);
215+
System.out.format("# %s\n", s1.sent);
218216
if (column.equals("LEMMA")) {
219-
System.err.format("# %s\n", s1.lemmas);
217+
System.out.format("# %s\n", s1.lemmas);
220218
} else if (column.equals("UPOS")) {
221-
System.err.format("# %s\n", s1.uposs);
219+
System.out.format("# %s\n", s1.uposs);
222220
}
223-
224221
}
225222

226223
private void similar(String column, int dist, ConlluComparator.Signatures s1, ConlluComparator.Signatures s2) {
227-
System.err.format("%s similar %d\t%s\t%s\n", column, dist, s1.id, s2.id);
228-
System.err.format("# %s\n", s1.sent);
229-
System.err.format("# %s\n", s2.sent);
224+
System.out.format("%s similar %d\t%s\t%s\n", column, dist, s1.id, s2.id);
225+
System.out.format("# %s\n", s1.sent);
226+
System.out.format("# %s\n", s2.sent);
230227
if (column.equals("LEMMA")) {
231-
System.err.format("# %s\n", s1.lemmas);
232-
System.err.format("# %s\n", s2.lemmas);
228+
System.out.format("# %s\n", s1.lemmas);
229+
System.out.format("# %s\n", s2.lemmas);
233230
} else if (column.equals("UPOS")) {
234-
System.err.format("# %s\n", s1.uposs);
235-
System.err.format("# %s\n", s2.uposs);
231+
System.out.format("# %s\n", s1.uposs);
232+
System.out.format("# %s\n", s2.uposs);
236233
}
237-
238234
}
239235

240236
}

src/main/java/com/orange/labs/comparison/ConlluComparator.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,16 @@ public ConlluComparator(List<? extends Object> objects, int numberOfThreads) thr
8989

9090
/**
9191
* finds identical/similar sentences. comparing all sentences with all
92-
* others; $\sum_{i=0}^{i=n} i$ comparisons needed
92+
* others; $\sum_{i=1}^{i=n-1} i$ comparisons needed
9393
*
9494
* @param form: 0: identical, >0 maximal Levenshtein-Damerau distance on entire sentence (charater level)
9595
* @param lemma: 0: identical, >0 maximal Levenshtein-Damerau distance on lemmas (token level)
9696
*/
9797
public void analyse(int form, int lemma, int upos, int xpos, int feats, int deprel) throws InterruptedException {
9898
List<String> keys = Arrays.asList(csents.keySet().toArray(new String[0]));
99-
99+
int comps = 0;
100+
for(int x = 1; x< keys.size()-1; ++x) comps += x;
101+
System.err.println(comps + " comparisons needed");
100102
List<Thread> thrs = new ArrayList<>();
101103

102104
for (int th = 0; th < numberOfThreads; ++th) {

0 commit comments

Comments
 (0)