@@ -96,7 +96,7 @@ public ConlluComparator(List<? extends Object> objects, int numberOfThreads) thr
96
96
*/
97
97
public void analyse (int form , int lemma , int upos , int xpos , int feats , int deprel ) throws InterruptedException {
98
98
List <String > keys = Arrays .asList (csents .keySet ().toArray (new String [0 ]));
99
-
99
+
100
100
List <Thread > thrs = new ArrayList <>();
101
101
102
102
for (int th = 0 ; th < numberOfThreads ; ++th ) {
@@ -105,180 +105,12 @@ public void analyse(int form, int lemma, int upos, int xpos, int feats, int depr
105
105
thr .start ();
106
106
thrs .add (thr );
107
107
}
108
-
108
+
109
109
for (Thread thr : thrs ) {
110
110
thr .join ();
111
111
}
112
-
112
+
113
113
}
114
- // public void ooanalyse(int form, int lemma, int upos, int xpos, int feats, int deprel) {
115
- //
116
- // List<String> keys = Arrays.asList(csents.keySet().toArray(new String[0]));
117
- // int len = keys.size();
118
- // for (int i = 0; i < len; ++i) {
119
- // System.err.println("Checking " + i);
120
- // Signatures cursent = csents.get(keys.get(i));
121
- // for (int j = i + 1; j < len; ++j) {
122
- // //System.err.println("comparing " + i + " " + j);
123
- // Signatures othersent = csents.get(keys.get(j));
124
- // // compare
125
- // if (form == 0) {
126
- // boolean rtc = cursent.sent.equals(othersent.sent);
127
- // if (rtc) {
128
- // identical("FORM", cursent, othersent);
129
- // }
130
- // } else if (form > 0) {
131
- // int dist = calculateDistance(cursent.sentence, othersent.sentence);
132
- // if (dist == 0) {
133
- // identical("FORM", cursent, othersent);
134
- // } else if (dist <= form) {
135
- // similar("FORM", dist, cursent, othersent);
136
- // }
137
- // }
138
- //
139
- // if (lemma == 0) {
140
- // boolean rtc = cursent.lemmas.equals(othersent.lemmas);
141
- // if (rtc) {
142
- // identical("LEMMA", cursent, othersent);
143
- // }
144
- // } else if (lemma > 0) {
145
- // int dist = calculateDistance(cursent.lemmas, othersent.lemmas);
146
- // //System.err.println("ZZZZZ " + dist + "\n" + cursent.lemmas + "\n"+ othersent.lemmas);
147
- // if (dist == 0) {
148
- // identical("LEMMA", cursent, othersent);
149
- // } else if (dist <= lemma) {
150
- // similar("LEMMA", dist, cursent, othersent);
151
- // }
152
- // }
153
- //
154
- // if (upos == 0) {
155
- // boolean rtc = cursent.uposs.equals(othersent.uposs);
156
- // if (rtc) {
157
- // identical("UPOS", cursent, othersent);
158
- // }
159
- // } else if (upos > 0) {
160
- // int dist = calculateDistance(cursent.uposs, othersent.uposs);
161
- // //System.err.println("ZZZZZ " + dist + "\n" + cursent.lemmas + "\n"+ othersent.lemmas);
162
- // if (dist == 0) {
163
- // identical("UPOS", cursent, othersent);
164
- // } else if (dist <= upos) {
165
- // similar("UPOS", dist, cursent, othersent);
166
- // }
167
- // }
168
- //
169
- // if (xpos == 0) {
170
- // boolean rtc = cursent.xposs.equals(othersent.xposs);
171
- // if (rtc) {
172
- // identical("XPOS", cursent, othersent);
173
- // }
174
- // } else if (xpos > 0) {
175
- // int dist = calculateDistance(cursent.xposs, othersent.xposs);
176
- // if (dist == 0) {
177
- // identical("XPOS", cursent, othersent);
178
- // } else if (dist <= xpos) {
179
- // similar("XPOS", dist, cursent, othersent);
180
- // }
181
- // }
182
- //
183
- //
184
- // if (feats == 0) {
185
- // boolean rtc = cursent.feats.equals(othersent.feats);
186
- // if (rtc) {
187
- // identical("FEATS", cursent, othersent);
188
- // }
189
- // } else if (feats > 0) {
190
- // int dist = calculateDistance(cursent.feats, othersent.feats);
191
- // if (dist == 0) {
192
- // identical("FEATS", cursent, othersent);
193
- // } else if (dist <= feats) {
194
- // similar("FEATS", dist, cursent, othersent);
195
- // }
196
- // }
197
- //
198
- //
199
- // if (deprel == 0) {
200
- // boolean rtc = cursent.deprels.equals(othersent.deprels);
201
- // if (rtc) {
202
- // identical("DEPREL", cursent, othersent);
203
- // }
204
- // } else if (deprel > 0) {
205
- // int dist = calculateDistance(cursent.deprels, othersent.deprels);
206
- // if (dist == 0) {
207
- // identical("DEPREL", cursent, othersent);
208
- // } else if (dist <= deprel) {
209
- // similar("DEPREL", dist, cursent, othersent);
210
- // }
211
- // }
212
- // }
213
- // }
214
- // }
215
- //
216
- // private void identical(String column, Signatures s1, Signatures s2) {
217
- // System.err.format("%s identical\t%s\t%s\n", column, s1.id, s2.id);
218
- // System.err.format("# %s\n", s1.sent);
219
- // if (column.equals("LEMMA")) {
220
- // System.err.format("# %s\n", s1.lemmas);
221
- // }
222
- // else if (column.equals("UPOS")) {
223
- // System.err.format("# %s\n", s1.uposs);
224
- // }
225
- //
226
- // }
227
- //
228
- // private void similar(String column, int dist, Signatures s1, Signatures s2) {
229
- // System.err.format("%s similar %d\t%s\t%s\n", column, dist, s1.id, s2.id);
230
- // System.err.format("# %s\n", s1.sent);
231
- // System.err.format("# %s\n", s2.sent);
232
- // if (column.equals("LEMMA")) {
233
- // System.err.format("# %s\n", s1.lemmas);
234
- // System.err.format("# %s\n", s2.lemmas);
235
- // } else if (column.equals("UPOS")) {
236
- // System.err.format("# %s\n", s1.uposs);
237
- // System.err.format("# %s\n", s2.uposs);
238
- // }
239
- //
240
- // }
241
- //
242
- // // inspired by https://github.com/crwohlfeil/damerau-levenshtein
243
- // /**
244
- // * calculate the levenshtein-damerau distance between two lists of objects (characters or strings)
245
- // * @param source
246
- // * @param target
247
- // * @return
248
- // */
249
- // private int calculateDistance(List<? extends Object> source, List<? extends Object> target) {
250
- // //if (source == null || target == null) {
251
- // // throw new IllegalArgumentException("Parameter must not be null");
252
- // //}
253
- // int sourceLength = source.size();
254
- // int targetLength = target.size();
255
- // if (sourceLength == 0) {
256
- // return targetLength;
257
- // }
258
- // if (targetLength == 0) {
259
- // return sourceLength;
260
- // }
261
- // int[][] dist = new int[sourceLength + 1][targetLength + 1];
262
- // for (int i = 0; i < sourceLength + 1; i++) {
263
- // dist[i][0] = i;
264
- // }
265
- // for (int j = 0; j < targetLength + 1; j++) {
266
- // dist[0][j] = j;
267
- // }
268
- // for (int i = 1; i < sourceLength + 1; i++) {
269
- // for (int j = 1; j < targetLength + 1; j++) {
270
- // int cost = source.get(i - 1).equals(target.get(j - 1)) ? 0 : 1;
271
- // dist[i][j] = Math.min(Math.min(dist[i - 1][j] + 1, dist[i][j - 1] + 1), dist[i - 1][j - 1] + cost);
272
- // if (i > 1
273
- // && j > 1
274
- // && source.get(i - 1).equals(target.get(j - 2))
275
- // && source.get(i - 2).equals(target.get(j - 1))) {
276
- // dist[i][j] = Math.min(dist[i][j], dist[i - 2][j - 2] + cost);
277
- // }
278
- // }
279
- // }
280
- // return dist[sourceLength][targetLength];
281
- // }
282
114
283
115
class Signatures {
284
116
@@ -348,7 +180,7 @@ public static void main(String args[]) {
348
180
}
349
181
List <String >argl = new ArrayList <>(Arrays .asList (args ));
350
182
argl .remove (0 );
351
- argl .remove (0 );
183
+ argl .remove (0 );
352
184
ConlluComparator cc = new ConlluComparator (argl , numberOfThreads );
353
185
354
186
cc .analyse (forms , lemmas , upos , xpos , feats , deprels );
0 commit comments