Skip to content

Commit 6f604a3

Browse files
author
Johannes Heinecke
committed
clean up
1 parent eeee529 commit 6f604a3

File tree

4 files changed

+11
-177
lines changed

4 files changed

+11
-177
lines changed

Diff for: CHANGES.md

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Changes
2+
## Version 2.15.0
3+
* added a script to find similar or identical sentences in a single or multiple CoNLL-U files
24

35
## Version 2.14.3
46
* updated versions of dependencies in `pom.xml` to latest version available

Diff for: pom.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@
3232
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3333
3434
author Johannes Heinecke
35-
version 2.14.3 as of 4th January 2022
35+
version 2.15.0 as of 6th February 2022
3636
-->
3737

3838
<modelVersion>4.0.0</modelVersion>
3939
<groupId>com.orange.labs</groupId>
4040
<artifactId>ConlluEditor</artifactId>
41-
<version>2.14.3</version>
41+
<version>2.15.0</version>
4242
<packaging>jar</packaging>
4343

4444
<properties>

Diff for: src/main/java/com/orange/labs/comparison/Analyser.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -169,13 +169,13 @@ public void run() {
169169
}
170170
}
171171
}
172-
172+
173173
// inspired by https://github.com/crwohlfeil/damerau-levenshtein
174174
/**
175175
* calculate the levenshtein-damerau distance between two lists of objects (characters or strings)
176-
* @param source
176+
* @param source
177177
* @param target
178-
* @return
178+
* @return
179179
*/
180180
private int calculateDistance(List<? extends Object> source, List<? extends Object> target) {
181181
//if (source == null || target == null) {

Diff for: src/main/java/com/orange/labs/comparison/ConlluComparator.java

+4-172
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public ConlluComparator(List<? extends Object> objects, int numberOfThreads) thr
9696
*/
9797
public void analyse(int form, int lemma, int upos, int xpos, int feats, int deprel) throws InterruptedException {
9898
List<String> keys = Arrays.asList(csents.keySet().toArray(new String[0]));
99-
99+
100100
List<Thread> thrs = new ArrayList<>();
101101

102102
for (int th = 0; th < numberOfThreads; ++th) {
@@ -105,180 +105,12 @@ public void analyse(int form, int lemma, int upos, int xpos, int feats, int depr
105105
thr.start();
106106
thrs.add(thr);
107107
}
108-
108+
109109
for(Thread thr : thrs) {
110110
thr.join();
111111
}
112-
112+
113113
}
114-
// public void ooanalyse(int form, int lemma, int upos, int xpos, int feats, int deprel) {
115-
//
116-
// List<String> keys = Arrays.asList(csents.keySet().toArray(new String[0]));
117-
// int len = keys.size();
118-
// for (int i = 0; i < len; ++i) {
119-
// System.err.println("Checking " + i);
120-
// Signatures cursent = csents.get(keys.get(i));
121-
// for (int j = i + 1; j < len; ++j) {
122-
// //System.err.println("comparing " + i + " " + j);
123-
// Signatures othersent = csents.get(keys.get(j));
124-
// // compare
125-
// if (form == 0) {
126-
// boolean rtc = cursent.sent.equals(othersent.sent);
127-
// if (rtc) {
128-
// identical("FORM", cursent, othersent);
129-
// }
130-
// } else if (form > 0) {
131-
// int dist = calculateDistance(cursent.sentence, othersent.sentence);
132-
// if (dist == 0) {
133-
// identical("FORM", cursent, othersent);
134-
// } else if (dist <= form) {
135-
// similar("FORM", dist, cursent, othersent);
136-
// }
137-
// }
138-
//
139-
// if (lemma == 0) {
140-
// boolean rtc = cursent.lemmas.equals(othersent.lemmas);
141-
// if (rtc) {
142-
// identical("LEMMA", cursent, othersent);
143-
// }
144-
// } else if (lemma > 0) {
145-
// int dist = calculateDistance(cursent.lemmas, othersent.lemmas);
146-
// //System.err.println("ZZZZZ " + dist + "\n" + cursent.lemmas + "\n"+ othersent.lemmas);
147-
// if (dist == 0) {
148-
// identical("LEMMA", cursent, othersent);
149-
// } else if (dist <= lemma) {
150-
// similar("LEMMA", dist, cursent, othersent);
151-
// }
152-
// }
153-
//
154-
// if (upos == 0) {
155-
// boolean rtc = cursent.uposs.equals(othersent.uposs);
156-
// if (rtc) {
157-
// identical("UPOS", cursent, othersent);
158-
// }
159-
// } else if (upos > 0) {
160-
// int dist = calculateDistance(cursent.uposs, othersent.uposs);
161-
// //System.err.println("ZZZZZ " + dist + "\n" + cursent.lemmas + "\n"+ othersent.lemmas);
162-
// if (dist == 0) {
163-
// identical("UPOS", cursent, othersent);
164-
// } else if (dist <= upos) {
165-
// similar("UPOS", dist, cursent, othersent);
166-
// }
167-
// }
168-
//
169-
// if (xpos == 0) {
170-
// boolean rtc = cursent.xposs.equals(othersent.xposs);
171-
// if (rtc) {
172-
// identical("XPOS", cursent, othersent);
173-
// }
174-
// } else if (xpos > 0) {
175-
// int dist = calculateDistance(cursent.xposs, othersent.xposs);
176-
// if (dist == 0) {
177-
// identical("XPOS", cursent, othersent);
178-
// } else if (dist <= xpos) {
179-
// similar("XPOS", dist, cursent, othersent);
180-
// }
181-
// }
182-
//
183-
//
184-
// if (feats == 0) {
185-
// boolean rtc = cursent.feats.equals(othersent.feats);
186-
// if (rtc) {
187-
// identical("FEATS", cursent, othersent);
188-
// }
189-
// } else if (feats > 0) {
190-
// int dist = calculateDistance(cursent.feats, othersent.feats);
191-
// if (dist == 0) {
192-
// identical("FEATS", cursent, othersent);
193-
// } else if (dist <= feats) {
194-
// similar("FEATS", dist, cursent, othersent);
195-
// }
196-
// }
197-
//
198-
//
199-
// if (deprel == 0) {
200-
// boolean rtc = cursent.deprels.equals(othersent.deprels);
201-
// if (rtc) {
202-
// identical("DEPREL", cursent, othersent);
203-
// }
204-
// } else if (deprel > 0) {
205-
// int dist = calculateDistance(cursent.deprels, othersent.deprels);
206-
// if (dist == 0) {
207-
// identical("DEPREL", cursent, othersent);
208-
// } else if (dist <= deprel) {
209-
// similar("DEPREL", dist, cursent, othersent);
210-
// }
211-
// }
212-
// }
213-
// }
214-
// }
215-
//
216-
// private void identical(String column, Signatures s1, Signatures s2) {
217-
// System.err.format("%s identical\t%s\t%s\n", column, s1.id, s2.id);
218-
// System.err.format("# %s\n", s1.sent);
219-
// if (column.equals("LEMMA")) {
220-
// System.err.format("# %s\n", s1.lemmas);
221-
// }
222-
// else if (column.equals("UPOS")) {
223-
// System.err.format("# %s\n", s1.uposs);
224-
// }
225-
//
226-
// }
227-
//
228-
// private void similar(String column, int dist, Signatures s1, Signatures s2) {
229-
// System.err.format("%s similar %d\t%s\t%s\n", column, dist, s1.id, s2.id);
230-
// System.err.format("# %s\n", s1.sent);
231-
// System.err.format("# %s\n", s2.sent);
232-
// if (column.equals("LEMMA")) {
233-
// System.err.format("# %s\n", s1.lemmas);
234-
// System.err.format("# %s\n", s2.lemmas);
235-
// } else if (column.equals("UPOS")) {
236-
// System.err.format("# %s\n", s1.uposs);
237-
// System.err.format("# %s\n", s2.uposs);
238-
// }
239-
//
240-
// }
241-
//
242-
// // inspired by https://github.com/crwohlfeil/damerau-levenshtein
243-
// /**
244-
// * calculate the levenshtein-damerau distance between two lists of objects (characters or strings)
245-
// * @param source
246-
// * @param target
247-
// * @return
248-
// */
249-
// private int calculateDistance(List<? extends Object> source, List<? extends Object> target) {
250-
// //if (source == null || target == null) {
251-
// // throw new IllegalArgumentException("Parameter must not be null");
252-
// //}
253-
// int sourceLength = source.size();
254-
// int targetLength = target.size();
255-
// if (sourceLength == 0) {
256-
// return targetLength;
257-
// }
258-
// if (targetLength == 0) {
259-
// return sourceLength;
260-
// }
261-
// int[][] dist = new int[sourceLength + 1][targetLength + 1];
262-
// for (int i = 0; i < sourceLength + 1; i++) {
263-
// dist[i][0] = i;
264-
// }
265-
// for (int j = 0; j < targetLength + 1; j++) {
266-
// dist[0][j] = j;
267-
// }
268-
// for (int i = 1; i < sourceLength + 1; i++) {
269-
// for (int j = 1; j < targetLength + 1; j++) {
270-
// int cost = source.get(i - 1).equals(target.get(j - 1)) ? 0 : 1;
271-
// dist[i][j] = Math.min(Math.min(dist[i - 1][j] + 1, dist[i][j - 1] + 1), dist[i - 1][j - 1] + cost);
272-
// if (i > 1
273-
// && j > 1
274-
// && source.get(i - 1).equals(target.get(j - 2))
275-
// && source.get(i - 2).equals(target.get(j - 1))) {
276-
// dist[i][j] = Math.min(dist[i][j], dist[i - 2][j - 2] + cost);
277-
// }
278-
// }
279-
// }
280-
// return dist[sourceLength][targetLength];
281-
// }
282114

283115
class Signatures {
284116

@@ -348,7 +180,7 @@ public static void main(String args[]) {
348180
}
349181
List<String>argl = new ArrayList<>(Arrays.asList(args));
350182
argl.remove(0);
351-
argl.remove(0);
183+
argl.remove(0);
352184
ConlluComparator cc = new ConlluComparator(argl, numberOfThreads);
353185

354186
cc.analyse(forms, lemmas, upos, xpos, feats, deprels);

0 commit comments

Comments
 (0)