32
32
*/
33
33
package com .orange .labs .comparison ;
34
34
35
+ import static java .lang .Math .abs ;
35
36
import java .util .ArrayList ;
36
37
import java .util .LinkedHashMap ;
37
38
import java .util .List ;
@@ -97,7 +98,7 @@ public void run() {
97
98
identical ("FORM" , cursent , othersent );
98
99
}
99
100
} else if (form > 0 ) {
100
- int dist = calculateDistance (cursent .sentence , othersent .sentence );
101
+ int dist = calculateDistance (cursent .sentence , othersent .sentence , form );
101
102
if (dist == 0 ) {
102
103
identical ("FORM" , cursent , othersent );
103
104
} else if (dist <= form ) {
@@ -111,7 +112,7 @@ public void run() {
111
112
identical ("LEMMA" , cursent , othersent );
112
113
}
113
114
} else if (lemma > 0 ) {
114
- int dist = calculateDistance (cursent .lemmas , othersent .lemmas );
115
+ int dist = calculateDistance (cursent .lemmas , othersent .lemmas , lemma );
115
116
if (dist == 0 ) {
116
117
identical ("LEMMA" , cursent , othersent );
117
118
} else if (dist <= lemma ) {
@@ -125,7 +126,7 @@ public void run() {
125
126
identical ("UPOS" , cursent , othersent );
126
127
}
127
128
} else if (upos > 0 ) {
128
- int dist = calculateDistance (cursent .uposs , othersent .uposs );
129
+ int dist = calculateDistance (cursent .uposs , othersent .uposs , upos );
129
130
if (dist == 0 ) {
130
131
identical ("UPOS" , cursent , othersent );
131
132
} else if (dist <= upos ) {
@@ -139,7 +140,7 @@ public void run() {
139
140
identical ("XPOS" , cursent , othersent );
140
141
}
141
142
} else if (xpos > 0 ) {
142
- int dist = calculateDistance (cursent .xposs , othersent .xposs );
143
+ int dist = calculateDistance (cursent .xposs , othersent .xposs , xpos );
143
144
if (dist == 0 ) {
144
145
identical ("XPOS" , cursent , othersent );
145
146
} else if (dist <= xpos ) {
@@ -153,7 +154,7 @@ public void run() {
153
154
identical ("FEATS" , cursent , othersent );
154
155
}
155
156
} else if (feats > 0 ) {
156
- int dist = calculateDistance (cursent .feats , othersent .feats );
157
+ int dist = calculateDistance (cursent .feats , othersent .feats , feats );
157
158
if (dist == 0 ) {
158
159
identical ("FEATS" , cursent , othersent );
159
160
} else if (dist <= feats ) {
@@ -167,7 +168,7 @@ public void run() {
167
168
identical ("DEPREL" , cursent , othersent );
168
169
}
169
170
} else if (deprel > 0 ) {
170
- int dist = calculateDistance (cursent .deprels , othersent .deprels );
171
+ int dist = calculateDistance (cursent .deprels , othersent .deprels , deprel );
171
172
if (dist == 0 ) {
172
173
identical ("DEPREL" , cursent , othersent );
173
174
} else if (dist <= deprel ) {
@@ -182,16 +183,20 @@ public void run() {
182
183
// inspired by https://github.com/crwohlfeil/damerau-levenshtein
183
184
/**
184
185
* calculate the levenshtein-damerau distance between two lists of objects (characters or strings)
186
+ * levenstein_distance(a,b) >= |len(a) - len(b)|
185
187
* @param source
186
188
* @param target
187
189
* @return
188
190
*/
189
- private int calculateDistance (List <? extends Object > source , List <? extends Object > target ) {
191
+ private int calculateDistance (List <? extends Object > source , List <? extends Object > target , int maxdist ) {
190
192
//if (source == null || target == null) {
191
193
// throw new IllegalArgumentException("Parameter must not be null");
192
194
//}
193
195
int sourceLength = source .size ();
194
196
int targetLength = target .size ();
197
+ // if the length of the two sentences differs more than maxdist, we stop here
198
+ if (abs (sourceLength - targetLength ) > maxdist ) return abs (sourceLength - targetLength );
199
+
195
200
if (sourceLength == 0 ) {
196
201
return targetLength ;
197
202
}
0 commit comments