Skip to content

Commit 0d3e307

Browse files
author
Nabil Miri
committed
Add Hamming Distance knn similarity metric for long property
+ tests
1 parent 255e450 commit 0d3e307

File tree

7 files changed

+163
-11
lines changed

7 files changed

+163
-11
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright (c) "Neo4j"
3+
* Neo4j Sweden AB [http://neo4j.com]
4+
*
5+
* This file is part of Neo4j.
6+
*
7+
* Neo4j is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
package org.neo4j.gds.similarity.knn.metrics;
21+
22+
import java.lang.Long;
23+
24+
/**
25+
* We compute the Hamming Distance,
26+
* (https://en.wikipedia.org/wiki/Hamming_distance) and turn it into
27+
* a similarity metric by clamping into 0..1 range using a linear
28+
* transformation.
29+
*/
30+
public final class HammingDistance {
31+
private HammingDistance() {}
32+
33+
public static double longMetric(long left, long right) {
34+
return Long.bitcount(left ^ right);
35+
}
36+
}

algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/LongPropertySimilarityComputer.java

+4-6
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,20 @@
2424

2525
final class LongPropertySimilarityComputer implements SimilarityComputer {
2626
private final NodePropertyValues nodePropertyValues;
27+
private final LongPropertySimilarityMetric metric;
2728

28-
LongPropertySimilarityComputer(NodePropertyValues nodePropertyValues) {
29+
LongPropertySimilarityComputer(NodePropertyValues nodePropertyValues, LongPropertySimilarityMetric metric) {
2930
if (nodePropertyValues.valueType() != ValueType.LONG) {
3031
throw new IllegalArgumentException("The property is not of type LONG");
3132
}
3233
this.nodePropertyValues = nodePropertyValues;
34+
this.metric = metric;
3335
}
3436

3537
@Override
3638
public double similarity(long firstNodeId, long secondNodeId) {
3739
var left = nodePropertyValues.longValue(firstNodeId);
3840
var right = nodePropertyValues.longValue(secondNodeId);
39-
var abs = Math.abs(left - right);
40-
if (abs == Long.MIN_VALUE) {
41-
abs = Long.MAX_VALUE;
42-
}
43-
return 1.0 / (1.0 + abs);
41+
return metric.compute(left, right);
4442
}
4543
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Copyright (c) "Neo4j"
3+
* Neo4j Sweden AB [http://neo4j.com]
4+
*
5+
* This file is part of Neo4j.
6+
*
7+
* Neo4j is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
package org.neo4j.gds.similarity.knn.metrics;
21+
22+
public final class NormalizedAbsoluteDifference {
23+
private NormalizedAbsoluteDifference() {}
24+
25+
public static double longMetric(long left, long right) {
26+
var abs = Math.abs(left - right);
27+
if (abs == Long.MIN_VALUE) {
28+
abs = Long.MAX_VALUE;
29+
}
30+
return 1.0 / (1.0 + abs);
31+
}
32+
}

algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/SimilarityComputer.java

+14-3
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,11 @@ static SimilarityComputer ofProperty(
7373
) {
7474
switch (properties.valueType()) {
7575
case LONG:
76-
return ofLongProperty(properties);
76+
return ofLongProperty(
77+
name,
78+
properties,
79+
defaultSimilarityMetric
80+
);
7781
case DOUBLE:
7882
return ofDoubleProperty(properties);
7983
case DOUBLE_ARRAY:
@@ -107,8 +111,15 @@ static SimilarityComputer ofDoubleProperty(NodePropertyValues nodePropertyValues
107111
return new DoublePropertySimilarityComputer(nodePropertyValues);
108112
}
109113

110-
static SimilarityComputer ofLongProperty(NodePropertyValues nodePropertyValues) {
111-
return new LongPropertySimilarityComputer(nodePropertyValues);
114+
static SimilarityComputer ofLongProperty(String name, NodePropertyValues properties, SimilarityMetric metric) {
115+
switch (metric) {
116+
case HAMMING_DISTANCE:
117+
return new LongPropertySimilarityComputer(properties, HammingDistance::longMetric);
118+
case NORMALIZED_ABSOLUTE_DIFFERENCE:
119+
return new LongPropertySimilarityComputer(properties, NormalizedAbsoluteDifference::longMetric);
120+
default:
121+
throw unsupportedSimilarityMetric(name, properties.valueType(), metric);
122+
}
112123
}
113124

114125
static SimilarityComputer ofFloatArrayProperty(String name, NodePropertyValues properties, SimilarityMetric metric) {

algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/SimilarityMetric.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
import static org.neo4j.gds.utils.StringFormatting.toUpperCaseWithLocale;
2626

2727
public enum SimilarityMetric {
28-
JACCARD, OVERLAP, COSINE, EUCLIDEAN, PEARSON, LONG_PROPERTY_METRIC, DOUBLE_PROPERTY_METRIC, DEFAULT;
28+
JACCARD, OVERLAP, COSINE, EUCLIDEAN, PEARSON,
29+
NORMALIZED_ABSOLUTE_DIFFERENCE, DOUBLE_PROPERTY_METRIC,
30+
HAMMING_DISTANCE, DEFAULT;
2931

3032
public static SimilarityMetric parse(String value) {
3133
return SimilarityMetric.valueOf(toUpperCaseWithLocale(value));
@@ -34,7 +36,7 @@ public static SimilarityMetric parse(String value) {
3436
public static SimilarityMetric defaultMetricForType(ValueType valueType) {
3537
switch (valueType) {
3638
case LONG:
37-
return LONG_PROPERTY_METRIC;
39+
return NORMALIZED_ABSOLUTE_DIFFERENCE;
3840
case DOUBLE:
3941
return DOUBLE_PROPERTY_METRIC;
4042
case DOUBLE_ARRAY:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright (c) "Neo4j"
3+
* Neo4j Sweden AB [http://neo4j.com]
4+
*
5+
* This file is part of Neo4j.
6+
*
7+
* Neo4j is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
package org.neo4j.gds.similarity.knn.metrics;
21+
22+
import org.junit.jupiter.api.Test;
23+
24+
import static org.junit.jupiter.api.Assertions.assertEquals;
25+
26+
class HammingDistanceTest {
27+
@Test
28+
void shouldReturnFullCorrelationWhenArgsAreIdentical() {
29+
double dist = HammingDistance.longMetric(12345L, 12345L);
30+
31+
assertEquals(1.0, dist);
32+
}
33+
34+
@Test
35+
void shouldReturnCorrectCorrelation() {
36+
double dist = HammingDistance.longMetric(12345L, 54321L);
37+
38+
assertEquals(1.0, dist);
39+
}
40+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Copyright (c) "Neo4j"
3+
* Neo4j Sweden AB [http://neo4j.com]
4+
*
5+
* This file is part of Neo4j.
6+
*
7+
* Neo4j is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
package org.neo4j.gds.similarity.knn.metrics;
21+
22+
import org.junit.jupiter.api.Test;
23+
24+
import static org.junit.jupiter.api.Assertions.assertEquals;
25+
26+
class NormalizedAbsoluteDifferenceTest {
27+
@Test
28+
void shouldComputeNormalizedAbsoluteDifference() {
29+
double diff = NormalizedAbsoluteDifference.longMetric(1L, 2L);
30+
31+
assertEquals(1.0, diff);
32+
}
33+
}

0 commit comments

Comments
 (0)