diff --git a/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/HammingDistance.java b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/HammingDistance.java
new file mode 100644
index 00000000000..d22e44b46a1
--- /dev/null
+++ b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/HammingDistance.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) "Neo4j"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.gds.similarity.knn.metrics;
+
+/**
+ * We compute the Hamming Distance,
+ * (https://en.wikipedia.org/wiki/Hamming_distance) and turn it into
+ * a similarity metric by clamping into 0..1 range using a linear
+ * transformation.
+ */
+public final class HammingDistance {
+ private HammingDistance() {}
+
+ public static double longMetric(long left, long right) {
+ return normalizeBitCount(
+ Long.bitCount(left ^ right)
+ );
+ }
+
+ /**
+ * We use unity-based normalization to scale the bit
+ * count to the [0-1] range:
+ * y = (x_i - min(x)) / (max(x) - min(x)) See
+ * https://stats.stackexchange.com/a/70807 for example.
+ * In our case, min(x) = 0 since you cannot have a negative
+ * bit count, and max(x) = 64 since in Java, a long is
+ * 64 bits in size.
+ *
+ * We then subtract the normalized range from 1.0 to map
+ * 1.0 as most similar, and 0.0 as least similar.
+ */
+ private static double normalizeBitCount(long bitCount) {
+ return 1.0 - (bitCount / 64.0);
+ }
+}
diff --git a/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/LongPropertySimilarityComputer.java b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/LongPropertySimilarityComputer.java
index 34f814d9481..fc402030ec7 100644
--- a/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/LongPropertySimilarityComputer.java
+++ b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/LongPropertySimilarityComputer.java
@@ -24,22 +24,20 @@
final class LongPropertySimilarityComputer implements SimilarityComputer {
private final NodePropertyValues nodePropertyValues;
+ private final LongPropertySimilarityMetric metric;
- LongPropertySimilarityComputer(NodePropertyValues nodePropertyValues) {
+ LongPropertySimilarityComputer(NodePropertyValues nodePropertyValues, LongPropertySimilarityMetric metric) {
if (nodePropertyValues.valueType() != ValueType.LONG) {
throw new IllegalArgumentException("The property is not of type LONG");
}
this.nodePropertyValues = nodePropertyValues;
+ this.metric = metric;
}
@Override
public double similarity(long firstNodeId, long secondNodeId) {
var left = nodePropertyValues.longValue(firstNodeId);
var right = nodePropertyValues.longValue(secondNodeId);
- var abs = Math.abs(left - right);
- if (abs == Long.MIN_VALUE) {
- abs = Long.MAX_VALUE;
- }
- return 1.0 / (1.0 + abs);
+ return metric.compute(left, right);
}
}
diff --git a/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/LongPropertySimilarityMetric.java b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/LongPropertySimilarityMetric.java
new file mode 100644
index 00000000000..035c773fd1e
--- /dev/null
+++ b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/LongPropertySimilarityMetric.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) "Neo4j"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.gds.similarity.knn.metrics;
+interface LongPropertySimilarityMetric {
+ double compute(long left, long right);
+}
diff --git a/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/NormalizedAbsoluteDifference.java b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/NormalizedAbsoluteDifference.java
new file mode 100644
index 00000000000..4f3dc6dc5ca
--- /dev/null
+++ b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/NormalizedAbsoluteDifference.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) "Neo4j"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.gds.similarity.knn.metrics;
+
+public final class NormalizedAbsoluteDifference {
+ private NormalizedAbsoluteDifference() {}
+
+ public static double longMetric(long left, long right) {
+ var abs = Math.abs(left - right);
+ if (abs == Long.MIN_VALUE) {
+ abs = Long.MAX_VALUE;
+ }
+ return 1.0 / (1.0 + abs);
+ }
+}
diff --git a/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/SimilarityComputer.java b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/SimilarityComputer.java
index e43d0e6f4c5..9a5b4b9dd59 100644
--- a/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/SimilarityComputer.java
+++ b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/SimilarityComputer.java
@@ -73,7 +73,11 @@ static SimilarityComputer ofProperty(
) {
switch (properties.valueType()) {
case LONG:
- return ofLongProperty(properties);
+ return ofLongProperty(
+ name,
+ properties,
+ defaultSimilarityMetric
+ );
case DOUBLE:
return ofDoubleProperty(properties);
case DOUBLE_ARRAY:
@@ -107,8 +111,15 @@ static SimilarityComputer ofDoubleProperty(NodePropertyValues nodePropertyValues
return new DoublePropertySimilarityComputer(nodePropertyValues);
}
- static SimilarityComputer ofLongProperty(NodePropertyValues nodePropertyValues) {
- return new LongPropertySimilarityComputer(nodePropertyValues);
+ static SimilarityComputer ofLongProperty(String name, NodePropertyValues properties, SimilarityMetric metric) {
+ switch (metric) {
+ case HAMMING_DISTANCE:
+ return new LongPropertySimilarityComputer(properties, HammingDistance::longMetric);
+ case NORMALIZED_ABSOLUTE_DIFFERENCE:
+ return new LongPropertySimilarityComputer(properties, NormalizedAbsoluteDifference::longMetric);
+ default:
+ throw unsupportedSimilarityMetric(name, properties.valueType(), metric);
+ }
}
static SimilarityComputer ofFloatArrayProperty(String name, NodePropertyValues properties, SimilarityMetric metric) {
diff --git a/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/SimilarityMetric.java b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/SimilarityMetric.java
index 74ceb2da6ca..e0d14b67f0c 100644
--- a/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/SimilarityMetric.java
+++ b/algo/src/main/java/org/neo4j/gds/similarity/knn/metrics/SimilarityMetric.java
@@ -25,7 +25,9 @@
import static org.neo4j.gds.utils.StringFormatting.toUpperCaseWithLocale;
public enum SimilarityMetric {
- JACCARD, OVERLAP, COSINE, EUCLIDEAN, PEARSON, LONG_PROPERTY_METRIC, DOUBLE_PROPERTY_METRIC, DEFAULT;
+ JACCARD, OVERLAP, COSINE, EUCLIDEAN, PEARSON,
+ NORMALIZED_ABSOLUTE_DIFFERENCE, DOUBLE_PROPERTY_METRIC,
+ HAMMING_DISTANCE, DEFAULT;
public static SimilarityMetric parse(String value) {
return SimilarityMetric.valueOf(toUpperCaseWithLocale(value));
@@ -34,7 +36,7 @@ public static SimilarityMetric parse(String value) {
public static SimilarityMetric defaultMetricForType(ValueType valueType) {
switch (valueType) {
case LONG:
- return LONG_PROPERTY_METRIC;
+ return NORMALIZED_ABSOLUTE_DIFFERENCE;
case DOUBLE:
return DOUBLE_PROPERTY_METRIC;
case DOUBLE_ARRAY:
diff --git a/algo/src/test/java/org/neo4j/gds/similarity/filteredknn/FilteredGenerateRandomNeighborsTest.java b/algo/src/test/java/org/neo4j/gds/similarity/filteredknn/FilteredGenerateRandomNeighborsTest.java
index ae133135e22..6f19ca5b571 100644
--- a/algo/src/test/java/org/neo4j/gds/similarity/filteredknn/FilteredGenerateRandomNeighborsTest.java
+++ b/algo/src/test/java/org/neo4j/gds/similarity/filteredknn/FilteredGenerateRandomNeighborsTest.java
@@ -68,7 +68,7 @@ public long size() {
idMap,
"myProperty",
nodeProperties,
- SimilarityMetric.LONG_PROPERTY_METRIC
+ SimilarityMetric.NORMALIZED_ABSOLUTE_DIFFERENCE
);
var random = new SplittableRandom();
diff --git a/algo/src/test/java/org/neo4j/gds/similarity/knn/GenerateRandomNeighborsTest.java b/algo/src/test/java/org/neo4j/gds/similarity/knn/GenerateRandomNeighborsTest.java
index b71807b7c21..b20f686e586 100644
--- a/algo/src/test/java/org/neo4j/gds/similarity/knn/GenerateRandomNeighborsTest.java
+++ b/algo/src/test/java/org/neo4j/gds/similarity/knn/GenerateRandomNeighborsTest.java
@@ -67,7 +67,7 @@ public long size() {
idMap,
"myProperty",
nodeProperties,
- SimilarityMetric.LONG_PROPERTY_METRIC
+ SimilarityMetric.NORMALIZED_ABSOLUTE_DIFFERENCE
);
var random = new SplittableRandom();
diff --git a/algo/src/test/java/org/neo4j/gds/similarity/knn/metrics/HammingDistanceTest.java b/algo/src/test/java/org/neo4j/gds/similarity/knn/metrics/HammingDistanceTest.java
new file mode 100644
index 00000000000..ee334ac7ced
--- /dev/null
+++ b/algo/src/test/java/org/neo4j/gds/similarity/knn/metrics/HammingDistanceTest.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) "Neo4j"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.gds.similarity.knn.metrics;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class HammingDistanceTest {
+ @Test
+ void shouldReturnFullCorrelationWhenArgsAreIdentical() {
+ double dist = HammingDistance.longMetric(12345L, 12345L);
+
+ assertEquals(1.0, dist);
+ }
+
+ @Test
+ void shouldReturnCorrectCorrelation() {
+ double dist = HammingDistance.longMetric(12345L, 54321L);
+
+ assertEquals(0.921875, dist);
+ }
+}
diff --git a/algo/src/test/java/org/neo4j/gds/similarity/knn/metrics/NormalizedAbsoluteDifferenceTest.java b/algo/src/test/java/org/neo4j/gds/similarity/knn/metrics/NormalizedAbsoluteDifferenceTest.java
new file mode 100644
index 00000000000..36480b8772c
--- /dev/null
+++ b/algo/src/test/java/org/neo4j/gds/similarity/knn/metrics/NormalizedAbsoluteDifferenceTest.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) "Neo4j"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.gds.similarity.knn.metrics;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class NormalizedAbsoluteDifferenceTest {
+ @Test
+ void shouldComputeNormalizedAbsoluteDifference() {
+ double diff = NormalizedAbsoluteDifference.longMetric(1L, 2L);
+
+ assertEquals(0.5, diff);
+ }
+}
diff --git a/algo/src/test/java/org/neo4j/gds/similarity/knn/metrics/SimilarityComputerTest.java b/algo/src/test/java/org/neo4j/gds/similarity/knn/metrics/SimilarityComputerTest.java
index 7bec193e48a..3fa1ea8b033 100644
--- a/algo/src/test/java/org/neo4j/gds/similarity/knn/metrics/SimilarityComputerTest.java
+++ b/algo/src/test/java/org/neo4j/gds/similarity/knn/metrics/SimilarityComputerTest.java
@@ -66,16 +66,22 @@ void doublePropertySimilarityReturnsValuesBetween0And1(@ForAll @From("differentV
}
@Property
- void longPropertySimilarityReturns1ForEqualValues(@ForAll @Positive long id) {
+ void longPropertySimilarityReturns1ForEqualValues(
+ @ForAll @Positive long id,
+ @ForAll @From("longMetrics") SimilarityMetric similarityMetric
+ ) {
NodePropertyValues props = new LongTestPropertyValues(nodeId -> nodeId);
- var sim = SimilarityComputer.ofLongProperty(props);
+ var sim = SimilarityComputer.ofLongProperty("", props, similarityMetric);
assertThat(sim.similarity(id, id)).isEqualTo(1.0);
}
@Property
- void longPropertySimilarityReturnsValuesBetween0And1(@ForAll @From("differentValues") LongLongPair ids) {
+ void longPropertySimilarityReturnsValuesBetween0And1(
+ @ForAll @From("differentValues") LongLongPair ids,
+ @ForAll @From("longMetrics") SimilarityMetric similarityMetric
+ ) {
NodePropertyValues props = new LongTestPropertyValues(nodeId -> nodeId);
- var sim = SimilarityComputer.ofLongProperty(props);
+ var sim = SimilarityComputer.ofLongProperty("", props, similarityMetric);
assertThat(sim.similarity(ids.getOne(), ids.getTwo())).isStrictlyBetween(0.0, 1.0);
}
@@ -305,6 +311,11 @@ final Arbitrary differentValues() {
.map(n2 -> PrimitiveTuples.pair((long) n1, (long) n2)));
}
+ @Provide("longMetrics")
+ final Arbitrary longMetrics() {
+ return Arbitraries.of(SimilarityMetric.NORMALIZED_ABSOLUTE_DIFFERENCE, SimilarityMetric.HAMMING_DISTANCE);
+ }
+
@Provide("longArrayMetrics")
final Arbitrary longArrayMetrics() {
return Arbitraries.of(SimilarityMetric.JACCARD, SimilarityMetric.OVERLAP);