Merge pull request #1107 from lmcinnes/bit_hamming

lmcinnes · web-flow · commit 9e299ebb64bd · 2024-03-29T23:28:48.000Z
Bit hamming
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -27,15 +27,6 @@ stages:
       - job: run_platform_tests
         strategy:
           matrix:
-            mac_py38:
-              imageName: 'macOS-latest'
-              python.version: '3.8'
-            linux_py38:
-              imageName: 'ubuntu-latest'
-              python.version: '3.8'
-            windows_py38:
-              imageName: 'windows-latest'
-              python.version: '3.8'
             mac_py39:
               imageName: 'macOS-latest'
               python.version: '3.9'
@@ -63,7 +54,16 @@ stages:
             windows_py311:
               imageName: 'windows-latest'
               python.version: '3.11'
-              
+            mac_py312:
+              imageName: 'macOS-latest'
+              python.version: '3.12'
+            linux_py312:
+              imageName: 'ubuntu-latest'
+              python.version: '3.12'
+            windows_py312:
+              imageName: 'windows-latest'
+              python.version: '3.12'
+
         pool:
           vmImage: $(imageName)
 
diff --git a/setup.py b/setup.py
@@ -16,7 +16,7 @@ def readme():
 
 configuration = {
     "name": "umap-learn",
-    "version": "0.5.5",
+    "version": "0.5.6",
     "description": "Uniform Manifold Approximation and Projection",
     "long_description": readme(),
     "long_description_content_type": "text/x-rst",
@@ -33,10 +33,10 @@ def readme():
         "Operating System :: POSIX",
         "Operating System :: Unix",
         "Operating System :: MacOS",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
     ],
     "keywords": "dimension reduction t-sne manifold",
     "url": "http://github.com/lmcinnes/umap",
diff --git a/umap/umap_.py b/umap/umap_.py
@@ -63,6 +63,7 @@
     "cosine": 2,
     "hellinger": 1,
     "jaccard": 1,
+    "bit_jaccard": 1,
     "dice": 1,
 }
 
@@ -2351,8 +2352,10 @@ def fit(self, X, y=None, force_all_finite=True):
                                    - 'allow-nan': accepts only np.nan and pd.NA values in array.
                                      Values cannot be infinite.
         """
-
-        X = check_array(X, dtype=np.float32, accept_sparse="csr", order="C", force_all_finite=force_all_finite)
+        if self.metric in ("bit_hamming", "bit_jaccard"):
+            X = check_array(X, dtype=np.uint8, order="C", force_all_finite=force_all_finite)
+        else:
+            X = check_array(X, dtype=np.float32, accept_sparse="csr", order="C", force_all_finite=force_all_finite)
         self._raw_data = X
 
         # Handle all the optional arguments, setting default
@@ -2926,7 +2929,10 @@ def transform(self, X, force_all_finite=True):
                 "Transform unavailable when model was fit with only a single data sample."
             )
         # If we just have the original input then short circuit things
-        X = check_array(X, dtype=np.float32, accept_sparse="csr", order="C", force_all_finite=force_all_finite)
+        if self.metric in ("bit_hamming", "bit_jaccard"):
+            X = check_array(X, dtype=np.uint8, order="C", force_all_finite=force_all_finite)
+        else:
+            X = check_array(X, dtype=np.float32, accept_sparse="csr", order="C", force_all_finite=force_all_finite)
         x_hash = joblib.hash(X)
         if x_hash == self._input_hash:
             if self.transform_mode == "embedding":
@@ -3297,7 +3303,10 @@ def _output_dist_only(x, y, *kwds):
         return inv_transformed_points
 
     def update(self, X, force_all_finite=True):
-        X = check_array(X, dtype=np.float32, accept_sparse="csr", order="C", force_all_finite=force_all_finite)
+        if self.metric in ("bit_hamming", "bit_jaccard"):
+            X = check_array(X, dtype=np.uint8, order="C", force_all_finite=force_all_finite)
+        else:
+            X = check_array(X, dtype=np.float32, accept_sparse="csr", order="C", force_all_finite=force_all_finite)
         random_state = check_random_state(self.transform_seed)
         rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)