Skip to content

Commit edfc9ca

Browse files
committed
fix: resolve all ruff lint and format violations
- Fix F821: add module-level numpy imports for type annotations - Fix PLC0415: add noqa for intentional lazy imports inside functions - Fix G004: convert f-string logging to lazy % formatting - Fix NPY002: add noqa for legacy numpy random calls in benchmarks - Fix ARG001/ARG002: prefix unused args with underscore - Fix PTH123: use Path.open() instead of open() - Fix I001: sort imports - Exclude *.ipynb from ruff (demo/benchmark notebooks) Signed-off-by: Maxime Grenu <maxime.grenu@gmail.com>
1 parent d3138f0 commit edfc9ca

20 files changed

+256
-278
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ exclude = [
189189
".git/",
190190
".venv/",
191191
"venv/",
192+
"*.ipynb",
192193
]
193194

194195
[tool.ruff.lint]

python/tests/test_backends.py

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -40,37 +40,37 @@ class TestGPUIndex:
4040

4141
def test_create_index(self):
4242
"""Test creating GPU index."""
43-
from zvec.backends.gpu import create_index
43+
from zvec.backends.gpu import create_index # noqa: PLC0415
4444

4545
index = create_index(dim=128, index_type="flat")
4646
assert index is not None
4747

4848
def test_add_vectors(self):
4949
"""Test adding vectors to index."""
50-
from zvec.backends.gpu import GPUIndex
50+
from zvec.backends.gpu import GPUIndex # noqa: PLC0415
5151

5252
index = GPUIndex(dim=128, index_type="flat")
53-
vectors = np.random.random((100, 128)).astype(np.float32)
53+
vectors = np.random.random((100, 128)).astype(np.float32) # noqa: NPY002
5454
index.add(vectors)
5555
assert index.ntotal == 100
5656

5757
def test_search(self):
5858
"""Test searching index."""
59-
from zvec.backends.gpu import GPUIndex
59+
from zvec.backends.gpu import GPUIndex # noqa: PLC0415
6060

6161
index = GPUIndex(dim=128, index_type="flat")
62-
vectors = np.random.random((100, 128)).astype(np.float32)
62+
vectors = np.random.random((100, 128)).astype(np.float32) # noqa: NPY002
6363
index.add(vectors)
6464

65-
query = np.random.random((5, 128)).astype(np.float32)
65+
query = np.random.random((5, 128)).astype(np.float32) # noqa: NPY002
6666
distances, indices = index.search(query, k=10)
6767

6868
assert distances.shape == (5, 10)
6969
assert indices.shape == (5, 10)
7070

7171
def test_fallback_to_cpu(self):
7272
"""Test CPU fallback."""
73-
from zvec.backends.gpu import GPUIndex
73+
from zvec.backends.gpu import GPUIndex # noqa: PLC0415
7474

7575
index = GPUIndex(dim=128, index_type="flat", use_gpu=False)
7676
assert not index.use_gpu
@@ -81,7 +81,7 @@ class TestQuantization:
8181

8282
def test_pq_encoder_init(self):
8383
"""Test PQ encoder initialization."""
84-
from zvec.backends.quantization import PQEncoder
84+
from zvec.backends.quantization import PQEncoder # noqa: PLC0415
8585

8686
encoder = PQEncoder(m=8, nbits=8, k=256)
8787
assert encoder.m == 8
@@ -90,10 +90,10 @@ def test_pq_encoder_init(self):
9090

9191
def test_pq_train(self):
9292
"""Test PQ training."""
93-
from zvec.backends.quantization import PQEncoder
93+
from zvec.backends.quantization import PQEncoder # noqa: PLC0415
9494

95-
np.random.seed(42)
96-
vectors = np.random.random((1000, 128)).astype(np.float32)
95+
np.random.seed(42) # noqa: NPY002
96+
vectors = np.random.random((1000, 128)).astype(np.float32) # noqa: NPY002
9797

9898
encoder = PQEncoder(m=8, nbits=8, k=256)
9999
encoder.train(vectors)
@@ -102,10 +102,10 @@ def test_pq_train(self):
102102

103103
def test_pq_encode_decode(self):
104104
"""Test PQ encode/decode."""
105-
from zvec.backends.quantization import PQEncoder
105+
from zvec.backends.quantization import PQEncoder # noqa: PLC0415
106106

107-
np.random.seed(42)
108-
vectors = np.random.random((100, 128)).astype(np.float32)
107+
np.random.seed(42) # noqa: NPY002
108+
vectors = np.random.random((100, 128)).astype(np.float32) # noqa: NPY002
109109

110110
encoder = PQEncoder(m=8, nbits=8, k=256)
111111
encoder.train(vectors)
@@ -118,15 +118,15 @@ def test_pq_encode_decode(self):
118118

119119
def test_pq_index(self):
120120
"""Test PQ index."""
121-
from zvec.backends.quantization import PQIndex
121+
from zvec.backends.quantization import PQIndex # noqa: PLC0415
122122

123-
np.random.seed(42)
124-
vectors = np.random.random((100, 128)).astype(np.float32)
123+
np.random.seed(42) # noqa: NPY002
124+
vectors = np.random.random((100, 128)).astype(np.float32) # noqa: NPY002
125125

126126
index = PQIndex(m=8, nbits=8, k=256)
127127
index.add(vectors)
128128

129-
query = np.random.random((5, 128)).astype(np.float32)
129+
query = np.random.random((5, 128)).astype(np.float32) # noqa: NPY002
130130
distances, indices = index.search(query, k=10)
131131

132132
assert distances.shape == (5, 10)
@@ -138,17 +138,17 @@ class TestOPQ:
138138

139139
def test_opq_encoder_init(self):
140140
"""Test OPQ encoder initialization."""
141-
from zvec.backends.opq import OPQEncoder
141+
from zvec.backends.opq import OPQEncoder # noqa: PLC0415
142142

143143
encoder = OPQEncoder(m=8, nbits=8, k=256)
144144
assert encoder.m == 8
145145

146146
def test_scalar_quantizer(self):
147147
"""Test scalar quantizer."""
148-
from zvec.backends.opq import ScalarQuantizer
148+
from zvec.backends.opq import ScalarQuantizer # noqa: PLC0415
149149

150-
np.random.seed(42)
151-
vectors = np.random.random((100, 128)).astype(np.float32)
150+
np.random.seed(42) # noqa: NPY002
151+
vectors = np.random.random((100, 128)).astype(np.float32) # noqa: NPY002
152152

153153
quantizer = ScalarQuantizer(bits=8)
154154
quantizer.train(vectors)
@@ -165,12 +165,12 @@ class TestSearchOptimization:
165165

166166
def test_adc(self):
167167
"""Test asymmetric distance computation."""
168-
from zvec.backends.search import asymmetric_distance_computation
168+
from zvec.backends.search import asymmetric_distance_computation # noqa: PLC0415
169169

170-
np.random.seed(42)
171-
queries = np.random.random((10, 128)).astype(np.float32)
172-
codes = np.random.randint(0, 256, (100, 8), dtype=np.uint8)
173-
distance_table = np.random.random((10, 8, 256)).astype(np.float32)
170+
np.random.seed(42) # noqa: NPY002
171+
queries = np.random.random((10, 128)).astype(np.float32) # noqa: NPY002
172+
codes = np.random.randint(0, 256, (100, 8), dtype=np.uint8) # noqa: NPY002
173+
distance_table = np.random.random((10, 8, 256)).astype(np.float32) # noqa: NPY002
174174

175175
distances = asymmetric_distance_computation(queries, codes, distance_table)
176176
assert distances.shape == (10, 100)
@@ -181,17 +181,17 @@ class TestHNSW:
181181

182182
def test_hnsw_creation(self):
183183
"""Test HNSW index creation."""
184-
from zvec.backends.hnsw import HNSWIndex
184+
from zvec.backends.hnsw import HNSWIndex # noqa: PLC0415
185185

186186
index = HNSWIndex(dim=128, M=16)
187187
assert index.dim == 128
188188

189189
def test_hnsw_add(self):
190190
"""Test adding vectors to HNSW."""
191-
from zvec.backends.hnsw import HNSWIndex
191+
from zvec.backends.hnsw import HNSWIndex # noqa: PLC0415
192192

193193
index = HNSWIndex(dim=128, M=8)
194-
vectors = np.random.random((50, 128)).astype(np.float32)
194+
vectors = np.random.random((50, 128)).astype(np.float32) # noqa: NPY002
195195
index.add(vectors)
196196

197197
# Basic check - just verify no error
@@ -203,27 +203,27 @@ class TestAppleSilicon:
203203

204204
def test_apple_silicon_detection(self):
205205
"""Test Apple Silicon detection."""
206-
from zvec.backends import apple_silicon
206+
from zvec.backends import apple_silicon # noqa: PLC0415
207207

208208
# Just verify functions exist and are callable
209209
assert callable(apple_silicon.is_apple_silicon)
210210
assert callable(apple_silicon.is_mps_available)
211211

212212
def test_apple_backend_init(self):
213213
"""Test Apple Silicon backend initialization."""
214-
from zvec.backends.apple_silicon import AppleSiliconBackend
214+
from zvec.backends.apple_silicon import AppleSiliconBackend # noqa: PLC0415
215215

216216
backend = AppleSiliconBackend(backend="numpy")
217217
assert backend.backend == "numpy"
218218

219219
def test_l2_distance(self):
220220
"""Test L2 distance computation."""
221-
from zvec.backends.apple_silicon import AppleSiliconBackend
221+
from zvec.backends.apple_silicon import AppleSiliconBackend # noqa: PLC0415
222222

223223
backend = AppleSiliconBackend(backend="numpy")
224224

225-
a = np.random.random((10, 128)).astype(np.float32)
226-
b = np.random.random((20, 128)).astype(np.float32)
225+
a = np.random.random((10, 128)).astype(np.float32) # noqa: NPY002
226+
b = np.random.random((20, 128)).astype(np.float32) # noqa: NPY002
227227

228228
distances = backend.l2_distance(a, b)
229229
assert distances.shape == (10, 20)
@@ -234,7 +234,7 @@ class TestDistributed:
234234

235235
def test_shard_manager(self):
236236
"""Test shard manager."""
237-
from zvec.backends.distributed import ShardManager
237+
from zvec.backends.distributed import ShardManager # noqa: PLC0415
238238

239239
manager = ShardManager(n_shards=4, strategy="hash")
240240
assert manager.n_shards == 4
@@ -244,18 +244,18 @@ def test_shard_manager(self):
244244

245245
def test_distributed_index(self):
246246
"""Test distributed index."""
247-
from zvec.backends.distributed import DistributedIndex
247+
from zvec.backends.distributed import DistributedIndex # noqa: PLC0415
248248

249249
index = DistributedIndex(n_shards=4)
250-
vectors = np.random.random((100, 128)).astype(np.float32)
250+
vectors = np.random.random((100, 128)).astype(np.float32) # noqa: NPY002
251251
vector_ids = [f"v_{i}" for i in range(100)]
252252

253253
index.add(vectors, vector_ids)
254254
assert 4 in index._local_indexes
255255

256256
def test_result_merger(self):
257257
"""Test result merging."""
258-
from zvec.backends.distributed import ResultMerger
258+
from zvec.backends.distributed import ResultMerger # noqa: PLC0415
259259

260260
results = [
261261
(np.array([1.0, 2.0]), np.array([0, 1])),

python/zvec/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
# Public API — grouped by category
2626
# ==============================
2727

28-
from . import model as model
2928
from . import backends as backends
29+
from . import model as model
3030

3131
# —— Extensions ——
3232
from .extension import (

python/zvec/backends/apple_ane.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
# 3. Powers of 2 for batch/dim (≤16k)
2424
# 4. Fused ops (no separate layernorm)
2525
# 5. CNNs preferred over Transformers
26+
from __future__ import annotations
27+
28+
import numpy as np
2629

2730
ANE_OPTIMIZATION_TIPS = """
2831
# ANE Optimization Guide
@@ -53,19 +56,18 @@
5356
"""
5457

5558

56-
def estimate_ane_speedup(dim: int, batch_size: int = 1) -> float:
59+
def estimate_ane_speedup(dim: int, _batch_size: int = 1) -> float:
5760
"""Estimate ANE speedup based on paper.
58-
61+
5962
From Ben Brown 2023:
6063
- ANE 3x faster for small embeddings (dim ≤ 256)
6164
- Lags for large batch operations
6265
"""
6366
if dim <= 256:
6467
return 3.0
65-
elif dim <= 1024:
68+
if dim <= 1024:
6669
return 2.0
67-
else:
68-
return 1.0
70+
return 1.0
6971

7072

7173
def get_optimal_ane_config(dim: int) -> dict:
@@ -74,7 +76,7 @@ def get_optimal_ane_config(dim: int) -> dict:
7476
optimal_dim = 1
7577
while optimal_dim < dim:
7678
optimal_dim *= 2
77-
79+
7880
return {
7981
"original_dim": dim,
8082
"optimal_dim": optimal_dim,
@@ -85,58 +87,56 @@ def get_optimal_ane_config(dim: int) -> dict:
8587

8688
class ANEVectorEncoder:
8789
"""Vector encoder optimized for Apple Neural Engine."""
88-
90+
8991
def __init__(self, dim: int, batch_size: int = 1):
9092
"""Initialize ANE encoder.
91-
93+
9294
Args:
9395
dim: Embedding dimension.
9496
batch_size: Batch size for encoding.
9597
"""
9698
self.dim = dim
9799
self.batch_size = batch_size
98100
self.config = get_optimal_ane_config(dim)
99-
101+
100102
# Check ANE availability
101103
self.ane_available = self._check_ane()
102-
104+
103105
def _check_ane(self) -> bool:
104106
"""Check if ANE is available."""
105107
try:
106-
import torch
108+
import torch # noqa: PLC0415
109+
107110
return torch.backends.mps.is_available()
108111
except ImportError:
109112
return False
110-
111-
def encode(self, texts: list[str]) -> "np.ndarray":
113+
114+
def encode(self, texts: list[str]) -> np.ndarray:
112115
"""Encode texts to embeddings using ANE.
113-
116+
114117
This is a placeholder - actual implementation would use:
115118
1. BERT/DistilBERT model
116119
2. Core ML conversion
117120
3. ANE inference
118121
"""
119-
import numpy as np
120-
122+
import numpy as np # noqa: PLC0415
123+
121124
# Placeholder: random embeddings
122-
embeddings = np.random.randn(len(texts), self.dim).astype(np.float16)
123-
124-
return embeddings
125-
125+
return np.random.randn(len(texts), self.dim).astype(np.float16) # noqa: NPY002
126+
126127
def optimize_for_ane(self, model_path: str) -> str:
127128
"""Convert PyTorch model to Core ML for ANE.
128-
129+
129130
Args:
130131
model_path: Path to PyTorch model.
131-
132+
132133
Returns:
133134
Path to Core ML model.
134135
"""
135136
# This would use coremltools
136137
# import coremltools as ct
137138
# model = ct.convert(model_path)
138139
# model.save("embedding_model.mlpackage")
139-
pass
140140

141141

142142
# Reference from Apple ML Research:

0 commit comments

Comments
 (0)