Skip to content

Commit 563681b

Browse files
author
cureprotocols
committed
Refactor: add type hints, doctests, and naming fixes for bot compliance
1 parent be82ae4 commit 563681b

File tree

2 files changed

+59
-28
lines changed

2 files changed

+59
-28
lines changed

data_structures/disjoint_set/union_find.py

+41-15
Original file line numberDiff line numberDiff line change
@@ -13,38 +13,64 @@
1313

1414

1515
class UnionFind:
16-
def __init__(self, size):
16+
def __init__(self, size: int) -> None:
17+
"""
18+
Initializes a Union-Find data structure with `size` elements.
19+
20+
>>> uf = UnionFind(5)
21+
>>> uf.find(0)
22+
0
23+
"""
1724
self.parent = list(range(size))
1825
self.rank = [0] * size
1926

20-
def find(self, node):
27+
def find(self, node: int) -> int:
28+
"""
29+
Finds the representative/root of the set that `node` belongs to.
30+
31+
>>> uf = UnionFind(5)
32+
>>> uf.find(3)
33+
3
34+
"""
2135
if self.parent[node] != node:
2236
self.parent[node] = self.find(self.parent[node]) # Path compression
2337
return self.parent[node]
2438

25-
def union(self, x, y):
26-
rootX = self.find(x)
27-
rootY = self.find(y)
28-
29-
if rootX == rootY:
39+
def union(self, a: int, b: int) -> bool:
40+
"""
41+
Unites the sets that contain elements `a` and `b`.
42+
43+
>>> uf = UnionFind(5)
44+
>>> uf.union(0, 1)
45+
True
46+
>>> uf.find(1) == uf.find(0)
47+
True
48+
>>> uf.union(0, 1)
49+
False
50+
"""
51+
root_a = self.find(a)
52+
root_b = self.find(b)
53+
54+
if root_a == root_b:
3055
return False # Already connected
3156

3257
# Union by rank
33-
if self.rank[rootX] < self.rank[rootY]:
34-
self.parent[rootX] = rootY
35-
elif self.rank[rootX] > self.rank[rootY]:
36-
self.parent[rootY] = rootX
58+
if self.rank[root_a] < self.rank[root_b]:
59+
self.parent[root_a] = root_b
60+
elif self.rank[root_a] > self.rank[root_b]:
61+
self.parent[root_b] = root_a
3762
else:
38-
self.parent[rootY] = rootX
39-
self.rank[rootX] += 1
63+
self.parent[root_b] = root_a
64+
self.rank[root_a] += 1
4065

4166
return True
4267

4368

44-
# Example usage
4569
if __name__ == "__main__":
46-
uf = UnionFind(10)
70+
import doctest
71+
doctest.testmod()
4772

73+
uf = UnionFind(10)
4874
uf.union(1, 2)
4975
uf.union(2, 3)
5076
uf.union(4, 5)

searches/reservoir_sampling.py

+18-13
Original file line numberDiff line numberDiff line change
@@ -2,48 +2,53 @@
22
Reservoir Sampling Algorithm
33
44
Use Case:
5-
Efficient for selecting k random items from a data stream of unknown size,
5+
Efficient for selecting `sample_size` random items from a data stream of unknown size,
66
or when the entire dataset cannot fit into memory.
77
88
Time Complexity:
99
- O(n), where n is the total number of items
10-
- Space Complexity: O(k)
10+
- Space Complexity: O(sample_size)
1111
1212
Author: Michael Alexander Montoya
1313
"""
1414

1515
import random
16+
from typing import Iterable
1617

1718

18-
def reservoir_sampling(stream, k):
19+
def reservoir_sampling(stream: Iterable[int], sample_size: int) -> list[int]:
1920
"""
2021
Performs reservoir sampling on a stream of items.
2122
2223
Args:
2324
stream: An iterable data stream.
24-
k: Number of items to sample.
25+
sample_size: Number of items to sample.
2526
2627
Returns:
27-
A list containing k randomly sampled items from the stream.
28-
"""
28+
A list containing `sample_size` randomly sampled items from the stream.
2929
30+
>>> result = reservoir_sampling(range(1, 1001), 10)
31+
>>> len(result) == 10
32+
True
33+
"""
3034
reservoir = []
3135

3236
for i, item in enumerate(stream):
33-
if i < k:
37+
if i < sample_size:
3438
reservoir.append(item)
3539
else:
3640
j = random.randint(0, i)
37-
if j < k:
41+
if j < sample_size:
3842
reservoir[j] = item
3943

4044
return reservoir
4145

4246

43-
# Example usage
4447
if __name__ == "__main__":
45-
stream_data = range(1, 1001) # Simulate a stream of numbers from 1 to 1000
46-
sample_size = 10
48+
import doctest
49+
50+
doctest.testmod()
4751

48-
sample = reservoir_sampling(stream_data, sample_size)
49-
print(f"Random sample of {sample_size} items from stream: {sample}")
52+
stream_data = range(1, 1001)
53+
sample = reservoir_sampling(stream_data, 10)
54+
print(f"Sampled items: {sample}")

0 commit comments

Comments
 (0)