Skip to content

Commit 17d7275

Browse files
Zikenkeon
authored andcommitted
Add b-tree (keon#501)
* Add huffman coding (encode and decode file), add tests for huffman coding * Add b-tree algorithm (insertion and deletion), add tests for b-tree
1 parent 47f6013 commit 17d7275

File tree

3 files changed

+294
-0
lines changed

3 files changed

+294
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ If you want to uninstall algorithms, it is as simple as:
342342
- [trie](algorithms/tree/trie)
343343
- [add_and_search](algorithms/tree/trie/add_and_search.py)
344344
- [trie](algorithms/tree/trie/trie.py)
345+
- [b_tree](algorithms/tree/b_tree.py)
345346
- [binary_tree_paths](algorithms/tree/binary_tree_paths.py)
346347
- [bin_tree_to_list](algorithms/tree/bin_tree_to_list.py)
347348
- [deepest_left](algorithms/tree/deepest_left.py)

algorithms/tree/b_tree.py

+235
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
"""
2+
B-tree is used to disk operations. Each node (except root) contains
3+
at least t-1 keys (t children) and at most 2*t - 1 keys (2*t children)
4+
where t is the degree of b-tree. It is not a kind of typical bst tree, because
5+
this tree grows up.
6+
B-tree is balanced which means that the difference between height of left subtree and right subtree is at most 1.
7+
8+
Complexity
9+
n - number of elements
10+
t - degree of tree
11+
Tree always has height at most logt (n+1)/2
12+
Algorithm Average Worst case
13+
Space O(n) O(n)
14+
Search O(log n) O(log n)
15+
Insert O(log n) O(log n)
16+
Delete O(log n) O(log n)
17+
"""
18+
19+
20+
class Node:
21+
def __init__(self):
22+
# self.is_leaf = is_leaf
23+
self.keys = []
24+
self.children = []
25+
26+
def __repr__(self):
27+
return "<id_node: {0}>".format(self.keys)
28+
29+
@property
30+
def is_leaf(self):
31+
return len(self.children) == 0
32+
33+
34+
class BTree:
35+
def __init__(self, t=2):
36+
self.min_numbers_of_keys = t - 1
37+
self.max_number_of_keys = 2 * t - 1
38+
39+
self.root = Node()
40+
41+
def _split_child(self, parent: Node, child_index: int):
42+
new_right_child = Node()
43+
half_max = self.max_number_of_keys // 2
44+
child = parent.children[child_index]
45+
middle_key = child.keys[half_max]
46+
new_right_child.keys = child.keys[half_max + 1:]
47+
child.keys = child.keys[:half_max]
48+
# child is left child of parent after splitting
49+
50+
if not child.is_leaf:
51+
new_right_child.children = child.children[half_max + 1:]
52+
child.children = child.children[:half_max + 1]
53+
54+
parent.keys.insert(child_index, middle_key)
55+
parent.children.insert(child_index + 1, new_right_child)
56+
57+
def insert_key(self, key):
58+
if len(self.root.keys) >= self.max_number_of_keys: # overflow, tree increases in height
59+
new_root = Node()
60+
new_root.children.append(self.root)
61+
self.root = new_root
62+
self._split_child(new_root, 0)
63+
self._insert_to_nonfull_node(self.root, key)
64+
else:
65+
self._insert_to_nonfull_node(self.root, key)
66+
67+
def _insert_to_nonfull_node(self, node: Node, key):
68+
i = len(node.keys) - 1
69+
while i >= 0 and node.keys[i] >= key: # find position where insert key
70+
i -= 1
71+
72+
if node.is_leaf:
73+
node.keys.insert(i + 1, key)
74+
else:
75+
if len(node.children[i + 1].keys) >= self.max_number_of_keys: # overflow
76+
self._split_child(node, i + 1)
77+
if node.keys[i + 1] < key: # decide which child is going to have a new key
78+
i += 1
79+
80+
self._insert_to_nonfull_node(node.children[i + 1], key)
81+
82+
def find(self, key) -> bool:
83+
current_node = self.root
84+
while True:
85+
i = len(current_node.keys) - 1
86+
while i >= 0 and current_node.keys[i] > key:
87+
i -= 1
88+
89+
if i >= 0 and current_node.keys[i] == key:
90+
return True
91+
elif current_node.is_leaf:
92+
return False
93+
else:
94+
current_node = current_node.children[i + 1]
95+
96+
def remove_key(self, key):
97+
self._remove_key(self.root, key)
98+
99+
def _remove_key(self, node: Node, key) -> bool:
100+
try:
101+
key_index = node.keys.index(key)
102+
if node.is_leaf:
103+
node.keys.remove(key)
104+
return True
105+
else:
106+
self._remove_from_nonleaf_node(node, key_index)
107+
108+
return True
109+
110+
except ValueError: # key not found in node
111+
if node.is_leaf:
112+
print("Key not found.")
113+
return False # key not found
114+
else:
115+
i = 0
116+
number_of_keys = len(node.keys)
117+
while i < number_of_keys and key > node.keys[i]: # decide in which subtree may be key
118+
i += 1
119+
120+
action_performed = self._repair_tree(node, i)
121+
if action_performed:
122+
return self._remove_key(node, key)
123+
else:
124+
return self._remove_key(node.children[i], key)
125+
126+
def _repair_tree(self, node: Node, child_index: int) -> bool:
127+
child = node.children[child_index]
128+
if self.min_numbers_of_keys < len(child.keys) <= self.max_number_of_keys: # The leaf/node is correct
129+
return False
130+
131+
if child_index > 0 and len(node.children[child_index - 1].keys) > self.min_numbers_of_keys:
132+
self._rotate_right(node, child_index)
133+
return True
134+
135+
if (child_index < len(node.children) - 1 and
136+
len(node.children[child_index + 1].keys) > self.min_numbers_of_keys): # 0 <-- 1
137+
self._rotate_left(node, child_index)
138+
return True
139+
140+
if child_index > 0:
141+
# merge child with brother on the left
142+
self._merge(node, child_index - 1, child_index)
143+
else:
144+
# merge child with brother on the right
145+
self._merge(node, child_index, child_index + 1)
146+
147+
return True
148+
149+
def _rotate_left(self, parent_node: Node, child_index: int):
150+
"""
151+
Take key from right brother of the child and transfer to the child
152+
"""
153+
new_child_key = parent_node.keys[child_index]
154+
new_parent_key = parent_node.children[child_index + 1].keys.pop(0)
155+
parent_node.children[child_index].keys.append(new_child_key)
156+
parent_node.keys[child_index] = new_parent_key
157+
158+
if not parent_node.children[child_index + 1].is_leaf:
159+
ownerless_child = parent_node.children[child_index + 1].children.pop(0)
160+
# make ownerless_child as a new biggest child (with highest key) -> transfer from right subtree to left subtree
161+
parent_node.children[child_index].children.append(ownerless_child)
162+
163+
def _rotate_right(self, parent_node: Node, child_index: int):
164+
"""
165+
Take key from left brother of the child and transfer to the child
166+
"""
167+
parent_key = parent_node.keys[child_index - 1]
168+
new_parent_key = parent_node.children[child_index - 1].keys.pop()
169+
parent_node.children[child_index].keys.insert(0, parent_key)
170+
parent_node.keys[child_index - 1] = new_parent_key
171+
172+
if not parent_node.children[child_index - 1].is_leaf:
173+
ownerless_child = parent_node.children[child_index - 1].children.pop()
174+
# make ownerless_child as a new lowest child (with lowest key) -> transfer from left subtree to right subtree
175+
parent_node.children[child_index].children.insert(0, ownerless_child)
176+
177+
def _merge(self, parent_node: Node, to_merge_index: int, transfered_child_index: int):
178+
from_merge_node = parent_node.children.pop(transfered_child_index)
179+
parent_key_to_merge = parent_node.keys.pop(to_merge_index)
180+
to_merge_node = parent_node.children[to_merge_index]
181+
to_merge_node.keys.append(parent_key_to_merge)
182+
to_merge_node.keys.extend(from_merge_node.keys)
183+
184+
if not to_merge_node.is_leaf:
185+
to_merge_node.children.extend(from_merge_node.children)
186+
187+
if parent_node == self.root and not parent_node.keys:
188+
self.root = to_merge_node
189+
190+
def _remove_from_nonleaf_node(self, node: Node, key_index: int):
191+
key = node.keys[key_index]
192+
left_subtree = node.children[key_index]
193+
if len(left_subtree.keys) > self.min_numbers_of_keys:
194+
largest_key = self._find_largest_and_delete_in_left_subtree(left_subtree)
195+
elif len(node.children[key_index + 1].keys) > self.min_numbers_of_keys:
196+
largest_key = self._find_largest_and_delete_in_right_subtree(node.children[key_index + 1])
197+
else:
198+
self._merge(node, key_index, key_index + 1)
199+
return self._remove_key(node, key)
200+
201+
node.keys[key_index] = largest_key
202+
203+
def _find_largest_and_delete_in_left_subtree(self, node: Node):
204+
if node.is_leaf:
205+
return node.keys.pop()
206+
else:
207+
ch_index = len(node.children) - 1
208+
self._repair_tree(node, ch_index)
209+
largest_key_in_subtree = self._find_largest_and_delete_in_left_subtree(
210+
node.children[len(node.children) - 1])
211+
# self._repair_tree(node, ch_index)
212+
return largest_key_in_subtree
213+
214+
def _find_largest_and_delete_in_right_subtree(self, node: Node):
215+
if node.is_leaf:
216+
return node.keys.pop(0)
217+
else:
218+
ch_index = 0
219+
self._repair_tree(node, ch_index)
220+
largest_key_in_subtree = self._find_largest_and_delete_in_right_subtree(node.children[0])
221+
# self._repair_tree(node, ch_index)
222+
return largest_key_in_subtree
223+
224+
def traverse_tree(self):
225+
self._traverse_tree(self.root)
226+
print()
227+
228+
def _traverse_tree(self, node: Node):
229+
if node.is_leaf:
230+
print(node.keys, end=" ")
231+
else:
232+
for i, key in enumerate(node.keys):
233+
self._traverse_tree(node.children[i])
234+
print(key, end=" ")
235+
self._traverse_tree(node.children[-1])

tests/test_tree.py

+58
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
inorder,
77
inorder_rec
88
)
9+
from algorithms.tree.b_tree import BTree
910

1011
import unittest
1112

@@ -50,5 +51,62 @@ def create_tree():
5051
return n1
5152

5253

54+
class TestBTree(unittest.TestCase):
55+
56+
@classmethod
57+
def setUpClass(cls):
58+
import random
59+
random.seed(18719)
60+
cls.random = random
61+
cls.range = 10000
62+
63+
def setUp(self):
64+
self.keys_to_insert = [self.random.randrange(-self.range, self.range) for i in range(self.range)]
65+
66+
def test_insertion_and_find_even_degree(self):
67+
btree = BTree(4)
68+
for i in self.keys_to_insert:
69+
btree.insert_key(i)
70+
71+
for i in range(100):
72+
key = self.random.choice(self.keys_to_insert)
73+
self.assertTrue(btree.find(key))
74+
75+
def test_insertion_and_find_odd_degree(self):
76+
btree = BTree(3)
77+
for i in self.keys_to_insert:
78+
btree.insert_key(i)
79+
80+
for i in range(100):
81+
key = self.random.choice(self.keys_to_insert)
82+
self.assertTrue(btree.find(key))
83+
84+
def test_deletion_even_degree(self):
85+
btree = BTree(4)
86+
key_list = set(self.keys_to_insert)
87+
for i in key_list:
88+
btree.insert_key(i)
89+
90+
for key in key_list:
91+
btree.remove_key(key)
92+
self.assertFalse(btree.find(key))
93+
94+
self.assertEqual(btree.root.keys, [])
95+
self.assertEqual(btree.root.children, [])
96+
97+
def test_deletion_odd_degree(self):
98+
btree = BTree(3)
99+
key_list = set(self.keys_to_insert)
100+
for i in key_list:
101+
btree.insert_key(i)
102+
103+
for key in key_list:
104+
btree.remove_key(key)
105+
self.assertFalse(btree.find(key))
106+
107+
self.assertEqual(btree.root.keys, [])
108+
self.assertEqual(btree.root.children, [])
109+
110+
53111
if __name__ == '__main__':
54112
unittest.main()

0 commit comments

Comments
 (0)