|
| 1 | +""" |
| 2 | +B-tree is used to disk operations. Each node (except root) contains |
| 3 | +at least t-1 keys (t children) and at most 2*t - 1 keys (2*t children) |
| 4 | +where t is the degree of b-tree. It is not a kind of typical bst tree, because |
| 5 | +this tree grows up. |
| 6 | +B-tree is balanced which means that the difference between height of left subtree and right subtree is at most 1. |
| 7 | +
|
| 8 | +Complexity |
| 9 | + n - number of elements |
| 10 | + t - degree of tree |
| 11 | + Tree always has height at most logt (n+1)/2 |
| 12 | + Algorithm Average Worst case |
| 13 | + Space O(n) O(n) |
| 14 | + Search O(log n) O(log n) |
| 15 | + Insert O(log n) O(log n) |
| 16 | + Delete O(log n) O(log n) |
| 17 | +""" |
| 18 | + |
| 19 | + |
| 20 | +class Node: |
| 21 | + def __init__(self): |
| 22 | + # self.is_leaf = is_leaf |
| 23 | + self.keys = [] |
| 24 | + self.children = [] |
| 25 | + |
| 26 | + def __repr__(self): |
| 27 | + return "<id_node: {0}>".format(self.keys) |
| 28 | + |
| 29 | + @property |
| 30 | + def is_leaf(self): |
| 31 | + return len(self.children) == 0 |
| 32 | + |
| 33 | + |
| 34 | +class BTree: |
| 35 | + def __init__(self, t=2): |
| 36 | + self.min_numbers_of_keys = t - 1 |
| 37 | + self.max_number_of_keys = 2 * t - 1 |
| 38 | + |
| 39 | + self.root = Node() |
| 40 | + |
| 41 | + def _split_child(self, parent: Node, child_index: int): |
| 42 | + new_right_child = Node() |
| 43 | + half_max = self.max_number_of_keys // 2 |
| 44 | + child = parent.children[child_index] |
| 45 | + middle_key = child.keys[half_max] |
| 46 | + new_right_child.keys = child.keys[half_max + 1:] |
| 47 | + child.keys = child.keys[:half_max] |
| 48 | + # child is left child of parent after splitting |
| 49 | + |
| 50 | + if not child.is_leaf: |
| 51 | + new_right_child.children = child.children[half_max + 1:] |
| 52 | + child.children = child.children[:half_max + 1] |
| 53 | + |
| 54 | + parent.keys.insert(child_index, middle_key) |
| 55 | + parent.children.insert(child_index + 1, new_right_child) |
| 56 | + |
| 57 | + def insert_key(self, key): |
| 58 | + if len(self.root.keys) >= self.max_number_of_keys: # overflow, tree increases in height |
| 59 | + new_root = Node() |
| 60 | + new_root.children.append(self.root) |
| 61 | + self.root = new_root |
| 62 | + self._split_child(new_root, 0) |
| 63 | + self._insert_to_nonfull_node(self.root, key) |
| 64 | + else: |
| 65 | + self._insert_to_nonfull_node(self.root, key) |
| 66 | + |
| 67 | + def _insert_to_nonfull_node(self, node: Node, key): |
| 68 | + i = len(node.keys) - 1 |
| 69 | + while i >= 0 and node.keys[i] >= key: # find position where insert key |
| 70 | + i -= 1 |
| 71 | + |
| 72 | + if node.is_leaf: |
| 73 | + node.keys.insert(i + 1, key) |
| 74 | + else: |
| 75 | + if len(node.children[i + 1].keys) >= self.max_number_of_keys: # overflow |
| 76 | + self._split_child(node, i + 1) |
| 77 | + if node.keys[i + 1] < key: # decide which child is going to have a new key |
| 78 | + i += 1 |
| 79 | + |
| 80 | + self._insert_to_nonfull_node(node.children[i + 1], key) |
| 81 | + |
| 82 | + def find(self, key) -> bool: |
| 83 | + current_node = self.root |
| 84 | + while True: |
| 85 | + i = len(current_node.keys) - 1 |
| 86 | + while i >= 0 and current_node.keys[i] > key: |
| 87 | + i -= 1 |
| 88 | + |
| 89 | + if i >= 0 and current_node.keys[i] == key: |
| 90 | + return True |
| 91 | + elif current_node.is_leaf: |
| 92 | + return False |
| 93 | + else: |
| 94 | + current_node = current_node.children[i + 1] |
| 95 | + |
| 96 | + def remove_key(self, key): |
| 97 | + self._remove_key(self.root, key) |
| 98 | + |
| 99 | + def _remove_key(self, node: Node, key) -> bool: |
| 100 | + try: |
| 101 | + key_index = node.keys.index(key) |
| 102 | + if node.is_leaf: |
| 103 | + node.keys.remove(key) |
| 104 | + return True |
| 105 | + else: |
| 106 | + self._remove_from_nonleaf_node(node, key_index) |
| 107 | + |
| 108 | + return True |
| 109 | + |
| 110 | + except ValueError: # key not found in node |
| 111 | + if node.is_leaf: |
| 112 | + print("Key not found.") |
| 113 | + return False # key not found |
| 114 | + else: |
| 115 | + i = 0 |
| 116 | + number_of_keys = len(node.keys) |
| 117 | + while i < number_of_keys and key > node.keys[i]: # decide in which subtree may be key |
| 118 | + i += 1 |
| 119 | + |
| 120 | + action_performed = self._repair_tree(node, i) |
| 121 | + if action_performed: |
| 122 | + return self._remove_key(node, key) |
| 123 | + else: |
| 124 | + return self._remove_key(node.children[i], key) |
| 125 | + |
| 126 | + def _repair_tree(self, node: Node, child_index: int) -> bool: |
| 127 | + child = node.children[child_index] |
| 128 | + if self.min_numbers_of_keys < len(child.keys) <= self.max_number_of_keys: # The leaf/node is correct |
| 129 | + return False |
| 130 | + |
| 131 | + if child_index > 0 and len(node.children[child_index - 1].keys) > self.min_numbers_of_keys: |
| 132 | + self._rotate_right(node, child_index) |
| 133 | + return True |
| 134 | + |
| 135 | + if (child_index < len(node.children) - 1 and |
| 136 | + len(node.children[child_index + 1].keys) > self.min_numbers_of_keys): # 0 <-- 1 |
| 137 | + self._rotate_left(node, child_index) |
| 138 | + return True |
| 139 | + |
| 140 | + if child_index > 0: |
| 141 | + # merge child with brother on the left |
| 142 | + self._merge(node, child_index - 1, child_index) |
| 143 | + else: |
| 144 | + # merge child with brother on the right |
| 145 | + self._merge(node, child_index, child_index + 1) |
| 146 | + |
| 147 | + return True |
| 148 | + |
| 149 | + def _rotate_left(self, parent_node: Node, child_index: int): |
| 150 | + """ |
| 151 | + Take key from right brother of the child and transfer to the child |
| 152 | + """ |
| 153 | + new_child_key = parent_node.keys[child_index] |
| 154 | + new_parent_key = parent_node.children[child_index + 1].keys.pop(0) |
| 155 | + parent_node.children[child_index].keys.append(new_child_key) |
| 156 | + parent_node.keys[child_index] = new_parent_key |
| 157 | + |
| 158 | + if not parent_node.children[child_index + 1].is_leaf: |
| 159 | + ownerless_child = parent_node.children[child_index + 1].children.pop(0) |
| 160 | + # make ownerless_child as a new biggest child (with highest key) -> transfer from right subtree to left subtree |
| 161 | + parent_node.children[child_index].children.append(ownerless_child) |
| 162 | + |
| 163 | + def _rotate_right(self, parent_node: Node, child_index: int): |
| 164 | + """ |
| 165 | + Take key from left brother of the child and transfer to the child |
| 166 | + """ |
| 167 | + parent_key = parent_node.keys[child_index - 1] |
| 168 | + new_parent_key = parent_node.children[child_index - 1].keys.pop() |
| 169 | + parent_node.children[child_index].keys.insert(0, parent_key) |
| 170 | + parent_node.keys[child_index - 1] = new_parent_key |
| 171 | + |
| 172 | + if not parent_node.children[child_index - 1].is_leaf: |
| 173 | + ownerless_child = parent_node.children[child_index - 1].children.pop() |
| 174 | + # make ownerless_child as a new lowest child (with lowest key) -> transfer from left subtree to right subtree |
| 175 | + parent_node.children[child_index].children.insert(0, ownerless_child) |
| 176 | + |
| 177 | + def _merge(self, parent_node: Node, to_merge_index: int, transfered_child_index: int): |
| 178 | + from_merge_node = parent_node.children.pop(transfered_child_index) |
| 179 | + parent_key_to_merge = parent_node.keys.pop(to_merge_index) |
| 180 | + to_merge_node = parent_node.children[to_merge_index] |
| 181 | + to_merge_node.keys.append(parent_key_to_merge) |
| 182 | + to_merge_node.keys.extend(from_merge_node.keys) |
| 183 | + |
| 184 | + if not to_merge_node.is_leaf: |
| 185 | + to_merge_node.children.extend(from_merge_node.children) |
| 186 | + |
| 187 | + if parent_node == self.root and not parent_node.keys: |
| 188 | + self.root = to_merge_node |
| 189 | + |
| 190 | + def _remove_from_nonleaf_node(self, node: Node, key_index: int): |
| 191 | + key = node.keys[key_index] |
| 192 | + left_subtree = node.children[key_index] |
| 193 | + if len(left_subtree.keys) > self.min_numbers_of_keys: |
| 194 | + largest_key = self._find_largest_and_delete_in_left_subtree(left_subtree) |
| 195 | + elif len(node.children[key_index + 1].keys) > self.min_numbers_of_keys: |
| 196 | + largest_key = self._find_largest_and_delete_in_right_subtree(node.children[key_index + 1]) |
| 197 | + else: |
| 198 | + self._merge(node, key_index, key_index + 1) |
| 199 | + return self._remove_key(node, key) |
| 200 | + |
| 201 | + node.keys[key_index] = largest_key |
| 202 | + |
| 203 | + def _find_largest_and_delete_in_left_subtree(self, node: Node): |
| 204 | + if node.is_leaf: |
| 205 | + return node.keys.pop() |
| 206 | + else: |
| 207 | + ch_index = len(node.children) - 1 |
| 208 | + self._repair_tree(node, ch_index) |
| 209 | + largest_key_in_subtree = self._find_largest_and_delete_in_left_subtree( |
| 210 | + node.children[len(node.children) - 1]) |
| 211 | + # self._repair_tree(node, ch_index) |
| 212 | + return largest_key_in_subtree |
| 213 | + |
| 214 | + def _find_largest_and_delete_in_right_subtree(self, node: Node): |
| 215 | + if node.is_leaf: |
| 216 | + return node.keys.pop(0) |
| 217 | + else: |
| 218 | + ch_index = 0 |
| 219 | + self._repair_tree(node, ch_index) |
| 220 | + largest_key_in_subtree = self._find_largest_and_delete_in_right_subtree(node.children[0]) |
| 221 | + # self._repair_tree(node, ch_index) |
| 222 | + return largest_key_in_subtree |
| 223 | + |
| 224 | + def traverse_tree(self): |
| 225 | + self._traverse_tree(self.root) |
| 226 | + print() |
| 227 | + |
| 228 | + def _traverse_tree(self, node: Node): |
| 229 | + if node.is_leaf: |
| 230 | + print(node.keys, end=" ") |
| 231 | + else: |
| 232 | + for i, key in enumerate(node.keys): |
| 233 | + self._traverse_tree(node.children[i]) |
| 234 | + print(key, end=" ") |
| 235 | + self._traverse_tree(node.children[-1]) |
0 commit comments