Skip to content

Commit 53d4a92

Browse files
authored
Merge pull request #88 from life4/small-fixes
Small fixes
2 parents 5f793e1 + 03be186 commit 53d4a92

File tree

4 files changed

+5
-6
lines changed

4 files changed

+5
-6
lines changed

tests/test_compression/test_entropy_ncd.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_simmetry_compressor(text):
3131
@hypothesis.given(text=hypothesis.strategies.text(min_size=1))
3232
def test_idempotency_compressor(text):
3333
# I've modified idempotency to some kind of distributivity for constant.
34-
# Now it indicates that compressor really compress.
34+
# Now it indicates that compressor actually does compression.
3535
assert ALG._get_size(text * 2) < ALG._get_size(text) * 2
3636

3737

textdistance/algorithms/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def _sum_counters(self, *sequences: Counter[T]) -> Counter[T]:
156156
result += s
157157
return result
158158

159-
def _count_counters(self, counter: Counter) -> float:
159+
def _count_counters(self, counter: Counter) -> int:
160160
"""Return all elements count from Counter
161161
"""
162162
if getattr(self, 'as_set', False):

textdistance/algorithms/compression_based.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,7 @@ def _make_probs(self, *sequences) -> dict[str, tuple[Fraction, Fraction]]:
108108

109109
prob_pairs = {}
110110
cumulative_count = 0
111-
counts = sorted(counts.items(), key=lambda x: (x[1], x[0]), reverse=True)
112-
for char, current_count in counts:
111+
for char, current_count in counts.most_common():
113112
prob_pairs[char] = (
114113
Fraction(cumulative_count, total_letters),
115114
Fraction(current_count, total_letters),
@@ -216,7 +215,7 @@ def _get_size(self, data: Sequence) -> float:
216215
class EntropyNCD(_NCDBase):
217216
"""Entropy based NCD
218217
219-
Get Entropy of input secueance as a size of compressed data.
218+
Get Entropy of input sequence as a size of compressed data.
220219
221220
https://en.wikipedia.org/wiki/Entropy_(information_theory)
222221
https://en.wikipedia.org/wiki/Entropy_encoding

textdistance/algorithms/token_based.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def __call__(self, *sequences: Sequence) -> float:
123123
sequences = [self._count_counters(s) for s in sequences] # ints
124124
ks = list(islice(self.ks, len(sequences)))
125125

126-
if len(sequences) == 2 or self.bias is None:
126+
if len(sequences) != 2 or self.bias is None:
127127
result = intersection
128128
for k, s in zip(ks, sequences):
129129
result += k * (s - intersection)

0 commit comments

Comments
 (0)