Skip to content

Commit aa7a971

Browse files
authored
fix #50 partition by single expression raise TypeError (#52)
* fix #50 partition by single expression raise TypeError * fix testpypi release version
1 parent 1ebe7ed commit aa7a971

File tree

15 files changed

+166
-45
lines changed

15 files changed

+166
-45
lines changed

.github/workflows/base-test.yml

+5
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ jobs:
4545
flag-name: run-${{ join(matrix.*, '-') }}
4646
parallel: true
4747

48+
release:
49+
50+
needs: test
51+
uses: ./.github/workflows/release.yml
52+
4853
coveralls-finish:
4954

5055
needs: test

.github/workflows/release.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
22

3-
on: push
3+
on:
4+
workflow_call:
45

56
jobs:
67
build:

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
.coverage
22
.idea
3+
.vscode
34
.tox
45
venv
56
**/__pycache__

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
### 1.1.3
2+
- Fix #50 partition by single expression raise TypeError.
3+
14
### 1.1.2
25
- Use [flake8](https://flake8.pycqa.org/) to lint code.
36
- Add GitHub action which runs tests.

README.md

+8-9
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ Notices about model definition:
168168
- need to specify the engine for clickhouse, specify the order_by for clickhouse order and the partition_by argument
169169

170170
```python
171-
from django.db.models import CheckConstraint, Func, IntegerChoices, Q
171+
from django.db.models import CheckConstraint, IntegerChoices, Q
172172
from django.utils import timezone
173173

174174
from clickhouse_backend import models
@@ -180,22 +180,21 @@ class Event(models.ClickhouseModel):
180180
DROP = 2
181181
ALERT = 3
182182
ip = models.GenericIPAddressField(default="::")
183-
ipv4 = models.GenericIPAddressField(default="127.0.0.1")
183+
ipv4 = models.IPv4Field(default="127.0.0.1")
184184
ip_nullable = models.GenericIPAddressField(null=True)
185185
port = models.UInt16Field(default=0)
186186
protocol = models.StringField(default="", low_cardinality=True)
187-
content = models.StringField(default="")
187+
content = models.JSONField(default=dict)
188188
timestamp = models.DateTime64Field(default=timezone.now)
189189
created_at = models.DateTime64Field(auto_now_add=True)
190190
action = models.EnumField(choices=Action.choices, default=Action.PASS)
191191

192192
class Meta:
193-
verbose_name = "Network event"
194-
ordering = ["-id"]
195-
db_table = "event"
196-
engine = models.ReplacingMergeTree(
197-
order_by=["id"],
198-
partition_by=Func("timestamp", function="toYYYYMMDD"),
193+
ordering = ["-timestamp"]
194+
engine = models.MergeTree(
195+
primary_key="timestamp",
196+
order_by=("timestamp", "id"),
197+
partition_by=models.toYYYYMMDD("timestamp"),
199198
index_granularity=1024,
200199
index_granularity_bytes=1 << 20,
201200
enable_mixed_granularity_parts=1,

clickhouse_backend/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1-
VERSION = (1, 1, 2)
1+
from clickhouse_backend.utils.version import get_version
22

3-
__version__ = ".".join(map(str, VERSION))
3+
VERSION = (1, 1, 3, "alpha", 0)
4+
5+
__version__ = get_version(VERSION)

clickhouse_backend/backend/operations.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from clickhouse_backend import compat
66
from clickhouse_backend.driver import JSON
77
from clickhouse_backend.driver.client import insert_pattern
8-
from clickhouse_backend.utils import get_timezone
8+
from clickhouse_backend.utils.timezone import get_timezone
99

1010

1111
class DatabaseOperations(BaseDatabaseOperations):

clickhouse_backend/models/engines.py

+18-20
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from itertools import zip_longest
2-
31
from django.db.models import Func, Value
42
from django.utils.itercompat import is_iterable
53

@@ -133,31 +131,31 @@ def __init__(
133131
):
134132
assert (
135133
order_by is not None or primary_key is not None
136-
), "order_by or primary_key is missing."
134+
), "At least one of order_by or primary_key must be provided"
137135
self.order_by = order_by
138136
self.primary_key = primary_key
139137
self.partition_by = partition_by
140138

141-
if order_by is not None:
142-
if isinstance(order_by, str) or not is_iterable(order_by):
143-
self.order_by = [order_by]
144-
if any(o is None for o in self.order_by):
145-
raise ValueError("None is not allowed in order_by")
146-
147-
if primary_key is not None:
148-
if isinstance(primary_key, str) or not is_iterable(primary_key):
149-
self.primary_key = [primary_key]
150-
if any(o is None for o in self.primary_key):
151-
raise ValueError("None is not allowed in primary_key")
139+
for key in ["order_by", "primary_key", "partition_by"]:
140+
value = getattr(self, key)
141+
if value is not None:
142+
if isinstance(value, str) or not is_iterable(value):
143+
value = (value,)
144+
setattr(self, key, value)
145+
elif not isinstance(value, tuple):
146+
value = tuple(value)
147+
setattr(self, key, value)
148+
if any(i is None for i in value):
149+
raise ValueError(f"None is not allowed in {key}")
152150

153151
# https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#choosing-a-primary-key-that-differs-from-the-sorting-key
154152
# primary key expression tuple must be a prefix of the sorting key expression tuple.
155-
if self.order_by is not None and self.primary_key is not None:
156-
for o, p in zip_longest(self.order_by, self.primary_key):
157-
if p is None:
158-
break
159-
if p != o:
160-
raise ValueError("primary_key must be a prefix of order_by")
153+
if (
154+
self.order_by is not None
155+
and self.primary_key is not None
156+
and self.order_by[: len(self.primary_key)] != self.primary_key
157+
):
158+
raise ValueError("primary_key must be a prefix of order_by")
161159

162160
super().__init__(*expressions, **settings)
163161

clickhouse_backend/utils/__init__.py

Whitespace-only changes.

clickhouse_backend/utils.py renamed to clickhouse_backend/utils/timezone.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ def get_timezone():
77
"""Guess what timezone the user is in.
88
99
Just remember, no matter USE_TZ or not,
10-
clickhouse always store UNIX timestamp (which is in UTC timezone).
10+
ClickHouse always store UNIX timestamp (which is in UTC timezone).
1111
1212
Results of lookups turning datetime to date are relevant to timezone.
13-
We should always pass timezone parameter to clickhouse functions like
13+
We should always pass timezone parameter to ClickHouse functions like
1414
toDate, toStartOfMonth.
1515
"""
1616
if settings.USE_TZ:

clickhouse_backend/utils/version.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import datetime
2+
import functools
3+
import os
4+
import subprocess
5+
6+
7+
# Borrowed from django.utils.version
8+
def get_version(version):
9+
"""Return a PEP 440-compliant version number from VERSION."""
10+
11+
# Now build the two parts of the version number:
12+
# main = X.Y[.Z]
13+
# sub = .devN - for pre-alpha releases
14+
# | {a|b|rc}N - for alpha, beta, and rc releases
15+
16+
main = get_main_version(version)
17+
18+
sub = ""
19+
if version[3] == "alpha" and version[4] == 0:
20+
git_changeset = get_git_changeset()
21+
if git_changeset:
22+
sub = ".dev%s" % git_changeset
23+
24+
elif version[3] != "final":
25+
mapping = {"alpha": "a", "beta": "b", "rc": "rc"}
26+
sub = mapping[version[3]] + str(version[4])
27+
28+
return main + sub
29+
30+
31+
def get_main_version(version):
32+
"""Return main version (X.Y[.Z]) from VERSION."""
33+
parts = 2 if version[2] == 0 else 3
34+
return ".".join(str(x) for x in version[:parts])
35+
36+
37+
@functools.lru_cache(maxsize=128)
38+
def get_git_changeset():
39+
"""Return a numeric identifier of the latest git changeset.
40+
41+
The result is the UTC timestamp of the changeset in YYYYMMDDHHMMSS format.
42+
This value isn't guaranteed to be unique, but collisions are very unlikely,
43+
so it's sufficient for generating the development version numbers.
44+
"""
45+
# Repository may not be found if __file__ is undefined, e.g. in a frozen
46+
# module.
47+
if "__file__" not in globals():
48+
return None
49+
repo_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
50+
git_log = subprocess.run(
51+
"git log --pretty=format:%ct --quiet -1 HEAD",
52+
capture_output=True,
53+
shell=True,
54+
cwd=repo_dir,
55+
text=True,
56+
)
57+
timestamp = git_log.stdout
58+
tz = datetime.timezone.utc
59+
try:
60+
timestamp = datetime.datetime.fromtimestamp(int(timestamp), tz=tz)
61+
except ValueError:
62+
return None
63+
return timestamp.strftime("%Y%m%d%H%M%S")

example/testapp/models.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,16 @@ class Action(IntegerChoices):
1515
ip_nullable = models.GenericIPAddressField(null=True)
1616
port = models.UInt16Field(default=0)
1717
protocol = models.StringField(default="", low_cardinality=True)
18-
content = models.JSONField()
18+
content = models.JSONField(default=dict)
1919
timestamp = models.DateTime64Field(default=timezone.now)
2020
created_at = models.DateTime64Field(auto_now_add=True)
2121
action = models.EnumField(choices=Action.choices, default=Action.PASS)
2222

2323
class Meta:
24-
verbose_name = "Network event"
25-
ordering = ["-id"]
26-
db_table = "event"
27-
engine = models.ReplacingMergeTree(
28-
order_by=["id"],
24+
ordering = ["-timestamp"]
25+
engine = models.MergeTree(
26+
primary_key="timestamp",
27+
order_by=("timestamp", "id"),
2928
partition_by=models.toYYYYMMDD("timestamp"),
3029
index_granularity=1024,
3130
index_granularity_bytes=1 << 20,
@@ -36,8 +35,8 @@ class Meta:
3635
fields=["ip"], name="ip_set_idx", type=models.Set(1000), granularity=4
3736
),
3837
models.Index(
39-
fields=["port"],
40-
name="port_bloom_idx",
38+
fields=["ipv4"],
39+
name="ipv4_bloom_idx",
4140
type=models.BloomFilter(0.001),
4241
granularity=1,
4342
),

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ dependencies = [
3838
dynamic = ["version", "readme"]
3939

4040
[tool.setuptools.dynamic]
41-
version = {attr = "clickhouse_backend.VERSION"}
41+
version = {attr = "clickhouse_backend.__version__"}
4242
readme = {file = ["README.md"], content-type = "text/markdown"}
4343

4444
[tool.isort]

tests/clickhouse_table_engine/models.py

+18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,24 @@
1+
from django.utils import timezone
2+
13
from clickhouse_backend import models
24

35

6+
class Event(models.ClickhouseModel):
7+
ip = models.GenericIPAddressField(default="::")
8+
port = models.UInt16Field(default=0)
9+
protocol = models.StringField(default="", low_cardinality=True)
10+
content = models.StringField(default="")
11+
timestamp = models.DateTime64Field(default=timezone.now)
12+
13+
class Meta:
14+
ordering = ["-timestamp"]
15+
engine = models.MergeTree(
16+
primary_key="timestamp",
17+
order_by=("timestamp", "id"),
18+
partition_by=models.toYYYYMMDD("timestamp"),
19+
)
20+
21+
422
class EngineWithSettings(models.ClickhouseModel):
523
class Meta:
624
engine = models.MergeTree(

tests/clickhouse_table_engine/tests.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,39 @@
11
from django.db import connection
22
from django.test import TestCase
33

4-
from .models import EngineWithSettings
4+
from clickhouse_backend import models
5+
6+
from .models import EngineWithSettings, Event
7+
8+
9+
class TestMergeTree(TestCase):
10+
def test_table(self):
11+
opts = Event._meta
12+
with connection.cursor() as cursor:
13+
cursor.execute(
14+
f"select engine_full from system.tables where table='{opts.db_table}'"
15+
)
16+
engine_full = cursor.fetchone()[0]
17+
self.assertEqual(
18+
engine_full.partition(" SETTINGS ")[0],
19+
"MergeTree PARTITION BY toYYYYMMDD(timestamp) PRIMARY KEY timestamp ORDER BY (timestamp, id)",
20+
)
21+
22+
def test_mergetree_init_exception(self):
23+
with self.assertRaisesMessage(
24+
AssertionError, "At least one of order_by or primary_key must be provided"
25+
):
26+
models.MergeTree()
27+
with self.assertRaisesMessage(ValueError, "None is not allowed in order_by"):
28+
models.MergeTree(order_by=(None, "a"))
29+
with self.assertRaisesMessage(
30+
ValueError, "primary_key must be a prefix of order_by"
31+
):
32+
models.MergeTree(order_by=("a", "b"), primary_key=["b"])
33+
with self.assertRaisesMessage(
34+
ValueError, "primary_key must be a prefix of order_by"
35+
):
36+
models.MergeTree(order_by=("a", "b"), primary_key=["a", "b", "c"])
537

638

739
class TestEngineSettings(TestCase):

0 commit comments

Comments
 (0)