Skip to content

Commit 566188a

Browse files
authored
Feat JSONField (#21)
#17 JSON field support
1 parent bca7a05 commit 566188a

File tree

25 files changed

+279
-41
lines changed

25 files changed

+279
-41
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
- Fix [bug when save django model instance](https://github.com/jayvynl/django-clickhouse-backend/issues/9).
66
- Support [clickhouse-driver 0.2.6](https://github.com/mymarilyn/clickhouse-driver), drop support for python3.6.
77
- Support [Django 4.2](https://docs.djangoproject.com).
8+
- Support [clickhouse JSON type](https://clickhouse.com/docs/en/sql-reference/data-types/json).
89

910
### 1.0.2 (2023-02-28)
1011
- Fix test db name when NAME not provided in DATABASES setting.

clickhouse_backend/backend/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class DatabaseWrapper(BaseDatabaseWrapper):
2828
"BigAutoField": "Int64",
2929
"IPAddressField": "IPv4",
3030
"GenericIPAddressField": "IPv6",
31+
"JSONField": "JSON",
3132
"BinaryField": "String",
3233
"CharField": "FixedString(%(max_length)s)",
3334
"DateField": "Date32",

clickhouse_backend/backend/features.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,23 @@ def uses_savepoints(self):
100100
supports_partial_indexes = False
101101

102102
# Does the backend support JSONField?
103-
supports_json_field = False
103+
supports_json_field = True
104+
# Can the backend introspect a JSONField?
105+
can_introspect_json_field = True
106+
# Does the backend support primitives in JSONField?
107+
supports_primitives_in_json_field = False
108+
# Is there a true datatype for JSON?
109+
has_native_json_field = True
110+
# Does the backend use PostgreSQL-style JSON operators like '->'?
111+
has_json_operators = False
112+
# Does the backend support __contains and __contained_by lookups for
113+
# a JSONField?
114+
supports_json_field_contains = False
115+
# Does value__d__contains={'f': 'g'} (without a list around the dict) match
116+
# {'d': [{'f': 'g'}]}?
117+
json_key_contains_list_matching_requires_list = False
118+
# Does the backend support JSONObject() database function?
119+
has_json_object_function = False
104120

105121
# Does the backend support column collations?
106122
supports_collation_on_charfield = False

clickhouse_backend/backend/introspection.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ def get_field_type(self, data_type, description):
3939
return "TupleField"
4040
elif data_type.startswith("Map"):
4141
return "MapField"
42+
elif data_type == "Object('json')":
43+
return "JSONField"
4244

4345
return f"{data_type}Field" # Int8
4446

clickhouse_backend/backend/operations.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
from django.conf import settings
21
from django.db.backends.base.operations import BaseDatabaseOperations
32

43
from clickhouse_backend import compat
5-
from clickhouse_backend.utils import get_timezone
4+
from clickhouse_backend.driver import JSON
65
from clickhouse_backend.driver.client import insert_pattern
6+
from clickhouse_backend.utils import get_timezone
77

88

99
class DatabaseOperations(BaseDatabaseOperations):
@@ -294,6 +294,9 @@ def adapt_datetimefield_value(self, value):
294294
def adapt_decimalfield_value(self, value, max_digits=None, decimal_places=None):
295295
return value
296296

297+
def adapt_json_value(self, value, encoder):
298+
return JSON(value)
299+
297300
def explain_query_prefix(self, format=None, **options):
298301
# bypass normal explain prefix insert in compiler.as_sql
299302
return ""

clickhouse_backend/driver/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from .connection import Connection
1111
# Binary is compatible for django's BinaryField.
12-
from .types import Binary # NOQA
12+
from .types import Binary, JSON # NOQA
1313

1414

1515
def connect(dsn=None, host=None,

clickhouse_backend/driver/escape.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def escape_param(item, context, for_server=False):
6161
return "[%s]" % ', '.join(str(escape_param(x, context, for_server=for_server)) for x in item)
6262

6363
elif isinstance(item, tuple):
64-
return "(%s)" % ', '.join(str(escape_param(x, context, for_server=for_server)) for x in item)
64+
return "tuple(%s)" % ', '.join(str(escape_param(x, context, for_server=for_server)) for x in item)
6565

6666
elif isinstance(item, Enum):
6767
return escape_param(item.value, context, for_server=for_server)
@@ -72,6 +72,15 @@ def escape_param(item, context, for_server=False):
7272
elif isinstance(item, types.Binary):
7373
return escape_binary(item, context)
7474

75+
elif isinstance(item, types.JSON):
76+
value = item.value
77+
if isinstance(value, list):
78+
return escape_param([types.JSON(v) for v in value], context, for_server=for_server)
79+
elif isinstance(value, dict):
80+
return escape_param(tuple(types.JSON(v) for v in value.values()), context, for_server=for_server)
81+
else:
82+
return escape_param(value, context, for_server=for_server)
83+
7584
else:
7685
return item
7786

clickhouse_backend/driver/types.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,6 @@
11
Binary = bytes
2+
3+
4+
class JSON:
5+
def __init__(self, value):
6+
self.value = value

clickhouse_backend/models/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from clickhouse_backend.patch import patch_all
12
from .base import ClickhouseModel
23
from .engines import *
34
from .engines import __all__ as engines_all # NOQA
@@ -15,3 +16,4 @@
1516
*fucntions_all,
1617
*indexes_all,
1718
]
19+
patch_all()

clickhouse_backend/models/base.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from django.db import models
22
from django.db.migrations import state
3-
from django.db.models import functions
43
from django.db.models import options
5-
from django.db.models.manager import BaseManager
64

75
from .query import QuerySet
86
from .sql import Query
@@ -16,15 +14,6 @@
1614
state.DEFAULT_NAMES = options.DEFAULT_NAMES
1715

1816

19-
def as_clickhouse(self, compiler, connection, **extra_context):
20-
return functions.Random.as_sql(
21-
self, compiler, connection, function="rand64", **extra_context
22-
)
23-
24-
25-
functions.Random.as_clickhouse = as_clickhouse
26-
27-
2817
class ClickhouseManager(models.Manager):
2918
_queryset_class = QuerySet
3019

clickhouse_backend/models/fields/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .array import *
1414
from .base import FieldMixin
1515
from .integer import *
16+
from .json import *
1617
from .map import *
1718
from .tuple import *
1819

@@ -30,7 +31,7 @@
3031
"DateField", "Date32Field", "DateTimeField", "DateTime64Field",
3132
"Enum8Field", "Enum16Field", "EnumField",
3233
"IPv4Field", "IPv6Field", "GenericIPAddressField",
33-
"ArrayField", "TupleField", "MapField",
34+
"ArrayField", "TupleField", "MapField", "JSONField",
3435
]
3536

3637

clickhouse_backend/models/fields/base.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ class FieldMixin:
66
"""All clickhouse field should inherit this mixin.
77
88
1. Remove unsupported arguments: unique, db_index, unique_for_date,
9-
unique_for_month, unique_for_year, db_tablespace.
9+
unique_for_month, unique_for_year, db_tablespace, db_collation.
1010
2. Return shortened name in deconstruct method.
11-
3. Add low_cardinality attribute, corresponding to clickhouse LowCardinality Data Type.
1211
1312
low_cardinality argument is added separately in every specific field that support LowCardinality.
1413
If added in this mixin, then PyCharm will not supply argument hints.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from django.db.models.fields import json
2+
from django.db.models.fields import Field
3+
4+
from clickhouse_backend.driver import JSON
5+
from .base import FieldMixin
6+
7+
__all__ = ["JSONField"]
8+
9+
10+
class JSONField(FieldMixin, json.JSONField):
11+
nullable_allowed = False
12+
13+
def deconstruct(self):
14+
name, path, args, kwargs = super().deconstruct()
15+
if path.startswith("clickhouse_backend.models.json"):
16+
path = path.replace("clickhouse_backend.models.json", "clickhouse_backend.models")
17+
return name, path, args, kwargs
18+
19+
def from_db_value(self, value, expression, connection):
20+
return value
21+
22+
def get_prep_value(self, value):
23+
# django 4.1 and below dumps value as json string.
24+
return Field.get_prep_value(self, value)
25+
26+
def get_db_prep_value(self, value, connection, prepared=False):
27+
value = super().get_db_prep_value(value, connection, prepared)
28+
# django 4.1 and below does not call connection.ops.adapt_json_value
29+
if not isinstance(value, JSON):
30+
value = JSON(value)
31+
return value
32+
33+
def get_db_prep_save(self, value, connection):
34+
value = super().get_db_prep_save(value, connection)
35+
if isinstance(value, JSON):
36+
value = value.value
37+
return value

clickhouse_backend/models/fields/map.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ def get_db_prep_value(self, value, connection, prepared=False):
163163
return value
164164

165165
def get_db_prep_save(self, value, connection):
166+
if hasattr(value, "as_sql"):
167+
return value
166168
if isinstance(value, collections.abc.Mapping):
167169
return {
168170
self.key_field.get_db_prep_save(k, connection):

clickhouse_backend/models/fields/tuple.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,9 @@ def _from_db_value(self, value, expression, connection):
230230
return value
231231
self._validate_length(value)
232232
values = []
233+
# when set allow_experimental_object_type=1 at session level, value will be a dict.
234+
if isinstance(value, dict):
235+
value = value.values()
233236
for i, field in zip(value, self._base_fields):
234237
if hasattr(field, "from_db_value"):
235238
values.append(field.from_db_value(i, expression, connection))

clickhouse_backend/patch/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .fields import patch_jsonfield
2+
from .functions import patch_functions
3+
4+
5+
def patch_all():
6+
patch_jsonfield()
7+
patch_functions()

clickhouse_backend/patch/fields.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from django.db.models.fields import json
2+
3+
4+
def key_transform_as_clickhouse(self, compiler, connection):
5+
lhs, params, key_transforms = self.preprocess_lhs(compiler, connection)
6+
sql = lhs
7+
params = list(params)
8+
for key in key_transforms:
9+
if key.isdigit():
10+
sql = f"{sql}[%s]"
11+
params.append(int(key) + 1)
12+
else:
13+
sql = f"tupleElement({sql}, %s)"
14+
params.append(key)
15+
return sql, tuple(params)
16+
17+
18+
def patch_jsonfield():
19+
json.KeyTransform.as_clickhouse = key_transform_as_clickhouse

clickhouse_backend/patch/functions.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from django.db.models import functions
2+
3+
4+
def random_as_clickhouse(self, compiler, connection, **extra_context):
5+
return functions.Random.as_sql(
6+
self, compiler, connection, function="rand64", **extra_context
7+
)
8+
9+
10+
def patch_functions():
11+
functions.Random.as_clickhouse = random_as_clickhouse

docs/Fields.md

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ Supported date types are:
7373
- LowCardinality(T)
7474
- Tuple(T1, T2, ...)
7575
- Map(key, value)
76+
- JSON
7677

7778

7879
### [U]Int(8|16|32|64|128|256)
@@ -551,7 +552,7 @@ Valid key fields are:
551552
- GenericIPAddressField
552553

553554

554-
When query from the database, MapFile get dict.
555+
When query from the database, MapField get dict.
555556

556557

557558
#### Lookups
@@ -567,10 +568,10 @@ class MapModel(models.ClickhouseModel):
567568

568569
MapModel.objects.create(
569570
map={
570-
"baidu": "39.156.66.10",
571-
"bing.com": "13.107.21.200",
572-
"google.com": "172.217.163.46"
573-
}
571+
"baidu": "39.156.66.10",
572+
"bing.com": "13.107.21.200",
573+
"google.com": "172.217.163.46"
574+
}
574575
)
575576
```
576577

@@ -634,3 +635,74 @@ MapModel.objects.annotate(
634635
).values('value')
635636
# <QuerySet [{'value': '::ffff:d6b:15c8'}]>
636637
```
638+
639+
640+
### JSON
641+
642+
Field importing path: `clickhouse_backend.models.JSONField`.
643+
644+
Neither Nullable nor LowCardinality is supported.
645+
646+
When query from the database, JSONField get dict or list.
647+
648+
The JSON data type is an experimental feature. To use it, set `allow_experimental_object_type = 1` in the database settings.
649+
For example:
650+
651+
```python
652+
DATABASES = {
653+
'default': {
654+
'ENGINE': 'clickhouse_backend.backend',
655+
'OPTIONS': {
656+
'settings': {
657+
'allow_experimental_object_type': 1,
658+
}
659+
}
660+
}
661+
}
662+
```
663+
664+
#### Lookups
665+
666+
Currently only key lookup is supported.
667+
668+
```python
669+
from clickhouse_backend import models
670+
671+
class JSONModel(models.ClickhouseModel):
672+
json = models.JSONField()
673+
674+
v = {'a': [1, 2, 3], 'b': [{'c': 1}, {'d': 2}], 'c': {'d': 'e'}}
675+
instance = JSONModel.objects.create(json=v)
676+
instance.refresh_from_db()
677+
instance.json
678+
# {'a': [1, 2, 3], 'b': [{'c': 1, 'd': 0}, {'c': 0, 'd': 2}], 'c': {'d': 'e'}}
679+
```
680+
681+
**Note** JSONField value may change after saved to database. This is because clickhouse internally store [JSON](https://clickhouse.com/docs/en/sql-reference/data-types/json)
682+
as [Tuple](https://clickhouse.com/docs/en/sql-reference/data-types/tuple) and [Array](https://clickhouse.com/docs/en/sql-reference/data-types/array).
683+
[Clickhouse try best to store JSON in a uniform schema](https://clickhouse.com/docs/en/integrations/data-formats/json#handling-data-changes).
684+
Sometimes when you insert a JSON value that is not compatible with existing schema, clickhouse will fail.
685+
686+
687+
##### key
688+
689+
Get the value of specific key.
690+
691+
```python
692+
JSONModel.objects.values('json__a')
693+
# <QuerySet [{'json__a': [1, 2, 3]}]>
694+
JSONModel.objects.values('json__b__0__c')
695+
# <QuerySet [{'json__b__0__c': 1}]>
696+
JSONModel.objects.values('json__c__d')
697+
# <QuerySet [{'json__c__d': 'e'}]>
698+
JSONModel.objects.values('json__c')
699+
# <QuerySet [{'json__c': {'d': 'e'}}]>
700+
701+
JSONModel.objects.filter(json__c={'any_key': 'e'}).exists()
702+
# True
703+
JSONModel.objects.filter(json__c=('e',)).exists()
704+
# True
705+
```
706+
707+
Note the strange behaviors of the last two examples. That's because clickhouse store python dict as [named tuple](https://clickhouse.com/docs/en/sql-reference/data-types/tuple#addressing-tuple-elements),
708+
named tuples are just normal tuples when compared, the `name` is not taken into account.

example/config/settings.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@
7171
DATABASES = {
7272
'default': {
7373
'ENGINE': 'clickhouse_backend.backend',
74+
'OPTIONS': {
75+
'settings': {
76+
'allow_experimental_object_type': 1,
77+
}
78+
},
7479
'TEST': {
7580
'MIGRATE': False
7681
}

0 commit comments

Comments
 (0)