Skip to content

Commit 075ca77

Browse files
committed
support multidimensional arrays
needed for functions like extractAllGroupsHorizontal returning Array(Array(String))
1 parent 223c2bc commit 075ca77

11 files changed

Lines changed: 590 additions & 13 deletions

File tree

src/binary.cpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,13 @@ extern "C"
326326
return UUIDOID;
327327
case Type::Code::Array:
328328
{
329-
Oid array_type
330-
= get_array_type(get_corr_postgres_type(type->As<clickhouse::ArrayType>()->GetItemType()));
329+
/* postgres uses one array type for any number of dimensions, so
330+
* walk past nested Array layers to the leaf element type. */
331+
auto leaf = type->As<clickhouse::ArrayType>()->GetItemType();
332+
while (leaf->GetCode() == Type::Code::Array)
333+
leaf = leaf->As<clickhouse::ArrayType>()->GetItemType();
334+
335+
Oid array_type = get_array_type(get_corr_postgres_type(leaf));
331336
if (array_type == InvalidOid)
332337
throw std::runtime_error("pg_clickhouse: could not find array "
333338
" type for column type "
@@ -1075,14 +1080,25 @@ extern "C"
10751080
size_t len = arr->Size();
10761081
auto slot = (ch_binary_array_t *)exc_palloc(sizeof(ch_binary_array_t));
10771082

1078-
Oid item_type = get_corr_postgres_type(arr->Type());
1083+
/* find leaf scalar type & nesting depth, since postgres has one
1084+
* array type per element type regardless of ndim */
1085+
int ndim = 1;
1086+
auto leaf_type = arr->Type();
1087+
while (leaf_type->GetCode() == Type::Code::Array)
1088+
{
1089+
leaf_type = leaf_type->As<clickhouse::ArrayType>()->GetItemType();
1090+
ndim++;
1091+
}
1092+
1093+
Oid item_type = get_corr_postgres_type(leaf_type);
10791094
Oid array_type = get_array_type(item_type);
10801095

10811096
if (array_type == InvalidOid)
10821097
throw std::runtime_error(std::string("pg_clickhouse: could not") + " find array type for "
10831098
+ std::to_string(item_type));
10841099

10851100
slot->len = len;
1101+
slot->ndim = ndim;
10861102
slot->array_type = array_type;
10871103
slot->item_type = item_type;
10881104

@@ -1091,8 +1107,13 @@ extern "C"
10911107
slot->datums = (Datum *)exc_palloc0(sizeof(Datum) * len);
10921108
slot->nulls = (bool *)exc_palloc0(sizeof(bool) * len);
10931109

1110+
/* For ndim==1 inner make_datum returns leaf scalars; for
1111+
* ndim>1 it recurses into the Array branch and produces
1112+
* nested ch_binary_array_t* values. Use a scratch valtype
1113+
* to avoid clobbering slot->item_type. */
1114+
Oid scratch;
10941115
for (size_t i = 0; i < len; ++i)
1095-
slot->datums[i] = make_datum(arr, i, &slot->item_type, &slot->nulls[i]);
1116+
slot->datums[i] = make_datum(arr, i, &scratch, &slot->nulls[i]);
10961117
}
10971118

10981119
/* this one will need additional work, since we just return raw slot */

src/convert.c

Lines changed: 143 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,20 +138,160 @@ convert_out_generic(ch_convert_output_state * state, Datum val)
138138
return val;
139139
}
140140

141+
/*
142+
* Walk nested ch_binary_array_t into a flat datum buffer, verifying each
143+
* level matches the dims taken from the first child. Returns false if the
144+
* shape is jagged so the caller can fall back to a slower path.
145+
*/
146+
static bool
147+
flatten_nested_array(ch_binary_array_t * slot, int *dims, int level,
148+
Datum * values, bool *nulls, size_t * idx)
149+
{
150+
if ((int) slot->len != dims[level])
151+
return false;
152+
153+
if (slot->ndim == 1)
154+
{
155+
for (size_t i = 0; i < slot->len; i++)
156+
{
157+
values[*idx] = slot->datums[i];
158+
nulls[*idx] = slot->nulls[i];
159+
(*idx)++;
160+
}
161+
}
162+
else
163+
{
164+
for (size_t i = 0; i < slot->len; i++)
165+
{
166+
ch_binary_array_t *child = (ch_binary_array_t *) DatumGetPointer(slot->datums[i]);
167+
168+
if (!flatten_nested_array(child, dims, level + 1, values, nulls, idx))
169+
return false;
170+
}
171+
}
172+
return true;
173+
}
174+
175+
/*
176+
* Emit a nested ch_binary_array_t as a postgres array text literal, quoting
177+
* each leaf and escaping `\` and `"`. Used as the jagged fallback so binary
178+
* surfaces the same array_in malformed-literal error as the http path.
179+
*/
180+
static void
181+
emit_nested_array_text(ch_binary_array_t * slot, FmgrInfo * outfn, StringInfo buf)
182+
{
183+
appendStringInfoChar(buf, '{');
184+
for (size_t i = 0; i < slot->len; i++)
185+
{
186+
if (i > 0)
187+
appendStringInfoChar(buf, ',');
188+
189+
if (slot->ndim > 1)
190+
{
191+
ch_binary_array_t *child = (ch_binary_array_t *) DatumGetPointer(slot->datums[i]);
192+
193+
emit_nested_array_text(child, outfn, buf);
194+
}
195+
else if (slot->nulls[i])
196+
appendStringInfoString(buf, "NULL");
197+
else
198+
{
199+
char *s = OutputFunctionCall(outfn, slot->datums[i]);
200+
201+
appendStringInfoChar(buf, '"');
202+
for (char *p = s; *p; p++)
203+
{
204+
if (*p == '"' || *p == '\\')
205+
appendStringInfoChar(buf, '\\');
206+
appendStringInfoChar(buf, *p);
207+
}
208+
appendStringInfoChar(buf, '"');
209+
pfree(s);
210+
}
211+
}
212+
appendStringInfoChar(buf, '}');
213+
}
214+
141215
static Datum
142216
convert_array(ch_convert_state * state, Datum val)
143217
{
144218
ch_binary_array_t *slot = (ch_binary_array_t *) DatumGetPointer(val);
145219

146220
if (slot->len == 0)
147-
val = PointerGetDatum(construct_empty_array(state->intype));
148-
else
221+
val = PointerGetDatum(construct_empty_array(slot->item_type));
222+
else if (slot->ndim == 1)
149223
{
150224
void *arrout = construct_array(slot->datums, slot->len, slot->item_type,
151225
state->typlen, state->typbyval, state->typalign);
152226

153227
val = PointerGetDatum(arrout);
154228
}
229+
else
230+
{
231+
int dims[MAXDIM];
232+
int lbs[MAXDIM];
233+
size_t total = 1;
234+
size_t idx = 0;
235+
Datum *flat;
236+
bool *flatnulls;
237+
ch_binary_array_t *probe = slot;
238+
239+
if (slot->ndim > MAXDIM)
240+
ereport(ERROR,
241+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
242+
errmsg("pg_clickhouse: nested array depth %d exceeds maximum %d",
243+
slot->ndim, MAXDIM)));
244+
245+
for (int d = 0; d < slot->ndim; d++)
246+
{
247+
dims[d] = (int) probe->len;
248+
lbs[d] = 1;
249+
total *= probe->len;
250+
if (probe->ndim > 1 && probe->len > 0)
251+
probe = (ch_binary_array_t *) DatumGetPointer(probe->datums[0]);
252+
}
253+
254+
if (total == 0)
255+
val = PointerGetDatum(construct_empty_array(slot->item_type));
256+
else
257+
{
258+
flat = palloc(sizeof(Datum) * total);
259+
flatnulls = palloc0(sizeof(bool) * total);
260+
261+
if (flatten_nested_array(slot, dims, 0, flat, flatnulls, &idx))
262+
val = PointerGetDatum(construct_md_array(flat, flatnulls, slot->ndim,
263+
dims, lbs, slot->item_type,
264+
state->typlen, state->typbyval,
265+
state->typalign));
266+
else
267+
{
268+
/*
269+
* Jagged shape: format as text and route through array_in so
270+
* binary surfaces the same malformed-literal error as http.
271+
*/
272+
StringInfoData buf;
273+
FmgrInfo outfn;
274+
Oid out_func;
275+
Oid in_func;
276+
Oid ioparam;
277+
bool varlena;
278+
279+
pfree(flat);
280+
pfree(flatnulls);
281+
282+
getTypeOutputInfo(slot->item_type, &out_func, &varlena);
283+
fmgr_info(out_func, &outfn);
284+
285+
initStringInfo(&buf);
286+
emit_nested_array_text(slot, &outfn, &buf);
287+
288+
getTypeInputInfo(state->intype, &in_func, &ioparam);
289+
val = OidInputFunctionCall(in_func, buf.data, ioparam, -1);
290+
291+
pfree(buf.data);
292+
}
293+
}
294+
}
155295

156296
return convert_generic(state, val);
157297
}
@@ -501,6 +641,7 @@ ch_binary_do_output_conversion(ch_binary_insert_state * insert_state,
501641

502642
arr = palloc(sizeof(ch_binary_array_t));
503643
arr->len = ArrayGetNItems(AARR_NDIM(v), AARR_DIMS(v));
644+
arr->ndim = 1;
504645
arr->datums = palloc(sizeof(Datum) * arr->len);
505646
arr->nulls = palloc(sizeof(bool) * arr->len);
506647
arr->item_type = cstate->innertype;

src/include/binary.hh

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,21 @@ extern "C"
4141
Oid *types;
4242
} ch_binary_tuple_t;
4343

44+
/*
45+
* Holds an array read from ClickHouse. For nested arrays
46+
* (Array(Array(...))) ndim > 1 and datums[i] points to a child
47+
* ch_binary_array_t with ndim-1. item_type is the leaf scalar type,
48+
* array_type is the postgres array type (same at every level since
49+
* postgres uses one array type per element type regardless of
50+
* dimensionality).
51+
*/
4452
typedef struct
4553
{
4654
Datum *datums;
4755
bool *nulls;
4856
size_t len;
49-
Oid item_type; /* used on selects */
57+
int ndim; /* nesting depth, ≥1 (used on selects) */
58+
Oid item_type; /* leaf element type (used on selects) */
5059
Oid array_type; /* used on selects */
5160
} ch_binary_array_t;
5261

test/expected/binary.out

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,41 @@ SELECT clickhouse_raw_query('INSERT INTO binary_test.arrays SELECT
8080

8181
(1 row)
8282

83+
-- nested arrays
84+
SELECT clickhouse_raw_query('CREATE TABLE binary_test.nested_arrays (
85+
c1 Int8, c2 Array(Array(Int32)), c3 Array(Array(String))
86+
) ENGINE = MergeTree PARTITION BY c1 ORDER BY (c1);
87+
');
88+
clickhouse_raw_query
89+
----------------------
90+
91+
(1 row)
92+
93+
SELECT clickhouse_raw_query('INSERT INTO binary_test.nested_arrays VALUES
94+
(1, [[1,2],[3,4]], [[''a'',''b''],[''c'',''d'']]),
95+
(2, [[5,6],[7,8]], [[''e'',''f''],[''g'',''h'']]);
96+
');
97+
clickhouse_raw_query
98+
----------------------
99+
100+
(1 row)
101+
102+
-- ragged nested arrays must error: postgres requires hyper-rectangles
103+
SELECT clickhouse_raw_query('CREATE TABLE binary_test.ragged_arrays (
104+
c1 Int8, c2 Array(Array(Int32))
105+
) ENGINE = MergeTree PARTITION BY c1 ORDER BY (c1);
106+
');
107+
clickhouse_raw_query
108+
----------------------
109+
110+
(1 row)
111+
112+
SELECT clickhouse_raw_query('INSERT INTO binary_test.ragged_arrays VALUES (1, [[1,2,3],[4]]);');
113+
clickhouse_raw_query
114+
----------------------
115+
116+
(1 row)
117+
83118
SELECT clickhouse_raw_query('CREATE TABLE binary_test.tuples (
84119
c1 Int8,
85120
c2 Tuple(Int, String, Float32),
@@ -152,6 +187,15 @@ CREATE FOREIGN TABLE farrays2 (
152187
c1 int8[],
153188
c2 text[]
154189
) SERVER binary_loopback OPTIONS (table_name 'arrays');
190+
CREATE FOREIGN TABLE fnested_arrays (
191+
c1 int2,
192+
c2 int[],
193+
c3 text[]
194+
) SERVER binary_loopback OPTIONS (table_name 'nested_arrays');
195+
CREATE FOREIGN TABLE fragged_arrays (
196+
c1 int2,
197+
c2 int[]
198+
) SERVER binary_loopback OPTIONS (table_name 'ragged_arrays');
155199
CREATE TYPE tupformat AS (a int, b text, c float4);
156200
CREATE FOREIGN TABLE ftuples (
157201
c1 int,
@@ -265,6 +309,17 @@ SELECT * FROM farrays ORDER BY c1;
265309

266310
SELECT * FROM farrays2 ORDER BY c1;
267311
ERROR: pg_clickhouse: could not cast value from integer[] to bigint[]
312+
-- nested arrays
313+
SELECT * FROM fnested_arrays ORDER BY c1;
314+
c1 | c2 | c3
315+
----+---------------+---------------
316+
1 | {{1,2},{3,4}} | {{a,b},{c,d}}
317+
2 | {{5,6},{7,8}} | {{e,f},{g,h}}
318+
(2 rows)
319+
320+
SELECT * FROM fragged_arrays ORDER BY c1;
321+
ERROR: malformed array literal: "{{"1","2","3"},{"4"}}"
322+
DETAIL: Multidimensional arrays must have sub-arrays with matching dimensions.
268323
-- tuples
269324
SELECT * FROM ftuples ORDER BY c1;
270325
c1 | c2 | c3
@@ -305,10 +360,12 @@ SELECT clickhouse_raw_query('DROP DATABASE binary_test');
305360
(1 row)
306361

307362
DROP SERVER binary_loopback CASCADE;
308-
NOTICE: drop cascades to 6 other objects
363+
NOTICE: drop cascades to 8 other objects
309364
DETAIL: drop cascades to foreign table fints
310365
drop cascades to foreign table ftypes
311366
drop cascades to foreign table farrays
312367
drop cascades to foreign table farrays2
368+
drop cascades to foreign table fnested_arrays
369+
drop cascades to foreign table fragged_arrays
313370
drop cascades to foreign table ftuples
314371
drop cascades to foreign table fbytes

0 commit comments

Comments
 (0)