Skip to content

Commit a829ad9

Browse files
rustyconoverclaude
andcommitted
fix: Preserve numRows for zero-column RecordBatch in IPC
When a zero-column RecordBatch is deserialized from IPC, ensureSameLengthData in the RecordBatch constructor recomputes length from children via chunks.reduce((max, col) => Math.max(max, col.length), 0). With zero children, this always returns 0 — discarding the original length from the IPC message header. Pass this.data.length to ensureSameLengthData as the explicit maxLength parameter, which the function already accepts as an optional third argument. For batches with columns, this.data.length already matches the max column length, so there is no behavior change. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a743bdd commit a829ad9

4 files changed

Lines changed: 127 additions & 1 deletion

File tree

src/recordbatch.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ export class RecordBatch<T extends TypeMap = any> {
8181
if (!(this.data instanceof Data)) {
8282
throw new TypeError('RecordBatch constructor expects a [Schema, Data] pair.');
8383
}
84-
[this.schema, this.data] = ensureSameLengthData<T>(this.schema, this.data.children as Data<T[keyof T]>[]);
84+
[this.schema, this.data] = ensureSameLengthData<T>(this.schema, this.data.children as Data<T[keyof T]>[], this.data.length);
8585
break;
8686
}
8787
case 1: {
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
"""Generate a zero-column IPC stream with 100 rows for testing."""
19+
20+
import pyarrow as pa
21+
import pyarrow.ipc as ipc
22+
23+
# Create a zero-column RecordBatch with 100 rows by creating a batch
24+
# with a dummy column and then dropping it.
25+
schema_with_col = pa.schema([("_dummy", pa.int32())])
26+
batch_with_col = pa.RecordBatch.from_arrays(
27+
[pa.array([0] * 100, type=pa.int32())], schema=schema_with_col
28+
)
29+
batch = batch_with_col.drop_columns(["_dummy"])
30+
assert batch.num_rows == 100
31+
assert batch.num_columns == 0
32+
33+
with open("zero_column_batch.arrow", "wb") as f:
34+
writer = ipc.new_stream(f, batch.schema)
35+
writer.write_batch(batch)
36+
writer.close()

test/data/zero_column_batch.arrow

144 Bytes
Binary file not shown.
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
import '../../../jest-extensions.js';
19+
import { readFileSync } from 'node:fs';
20+
import path from 'node:path';
21+
import {
22+
makeData,
23+
RecordBatch,
24+
RecordBatchStreamWriter,
25+
Schema,
26+
Struct,
27+
tableFromIPC,
28+
} from 'apache-arrow';
29+
30+
const testDataDir = path.resolve(process.cwd(), 'test/data');
31+
32+
describe('Zero-column RecordBatch numRows preservation', () => {
33+
34+
describe('PyArrow interop', () => {
35+
36+
test('should read PyArrow zero-column stream and preserve numRows', () => {
37+
const buffer = readFileSync(path.resolve(testDataDir, 'zero_column_batch.arrow'));
38+
const table = tableFromIPC(buffer);
39+
40+
expect(table.numRows).toBe(100);
41+
expect(table.numCols).toBe(0);
42+
expect(table.batches).toHaveLength(1);
43+
expect(table.batches[0].numRows).toBe(100);
44+
});
45+
});
46+
47+
describe('JS round-trip', () => {
48+
49+
test('zero-column batch should round-trip through IPC stream writer', () => {
50+
const schema = new Schema([]);
51+
const data = makeData({
52+
type: new Struct([]),
53+
length: 100,
54+
nullCount: 0,
55+
children: [],
56+
});
57+
const batch = new RecordBatch(schema, data);
58+
expect(batch.numRows).toBe(100);
59+
expect(batch.numCols).toBe(0);
60+
61+
const writer = new RecordBatchStreamWriter();
62+
writer.write(batch);
63+
writer.finish();
64+
const buffer = writer.toUint8Array(true);
65+
66+
const table = tableFromIPC(buffer);
67+
expect(table.numRows).toBe(100);
68+
expect(table.numCols).toBe(0);
69+
expect(table.batches).toHaveLength(1);
70+
expect(table.batches[0].numRows).toBe(100);
71+
});
72+
});
73+
74+
describe('Direct constructor', () => {
75+
76+
test('RecordBatch constructor preserves length for zero-column data', () => {
77+
const schema = new Schema([]);
78+
const data = makeData({
79+
type: new Struct([]),
80+
length: 100,
81+
nullCount: 0,
82+
children: [],
83+
});
84+
const batch = new RecordBatch(schema, data);
85+
86+
expect(batch.numRows).toBe(100);
87+
expect(batch.numCols).toBe(0);
88+
});
89+
});
90+
});

0 commit comments

Comments
 (0)