Skip to content

Commit 64ff6a2

Browse files
authored
fix(NODE-6735, NODE-6711): add BSON vector validation to EJSON stringification, serialization and conversion to native types (#748)
1 parent 97a21df commit 64ff6a2

File tree

5 files changed

+226
-92
lines changed

5 files changed

+226
-92
lines changed

src/binary.ts

+23-3
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,8 @@ export class Binary extends BSONValue {
341341
throw new BSONError('Binary datatype field is not Int8');
342342
}
343343

344+
validateBinaryVector(this);
345+
344346
return new Int8Array(
345347
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
346348
);
@@ -361,6 +363,8 @@ export class Binary extends BSONValue {
361363
throw new BSONError('Binary datatype field is not Float32');
362364
}
363365

366+
validateBinaryVector(this);
367+
364368
const floatBytes = new Uint8Array(
365369
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
366370
);
@@ -387,6 +391,8 @@ export class Binary extends BSONValue {
387391
throw new BSONError('Binary datatype field is not packed bit');
388392
}
389393

394+
validateBinaryVector(this);
395+
390396
return new Uint8Array(
391397
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
392398
);
@@ -409,6 +415,8 @@ export class Binary extends BSONValue {
409415
throw new BSONError('Binary datatype field is not packed bit');
410416
}
411417

418+
validateBinaryVector(this);
419+
412420
const byteCount = this.length() - 2;
413421
const bitCount = byteCount * 8 - this.buffer[1];
414422
const bits = new Int8Array(bitCount);
@@ -434,7 +442,9 @@ export class Binary extends BSONValue {
434442
buffer[1] = 0;
435443
const intBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength);
436444
buffer.set(intBytes, 2);
437-
return new this(buffer, this.SUBTYPE_VECTOR);
445+
const bin = new this(buffer, this.SUBTYPE_VECTOR);
446+
validateBinaryVector(bin);
447+
return bin;
438448
}
439449

440450
/** Constructs a Binary representing an Float32 Vector. */
@@ -448,7 +458,9 @@ export class Binary extends BSONValue {
448458

449459
if (NumberUtils.isBigEndian) ByteUtils.swap32(new Uint8Array(binaryBytes.buffer, 2));
450460

451-
return new this(binaryBytes, this.SUBTYPE_VECTOR);
461+
const bin = new this(binaryBytes, this.SUBTYPE_VECTOR);
462+
validateBinaryVector(bin);
463+
return bin;
452464
}
453465

454466
/**
@@ -461,7 +473,9 @@ export class Binary extends BSONValue {
461473
buffer[0] = Binary.VECTOR_TYPE.PackedBit;
462474
buffer[1] = padding;
463475
buffer.set(array, 2);
464-
return new this(buffer, this.SUBTYPE_VECTOR);
476+
const bin = new this(buffer, this.SUBTYPE_VECTOR);
477+
validateBinaryVector(bin);
478+
return bin;
465479
}
466480

467481
/**
@@ -517,6 +531,12 @@ export function validateBinaryVector(vector: Binary): void {
517531
throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors');
518532
}
519533

534+
if (datatype === Binary.VECTOR_TYPE.Float32) {
535+
if (size !== 0 && size - 2 !== 0 && (size - 2) % 4 !== 0) {
536+
throw new BSONError('Invalid Vector: Float32 vector must contain a multiple of 4 bytes');
537+
}
538+
}
539+
520540
if (datatype === Binary.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) {
521541
throw new BSONError(
522542
'Invalid Vector: padding must be zero for packed bit vectors that are empty'

test/node/bson_binary_vector.spec.test.ts

+180-65
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1+
import * as util from 'util';
12
import * as fs from 'fs';
23
import * as path from 'path';
3-
import { BSON, BSONError, Binary } from '../register-bson';
4+
import { BSON, BSONError, Binary, EJSON } from '../register-bson';
45
import { expect } from 'chai';
56

67
const { toHex, fromHex } = BSON.onDemand.ByteUtils;
78

89
type VectorHexType = '0x03' | '0x27' | '0x10';
910
type VectorTest = {
1011
description: string;
11-
vector: (number | string)[];
12+
vector?: number[];
1213
valid: boolean;
1314
dtype_hex: VectorHexType;
1415
padding?: number;
@@ -17,15 +18,11 @@ type VectorTest = {
1718
type VectorSuite = { description: string; test_key: string; tests: VectorTest[] };
1819

1920
function fixFloats(f: string | number): number {
21+
// Should be nothing to "fix" but validates we didn't get
22+
// an unexpected type so we don't silently fail on it during the test
2023
if (typeof f === 'number') {
2124
return f;
2225
}
23-
if (f === 'inf') {
24-
return Infinity;
25-
}
26-
if (f === '-inf') {
27-
return -Infinity;
28-
}
2926
throw new Error(`test format error: unknown float value: ${f}`);
3027
}
3128

@@ -49,7 +46,20 @@ function fixBits(f: number | string): number {
4946
return f;
5047
}
5148

52-
function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary {
49+
function dtypeToHelper(dtype_hex: string) {
50+
switch (dtype_hex) {
51+
case '0x10' /* packed_bit */:
52+
return 'fromPackedBits';
53+
case '0x03' /* int8 */:
54+
return 'fromInt8Array';
55+
case '0x27' /* float32 */:
56+
return 'fromFloat32Array';
57+
default:
58+
throw new Error(`Unknown dtype_hex: ${dtype_hex}`);
59+
}
60+
}
61+
62+
function make(vector: number[], dtype_hex: VectorHexType, padding?: number): Binary {
5363
let binary: Binary;
5464
switch (dtype_hex) {
5565
case '0x10' /* packed_bit */:
@@ -87,21 +97,152 @@ const invalidTestExpectedError = new Map()
8797
'Invalid Vector: padding must be a value between 0 and 7'
8898
)
8999
.set('Negative padding PACKED_BIT', 'Invalid Vector: padding must be a value between 0 and 7')
90-
// skipped
91-
.set('Overflow Vector PACKED_BIT', false)
92-
.set('Underflow Vector PACKED_BIT', false)
93-
.set('Overflow Vector INT8', false)
94-
.set('Underflow Vector INT8', false)
95-
.set('INT8 with float inputs', false)
96-
// duplicate test! but also skipped.
97-
.set('Vector with float values PACKED_BIT', false)
98-
.set('Vector with float values PACKED_BIT', false);
100+
.set(
101+
'Insufficient vector data FLOAT32',
102+
'Invalid Vector: Float32 vector must contain a multiple of 4 bytes'
103+
)
104+
// These are not possible given the constraints of the input types allowed:
105+
// our helpers will throw an "unsupported_error" for these
106+
.set('Overflow Vector PACKED_BIT', 'unsupported_error')
107+
.set('Underflow Vector PACKED_BIT', 'unsupported_error')
108+
.set('Overflow Vector INT8', 'unsupported_error')
109+
.set('Underflow Vector INT8', 'unsupported_error')
110+
.set('INT8 with float inputs', 'unsupported_error')
111+
.set('Vector with float values PACKED_BIT', 'unsupported_error');
112+
113+
function catchError<T>(
114+
fn: () => T
115+
): { status: 'returned'; result: T } | { status: 'thrown'; result: Error } {
116+
try {
117+
return { status: 'returned', result: fn() };
118+
} catch (error) {
119+
return { status: 'thrown', result: error };
120+
}
121+
}
122+
123+
function testVectorInvalidInputValues(test: VectorTest, expectedErrorMessage: string) {
124+
const binaryCreation = catchError(make.bind(null, test.vector!, test.dtype_hex, test.padding));
125+
const bsonBytesCreation =
126+
binaryCreation.status !== 'thrown'
127+
? catchError(BSON.serialize.bind(null, { bin: binaryCreation.result }))
128+
: undefined;
129+
const ejsonStringCreation =
130+
binaryCreation.status !== 'thrown'
131+
? catchError(BSON.EJSON.stringify.bind(null, { bin: binaryCreation.result }))
132+
: undefined;
133+
134+
const binaryHelperValidations = [
135+
'Padding specified with no vector data PACKED_BIT',
136+
'Exceeding maximum padding PACKED_BIT',
137+
'Negative padding PACKED_BIT',
138+
...Array.from(invalidTestExpectedError.entries())
139+
.filter(([, v]) => v === 'unsupported_error')
140+
.map(([k]) => k)
141+
];
142+
143+
const errorType = expectedErrorMessage === 'unsupported_error' ? Error : BSONError;
144+
const errorName = expectedErrorMessage === 'unsupported_error' ? 'Error' : 'BSONError';
145+
146+
const check = outcome => {
147+
expect(outcome).to.exist;
148+
expect(outcome.status).to.equal('thrown');
149+
expect(outcome.result).to.be.instanceOf(errorType);
150+
expect(outcome.result).to.match(new RegExp(expectedErrorMessage));
151+
};
152+
153+
if (binaryHelperValidations.includes(test.description)) {
154+
describe('when creating a BSON Vector given invalid input values', () => {
155+
it(`Binary.${dtypeToHelper(test.dtype_hex)}() throws a ${errorName}`, function () {
156+
check(binaryCreation);
157+
});
158+
});
159+
} else {
160+
expect(errorName).to.equal('BSONError'); // unsupported_error are only when making vectors
161+
162+
describe('when encoding a BSON Vector given invalid input values', () => {
163+
it(`Binary.${dtypeToHelper(test.dtype_hex)}() does not throw`, function () {
164+
expect(binaryCreation).to.have.property('status', 'returned');
165+
});
166+
167+
it(`BSON.serialize() throws a BSONError`, function () {
168+
check(bsonBytesCreation);
169+
});
170+
171+
it(`EJSON.stringify() throws a BSONError`, function () {
172+
check(ejsonStringCreation);
173+
});
174+
});
175+
}
176+
}
177+
178+
function testVectorInvalidBSONBytes(test: VectorTest, expectedErrorMessage: string) {
179+
describe('when encoding a Binary Vector made from invalid bytes', () => {
180+
it(`BSON.serialize() throw a BSONError`, function () {
181+
let thrownError: Error | undefined;
182+
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));
183+
184+
try {
185+
BSON.serialize(bin);
186+
} catch (error) {
187+
thrownError = error;
188+
}
189+
190+
expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
191+
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
192+
});
193+
194+
const toHelper = dtypeToHelper(test.dtype_hex).replace('from', 'to');
195+
it(`Binary.${toHelper}() throw a BSONError`, function () {
196+
let thrownError: Error | undefined;
197+
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));
198+
199+
try {
200+
bin.vector[toHelper]();
201+
} catch (error) {
202+
thrownError = error;
203+
}
204+
205+
expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
206+
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
207+
});
208+
209+
if (toHelper === 'toPackedBits') {
210+
it(`Binary.toBits() throw a BSONError`, function () {
211+
let thrownError: Error | undefined;
212+
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));
213+
214+
try {
215+
bin.vector.toBits();
216+
} catch (error) {
217+
thrownError = error;
218+
}
219+
220+
expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
221+
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
222+
});
223+
}
224+
225+
it(`EJSON.stringify() throw a BSONError`, function () {
226+
let thrownError: Error | undefined;
227+
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));
228+
229+
try {
230+
EJSON.stringify(bin);
231+
} catch (error) {
232+
thrownError = error;
233+
}
234+
235+
expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
236+
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
237+
});
238+
});
239+
}
99240

100241
describe('BSON Binary Vector spec tests', () => {
101242
const tests: Record<string, VectorSuite> = Object.create(null);
102243

103244
for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) {
104-
tests[path.basename(file, '.json')] = JSON.parse(
245+
tests[path.basename(file, '.json')] = EJSON.parse(
105246
fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8')
106247
);
107248
}
@@ -120,20 +261,22 @@ describe('BSON Binary Vector spec tests', () => {
120261
* > MUST assert that the input float array is the same after encoding and decoding.
121262
*/
122263
for (const test of valid) {
123-
it(`encode ${test.description}`, function () {
124-
const bin = make(test.vector, test.dtype_hex, test.padding);
264+
describe(test.description, () => {
265+
it(`calling Binary.${dtypeToHelper(test.dtype_hex)}() with input numbers and serializing it does not throw`, function () {
266+
const bin = make(test.vector!, test.dtype_hex, test.padding);
125267

126-
const buffer = BSON.serialize({ [suite.test_key]: bin });
127-
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
128-
});
268+
const buffer = BSON.serialize({ [suite.test_key]: bin });
269+
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
270+
});
129271

130-
it(`decode ${test.description}`, function () {
131-
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
132-
const doc = BSON.deserialize(canonical_bson);
272+
it(`creating a Binary instance from BSON bytes does not throw`, function () {
273+
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
274+
const doc = BSON.deserialize(canonical_bson);
133275

134-
expect(doc[suite.test_key].sub_type).to.equal(0x09);
135-
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
136-
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
276+
expect(doc[suite.test_key].sub_type).to.equal(0x09);
277+
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
278+
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
279+
});
137280
});
138281
}
139282
});
@@ -147,46 +290,18 @@ describe('BSON Binary Vector spec tests', () => {
147290
for (const test of invalid) {
148291
const expectedErrorMessage = invalidTestExpectedError.get(test.description);
149292

150-
it(`bson: ${test.description}`, function () {
151-
let thrownError: Error | undefined;
152-
try {
153-
const bin = make(test.vector, test.dtype_hex, test.padding);
154-
BSON.serialize({ bin });
155-
} catch (error) {
156-
thrownError = error;
293+
describe(test.description, () => {
294+
if (test.canonical_bson != null) {
295+
testVectorInvalidBSONBytes(test, expectedErrorMessage);
157296
}
158297

159-
if (thrownError?.message.startsWith('unsupported_error')) {
160-
expect(
161-
expectedErrorMessage,
162-
'We expect a certain error message but got an unsupported error'
163-
).to.be.false;
164-
this.skip();
298+
if (test.vector != null) {
299+
testVectorInvalidInputValues(test, expectedErrorMessage);
165300
}
166301

167-
expect(thrownError).to.be.instanceOf(BSONError);
168-
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
169-
});
170-
171-
it(`extended json: ${test.description}`, function () {
172-
let thrownError: Error | undefined;
173-
try {
174-
const bin = make(test.vector, test.dtype_hex, test.padding);
175-
BSON.EJSON.stringify({ bin });
176-
} catch (error) {
177-
thrownError = error;
302+
if (test.vector == null && test.canonical_bson == null) {
303+
throw new Error('not testing anything for: ' + util.inspect(test));
178304
}
179-
180-
if (thrownError?.message.startsWith('unsupported_error')) {
181-
expect(
182-
expectedErrorMessage,
183-
'We expect a certain error message but got an unsupported error'
184-
).to.be.false;
185-
this.skip();
186-
}
187-
188-
expect(thrownError).to.be.instanceOf(BSONError);
189-
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
190305
});
191306
}
192307
});

0 commit comments

Comments
 (0)