Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(NODE-6735, NODE-6711): add BSON vector validation to EJSON stringification, serialization and conversion to native types #748

Merged
merged 13 commits into from
Apr 3, 2025
Merged
26 changes: 23 additions & 3 deletions src/binary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ export class Binary extends BSONValue {
throw new BSONError('Binary datatype field is not Int8');
}

validateBinaryVector(this);

return new Int8Array(
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
);
Expand All @@ -361,6 +363,8 @@ export class Binary extends BSONValue {
throw new BSONError('Binary datatype field is not Float32');
}

validateBinaryVector(this);

const floatBytes = new Uint8Array(
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
);
Expand All @@ -387,6 +391,8 @@ export class Binary extends BSONValue {
throw new BSONError('Binary datatype field is not packed bit');
}

validateBinaryVector(this);

return new Uint8Array(
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
);
Expand All @@ -409,6 +415,8 @@ export class Binary extends BSONValue {
throw new BSONError('Binary datatype field is not packed bit');
}

validateBinaryVector(this);

const byteCount = this.length() - 2;
const bitCount = byteCount * 8 - this.buffer[1];
const bits = new Int8Array(bitCount);
Expand All @@ -434,7 +442,9 @@ export class Binary extends BSONValue {
buffer[1] = 0;
const intBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength);
buffer.set(intBytes, 2);
return new this(buffer, this.SUBTYPE_VECTOR);
const bin = new this(buffer, this.SUBTYPE_VECTOR);
validateBinaryVector(bin);
return bin;
}

/** Constructs a Binary representing an Float32 Vector. */
Expand All @@ -448,7 +458,9 @@ export class Binary extends BSONValue {

if (NumberUtils.isBigEndian) ByteUtils.swap32(new Uint8Array(binaryBytes.buffer, 2));

return new this(binaryBytes, this.SUBTYPE_VECTOR);
const bin = new this(binaryBytes, this.SUBTYPE_VECTOR);
validateBinaryVector(bin);
return bin;
}

/**
Expand All @@ -461,7 +473,9 @@ export class Binary extends BSONValue {
buffer[0] = Binary.VECTOR_TYPE.PackedBit;
buffer[1] = padding;
buffer.set(array, 2);
return new this(buffer, this.SUBTYPE_VECTOR);
const bin = new this(buffer, this.SUBTYPE_VECTOR);
validateBinaryVector(bin);
return bin;
}

/**
Expand Down Expand Up @@ -517,6 +531,12 @@ export function validateBinaryVector(vector: Binary): void {
throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors');
}

if (datatype === Binary.VECTOR_TYPE.Float32) {
if (size !== 0 && size - 2 !== 0 && (size - 2) % 4 !== 0) {
throw new BSONError('Invalid Vector: Float32 vector must contain a multiple of 4 bytes');
}
}

if (datatype === Binary.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) {
throw new BSONError(
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
Expand Down
245 changes: 180 additions & 65 deletions test/node/bson_binary_vector.spec.test.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import * as util from 'util';
import * as fs from 'fs';
import * as path from 'path';
import { BSON, BSONError, Binary } from '../register-bson';
import { BSON, BSONError, Binary, EJSON } from '../register-bson';
import { expect } from 'chai';

const { toHex, fromHex } = BSON.onDemand.ByteUtils;

type VectorHexType = '0x03' | '0x27' | '0x10';
type VectorTest = {
description: string;
vector: (number | string)[];
vector?: number[];
valid: boolean;
dtype_hex: VectorHexType;
padding?: number;
Expand All @@ -17,15 +18,11 @@ type VectorTest = {
type VectorSuite = { description: string; test_key: string; tests: VectorTest[] };

function fixFloats(f: string | number): number {
// Should be nothing to "fix" but validates we didn't get
// an unexpected type so we don't silently fail on it during the test
if (typeof f === 'number') {
return f;
}
if (f === 'inf') {
return Infinity;
}
if (f === '-inf') {
return -Infinity;
}
throw new Error(`test format error: unknown float value: ${f}`);
}

Expand All @@ -49,7 +46,20 @@ function fixBits(f: number | string): number {
return f;
}

function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary {
function dtypeToHelper(dtype_hex: string) {
switch (dtype_hex) {
case '0x10' /* packed_bit */:
return 'fromPackedBits';
case '0x03' /* int8 */:
return 'fromInt8Array';
case '0x27' /* float32 */:
return 'fromFloat32Array';
default:
throw new Error(`Unknown dtype_hex: ${dtype_hex}`);
}
}

function make(vector: number[], dtype_hex: VectorHexType, padding?: number): Binary {
let binary: Binary;
switch (dtype_hex) {
case '0x10' /* packed_bit */:
Expand Down Expand Up @@ -87,21 +97,152 @@ const invalidTestExpectedError = new Map()
'Invalid Vector: padding must be a value between 0 and 7'
)
.set('Negative padding PACKED_BIT', 'Invalid Vector: padding must be a value between 0 and 7')
// skipped
.set('Overflow Vector PACKED_BIT', false)
.set('Underflow Vector PACKED_BIT', false)
.set('Overflow Vector INT8', false)
.set('Underflow Vector INT8', false)
.set('INT8 with float inputs', false)
// duplicate test! but also skipped.
.set('Vector with float values PACKED_BIT', false)
.set('Vector with float values PACKED_BIT', false);
.set(
'Insufficient vector data FLOAT32',
'Invalid Vector: Float32 vector must contain a multiple of 4 bytes'
)
// These are not possible given the constraints of the input types allowed:
// our helpers will throw an "unsupported_error" for these
.set('Overflow Vector PACKED_BIT', 'unsupported_error')
.set('Underflow Vector PACKED_BIT', 'unsupported_error')
.set('Overflow Vector INT8', 'unsupported_error')
.set('Underflow Vector INT8', 'unsupported_error')
.set('INT8 with float inputs', 'unsupported_error')
.set('Vector with float values PACKED_BIT', 'unsupported_error');

function catchError<T>(
fn: () => T
): { status: 'returned'; result: T } | { status: 'thrown'; result: Error } {
try {
return { status: 'returned', result: fn() };
} catch (error) {
return { status: 'thrown', result: error };
}
}

function testVectorInvalidInputValues(test: VectorTest, expectedErrorMessage: string) {
const binaryCreation = catchError(make.bind(null, test.vector!, test.dtype_hex, test.padding));
const bsonBytesCreation =
binaryCreation.status !== 'thrown'
? catchError(BSON.serialize.bind(null, { bin: binaryCreation.result }))
: undefined;
const ejsonStringCreation =
binaryCreation.status !== 'thrown'
? catchError(BSON.EJSON.stringify.bind(null, { bin: binaryCreation.result }))
: undefined;

const binaryHelperValidations = [
'Padding specified with no vector data PACKED_BIT',
'Exceeding maximum padding PACKED_BIT',
'Negative padding PACKED_BIT',
...Array.from(invalidTestExpectedError.entries())
.filter(([, v]) => v === 'unsupported_error')
.map(([k]) => k)
];

const errorType = expectedErrorMessage === 'unsupported_error' ? Error : BSONError;
const errorName = expectedErrorMessage === 'unsupported_error' ? 'Error' : 'BSONError';

const check = outcome => {
expect(outcome).to.exist;
expect(outcome.status).to.equal('thrown');
expect(outcome.result).to.be.instanceOf(errorType);
expect(outcome.result).to.match(new RegExp(expectedErrorMessage));
};

if (binaryHelperValidations.includes(test.description)) {
describe('when creating a BSON Vector given invalid input values', () => {
it(`Binary.${dtypeToHelper(test.dtype_hex)}() throws a ${errorName}`, function () {
check(binaryCreation);
});
});
} else {
expect(errorName).to.equal('BSONError'); // unsupported_error are only when making vectors

describe('when encoding a BSON Vector given invalid input values', () => {
it(`Binary.${dtypeToHelper(test.dtype_hex)}() does not throw`, function () {
expect(binaryCreation).to.have.property('status', 'returned');
});

it(`BSON.serialize() throws a BSONError`, function () {
check(bsonBytesCreation);
});

it(`EJSON.stringify() throws a BSONError`, function () {
check(ejsonStringCreation);
});
});
}
}

function testVectorInvalidBSONBytes(test: VectorTest, expectedErrorMessage: string) {
describe('when encoding a Binary Vector made from invalid bytes', () => {
it(`BSON.serialize() throw a BSONError`, function () {
let thrownError: Error | undefined;
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));

try {
BSON.serialize(bin);
} catch (error) {
thrownError = error;
}

expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});

const toHelper = dtypeToHelper(test.dtype_hex).replace('from', 'to');
it(`Binary.${toHelper}() throw a BSONError`, function () {
let thrownError: Error | undefined;
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));

try {
bin.vector[toHelper]();
} catch (error) {
thrownError = error;
}

expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});

if (toHelper === 'toPackedBits') {
it(`Binary.toBits() throw a BSONError`, function () {
let thrownError: Error | undefined;
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));

try {
bin.vector.toBits();
} catch (error) {
thrownError = error;
}

expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});
}

it(`EJSON.stringify() throw a BSONError`, function () {
let thrownError: Error | undefined;
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));

try {
EJSON.stringify(bin);
} catch (error) {
thrownError = error;
}

expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});
});
}

describe('BSON Binary Vector spec tests', () => {
const tests: Record<string, VectorSuite> = Object.create(null);

for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) {
tests[path.basename(file, '.json')] = JSON.parse(
tests[path.basename(file, '.json')] = EJSON.parse(
fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8')
);
}
Expand All @@ -120,20 +261,22 @@ describe('BSON Binary Vector spec tests', () => {
* > MUST assert that the input float array is the same after encoding and decoding.
*/
for (const test of valid) {
it(`encode ${test.description}`, function () {
const bin = make(test.vector, test.dtype_hex, test.padding);
describe(test.description, () => {
it(`calling Binary.${dtypeToHelper(test.dtype_hex)}() with input numbers and serializing it does not throw`, function () {
const bin = make(test.vector!, test.dtype_hex, test.padding);

const buffer = BSON.serialize({ [suite.test_key]: bin });
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
});
const buffer = BSON.serialize({ [suite.test_key]: bin });
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
});

it(`decode ${test.description}`, function () {
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
const doc = BSON.deserialize(canonical_bson);
it(`creating a Binary instance from BSON bytes does not throw`, function () {
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
const doc = BSON.deserialize(canonical_bson);

expect(doc[suite.test_key].sub_type).to.equal(0x09);
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
expect(doc[suite.test_key].sub_type).to.equal(0x09);
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
});
});
}
});
Expand All @@ -147,46 +290,18 @@ describe('BSON Binary Vector spec tests', () => {
for (const test of invalid) {
const expectedErrorMessage = invalidTestExpectedError.get(test.description);

it(`bson: ${test.description}`, function () {
let thrownError: Error | undefined;
try {
const bin = make(test.vector, test.dtype_hex, test.padding);
BSON.serialize({ bin });
} catch (error) {
thrownError = error;
describe(test.description, () => {
if (test.canonical_bson != null) {
testVectorInvalidBSONBytes(test, expectedErrorMessage);
}

if (thrownError?.message.startsWith('unsupported_error')) {
expect(
expectedErrorMessage,
'We expect a certain error message but got an unsupported error'
).to.be.false;
this.skip();
if (test.vector != null) {
testVectorInvalidInputValues(test, expectedErrorMessage);
}

expect(thrownError).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});

it(`extended json: ${test.description}`, function () {
let thrownError: Error | undefined;
try {
const bin = make(test.vector, test.dtype_hex, test.padding);
BSON.EJSON.stringify({ bin });
} catch (error) {
thrownError = error;
if (test.vector == null && test.canonical_bson == null) {
throw new Error('not testing anything for: ' + util.inspect(test));
}

if (thrownError?.message.startsWith('unsupported_error')) {
expect(
expectedErrorMessage,
'We expect a certain error message but got an unsupported error'
).to.be.false;
this.skip();
}

expect(thrownError).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});
}
});
Expand Down
Loading