From 40d13d326a6a32e0778562033fae7ebc411f909c Mon Sep 17 00:00:00 2001 From: Jason Paulos Date: Mon, 30 Oct 2023 12:07:35 -0400 Subject: [PATCH 1/2] Add decoding option for raw strings --- README.md | 26 +++++++++++--------- src/Decoder.ts | 28 ++++++++++++++++++--- test/decode-raw-strings.test.ts | 43 +++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 15 deletions(-) create mode 100644 test/decode-raw-strings.test.ts diff --git a/README.md b/README.md index 5cebb1f..08c1fce 100644 --- a/README.md +++ b/README.md @@ -148,12 +148,15 @@ Name|Type|Default extensionCodec | ExtensionCodec | `ExtensionCodec.defaultCodec` context | user-defined | - useBigInt64 | boolean | false +rawStrings | boolean | false maxStrLength | number | `4_294_967_295` (UINT32_MAX) maxBinLength | number | `4_294_967_295` (UINT32_MAX) maxArrayLength | number | `4_294_967_295` (UINT32_MAX) maxMapLength | number | `4_294_967_295` (UINT32_MAX) maxExtLength | number | `4_294_967_295` (UINT32_MAX) +To skip UTF-8 decoding of strings, `rawStrings` can be set to `true`. In this case, strings are decoded into `Uint8Array`. + You can use `max${Type}Length` to limit the length of each type decoded. ### `decodeMulti(buffer: ArrayLike | BufferSource, options?: DecoderOptions): Generator` @@ -498,18 +501,19 @@ null, undefined|nil|null (*1) boolean (true, false)|bool family|boolean (true, false) number (53-bit int)|int family|number number (64-bit float)|float family|number -string|str family|string -ArrayBufferView |bin family|Uint8Array (*2) +string|str family|string (*2) +ArrayBufferView |bin family|Uint8Array (*3) Array|array family|Array -Object|map family|Object (*3) -Date|timestamp ext family|Date (*4) -bigint|N/A|N/A (*5) +Object|map family|Object (4) +Date|timestamp ext family|Date (*5) +bigint|N/A|N/A (*6) * *1 Both `null` and `undefined` are mapped to `nil` (`0xC0`) type, and are decoded into `null` -* *2 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array` -* *3 In handling `Object`, it is regarded as `Record` in terms of TypeScript -* *4 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec. -* *5 bigint is not supported in `useBigInt64: false` mode, but you can define an extension codec for it. +* *2 If you'd like to skip UTF-8 decoding of strings, set `rawStrings: true`. In this case, strings are decoded into `Uint8Array`. +* *3 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array` +* *4 In handling `Object`, it is regarded as `Record` in terms of TypeScript +* *5 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec. +* *6 bigint is not supported in `useBigInt64: false` mode, but you can define an extension codec for it. If you set `useBigInt64: true`, the following mapping is used: @@ -519,7 +523,7 @@ null, undefined|nil|null boolean (true, false)|bool family|boolean (true, false) **number (32-bit int)**|int family|number **number (except for the above)**|float family|number -**bigint**|int64 / uint64|bigint (*6) +**bigint**|int64 / uint64|bigint (*7) string|str family|string ArrayBufferView |bin family|Uint8Array Array|array family|Array @@ -527,7 +531,7 @@ Object|map family|Object Date|timestamp ext family|Date -* *6 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined. +* *7 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined. ## Prerequisites diff --git a/src/Decoder.ts b/src/Decoder.ts index eedb0fb..fd8a6cb 100644 --- a/src/Decoder.ts +++ b/src/Decoder.ts @@ -20,6 +20,17 @@ export type DecoderOptions = Readonly< */ useBigInt64: boolean; + /** + * By default, string values will be decoded as UTF-8 strings. However, if this option is true, + * string values will be returned as Uint8Arrays without additional decoding. + * + * This is useful if the strings may contain invalid UTF-8 sequences. + * + * Note that this option only applies to string values, not map keys. Additionally, when + * enabled, raw string length is limited by the maxBinLength option. + */ + rawStrings: boolean; + /** * Maximum string length. * @@ -195,6 +206,7 @@ export class Decoder { private readonly extensionCodec: ExtensionCodecType; private readonly context: ContextType; private readonly useBigInt64: boolean; + private readonly rawStrings: boolean; private readonly maxStrLength: number; private readonly maxBinLength: number; private readonly maxArrayLength: number; @@ -215,6 +227,7 @@ export class Decoder { this.context = (options as { context: ContextType } | undefined)?.context as ContextType; // needs a type assertion because EncoderOptions has no context property when ContextType is undefined this.useBigInt64 = options?.useBigInt64 ?? false; + this.rawStrings = options?.rawStrings ?? false; this.maxStrLength = options?.maxStrLength ?? UINT32_MAX; this.maxBinLength = options?.maxBinLength ?? UINT32_MAX; this.maxArrayLength = options?.maxArrayLength ?? UINT32_MAX; @@ -399,7 +412,7 @@ export class Decoder { } else { // fixstr (101x xxxx) 0xa0 - 0xbf const byteLength = headByte - 0xa0; - object = this.decodeUtf8String(byteLength, 0); + object = this.decodeString(byteLength, 0); } } else if (headByte === 0xc0) { // nil @@ -451,15 +464,15 @@ export class Decoder { } else if (headByte === 0xd9) { // str 8 const byteLength = this.lookU8(); - object = this.decodeUtf8String(byteLength, 1); + object = this.decodeString(byteLength, 1); } else if (headByte === 0xda) { // str 16 const byteLength = this.lookU16(); - object = this.decodeUtf8String(byteLength, 2); + object = this.decodeString(byteLength, 2); } else if (headByte === 0xdb) { // str 32 const byteLength = this.lookU32(); - object = this.decodeUtf8String(byteLength, 4); + object = this.decodeString(byteLength, 4); } else if (headByte === 0xdc) { // array 16 const size = this.readU16(); @@ -637,6 +650,13 @@ export class Decoder { this.stack.pushArrayState(size); } + private decodeString(byteLength: number, headerOffset: number): string | Uint8Array { + if (!this.rawStrings || this.stateIsMapKey()) { + return this.decodeUtf8String(byteLength, headerOffset); + } + return this.decodeBinary(byteLength, headerOffset); + } + private decodeUtf8String(byteLength: number, headerOffset: number): string { if (byteLength > this.maxStrLength) { throw new DecodeError( diff --git a/test/decode-raw-strings.test.ts b/test/decode-raw-strings.test.ts new file mode 100644 index 0000000..cdaaa1b --- /dev/null +++ b/test/decode-raw-strings.test.ts @@ -0,0 +1,43 @@ +import assert from "assert"; +import { encode, decode } from "../src"; +import type { DecoderOptions } from "../src"; + +describe("decode with rawStrings specified", () => { + const options = { rawStrings: true } satisfies DecoderOptions; + + it("decodes string as binary", () => { + const actual = decode(encode("foo"), options); + const expected = Uint8Array.from([0x66, 0x6f, 0x6f]); + assert.deepStrictEqual(actual, expected); + }); + + it("decodes invalid UTF-8 string as binary", () => { + const invalidUtf8String = Uint8Array.from([61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176, 184, 221, 66, 188, 171, 36, 135, 121]); + const encoded = Uint8Array.from([196, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176, 184, 221, 66, 188, 171, 36, 135, 121]); + + const actual = decode(encoded, options); + assert.deepStrictEqual(actual, invalidUtf8String); + }); + + it("decodes object keys as strings", () => { + const actual = decode(encode({ key: "foo" }), options); + const expected = { key: Uint8Array.from([0x66, 0x6f, 0x6f]) }; + assert.deepStrictEqual(actual, expected); + }); + + it("ignores maxStrLength", () => { + const lengthLimitedOptions = { ...options, maxStrLength: 1 } satisfies DecoderOptions; + + const actual = decode(encode("foo"), lengthLimitedOptions); + const expected = Uint8Array.from([0x66, 0x6f, 0x6f]); + assert.deepStrictEqual(actual, expected); + }); + + it("respects maxBinLength", () => { + const lengthLimitedOptions = { ...options, maxBinLength: 1 } satisfies DecoderOptions; + + assert.throws(() => { + decode(encode("foo"), lengthLimitedOptions); + }, /max length exceeded/i); + }); +}); From 373ec36a6a8658a491882cecb5bd3748391aba48 Mon Sep 17 00:00:00 2001 From: Jason Paulos Date: Mon, 30 Oct 2023 17:08:40 -0400 Subject: [PATCH 2/2] fixup --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 08c1fce..37b6b45 100644 --- a/README.md +++ b/README.md @@ -504,7 +504,7 @@ number (64-bit float)|float family|number string|str family|string (*2) ArrayBufferView |bin family|Uint8Array (*3) Array|array family|Array -Object|map family|Object (4) +Object|map family|Object (*4) Date|timestamp ext family|Date (*5) bigint|N/A|N/A (*6)