Skip to content

Commit 0bb101d

Browse files
authored
Merge pull request #235 from jasonpaulos/decode-raw-strings
Allow decoding raw strings
2 parents 2209b7e + 3b6ef80 commit 0bb101d

File tree

3 files changed

+82
-15
lines changed

3 files changed

+82
-15
lines changed

README.md

+15-11
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,15 @@ Name|Type|Default
148148
extensionCodec | ExtensionCodec | `ExtensionCodec.defaultCodec`
149149
context | user-defined | -
150150
useBigInt64 | boolean | false
151+
rawStrings | boolean | false
151152
maxStrLength | number | `4_294_967_295` (UINT32_MAX)
152153
maxBinLength | number | `4_294_967_295` (UINT32_MAX)
153154
maxArrayLength | number | `4_294_967_295` (UINT32_MAX)
154155
maxMapLength | number | `4_294_967_295` (UINT32_MAX)
155156
maxExtLength | number | `4_294_967_295` (UINT32_MAX)
156157

158+
To skip UTF-8 decoding of strings, `rawStrings` can be set to `true`. In this case, strings are decoded into `Uint8Array`.
159+
157160
You can use `max${Type}Length` to limit the length of each type decoded.
158161

159162
### `decodeMulti(buffer: ArrayLike<number> | BufferSource, options?: DecoderOptions): Generator<unknown, void, unknown>`
@@ -498,18 +501,19 @@ null, undefined|nil|null (*1)
498501
boolean (true, false)|bool family|boolean (true, false)
499502
number (53-bit int)|int family|number
500503
number (64-bit float)|float family|number
501-
string|str family|string
502-
ArrayBufferView |bin family|Uint8Array (*2)
504+
string|str family|string (*2)
505+
ArrayBufferView |bin family|Uint8Array (*3)
503506
Array|array family|Array
504-
Object|map family|Object (*3)
505-
Date|timestamp ext family|Date (*4)
506-
bigint|N/A|N/A (*5)
507+
Object|map family|Object (*4)
508+
Date|timestamp ext family|Date (*5)
509+
bigint|N/A|N/A (*6)
507510

508511
* *1 Both `null` and `undefined` are mapped to `nil` (`0xC0`) type, and are decoded into `null`
509-
* *2 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array`
510-
* *3 In handling `Object`, it is regarded as `Record<string, unknown>` in terms of TypeScript
511-
* *4 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec.
512-
* *5 bigint is not supported in `useBigInt64: false` mode, but you can define an extension codec for it.
512+
* *2 If you'd like to skip UTF-8 decoding of strings, set `rawStrings: true`. In this case, strings are decoded into `Uint8Array`.
513+
* *3 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array`
514+
* *4 In handling `Object`, it is regarded as `Record<string, unknown>` in terms of TypeScript
515+
* *5 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec.
516+
* *6 bigint is not supported in `useBigInt64: false` mode, but you can define an extension codec for it.
513517

514518
If you set `useBigInt64: true`, the following mapping is used:
515519

@@ -519,15 +523,15 @@ null, undefined|nil|null
519523
boolean (true, false)|bool family|boolean (true, false)
520524
**number (32-bit int)**|int family|number
521525
**number (except for the above)**|float family|number
522-
**bigint**|int64 / uint64|bigint (*6)
526+
**bigint**|int64 / uint64|bigint (*7)
523527
string|str family|string
524528
ArrayBufferView |bin family|Uint8Array
525529
Array|array family|Array
526530
Object|map family|Object
527531
Date|timestamp ext family|Date
528532

529533

530-
* *6 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined.
534+
* *7 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined.
531535

532536
## Prerequisites
533537

src/Decoder.ts

+24-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,17 @@ export type DecoderOptions<ContextType = undefined> = Readonly<
2020
*/
2121
useBigInt64: boolean;
2222

23+
/**
24+
* By default, string values will be decoded as UTF-8 strings. However, if this option is true,
25+
* string values will be returned as Uint8Arrays without additional decoding.
26+
*
27+
* This is useful if the strings may contain invalid UTF-8 sequences.
28+
*
29+
* Note that this option only applies to string values, not map keys. Additionally, when
30+
* enabled, raw string length is limited by the maxBinLength option.
31+
*/
32+
rawStrings: boolean;
33+
2334
/**
2435
* Maximum string length.
2536
*
@@ -195,6 +206,7 @@ export class Decoder<ContextType = undefined> {
195206
private readonly extensionCodec: ExtensionCodecType<ContextType>;
196207
private readonly context: ContextType;
197208
private readonly useBigInt64: boolean;
209+
private readonly rawStrings: boolean;
198210
private readonly maxStrLength: number;
199211
private readonly maxBinLength: number;
200212
private readonly maxArrayLength: number;
@@ -215,6 +227,7 @@ export class Decoder<ContextType = undefined> {
215227
this.context = (options as { context: ContextType } | undefined)?.context as ContextType; // needs a type assertion because EncoderOptions has no context property when ContextType is undefined
216228

217229
this.useBigInt64 = options?.useBigInt64 ?? false;
230+
this.rawStrings = options?.rawStrings ?? false;
218231
this.maxStrLength = options?.maxStrLength ?? UINT32_MAX;
219232
this.maxBinLength = options?.maxBinLength ?? UINT32_MAX;
220233
this.maxArrayLength = options?.maxArrayLength ?? UINT32_MAX;
@@ -399,7 +412,7 @@ export class Decoder<ContextType = undefined> {
399412
} else {
400413
// fixstr (101x xxxx) 0xa0 - 0xbf
401414
const byteLength = headByte - 0xa0;
402-
object = this.decodeUtf8String(byteLength, 0);
415+
object = this.decodeString(byteLength, 0);
403416
}
404417
} else if (headByte === 0xc0) {
405418
// nil
@@ -451,15 +464,15 @@ export class Decoder<ContextType = undefined> {
451464
} else if (headByte === 0xd9) {
452465
// str 8
453466
const byteLength = this.lookU8();
454-
object = this.decodeUtf8String(byteLength, 1);
467+
object = this.decodeString(byteLength, 1);
455468
} else if (headByte === 0xda) {
456469
// str 16
457470
const byteLength = this.lookU16();
458-
object = this.decodeUtf8String(byteLength, 2);
471+
object = this.decodeString(byteLength, 2);
459472
} else if (headByte === 0xdb) {
460473
// str 32
461474
const byteLength = this.lookU32();
462-
object = this.decodeUtf8String(byteLength, 4);
475+
object = this.decodeString(byteLength, 4);
463476
} else if (headByte === 0xdc) {
464477
// array 16
465478
const size = this.readU16();
@@ -637,6 +650,13 @@ export class Decoder<ContextType = undefined> {
637650
this.stack.pushArrayState(size);
638651
}
639652

653+
private decodeString(byteLength: number, headerOffset: number): string | Uint8Array {
654+
if (!this.rawStrings || this.stateIsMapKey()) {
655+
return this.decodeUtf8String(byteLength, headerOffset);
656+
}
657+
return this.decodeBinary(byteLength, headerOffset);
658+
}
659+
640660
private decodeUtf8String(byteLength: number, headerOffset: number): string {
641661
if (byteLength > this.maxStrLength) {
642662
throw new DecodeError(

test/decode-raw-strings.test.ts

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import assert from "assert";
2+
import { encode, decode } from "../src";
3+
import type { DecoderOptions } from "../src";
4+
5+
describe("decode with rawStrings specified", () => {
6+
const options = { rawStrings: true } satisfies DecoderOptions;
7+
8+
it("decodes string as binary", () => {
9+
const actual = decode(encode("foo"), options);
10+
const expected = Uint8Array.from([0x66, 0x6f, 0x6f]);
11+
assert.deepStrictEqual(actual, expected);
12+
});
13+
14+
it("decodes invalid UTF-8 string as binary", () => {
15+
const invalidUtf8String = Uint8Array.from([61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176, 184, 221, 66, 188, 171, 36, 135, 121]);
16+
const encoded = Uint8Array.from([196, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176, 184, 221, 66, 188, 171, 36, 135, 121]);
17+
18+
const actual = decode(encoded, options);
19+
assert.deepStrictEqual(actual, invalidUtf8String);
20+
});
21+
22+
it("decodes object keys as strings", () => {
23+
const actual = decode(encode({ key: "foo" }), options);
24+
const expected = { key: Uint8Array.from([0x66, 0x6f, 0x6f]) };
25+
assert.deepStrictEqual(actual, expected);
26+
});
27+
28+
it("ignores maxStrLength", () => {
29+
const lengthLimitedOptions = { ...options, maxStrLength: 1 } satisfies DecoderOptions;
30+
31+
const actual = decode(encode("foo"), lengthLimitedOptions);
32+
const expected = Uint8Array.from([0x66, 0x6f, 0x6f]);
33+
assert.deepStrictEqual(actual, expected);
34+
});
35+
36+
it("respects maxBinLength", () => {
37+
const lengthLimitedOptions = { ...options, maxBinLength: 1 } satisfies DecoderOptions;
38+
39+
assert.throws(() => {
40+
decode(encode("foo"), lengthLimitedOptions);
41+
}, /max length exceeded/i);
42+
});
43+
});

0 commit comments

Comments
 (0)