Skip to content

Commit 244360f

Browse files
committed
do more UTF-8 validation if turned on
1 parent dad7547 commit 244360f

File tree

3 files changed

+107
-6
lines changed

3 files changed

+107
-6
lines changed

VelocyPack.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,11 @@ reference, for arrays and objects see below for details:
8181
- 0x40-0xbe : UTF-8-string, using V - 0x40 bytes (not Unicode characters!),
8282
length 0 is possible, so 0x40 is the empty string,
8383
maximal length is 126, note that strings here are not
84-
zero-terminated
84+
zero-terminated and may contain NUL bytes
8585
- 0xbf : long UTF-8-string, next 8 bytes are length of string in
8686
bytes (not Unicode characters) as little endian unsigned
8787
integer, note that long strings are not zero-terminated
88-
and may contain zero bytes
88+
and may contain NUL bytes
8989
- 0xc0-0xc7 : binary blob, next V - 0xbf bytes are the length of blob in
9090
bytes, note that binary blobs are not zero-terminated
9191
- 0xc8-0xcf : positive long packed BCD-encoded float, V - 0xc7 bytes follow
@@ -389,7 +389,7 @@ entries, as in this example:
389389
Similarly with type 0x0c and 2-byte offsets, byte length and number of
390390
subvalues, or with type 0x0e and 8-byte numbers.
391391

392-
Note that it is not recommended to encode short arrays with too long
392+
Note that it is not recommended to encode short objects with too long
393393
index tables.
394394

395395
### Special compact objects

src/Validator.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -456,12 +456,18 @@ void Validator::validateCompactObject(uint8_t const* ptr, std::size_t length) {
456456
// validate key
457457
validate(p, e - p, true);
458458
Slice key(p);
459-
if (!key.isString() && !key.isInteger()) {
459+
bool isString = key.isString();
460+
if (!isString && !key.isInteger()) {
460461
throw Exception(Exception::ValidatorInvalidLength, "Invalid object key type");
461462
}
463+
ValueLength keySize = key.byteSize();
464+
// validate key
465+
if (isString && options->validateUtf8Strings) {
466+
validate(p, keySize, true);
467+
}
462468

463469
// validate value
464-
p += key.byteSize();
470+
p += keySize;
465471
validate(p, e - p, true);
466472
p += Slice(p).byteSize();
467473
}
@@ -555,11 +561,16 @@ void Validator::validateIndexedObject(uint8_t const* ptr, std::size_t length) {
555561
validate(member, indexTable - member, true);
556562

557563
Slice key(member);
558-
if (!key.isString() && !key.isInteger()) {
564+
bool const isString = key.isString();
565+
if (!isString && !key.isInteger()) {
559566
throw Exception(Exception::ValidatorInvalidLength, "Invalid object key type");
560567
}
561568

562569
ValueLength const keySize = key.byteSize();
570+
if (isString && options->validateUtf8Strings) {
571+
validate(member, keySize, true);
572+
}
573+
563574
uint8_t const* value = member + keySize;
564575
if (value >= indexTable) {
565576
throw Exception(Exception::ValidatorInvalidLength, "Object value leaking into index table");

tests/testsValidator.cpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,96 @@ TEST(ValidatorTest, StringInvalidUtf8Long) {
513513
ASSERT_VELOCYPACK_EXCEPTION(validator.validate(value.c_str(), value.size()), Exception::InvalidUtf8Sequence);
514514
}
515515

516+
TEST(ValidatorTest, StringValidUtf8ObjectWithValidation) {
517+
std::string const value("\x0b\x08\x01\x41\x41\x41\x41\x03", 8);
518+
519+
Options options;
520+
options.validateUtf8Strings = true;
521+
Validator validator(&options);
522+
ASSERT_TRUE(validator.validate(value.c_str(), value.size()));
523+
}
524+
525+
TEST(ValidatorTest, StringInvalidUtf8ObjectKeyWithValidation) {
526+
std::string const value("\x0b\x08\x01\x41\x80\x41\x41\x03", 8);
527+
528+
Options options;
529+
options.validateUtf8Strings = true;
530+
Validator validator(&options);
531+
ASSERT_VELOCYPACK_EXCEPTION(validator.validate(value.c_str(), value.size()), Exception::InvalidUtf8Sequence);
532+
}
533+
534+
TEST(ValidatorTest, StringInvalidUtf8ObjectValueWithValidation) {
535+
std::string const value("\x0b\x08\x01\x41\x41\x41\x80\x03", 8);
536+
537+
Options options;
538+
options.validateUtf8Strings = true;
539+
Validator validator(&options);
540+
ASSERT_VELOCYPACK_EXCEPTION(validator.validate(value.c_str(), value.size()), Exception::InvalidUtf8Sequence);
541+
}
542+
543+
TEST(ValidatorTest, StringInvalidUtf8ObjectLongKeyWithValidation) {
544+
std::string const value("\x0b\x10\x01\xbf\x01\x00\x00\x00\x00\x00\x00\x00\x80\x41\x41\x03", 16);
545+
546+
Options options;
547+
options.validateUtf8Strings = true;
548+
Validator validator(&options);
549+
ASSERT_VELOCYPACK_EXCEPTION(validator.validate(value.c_str(), value.size()), Exception::InvalidUtf8Sequence);
550+
}
551+
552+
TEST(ValidatorTest, StringInvalidUtf8ObjectLongValueWithValidation) {
553+
std::string const value("\x0b\x10\x01\x41\x41\xbf\x01\x00\x00\x00\x00\x00\x00\x00\x80\x03", 16);
554+
555+
Options options;
556+
options.validateUtf8Strings = true;
557+
Validator validator(&options);
558+
ASSERT_VELOCYPACK_EXCEPTION(validator.validate(value.c_str(), value.size()), Exception::InvalidUtf8Sequence);
559+
}
560+
561+
TEST(ValidatorTest, StringValidUtf8CompactObjectWithValidation) {
562+
std::string const value("\x14\x07\x41\x41\x41\x41\x01", 7);
563+
564+
Options options;
565+
options.validateUtf8Strings = true;
566+
Validator validator(&options);
567+
ASSERT_TRUE(validator.validate(value.c_str(), value.size()));
568+
}
569+
570+
TEST(ValidatorTest, StringInvalidUtf8CompactObjectKeyWithValidation) {
571+
std::string const value("\x14\x07\x41\x80\x41\x41\x01", 7);
572+
573+
Options options;
574+
options.validateUtf8Strings = true;
575+
Validator validator(&options);
576+
ASSERT_VELOCYPACK_EXCEPTION(validator.validate(value.c_str(), value.size()), Exception::InvalidUtf8Sequence);
577+
}
578+
579+
TEST(ValidatorTest, StringInvalidUtf8CompactObjectLongKeyWithValidation) {
580+
std::string const value("\x14\x0f\xbf\x01\x00\x00\x00\x00\x00\x00\x00\x80\x41\x41\x01", 15);
581+
582+
Options options;
583+
options.validateUtf8Strings = true;
584+
Validator validator(&options);
585+
ASSERT_VELOCYPACK_EXCEPTION(validator.validate(value.c_str(), value.size()), Exception::InvalidUtf8Sequence);
586+
}
587+
588+
TEST(ValidatorTest, StringInvalidUtf8CompactObjectValueWithValidation) {
589+
std::string const value("\x14\x07\x41\x41\x41\x80\x01", 7);
590+
591+
Options options;
592+
options.validateUtf8Strings = true;
593+
Validator validator(&options);
594+
ASSERT_VELOCYPACK_EXCEPTION(validator.validate(value.c_str(), value.size()), Exception::InvalidUtf8Sequence);
595+
}
596+
597+
TEST(ValidatorTest, StringInvalidUtf8CompactLongObjectValueWithValidation) {
598+
std::string const value("\x14\x0f\x41\x41\xbf\x01\x00\x00\x00\x00\x00\x00\x00\x80\x01", 15);
599+
600+
Options options;
601+
options.validateUtf8Strings = true;
602+
Validator validator(&options);
603+
ASSERT_VELOCYPACK_EXCEPTION(validator.validate(value.c_str(), value.size()), Exception::InvalidUtf8Sequence);
604+
}
605+
516606
TEST(ValidatorTest, LongStringEmpty) {
517607
std::string const value("\xbf\x00\x00\x00\x00\x00\x00\x00\x00", 9);
518608

0 commit comments

Comments
 (0)