Skip to content

Commit e5fbafb

Browse files
committed
src: use stack allocation for small string encoding
Use stack-allocated buffers in StringBytes::Encode() for small inputs instead of heap-allocating via UncheckedMalloc for every call. Refs: nodejs/performance#194
1 parent 0d7e4b1 commit e5fbafb

File tree

1 file changed

+99
-88
lines changed

1 file changed

+99
-88
lines changed

src/string_bytes.cc

Lines changed: 99 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -146,11 +146,57 @@ class ExternString: public ResourceType {
146146
};
147147

148148

149-
typedef ExternString<String::ExternalOneByteStringResource,
150-
char> ExternOneByteString;
151-
typedef ExternString<String::ExternalStringResource,
152-
uint16_t> ExternTwoByteString;
149+
typedef ExternString<String::ExternalOneByteStringResource, char>
150+
ExternOneByteString;
151+
typedef ExternString<String::ExternalStringResource, uint16_t>
152+
ExternTwoByteString;
153+
154+
template <typename EncodeFn>
155+
static MaybeLocal<Value> EncodeOneByteString(Isolate* isolate,
156+
size_t length,
157+
EncodeFn encode) {
158+
// 512B: covers common small outputs (hex SHA-256/512, UUIDs).
159+
// Larger thresholds were benchmarked and regressed other paths.
160+
static constexpr size_t kStackThreshold = 512;
161+
if (length <= kStackThreshold) {
162+
char stack_buf[kStackThreshold];
163+
encode(stack_buf);
164+
return String::NewFromOneByte(isolate,
165+
reinterpret_cast<const uint8_t*>(stack_buf),
166+
v8::NewStringType::kNormal,
167+
static_cast<int>(length));
168+
}
169+
char* heap_buf = node::UncheckedMalloc<char>(length);
170+
if (heap_buf == nullptr) {
171+
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
172+
return MaybeLocal<Value>();
173+
}
174+
encode(heap_buf);
175+
return ExternOneByteString::New(isolate, heap_buf, length);
176+
}
153177

178+
template <typename EncodeFn>
179+
static MaybeLocal<Value> EncodeTwoByteString(Isolate* isolate,
180+
size_t char_length,
181+
EncodeFn encode) {
182+
// 512 bytes on the stack, matching the one-byte
183+
static constexpr size_t kStackThreshold = 256;
184+
if (char_length <= kStackThreshold) {
185+
uint16_t stack_buf[kStackThreshold];
186+
encode(stack_buf);
187+
return String::NewFromTwoByte(isolate,
188+
stack_buf,
189+
v8::NewStringType::kNormal,
190+
static_cast<int>(char_length));
191+
}
192+
uint16_t* heap_buf = node::UncheckedMalloc<uint16_t>(char_length);
193+
if (heap_buf == nullptr) {
194+
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
195+
return MaybeLocal<Value>();
196+
}
197+
encode(heap_buf);
198+
return ExternTwoByteString::New(isolate, heap_buf, char_length);
199+
}
154200

155201
template <>
156202
MaybeLocal<Value> ExternOneByteString::NewExternal(
@@ -513,27 +559,23 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
513559
MaybeLocal<String> val;
514560

515561
switch (encoding) {
516-
case BUFFER:
517-
{
518-
auto maybe_buf = Buffer::Copy(isolate, buf, buflen);
519-
Local<v8::Object> buf;
520-
if (!maybe_buf.ToLocal(&buf)) {
521-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
522-
}
523-
return buf;
562+
case BUFFER: {
563+
auto maybe_buf = Buffer::Copy(isolate, buf, buflen);
564+
Local<v8::Object> buf;
565+
if (!maybe_buf.ToLocal(&buf)) {
566+
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
524567
}
568+
return buf;
569+
}
525570

526571
case ASCII:
527572
buflen = keep_buflen_in_range(buflen);
528573
if (simdutf::validate_ascii_with_errors(buf, buflen).error) {
529574
// The input contains non-ASCII bytes.
530-
char* out = node::UncheckedMalloc(buflen);
531-
if (out == nullptr) {
532-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
533-
return MaybeLocal<Value>();
534-
}
535-
nbytes::ForceAscii(buf, out, buflen);
536-
return ExternOneByteString::New(isolate, out, buflen);
575+
576+
return EncodeOneByteString(isolate, buflen, [buf, buflen](char* dst) {
577+
nbytes::ForceAscii(buf, dst, buflen);
578+
});
537579
} else {
538580
return ExternOneByteString::NewFromCopy(isolate, buf, buflen);
539581
}
@@ -557,14 +599,12 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
557599
isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
558600
return MaybeLocal<Value>();
559601
}
560-
uint16_t* dst = node::UncheckedMalloc<uint16_t>(u16size);
561-
if (u16size != 0 && dst == nullptr) {
562-
THROW_ERR_MEMORY_ALLOCATION_FAILED(isolate);
563-
return MaybeLocal<Value>();
564-
}
565-
size_t utf16len = simdutf::convert_valid_utf8_to_utf16(
566-
buf, buflen, reinterpret_cast<char16_t*>(dst));
567-
return ExternTwoByteString::New(isolate, dst, utf16len);
602+
return EncodeTwoByteString(
603+
isolate, u16size, [buf, buflen, u16size](uint16_t* dst) {
604+
size_t written = simdutf::convert_valid_utf8_to_utf16(
605+
buf, buflen, reinterpret_cast<char16_t*>(dst));
606+
CHECK_EQ(written, u16size);
607+
});
568608
}
569609

570610
val =
@@ -583,77 +623,52 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
583623
case BASE64: {
584624
buflen = keep_buflen_in_range(buflen);
585625
size_t dlen = simdutf::base64_length_from_binary(buflen);
586-
char* dst = node::UncheckedMalloc(dlen);
587-
if (dst == nullptr) {
588-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
589-
return MaybeLocal<Value>();
590-
}
591-
592-
size_t written = simdutf::binary_to_base64(buf, buflen, dst);
593-
CHECK_EQ(written, dlen);
594-
595-
return ExternOneByteString::New(isolate, dst, dlen);
626+
return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {
627+
size_t written = simdutf::binary_to_base64(buf, buflen, dst);
628+
CHECK_EQ(written, dlen);
629+
});
596630
}
597631

598632
case BASE64URL: {
599633
buflen = keep_buflen_in_range(buflen);
600634
size_t dlen =
601635
simdutf::base64_length_from_binary(buflen, simdutf::base64_url);
602-
char* dst = node::UncheckedMalloc(dlen);
603-
if (dst == nullptr) {
604-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
605-
return MaybeLocal<Value>();
606-
}
607-
608-
size_t written =
609-
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);
610-
CHECK_EQ(written, dlen);
611-
612-
return ExternOneByteString::New(isolate, dst, dlen);
636+
return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {
637+
size_t written =
638+
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);
639+
CHECK_EQ(written, dlen);
640+
});
613641
}
614642

615643
case HEX: {
616644
buflen = keep_buflen_in_range(buflen);
617645
size_t dlen = buflen * 2;
618-
char* dst = node::UncheckedMalloc(dlen);
619-
if (dst == nullptr) {
620-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
621-
return MaybeLocal<Value>();
622-
}
623-
size_t written = nbytes::HexEncode(buf, buflen, dst, dlen);
624-
CHECK_EQ(written, dlen);
625-
626-
return ExternOneByteString::New(isolate, dst, dlen);
646+
return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {
647+
size_t written = nbytes::HexEncode(buf, buflen, dst, dlen);
648+
CHECK_EQ(written, dlen);
649+
});
627650
}
628651

629652
case UCS2: {
630653
buflen = keep_buflen_in_range(buflen);
631654
size_t str_len = buflen / 2;
632655
if constexpr (IsBigEndian()) {
633-
uint16_t* dst = node::UncheckedMalloc<uint16_t>(str_len);
634-
if (str_len != 0 && dst == nullptr) {
635-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
636-
return MaybeLocal<Value>();
637-
}
638-
for (size_t i = 0, k = 0; k < str_len; i += 2, k += 1) {
639-
// The input is in *little endian*, because that's what Node.js
640-
// expects, so the high byte comes after the low byte.
641-
const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
642-
const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
643-
dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
644-
}
645-
return ExternTwoByteString::New(isolate, dst, str_len);
656+
return EncodeTwoByteString(
657+
isolate, str_len, [buf, str_len](uint16_t* dst) {
658+
for (size_t i = 0, k = 0; k < str_len; i += 2, k += 1) {
659+
// The input is in *little endian*, because that's what Node.js
660+
// expects, so the high byte comes after the low byte.
661+
const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
662+
const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
663+
dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
664+
}
665+
});
646666
}
647667
if (reinterpret_cast<uintptr_t>(buf) % 2 != 0) {
648-
// Unaligned data still means we can't directly pass it to V8.
649-
char* dst = node::UncheckedMalloc(buflen);
650-
if (dst == nullptr) {
651-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
652-
return MaybeLocal<Value>();
653-
}
654-
memcpy(dst, buf, buflen);
655-
return ExternTwoByteString::New(
656-
isolate, reinterpret_cast<uint16_t*>(dst), str_len);
668+
return EncodeTwoByteString(
669+
isolate, str_len, [buf, buflen](uint16_t* dst) {
670+
memcpy(dst, buf, buflen);
671+
});
657672
}
658673
return ExternTwoByteString::NewFromCopy(
659674
isolate, reinterpret_cast<const uint16_t*>(buf), str_len);
@@ -675,15 +690,11 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
675690
// https://nodejs.org/api/buffer.html regarding Node's "ucs2"
676691
// encoding specification
677692
if constexpr (IsBigEndian()) {
678-
uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen);
679-
if (dst == nullptr) {
680-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
681-
return MaybeLocal<Value>();
682-
}
683-
size_t nbytes = buflen * sizeof(uint16_t);
684-
memcpy(dst, buf, nbytes);
685-
CHECK(nbytes::SwapBytes16(reinterpret_cast<char*>(dst), nbytes));
686-
return ExternTwoByteString::New(isolate, dst, buflen);
693+
return EncodeTwoByteString(isolate, buflen, [buf, buflen](uint16_t* dst) {
694+
size_t nbytes = buflen * sizeof(uint16_t);
695+
memcpy(dst, buf, nbytes);
696+
CHECK(nbytes::SwapBytes16(reinterpret_cast<char*>(dst), nbytes));
697+
});
687698
} else {
688699
return ExternTwoByteString::NewFromCopy(isolate, buf, buflen);
689700
}

0 commit comments

Comments
 (0)