Skip to content

Commit 934880f

Browse files
committed
Auto merge of #124810 - lincot:speed-up-string-push-and-string-insert, r=tgross35
speed up `String::push` and `String::insert` Addresses the concerns described in #116235. The performance gain comes mainly from avoiding temporary buffers. Complex pattern matching in `encode_utf8` (introduced in #67569) has been simplified to a comparison and an exhaustive `match` in the `encode_utf8_raw_unchecked` helper function. It takes a slice of `MaybeUninit<u8>` because otherwise we'd have to construct a normal slice to uninitialized data, which is not desirable, I guess. Several functions still have that [unneeded zeroing](https://rust.godbolt.org/z/5oKfMPo7j), but a single instruction is not that important, I guess. `@rustbot` label T-libs C-optimization A-str
2 parents 48f89e7 + ff248de commit 934880f

File tree

6 files changed

+143
-74
lines changed

6 files changed

+143
-74
lines changed

Diff for: library/alloc/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
#![feature(async_iterator)]
105105
#![feature(bstr)]
106106
#![feature(bstr_internals)]
107+
#![feature(char_internals)]
107108
#![feature(char_max_len)]
108109
#![feature(clone_to_uninit)]
109110
#![feature(coerce_unsized)]

Diff for: library/alloc/src/string.rs

+47-18
Original file line numberDiff line numberDiff line change
@@ -1401,11 +1401,14 @@ impl String {
14011401
#[inline]
14021402
#[stable(feature = "rust1", since = "1.0.0")]
14031403
pub fn push(&mut self, ch: char) {
1404-
match ch.len_utf8() {
1405-
1 => self.vec.push(ch as u8),
1406-
_ => {
1407-
self.vec.extend_from_slice(ch.encode_utf8(&mut [0; char::MAX_LEN_UTF8]).as_bytes())
1408-
}
1404+
let len = self.len();
1405+
let ch_len = ch.len_utf8();
1406+
self.reserve(ch_len);
1407+
1408+
// SAFETY: Just reserved capacity for at least the length needed to encode `ch`.
1409+
unsafe {
1410+
core::char::encode_utf8_raw_unchecked(ch as u32, self.vec.as_mut_ptr().add(self.len()));
1411+
self.vec.set_len(len + ch_len);
14091412
}
14101413
}
14111414

@@ -1702,24 +1705,31 @@ impl String {
17021705
#[rustc_confusables("set")]
17031706
pub fn insert(&mut self, idx: usize, ch: char) {
17041707
assert!(self.is_char_boundary(idx));
1705-
let mut bits = [0; char::MAX_LEN_UTF8];
1706-
let bits = ch.encode_utf8(&mut bits).as_bytes();
17071708

1709+
let len = self.len();
1710+
let ch_len = ch.len_utf8();
1711+
self.reserve(ch_len);
1712+
1713+
// SAFETY: Move the bytes starting from `idx` to their new location `ch_len`
1714+
// bytes ahead. This is safe because sufficient capacity was reserved, and `idx`
1715+
// is a char boundary.
17081716
unsafe {
1709-
self.insert_bytes(idx, bits);
1717+
ptr::copy(
1718+
self.vec.as_ptr().add(idx),
1719+
self.vec.as_mut_ptr().add(idx + ch_len),
1720+
len - idx,
1721+
);
17101722
}
1711-
}
17121723

1713-
#[cfg(not(no_global_oom_handling))]
1714-
unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) {
1715-
let len = self.len();
1716-
let amt = bytes.len();
1717-
self.vec.reserve(amt);
1724+
// SAFETY: Encode the character into the vacated region if `idx != len`,
1725+
// or into the uninitialized spare capacity otherwise.
1726+
unsafe {
1727+
core::char::encode_utf8_raw_unchecked(ch as u32, self.vec.as_mut_ptr().add(idx));
1728+
}
17181729

1730+
// SAFETY: Update the length to include the newly added bytes.
17191731
unsafe {
1720-
ptr::copy(self.vec.as_ptr().add(idx), self.vec.as_mut_ptr().add(idx + amt), len - idx);
1721-
ptr::copy_nonoverlapping(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt);
1722-
self.vec.set_len(len + amt);
1732+
self.vec.set_len(len + ch_len);
17231733
}
17241734
}
17251735

@@ -1749,8 +1759,27 @@ impl String {
17491759
pub fn insert_str(&mut self, idx: usize, string: &str) {
17501760
assert!(self.is_char_boundary(idx));
17511761

1762+
let len = self.len();
1763+
let amt = string.len();
1764+
self.reserve(amt);
1765+
1766+
// SAFETY: Move the bytes starting from `idx` to their new location `amt` bytes
1767+
// ahead. This is safe because sufficient capacity was just reserved, and `idx`
1768+
// is a char boundary.
1769+
unsafe {
1770+
ptr::copy(self.vec.as_ptr().add(idx), self.vec.as_mut_ptr().add(idx + amt), len - idx);
1771+
}
1772+
1773+
// SAFETY: Copy the new string slice into the vacated region if `idx != len`,
1774+
// or into the uninitialized spare capacity otherwise. The borrow checker
1775+
// ensures that the source and destination do not overlap.
1776+
unsafe {
1777+
ptr::copy_nonoverlapping(string.as_ptr(), self.vec.as_mut_ptr().add(idx), amt);
1778+
}
1779+
1780+
// SAFETY: Update the length to include the newly added bytes.
17521781
unsafe {
1753-
self.insert_bytes(idx, string.as_bytes());
1782+
self.vec.set_len(len + amt);
17541783
}
17551784
}
17561785

Diff for: library/alloctests/benches/string.rs

+22-26
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@ use test::{Bencher, black_box};
44

55
#[bench]
66
fn bench_with_capacity(b: &mut Bencher) {
7-
b.iter(|| String::with_capacity(100));
7+
b.iter(|| String::with_capacity(black_box(100)));
88
}
99

1010
#[bench]
1111
fn bench_push_str(b: &mut Bencher) {
1212
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
1313
b.iter(|| {
1414
let mut r = String::new();
15-
r.push_str(s);
15+
black_box(&mut r).push_str(black_box(s));
16+
r
1617
});
1718
}
1819

@@ -24,8 +25,9 @@ fn bench_push_str_one_byte(b: &mut Bencher) {
2425
b.iter(|| {
2526
let mut r = String::new();
2627
for _ in 0..REPETITIONS {
27-
r.push_str("a")
28+
black_box(&mut r).push_str(black_box("a"));
2829
}
30+
r
2931
});
3032
}
3133

@@ -35,8 +37,9 @@ fn bench_push_char_one_byte(b: &mut Bencher) {
3537
b.iter(|| {
3638
let mut r = String::new();
3739
for _ in 0..REPETITIONS {
38-
r.push('a')
40+
black_box(&mut r).push(black_box('a'));
3941
}
42+
r
4043
});
4144
}
4245

@@ -46,8 +49,9 @@ fn bench_push_char_two_bytes(b: &mut Bencher) {
4649
b.iter(|| {
4750
let mut r = String::new();
4851
for _ in 0..REPETITIONS {
49-
r.push('â')
52+
black_box(&mut r).push(black_box('â'));
5053
}
54+
r
5155
});
5256
}
5357

@@ -57,34 +61,26 @@ fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
5761
Lorem ipsum dolor sit amet, consectetur. ";
5862

5963
assert_eq!(100, s.len());
60-
b.iter(|| {
61-
let _ = String::from_utf8_lossy(s);
62-
});
64+
b.iter(|| String::from_utf8_lossy(black_box(s)));
6365
}
6466

6567
#[bench]
6668
fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
6769
let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
6870
assert_eq!(100, s.len());
69-
b.iter(|| {
70-
let _ = String::from_utf8_lossy(s);
71-
});
71+
b.iter(|| String::from_utf8_lossy(black_box(s)));
7272
}
7373

7474
#[bench]
7575
fn from_utf8_lossy_invalid(b: &mut Bencher) {
7676
let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
77-
b.iter(|| {
78-
let _ = String::from_utf8_lossy(s);
79-
});
77+
b.iter(|| String::from_utf8_lossy(black_box(s)));
8078
}
8179

8280
#[bench]
8381
fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
8482
let s = repeat(0xf5).take(100).collect::<Vec<_>>();
85-
b.iter(|| {
86-
let _ = String::from_utf8_lossy(&s);
87-
});
83+
b.iter(|| String::from_utf8_lossy(black_box(&s)));
8884
}
8985

9086
#[bench]
@@ -96,8 +92,8 @@ fn bench_exact_size_shrink_to_fit(b: &mut Bencher) {
9692
r.push_str(s);
9793
assert_eq!(r.len(), r.capacity());
9894
b.iter(|| {
99-
let mut r = String::with_capacity(s.len());
100-
r.push_str(s);
95+
let mut r = String::with_capacity(black_box(s.len()));
96+
r.push_str(black_box(s));
10197
r.shrink_to_fit();
10298
r
10399
});
@@ -107,29 +103,29 @@ fn bench_exact_size_shrink_to_fit(b: &mut Bencher) {
107103
fn bench_from_str(b: &mut Bencher) {
108104
let s = "Hello there, the quick brown fox jumped over the lazy dog! \
109105
Lorem ipsum dolor sit amet, consectetur. ";
110-
b.iter(|| String::from(s))
106+
b.iter(|| String::from(black_box(s)))
111107
}
112108

113109
#[bench]
114110
fn bench_from(b: &mut Bencher) {
115111
let s = "Hello there, the quick brown fox jumped over the lazy dog! \
116112
Lorem ipsum dolor sit amet, consectetur. ";
117-
b.iter(|| String::from(s))
113+
b.iter(|| String::from(black_box(s)))
118114
}
119115

120116
#[bench]
121117
fn bench_to_string(b: &mut Bencher) {
122118
let s = "Hello there, the quick brown fox jumped over the lazy dog! \
123119
Lorem ipsum dolor sit amet, consectetur. ";
124-
b.iter(|| s.to_string())
120+
b.iter(|| black_box(s).to_string())
125121
}
126122

127123
#[bench]
128124
fn bench_insert_char_short(b: &mut Bencher) {
129125
let s = "Hello, World!";
130126
b.iter(|| {
131127
let mut x = String::from(s);
132-
black_box(&mut x).insert(6, black_box(' '));
128+
black_box(&mut x).insert(black_box(6), black_box(' '));
133129
x
134130
})
135131
}
@@ -139,7 +135,7 @@ fn bench_insert_char_long(b: &mut Bencher) {
139135
let s = "Hello, World!";
140136
b.iter(|| {
141137
let mut x = String::from(s);
142-
black_box(&mut x).insert(6, black_box('❤'));
138+
black_box(&mut x).insert(black_box(6), black_box('❤'));
143139
x
144140
})
145141
}
@@ -149,7 +145,7 @@ fn bench_insert_str_short(b: &mut Bencher) {
149145
let s = "Hello, World!";
150146
b.iter(|| {
151147
let mut x = String::from(s);
152-
black_box(&mut x).insert_str(6, black_box(" "));
148+
black_box(&mut x).insert_str(black_box(6), black_box(" "));
153149
x
154150
})
155151
}
@@ -159,7 +155,7 @@ fn bench_insert_str_long(b: &mut Bencher) {
159155
let s = "Hello, World!";
160156
b.iter(|| {
161157
let mut x = String::from(s);
162-
black_box(&mut x).insert_str(6, black_box(" rustic "));
158+
black_box(&mut x).insert_str(black_box(6), black_box(" rustic "));
163159
x
164160
})
165161
}

Diff for: library/core/src/char/methods.rs

+61-29
Original file line numberDiff line numberDiff line change
@@ -1806,39 +1806,71 @@ const fn len_utf16(code: u32) -> usize {
18061806
#[inline]
18071807
pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
18081808
let len = len_utf8(code);
1809-
match (len, &mut *dst) {
1810-
(1, [a, ..]) => {
1811-
*a = code as u8;
1812-
}
1813-
(2, [a, b, ..]) => {
1814-
*a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
1815-
*b = (code & 0x3F) as u8 | TAG_CONT;
1816-
}
1817-
(3, [a, b, c, ..]) => {
1818-
*a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
1819-
*b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1820-
*c = (code & 0x3F) as u8 | TAG_CONT;
1821-
}
1822-
(4, [a, b, c, d, ..]) => {
1823-
*a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
1824-
*b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
1825-
*c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1826-
*d = (code & 0x3F) as u8 | TAG_CONT;
1827-
}
1828-
_ => {
1829-
const_panic!(
1830-
"encode_utf8: buffer does not have enough bytes to encode code point",
1831-
"encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
1832-
code: u32 = code,
1833-
len: usize = len,
1834-
dst_len: usize = dst.len(),
1835-
)
1836-
}
1837-
};
1809+
if dst.len() < len {
1810+
const_panic!(
1811+
"encode_utf8: buffer does not have enough bytes to encode code point",
1812+
"encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
1813+
code: u32 = code,
1814+
len: usize = len,
1815+
dst_len: usize = dst.len(),
1816+
);
1817+
}
1818+
1819+
// SAFETY: `dst` is checked to be at least the length needed to encode the codepoint.
1820+
unsafe { encode_utf8_raw_unchecked(code, dst.as_mut_ptr()) };
1821+
18381822
// SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
18391823
unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
18401824
}
18411825

1826+
/// Encodes a raw `u32` value as UTF-8 into the byte buffer pointed to by `dst`.
1827+
///
1828+
/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1829+
/// (Creating a `char` in the surrogate range is UB.)
1830+
/// The result is valid [generalized UTF-8] but not valid UTF-8.
1831+
///
1832+
/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1833+
///
1834+
/// # Safety
1835+
///
1836+
/// The behavior is undefined if the buffer pointed to by `dst` is not
1837+
/// large enough to hold the encoded codepoint. A buffer of length four
1838+
/// is large enough to encode any `char`.
1839+
///
1840+
/// For a safe version of this function, see the [`encode_utf8_raw`] function.
1841+
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1842+
#[doc(hidden)]
1843+
#[inline]
1844+
pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) {
1845+
let len = len_utf8(code);
1846+
// SAFETY: The caller must guarantee that the buffer pointed to by `dst`
1847+
// is at least `len` bytes long.
1848+
unsafe {
1849+
match len {
1850+
1 => {
1851+
*dst = code as u8;
1852+
}
1853+
2 => {
1854+
*dst = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
1855+
*dst.add(1) = (code & 0x3F) as u8 | TAG_CONT;
1856+
}
1857+
3 => {
1858+
*dst = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
1859+
*dst.add(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1860+
*dst.add(2) = (code & 0x3F) as u8 | TAG_CONT;
1861+
}
1862+
4 => {
1863+
*dst = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
1864+
*dst.add(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
1865+
*dst.add(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1866+
*dst.add(3) = (code & 0x3F) as u8 | TAG_CONT;
1867+
}
1868+
// SAFETY: `char` always takes between 1 and 4 bytes to encode in UTF-8.
1869+
_ => crate::hint::unreachable_unchecked(),
1870+
}
1871+
}
1872+
}
1873+
18421874
/// Encodes a raw `u32` value as native endian UTF-16 into the provided `u16` buffer,
18431875
/// and then returns the subslice of the buffer that contains the encoded character.
18441876
///

Diff for: library/core/src/char/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
3838
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
3939
pub use self::methods::encode_utf16_raw; // perma-unstable
4040
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
41-
pub use self::methods::encode_utf8_raw; // perma-unstable
41+
pub use self::methods::{encode_utf8_raw, encode_utf8_raw_unchecked}; // perma-unstable
4242

4343
#[rustfmt::skip]
4444
use crate::ascii;

Diff for: tests/codegen/string-push.rs

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
//! Check that `String::push` is optimized enough not to call `memcpy`.
2+
3+
//@ compile-flags: -O
4+
#![crate_type = "lib"]
5+
6+
// CHECK-LABEL: @string_push_does_not_call_memcpy
7+
#[no_mangle]
8+
pub fn string_push_does_not_call_memcpy(s: &mut String, ch: char) {
9+
// CHECK-NOT: call void @llvm.memcpy
10+
s.push(ch);
11+
}

0 commit comments

Comments
 (0)