Skip to content

Commit 3d3d11b

Browse files
committed
Add safety comments to more granular unsafe blocks.
1 parent 8f3d10a commit 3d3d11b

File tree

1 file changed

+78
-59
lines changed

1 file changed

+78
-59
lines changed

src/byteset/scalar.rs

Lines changed: 78 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -21,44 +21,53 @@ pub fn inv_memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
2121
let vn1 = repeat_byte(n1);
2222
let confirm = |byte| byte != n1;
2323
let start_ptr = haystack.as_ptr();
24+
let mut ptr = start_ptr;
2425

25-
unsafe {
26-
let end_ptr = haystack.as_ptr().add(haystack.len());
27-
let mut ptr = start_ptr;
26+
// SAFETY: 2nd safety requirement of `sub` either 1) doesn't apply when `haystack.len()` is
27+
// zero or 2) is in-bounds (i.e. one-past allocation) of the same allocated object.
28+
let end_ptr = unsafe { start_ptr.add(haystack.len()) };
2829

29-
if haystack.len() < USIZE_BYTES {
30-
return forward_search(start_ptr, end_ptr, ptr, confirm);
31-
}
30+
if haystack.len() < USIZE_BYTES {
31+
return unsafe { forward_search(start_ptr, end_ptr, ptr, confirm) };
32+
}
3233

33-
let chunk = read_unaligned_usize(ptr);
34-
if (chunk ^ vn1) != 0 {
35-
return forward_search(start_ptr, end_ptr, ptr, confirm);
36-
}
34+
let chunk = unsafe { read_unaligned_usize(ptr) };
35+
if (chunk ^ vn1) != 0 {
36+
return unsafe { forward_search(start_ptr, end_ptr, ptr, confirm) };
37+
}
3738

38-
ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & ALIGN_MASK));
39-
debug_assert!(ptr > start_ptr);
40-
debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
41-
42-
if haystack.len() >= LOOP_SIZE {
43-
// The `if` condition guarantees that `end_ptr.sub(LOOP_SIZE)` (in the loop condition)
44-
// meets the safety requrement that the result must be in bounds of the same allocated
45-
// object.
46-
while ptr <= end_ptr.sub(LOOP_SIZE) {
47-
debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
48-
49-
let a = *(ptr as *const usize);
50-
let b = *(ptr.add(USIZE_BYTES) as *const usize);
51-
let eqa = (a ^ vn1) != 0;
52-
let eqb = (b ^ vn1) != 0;
53-
if eqa || eqb {
54-
break;
55-
}
56-
ptr = ptr.add(LOOP_SIZE);
39+
// SAFETY: Adding `1..=USIZE_BYTES`. One of the `if`s above means that `haystack.len() >=
40+
// USIZE_BYTES`. So the result of `add` is in-bounds of the same allocated object.
41+
ptr = unsafe { ptr.add(USIZE_BYTES - (start_ptr as usize & ALIGN_MASK)) };
42+
debug_assert!(ptr > start_ptr);
43+
// SAFETY: One of the `if`s above means that `haystack.len() >= USIZE_BYTES`. So the result of
44+
// `sub` is in-bounds of the same allocated object.
45+
debug_assert!(unsafe { end_ptr.sub(USIZE_BYTES) } >= start_ptr);
46+
47+
if haystack.len() >= LOOP_SIZE {
48+
// SAFETY: The `if` condition above guarantees that `end_ptr.sub(LOOP_SIZE)` will
49+
// stay in bounds of the same allocated object.
50+
while ptr <= unsafe { end_ptr.sub(LOOP_SIZE) } {
51+
debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
52+
53+
// SAFETY: Loop condition (and the fact that `LOOP_SIZE` is twice the size of
54+
// `USIZE_BYTES` together guarantee that dereferences and `add` have their
55+
// safety requirements met.
56+
let a = unsafe { *(ptr as *const usize) };
57+
let b = unsafe { *(ptr.add(USIZE_BYTES) as *const usize) };
58+
let eqa = (a ^ vn1) != 0;
59+
let eqb = (b ^ vn1) != 0;
60+
if eqa || eqb {
61+
break;
5762
}
58-
}
5963

60-
forward_search(start_ptr, end_ptr, ptr, confirm)
64+
// SAFETY: The loop condition guarantees that `add` will stay in bounds of the same
65+
// allocated object.
66+
ptr = unsafe { ptr.add(LOOP_SIZE) };
67+
}
6168
}
69+
70+
unsafe { forward_search(start_ptr, end_ptr, ptr, confirm) }
6271
}
6372

6473
/// Return the last index not matching the byte `x` in `text`.
@@ -67,40 +76,50 @@ pub fn inv_memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
6776
let confirm = |byte| byte != n1;
6877
let start_ptr = haystack.as_ptr();
6978

70-
unsafe {
71-
let end_ptr = haystack.as_ptr().add(haystack.len());
72-
let mut ptr = end_ptr;
79+
// SAFETY: 2nd safety requirement of `add` either 1) doesn't apply when `haystack.len()` is
80+
// zero or 2) is in-bounds (i.e. one-past allocation) of the same allocated object.
81+
let end_ptr = unsafe { start_ptr.add(haystack.len()) };
82+
let mut ptr = end_ptr;
7383

74-
if haystack.len() < USIZE_BYTES {
75-
return reverse_search(start_ptr, end_ptr, ptr, confirm);
76-
}
84+
if haystack.len() < USIZE_BYTES {
85+
return unsafe { reverse_search(start_ptr, end_ptr, ptr, confirm) };
86+
}
7787

78-
let chunk = read_unaligned_usize(ptr.sub(USIZE_BYTES));
79-
if (chunk ^ vn1) != 0 {
80-
return reverse_search(start_ptr, end_ptr, ptr, confirm);
81-
}
88+
let chunk = unsafe { read_unaligned_usize(ptr.sub(USIZE_BYTES)) };
89+
if (chunk ^ vn1) != 0 {
90+
return unsafe { reverse_search(start_ptr, end_ptr, ptr, confirm) };
91+
}
8292

83-
ptr = ptr.sub(end_ptr as usize & ALIGN_MASK);
84-
debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
85-
if haystack.len() >= LOOP_SIZE {
86-
// The `if` condition guarantees that `start_ptr.add(LOOP_SIZE)` (in the loop
87-
// condition) meets the safety requrement that the result must be in bounds of the same
88-
// allocated object.
89-
while ptr >= start_ptr.add(LOOP_SIZE) {
90-
debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
91-
92-
let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize);
93-
let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize);
94-
let eqa = (a ^ vn1) != 0;
95-
let eqb = (b ^ vn1) != 0;
96-
if eqa || eqb {
97-
break;
98-
}
99-
ptr = ptr.sub(LOOP_SIZE);
93+
// SAFETY: Subtracting `1..=USIZE_BYTES`. One of the `if`s above means that `haystack.len() >=
94+
// USIZE_BYTES`. So the result of `sub` is in-bounds of the same allocated object.
95+
ptr = unsafe { ptr.sub(end_ptr as usize & ALIGN_MASK) };
96+
debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
97+
98+
if haystack.len() >= LOOP_SIZE {
99+
// SAFETY: The `if` condition above guarantees that `start_ptr.add(LOOP_SIZE)` will
100+
// stay in bounds of the same allocated object.
101+
while ptr >= unsafe { start_ptr.add(LOOP_SIZE) } {
102+
debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
103+
104+
// SAFETY: Loop condition (and the fact that `LOOP_SIZE` is twice the size of
105+
// `USIZE_BYTES` together guarantee that dereferences and `sub`s have their
106+
// safety requirements met.
107+
let a = unsafe { *(ptr.sub(2 * USIZE_BYTES) as *const usize) };
108+
let b = unsafe { *(ptr.sub(1 * USIZE_BYTES) as *const usize) };
109+
110+
let eqa = (a ^ vn1) != 0;
111+
let eqb = (b ^ vn1) != 0;
112+
if eqa || eqb {
113+
break;
100114
}
115+
116+
// SAFETY: The loop condition guarantees that `sub` will stay in bounds of the same
117+
// allocated object.
118+
ptr = unsafe { ptr.sub(LOOP_SIZE) };
101119
}
102-
reverse_search(start_ptr, end_ptr, ptr, confirm)
103120
}
121+
122+
unsafe { reverse_search(start_ptr, end_ptr, ptr, confirm) }
104123
}
105124

106125
#[inline(always)]

0 commit comments

Comments
 (0)