Skip to content

Commit 2e667b0

Browse files
committed
Auto merge of #146948 - folkertdev:hint-prefetch, r=Amanieu
add `core::hint::prefetch_{read, write}_{data, instruction}` tracking issue: #146941 acp: rust-lang/libs-team#638 well, we don't expose `prefetch_write_instruction`, that one doesn't really make sense in practice. The implementation is straightforward, the docs can probably use some tweaks. Especially for the instruction version it's a little awkward. r? `@Amanieu`
2 parents 5f1173b + b9e3e41 commit 2e667b0

File tree

1 file changed

+149
-0
lines changed

1 file changed

+149
-0
lines changed

library/core/src/hint.rs

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,3 +823,152 @@ where
823823
crate::intrinsics::select_unpredictable(condition, true_val, false_val).assume_init()
824824
}
825825
}
826+
827+
/// The expected temporal locality of a memory prefetch operation.
828+
///
829+
/// Locality expresses how likely the prefetched data is to be reused soon,
830+
/// and therefore which level of cache it should be brought into.
831+
///
832+
/// The locality is just a hint, and may be ignored on some targets or by the hardware.
833+
///
834+
/// Used with functions like [`prefetch_read`] and [`prefetch_write`].
835+
///
836+
/// [`prefetch_read`]: crate::hint::prefetch_read
837+
/// [`prefetch_write`]: crate::hint::prefetch_write
838+
#[unstable(feature = "hint_prefetch", issue = "146941")]
839+
#[non_exhaustive]
840+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
841+
pub enum Locality {
842+
/// Data is expected to be reused eventually.
843+
///
844+
/// Typically prefetches into L3 cache (if the CPU supports it).
845+
L3,
846+
/// Data is expected to be reused in the near future.
847+
///
848+
/// Typically prefetches into L2 cache.
849+
L2,
850+
/// Data is expected to be reused very soon.
851+
///
852+
/// Typically prefetches into L1 cache.
853+
L1,
854+
}
855+
856+
impl Locality {
857+
/// Convert to the constant that LLVM associates with a locality.
858+
const fn to_llvm(self) -> i32 {
859+
match self {
860+
Self::L3 => 1,
861+
Self::L2 => 2,
862+
Self::L1 => 3,
863+
}
864+
}
865+
}
866+
867+
/// Prefetch the cache line containing `ptr` for a future read.
868+
///
869+
/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
870+
/// soon after, but may also increase bandwidth usage or evict other cache lines.
871+
///
872+
/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
873+
///
874+
/// Passing a dangling or invalid pointer is permitted: the memory will not
875+
/// actually be dereferenced, and no faults are raised.
876+
///
877+
/// # Examples
878+
///
879+
/// ```
880+
/// #![feature(hint_prefetch)]
881+
/// use std::hint::{Locality, prefetch_read};
882+
/// use std::mem::size_of_val;
883+
///
884+
/// // Prefetch all of `slice` into the L1 cache.
885+
/// fn prefetch_slice<T>(slice: &[T]) {
886+
/// // On most systems the cache line size is 64 bytes.
887+
/// for offset in (0..size_of_val(slice)).step_by(64) {
888+
/// prefetch_read(slice.as_ptr().wrapping_add(offset), Locality::L1);
889+
/// }
890+
/// }
891+
/// ```
892+
#[inline(always)]
893+
#[unstable(feature = "hint_prefetch", issue = "146941")]
894+
pub const fn prefetch_read<T>(ptr: *const T, locality: Locality) {
895+
match locality {
896+
Locality::L3 => intrinsics::prefetch_read_data::<T, { Locality::L3.to_llvm() }>(ptr),
897+
Locality::L2 => intrinsics::prefetch_read_data::<T, { Locality::L2.to_llvm() }>(ptr),
898+
Locality::L1 => intrinsics::prefetch_read_data::<T, { Locality::L1.to_llvm() }>(ptr),
899+
}
900+
}
901+
902+
/// Prefetch the cache line containing `ptr` for a single future read, but attempt to avoid
903+
/// polluting the cache.
904+
///
905+
/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
906+
/// soon after, but may also increase bandwidth usage or evict other cache lines.
907+
///
908+
/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
909+
///
910+
/// Passing a dangling or invalid pointer is permitted: the memory will not
911+
/// actually be dereferenced, and no faults are raised.
912+
#[inline(always)]
913+
#[unstable(feature = "hint_prefetch", issue = "146941")]
914+
pub const fn prefetch_read_non_temporal<T>(ptr: *const T, locality: Locality) {
915+
// The LLVM intrinsic does not currently support specifying the locality.
916+
let _ = locality;
917+
intrinsics::prefetch_read_data::<T, 0>(ptr)
918+
}
919+
920+
/// Prefetch the cache line containing `ptr` for a future write.
921+
///
922+
/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
923+
/// soon after, but may also increase bandwidth usage or evict other cache lines.
924+
///
925+
/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
926+
///
927+
/// Passing a dangling or invalid pointer is permitted: the memory will not
928+
/// actually be dereferenced, and no faults are raised.
929+
#[inline(always)]
930+
#[unstable(feature = "hint_prefetch", issue = "146941")]
931+
pub const fn prefetch_write<T>(ptr: *mut T, locality: Locality) {
932+
match locality {
933+
Locality::L3 => intrinsics::prefetch_write_data::<T, { Locality::L3.to_llvm() }>(ptr),
934+
Locality::L2 => intrinsics::prefetch_write_data::<T, { Locality::L2.to_llvm() }>(ptr),
935+
Locality::L1 => intrinsics::prefetch_write_data::<T, { Locality::L1.to_llvm() }>(ptr),
936+
}
937+
}
938+
939+
/// Prefetch the cache line containing `ptr` for a single future write, but attempt to avoid
940+
/// polluting the cache.
941+
///
942+
/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
943+
/// soon after, but may also increase bandwidth usage or evict other cache lines.
944+
///
945+
/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
946+
///
947+
/// Passing a dangling or invalid pointer is permitted: the memory will not
948+
/// actually be dereferenced, and no faults are raised.
949+
#[inline(always)]
950+
#[unstable(feature = "hint_prefetch", issue = "146941")]
951+
pub const fn prefetch_write_non_temporal<T>(ptr: *const T, locality: Locality) {
952+
// The LLVM intrinsic does not currently support specifying the locality.
953+
let _ = locality;
954+
intrinsics::prefetch_write_data::<T, 0>(ptr)
955+
}
956+
957+
/// Prefetch the cache line containing `ptr` into the instruction cache for a future read.
958+
///
959+
/// A strategically placed prefetch can reduce cache miss latency if the instructions are
960+
/// accessed soon after, but may also increase bandwidth usage or evict other cache lines.
961+
///
962+
/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
963+
///
964+
/// Passing a dangling or invalid pointer is permitted: the memory will not
965+
/// actually be dereferenced, and no faults are raised.
966+
#[inline(always)]
967+
#[unstable(feature = "hint_prefetch", issue = "146941")]
968+
pub const fn prefetch_read_instruction<T>(ptr: *const T, locality: Locality) {
969+
match locality {
970+
Locality::L3 => intrinsics::prefetch_read_instruction::<T, { Locality::L3.to_llvm() }>(ptr),
971+
Locality::L2 => intrinsics::prefetch_read_instruction::<T, { Locality::L2.to_llvm() }>(ptr),
972+
Locality::L1 => intrinsics::prefetch_read_instruction::<T, { Locality::L1.to_llvm() }>(ptr),
973+
}
974+
}

0 commit comments

Comments
 (0)