Skip to content

Commit 6dd8b66

Browse files
Update ICU data to 78 (#7025)
#6787
1 parent 1c5c2ad commit 6dd8b66

File tree

386 files changed

+53019
-44722
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

386 files changed

+53019
-44722
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ icu_pattern = { version = "0.4.0", path = "components/pattern", default-features
140140
icu = { version = "~2.0.0", path = "components/icu", default-features = false }
141141
icu_calendar = { version = "~2.0.0", path = "components/calendar", default-features = false }
142142
icu_casemap = { version = "~2.0.0", path = "components/casemap", default-features = false }
143-
icu_collator = { version = "~2.0.0", path = "components/collator", default-features = false }
143+
icu_collator = { version = "~2.1.0-dev", path = "components/collator", default-features = false }
144144
icu_collections = { version = "~2.0.0", path = "components/collections", default-features = false }
145145
icu_codepointtrie_builder = { version = "~0.5.0", path = "components/collections/codepointtrie_builder", default-features = false }
146146
icu_datetime = { version = "~2.0.0", path = "components/datetime", default-features = false }
@@ -172,7 +172,7 @@ icu_provider_registry = { version = "~2.0.0", path = "provider/registry", defaul
172172
# Baked data
173173
icu_calendar_data = { version = "~2.0.0", path = "provider/data/calendar", default-features = false }
174174
icu_casemap_data = { version = "~2.0.0", path = "provider/data/casemap", default-features = false }
175-
icu_collator_data = { version = "~2.0.1", path = "provider/data/collator", default-features = false }
175+
icu_collator_data = { version = "~2.1.0-dev", path = "provider/data/collator", default-features = false }
176176
icu_datetime_data = { version = "~2.0.0", path = "provider/data/datetime", default-features = false }
177177
icu_decimal_data = { version = "~2.0.0", path = "provider/data/decimal", default-features = false }
178178
icu_list_data = { version = "~2.0.0", path = "provider/data/list", default-features = false }

components/collator/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
[package]
66
name = "icu_collator"
77
description = "API for comparing strings according to language-dependent conventions"
8-
version = "2.0.1"
8+
version = "2.1.0-dev"
99

1010
authors.workspace = true
1111
categories.workspace = true

components/collator/src/comparison.rs

Lines changed: 79 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ use crate::provider::CollationRootV1;
4242
use crate::provider::CollationSpecialPrimariesV1;
4343
use crate::provider::CollationSpecialPrimariesValidated;
4444
use crate::provider::CollationTailoringV1;
45-
use core::array;
4645
use core::cmp::Ordering;
4746
use core::convert::{Infallible, TryFrom};
4847
use icu_normalizer::provider::DecompositionData;
@@ -663,32 +662,25 @@ impl Collator {
663662
return Err(DataError::custom("invalid").with_marker(CollationSpecialPrimariesV1::INFO));
664663
}
665664
let special_primaries = special_primaries.map_project(|csp, _| {
666-
if csp.last_primaries.len()
667-
== (MaxVariable::Currency as usize)
668-
+ core::mem::size_of_val(
669-
&CollationSpecialPrimariesValidated::HARDCODED_FALLBACK.compressible_bytes,
670-
) / core::mem::size_of::<u16>()
671-
{
672-
CollationSpecialPrimariesValidated {
673-
compressible_bytes: array::from_fn(|i| {
674-
#[expect(clippy::unwrap_used)] // protected by the if
675-
{
676-
csp.last_primaries
677-
.get((MaxVariable::Currency as usize) + i)
678-
.unwrap()
679-
}
680-
}),
681-
last_primaries: csp.last_primaries.truncated(MaxVariable::Currency as usize),
682-
numeric_primary: csp.numeric_primary,
683-
}
684-
} else {
685-
// Data without compressible bytes, add hardcoded data
686-
CollationSpecialPrimariesValidated {
687-
last_primaries: csp.last_primaries,
688-
compressible_bytes: CollationSpecialPrimariesValidated::HARDCODED_FALLBACK
689-
.compressible_bytes,
690-
numeric_primary: csp.numeric_primary,
691-
}
665+
let compressible_bytes = (csp.last_primaries.len()
666+
== MaxVariable::Currency as usize + 16)
667+
.then(|| {
668+
csp.last_primaries
669+
.as_maybe_borrowed()?
670+
.as_ule_slice()
671+
.get((MaxVariable::Currency as usize)..)?
672+
.try_into()
673+
.ok()
674+
})
675+
.flatten()
676+
.unwrap_or(
677+
CollationSpecialPrimariesValidated::HARDCODED_COMPRESSIBLE_BYTES_FALLBACK,
678+
);
679+
680+
CollationSpecialPrimariesValidated {
681+
last_primaries: csp.last_primaries.truncated(MaxVariable::Currency as usize),
682+
numeric_primary: csp.numeric_primary,
683+
compressible_bytes,
692684
}
693685
});
694686

@@ -769,30 +761,70 @@ impl CollatorBorrowed<'static> {
769761
LocaleSpecificDataHolder::try_new_unstable_internal(provider, prefs, options)?;
770762

771763
// TODO: redesign Korean search collation handling
772-
if jamo.ce32s.len() != JAMO_COUNT {
773-
return Err(DataError::custom("invalid").with_marker(CollationJamoV1::INFO));
774-
}
764+
const _: () = assert!(
765+
crate::provider::Baked::SINGLETON_COLLATION_JAMO_V1
766+
.ce32s
767+
.as_slice()
768+
.len()
769+
== JAMO_COUNT
770+
);
775771

776-
let special_primaries = crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1;
777772
// `variant_count` isn't stable yet:
778773
// https://github.com/rust-lang/rust/issues/73662
779-
if special_primaries.last_primaries.len() <= (MaxVariable::Currency as usize) {
780-
return Err(DataError::custom("invalid").with_marker(CollationSpecialPrimariesV1::INFO));
781-
} else if CollationSpecialPrimariesValidated::HARDCODED_FALLBACK.numeric_primary
782-
!= special_primaries.numeric_primary
783-
|| CollationSpecialPrimariesValidated::HARDCODED_FALLBACK
774+
const _: () = assert!(
775+
crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
784776
.last_primaries
785-
.iter()
786-
.zip(special_primaries.last_primaries.iter())
787-
.any(|(a, b)| a != b)
788-
{
789-
// Baked data without compressible bits, but not matching hardcoded data
790-
return Err(
791-
DataError::custom("cannot fall back to hardcoded compressible data")
792-
.with_marker(CollationSpecialPrimariesV1::INFO),
793-
);
794-
}
795-
let special_primaries = CollationSpecialPrimariesValidated::HARDCODED_FALLBACK;
777+
.as_slice()
778+
.len()
779+
> (MaxVariable::Currency as usize)
780+
);
781+
782+
let special_primaries = const {
783+
&CollationSpecialPrimariesValidated {
784+
last_primaries: zerovec::ZeroSlice::from_ule_slice(
785+
crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
786+
.last_primaries
787+
.as_slice()
788+
.as_ule_slice()
789+
.split_at(MaxVariable::Currency as usize)
790+
.0,
791+
)
792+
.as_zerovec(),
793+
numeric_primary: crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
794+
.numeric_primary,
795+
compressible_bytes: {
796+
const C: &[<u16 as AsULE>::ULE] =
797+
crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
798+
.last_primaries
799+
.as_slice()
800+
.as_ule_slice();
801+
if C.len() == MaxVariable::Currency as usize + 16 {
802+
let i = MaxVariable::Currency as usize;
803+
#[allow(clippy::indexing_slicing)] // protected, const
804+
&[
805+
C[i],
806+
C[i + 1],
807+
C[i + 2],
808+
C[i + 3],
809+
C[i + 4],
810+
C[i + 5],
811+
C[i + 6],
812+
C[i + 7],
813+
C[i + 8],
814+
C[i + 9],
815+
C[i + 10],
816+
C[i + 11],
817+
C[i + 12],
818+
C[i + 13],
819+
C[i + 14],
820+
C[i + 15],
821+
]
822+
} else {
823+
CollationSpecialPrimariesValidated::HARDCODED_COMPRESSIBLE_BYTES_FALLBACK
824+
}
825+
},
826+
}
827+
};
796828

797829
// Attribute belongs closer to `unwrap`, but
798830
// https://github.com/rust-lang/rust/issues/15701

components/collator/src/provider.rs

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
use icu_collections::char16trie::Char16TrieIterator;
2222
use icu_collections::codepointtrie::CodePointTrie;
2323
use icu_provider::prelude::*;
24-
use zerovec::ule::AsULE;
24+
use zerovec::ule::{AsULE, RawBytesULE};
2525
use zerovec::ZeroVec;
2626
use zerovec::{zeroslice, ZeroSlice};
2727

@@ -577,39 +577,28 @@ pub(crate) struct CollationSpecialPrimariesValidated<'data> {
577577
pub numeric_primary: u8,
578578
/// 256 bits (packed in 16 u16s) to classify every possible
579579
/// byte into compressible or non-compressible.
580-
pub compressible_bytes: [u16; 16],
580+
pub compressible_bytes: &'data [<u16 as AsULE>::ULE; 16],
581581
}
582582

583583
impl CollationSpecialPrimariesValidated<'static> {
584-
pub(crate) const HARDCODED_FALLBACK: &Self = &Self {
585-
last_primaries: zerovec::zerovec!(u16; <u16 as AsULE>::ULE::from_aligned; [
586-
// Last primaries
587-
1286,
588-
3072,
589-
3488,
590-
3840,
591-
]),
592-
numeric_primary: 16u8,
593-
compressible_bytes: [
594-
// Compressible bytes
595-
0b0000_0000_0000_0000,
596-
0b0000_0000_0000_0000,
597-
0b0000_0000_0000_0000,
598-
0b0000_0000_0000_0000,
599-
0b0000_0000_0000_0000,
600-
0b0000_0000_0000_0000,
601-
0b1111_1111_1111_1110,
602-
0b1111_1111_1111_1111,
603-
0b0000_0000_0000_0001,
604-
0b0000_0000_0000_0000,
605-
0b0000_0000_0000_0000,
606-
0b0000_0000_0000_0000,
607-
0b0000_0000_0000_0000,
608-
0b0000_0000_0000_0000,
609-
0b0000_0000_0000_0000,
610-
0b0100_0000_0000_0000,
611-
],
612-
};
584+
pub(crate) const HARDCODED_COMPRESSIBLE_BYTES_FALLBACK: &'static [<u16 as AsULE>::ULE; 16] = &[
585+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
586+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
587+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
588+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
589+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
590+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
591+
RawBytesULE(0b1111_1111_1111_1110u16.to_le_bytes()),
592+
RawBytesULE(0b1111_1111_1111_1111u16.to_le_bytes()),
593+
RawBytesULE(0b0000_0000_0000_0001u16.to_le_bytes()),
594+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
595+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
596+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
597+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
598+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
599+
RawBytesULE(0b0000_0000_0000_0000u16.to_le_bytes()),
600+
RawBytesULE(0b0100_0000_0000_0000u16.to_le_bytes()),
601+
];
613602
}
614603

615604
icu_provider::data_struct!(
@@ -633,7 +622,7 @@ impl CollationSpecialPrimariesValidated<'_> {
633622
// into Compiler Explorer shows that the panic
634623
// is optimized away.
635624
#[expect(clippy::indexing_slicing)]
636-
let field = self.compressible_bytes[usize::from(b >> 4)];
625+
let field = u16::from_unaligned(self.compressible_bytes[usize::from(b >> 4)]);
637626
let mask = 1 << (b & 0b1111);
638627
(field & mask) != 0
639628
}

0 commit comments

Comments
 (0)