@@ -778,7 +778,72 @@ impl char {
778778 pub fn is_alphabetic ( self ) -> bool {
779779 match self {
780780 'a' ..='z' | 'A' ..='Z' => true ,
781- c => c > '\x7f' && unicode:: Alphabetic ( c) ,
781+ '\0' ..='\u{A9}' => false ,
782+ _ => unicode:: Alphabetic ( self ) ,
783+ }
784+ }
785+
786+ /// Returns `true` if this `char` has the `Cased` property.
787+ /// A character is cased if and only if it is uppercase, lowercase, or titlecase.
788+ ///
789+ /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
790+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
791+ ///
792+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
793+ /// [ucd]: https://www.unicode.org/reports/tr44/
794+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
795+ ///
796+ /// # Examples
797+ ///
798+ /// Basic usage:
799+ ///
800+ /// ```
801+ /// #![feature(titlecase)]
802+ /// assert!('A'.is_cased());
803+ /// assert!('a'.is_cased());
804+ /// assert!(!'京'.is_cased());
805+ /// ```
806+ #[ must_use]
807+ #[ unstable( feature = "titlecase" , issue = "153892" ) ]
808+ #[ inline]
809+ pub fn is_cased ( self ) -> bool {
810+ match self {
811+ 'a' ..='z' | 'A' ..='Z' => true ,
812+ '\0' ..='\u{A9}' => false ,
813+ _ => unicode:: Cased ( self ) ,
814+ }
815+ }
816+
817+ /// Returns the case of this character:
818+ /// [`Some(CharCase::Upper)`][`CharCase::Upper`] if [`self.is_uppercase()`][`char::is_uppercase`],
819+ /// [`Some(CharCase::Lower)`][`CharCase::Lower`] if [`self.is_lowercase()`][`char::is_lowercase`],
820+ /// [`Some(CharCase::Title)`][`CharCase::Title`] if [`self.is_titlecase()`][`char::is_titlecase`], and
821+ /// `None` if [`!self.is_cased()`][`char::is_cased`].
822+ ///
823+ /// # Examples
824+ ///
825+ /// ```
826+ /// #![feature(titlecase)]
827+ /// use core::char::CharCase;
828+ /// assert_eq!('a'.case(), Some(CharCase::Lower));
829+ /// assert_eq!('δ'.case(), Some(CharCase::Lower));
830+ /// assert_eq!('A'.case(), Some(CharCase::Upper));
831+ /// assert_eq!('Δ'.case(), Some(CharCase::Upper));
832+ /// assert_eq!('Dž'.case(), Some(CharCase::Title));
833+ /// assert_eq!('中'.case(), None);
834+ /// ```
835+ #[ must_use]
836+ #[ unstable( feature = "titlecase" , issue = "153892" ) ]
837+ #[ inline]
838+ pub fn case ( self ) -> Option < CharCase > {
839+ match self {
840+ 'a' ..='z' => Some ( CharCase :: Lower ) ,
841+ 'A' ..='Z' => Some ( CharCase :: Upper ) ,
842+ '\0' ..='\u{A9}' => None ,
843+ _ if !unicode:: Cased ( self ) => None ,
844+ _ if unicode:: Lowercase ( self ) => Some ( CharCase :: Lower ) ,
845+ _ if unicode:: Uppercase ( self ) => Some ( CharCase :: Upper ) ,
846+ _ => Some ( CharCase :: Title ) ,
782847 }
783848 }
784849
@@ -819,7 +884,42 @@ impl char {
819884 pub const fn is_lowercase ( self ) -> bool {
820885 match self {
821886 'a' ..='z' => true ,
822- c => c > '\x7f' && unicode:: Lowercase ( c) ,
887+ '\0' ..='\u{A9}' => false ,
888+ _ => unicode:: Lowercase ( self ) ,
889+ }
890+ }
891+
892+ /// Returns `true` if this `char` has the general category for titlecase letters.
893+ /// Conceptually, these characters consist of an uppercase portion followed by a lowercase portion.
894+ ///
895+ /// Titlecase letters (code points with the general category of `Lt`) are described in Chapter 4
896+ /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
897+ /// Database][ucd] [`UnicodeData.txt`].
898+ ///
899+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
900+ /// [ucd]: https://www.unicode.org/reports/tr44/
901+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
902+ ///
903+ /// # Examples
904+ ///
905+ /// Basic usage:
906+ ///
907+ /// ```
908+ /// #![feature(titlecase)]
909+ /// assert!('Dž'.is_titlecase());
910+ /// assert!('ῼ'.is_titlecase());
911+ /// assert!(!'D'.is_titlecase());
912+ /// assert!(!'z'.is_titlecase());
913+ /// assert!(!'中'.is_titlecase());
914+ /// assert!(!' '.is_titlecase());
915+ /// ```
916+ #[ must_use]
917+ #[ unstable( feature = "titlecase" , issue = "153892" ) ]
918+ #[ inline]
919+ pub fn is_titlecase ( self ) -> bool {
920+ match self {
921+ '\0' ..='\u{01C4}' => false ,
922+ _ => self . is_cased ( ) && !self . is_lowercase ( ) && !self . is_uppercase ( ) ,
823923 }
824924 }
825925
@@ -860,7 +960,8 @@ impl char {
860960 pub const fn is_uppercase ( self ) -> bool {
861961 match self {
862962 'A' ..='Z' => true ,
863- c => c > '\x7f' && unicode:: Uppercase ( c) ,
963+ '\0' ..='\u{BF}' => false ,
964+ _ => unicode:: Uppercase ( self ) ,
864965 }
865966 }
866967
@@ -893,7 +994,8 @@ impl char {
893994 pub const fn is_whitespace ( self ) -> bool {
894995 match self {
895996 ' ' | '\x09' ..='\x0d' => true ,
896- c => c > '\x7f' && unicode:: White_Space ( c) ,
997+ '\0' ..='\u{84}' => false ,
998+ _ => unicode:: White_Space ( self ) ,
897999 }
8981000 }
8991001
@@ -920,10 +1022,10 @@ impl char {
9201022 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
9211023 #[ inline]
9221024 pub fn is_alphanumeric ( self ) -> bool {
923- if self . is_ascii ( ) {
924- self . is_ascii_alphanumeric ( )
925- } else {
926- unicode:: Alphabetic ( self ) || unicode:: N ( self )
1025+ match self {
1026+ 'a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9' => true ,
1027+ '\0' ..= '\u{A9}' => false ,
1028+ _ => unicode:: Alphabetic ( self ) || unicode:: N ( self ) ,
9271029 }
9281030 }
9291031
@@ -969,23 +1071,7 @@ impl char {
9691071 #[ must_use]
9701072 #[ inline]
9711073 pub ( crate ) fn is_grapheme_extended ( self ) -> bool {
972- !self . is_ascii ( ) && unicode:: Grapheme_Extend ( self )
973- }
974-
975- /// Returns `true` if this `char` has the `Cased` property.
976- ///
977- /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
978- /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
979- ///
980- /// [Unicode Standard]: https://www.unicode.org/versions/latest/
981- /// [ucd]: https://www.unicode.org/reports/tr44/
982- /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
983- #[ must_use]
984- #[ inline]
985- #[ doc( hidden) ]
986- #[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
987- pub fn is_cased ( self ) -> bool {
988- if self . is_ascii ( ) { self . is_ascii_alphabetic ( ) } else { unicode:: Cased ( self ) }
1074+ self > '\u{02FF}' && unicode:: Grapheme_Extend ( self )
9891075 }
9901076
9911077 /// Returns `true` if this `char` has the `Case_Ignorable` property.
@@ -1047,7 +1133,8 @@ impl char {
10471133 pub fn is_numeric ( self ) -> bool {
10481134 match self {
10491135 '0' ..='9' => true ,
1050- c => c > '\x7f' && unicode:: N ( c) ,
1136+ '\0' ..='\u{B1}' => false ,
1137+ _ => unicode:: N ( self ) ,
10511138 }
10521139 }
10531140
@@ -1110,17 +1197,123 @@ impl char {
11101197 /// // convert into themselves.
11111198 /// assert_eq!('山'.to_lowercase().to_string(), "山");
11121199 /// ```
1113- #[ must_use = "this returns the lowercase character as a new iterator, \
1200+ #[ must_use = "this returns the lowercased character as a new iterator, \
11141201 without modifying the original"]
11151202 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
11161203 #[ inline]
11171204 pub fn to_lowercase ( self ) -> ToLowercase {
11181205 ToLowercase ( CaseMappingIter :: new ( conversions:: to_lower ( self ) ) )
11191206 }
11201207
1208+ /// Returns an iterator that yields the titlecase mapping of this `char` as one or more
1209+ /// `char`s.
1210+ ///
1211+ /// This is usually, but not always, equivalent to the uppercase mapping
1212+ /// returned by [`Self::to_uppercase`]. Prefer this method when seeking to capitalize
1213+ /// Only The First Letter of a word, but use [`Self::to_uppercase`] for ALL CAPS.
1214+ ///
1215+ /// If this `char` does not have an titlecase mapping, the iterator yields the same `char`.
1216+ ///
1217+ /// If this `char` has a one-to-one titlecase mapping given by the [Unicode Character
1218+ /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1219+ ///
1220+ /// [ucd]: https://www.unicode.org/reports/tr44/
1221+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1222+ ///
1223+ /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
1224+ /// the `char`(s) given by [`SpecialCasing.txt`].
1225+ ///
1226+ /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1227+ ///
1228+ /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1229+ /// is independent of context and language.
1230+ ///
1231+ /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1232+ /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1233+ ///
1234+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1235+ ///
1236+ /// # Examples
1237+ ///
1238+ /// As an iterator:
1239+ ///
1240+ /// ```
1241+ /// #![feature(titlecase)]
1242+ /// for c in 'ß'.to_titlecase() {
1243+ /// print!("{c}");
1244+ /// }
1245+ /// println!();
1246+ /// ```
1247+ ///
1248+ /// Using `println!` directly:
1249+ ///
1250+ /// ```
1251+ /// #![feature(titlecase)]
1252+ /// println!("{}", 'ß'.to_titlecase());
1253+ /// ```
1254+ ///
1255+ /// Both are equivalent to:
1256+ ///
1257+ /// ```
1258+ /// println!("Ss");
1259+ /// ```
1260+ ///
1261+ /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1262+ ///
1263+ /// ```
1264+ /// #![feature(titlecase)]
1265+ /// assert_eq!('c'.to_titlecase().to_string(), "C");
1266+ /// assert_eq!('dž'.to_titlecase().to_string(), "Dž");
1267+ /// assert_eq!('ῼ'.to_titlecase().to_string(), "ῼ");
1268+ ///
1269+ /// // Sometimes the result is more than one character:
1270+ /// assert_eq!('ß'.to_titlecase().to_string(), "Ss");
1271+ ///
1272+ /// // Characters that do not have separate cased forms
1273+ /// // convert into themselves.
1274+ /// assert_eq!('山'.to_titlecase().to_string(), "山");
1275+ /// ```
1276+ ///
1277+ /// # Note on locale
1278+ ///
1279+ /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1280+ ///
1281+ /// * 'Dotless': I / ı, sometimes written ï
1282+ /// * 'Dotted': İ / i
1283+ ///
1284+ /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1285+ ///
1286+ /// ```
1287+ /// #![feature(titlecase)]
1288+ /// let upper_i = 'i'.to_titlecase().to_string();
1289+ /// ```
1290+ ///
1291+ /// The value of `upper_i` here relies on the language of the text: if we're
1292+ /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
1293+ /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
1294+ ///
1295+ /// ```
1296+ /// #![feature(titlecase)]
1297+ /// let upper_i = 'i'.to_titlecase().to_string();
1298+ ///
1299+ /// assert_eq!(upper_i, "I");
1300+ /// ```
1301+ ///
1302+ /// holds across languages.
1303+ #[ must_use = "this returns the titlecased character as a new iterator, \
1304+ without modifying the original"]
1305+ #[ unstable( feature = "titlecase" , issue = "153892" ) ]
1306+ #[ inline]
1307+ pub fn to_titlecase ( self ) -> ToTitlecase {
1308+ ToTitlecase ( CaseMappingIter :: new ( conversions:: to_title ( self ) ) )
1309+ }
1310+
11211311 /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
11221312 /// `char`s.
11231313 ///
1314+ /// Prefer this method when converting a word into ALL CAPS, but consider [`Self::to_titlecase`]
1315+ /// instead if you seek to capitalize Only The First Letter.
1316+ ///
11241317 /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
11251318 ///
11261319 /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
@@ -1170,9 +1363,11 @@ impl char {
11701363 ///
11711364 /// ```
11721365 /// assert_eq!('c'.to_uppercase().to_string(), "C");
1366+ /// assert_eq!('dž'.to_uppercase().to_string(), "DŽ");
11731367 ///
11741368 /// // Sometimes the result is more than one character:
11751369 /// assert_eq!('ſt'.to_uppercase().to_string(), "ST");
1370+ /// assert_eq!('ῼ'.to_uppercase().to_string(), "ΩΙ");
11761371 ///
11771372 /// // Characters that do not have both uppercase and lowercase
11781373 /// // convert into themselves.
@@ -1181,7 +1376,7 @@ impl char {
11811376 ///
11821377 /// # Note on locale
11831378 ///
1184- /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
1379+ /// In Turkish and Azeri , the equivalent of 'i' in Latin has five forms instead of two:
11851380 ///
11861381 /// * 'Dotless': I / ı, sometimes written ï
11871382 /// * 'Dotted': İ / i
@@ -1193,7 +1388,7 @@ impl char {
11931388 /// ```
11941389 ///
11951390 /// The value of `upper_i` here relies on the language of the text: if we're
1196- /// in `en-US`, it should be `"I"`, but if we're in `tr_TR `, it should
1391+ /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ `, it should
11971392 /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
11981393 ///
11991394 /// ```
@@ -1203,7 +1398,7 @@ impl char {
12031398 /// ```
12041399 ///
12051400 /// holds across languages.
1206- #[ must_use = "this returns the uppercase character as a new iterator, \
1401+ #[ must_use = "this returns the uppercased character as a new iterator, \
12071402 without modifying the original"]
12081403 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
12091404 #[ inline]
@@ -1446,7 +1641,7 @@ impl char {
14461641 #[ rustc_const_stable( feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" ) ]
14471642 #[ inline]
14481643 pub const fn is_ascii_alphabetic ( & self ) -> bool {
1449- matches ! ( * self , 'A ' ..='Z ' | 'a ' ..='z ' )
1644+ matches ! ( * self , 'a ' ..='z ' | 'A ' ..='Z ' )
14501645 }
14511646
14521647 /// Checks if the value is an ASCII uppercase character:
0 commit comments