@@ -186,17 +186,12 @@ public static function platformNewLines(string $s): string
186186 */
187187 public static function toAscii (string $ s ): string
188188 {
189- $ iconv = defined ('ICONV_IMPL ' ) ? trim (ICONV_IMPL , '" \'' ) : null ;
190- static $ transliterator = null ;
191- if ($ transliterator === null ) {
192- if (class_exists ('Transliterator ' , false )) {
193- $ transliterator = \Transliterator::create ('Any-Latin; Latin-ASCII ' );
194- } else {
195- trigger_error (__METHOD__ . "(): it is recommended to enable PHP extensions 'intl'. " , E_USER_NOTICE );
196- $ transliterator = false ;
197- }
189+ if (!extension_loaded ('intl ' )) {
190+ throw new Nette \NotSupportedException (__METHOD__ . '() requires INTL extension that is not loaded. ' );
198191 }
199192
193+ $ iconv = defined ('ICONV_IMPL ' ) ? trim (ICONV_IMPL , '" \'' ) : null ;
194+
200195 // remove control characters and check UTF-8 validity
201196 $ s = self ::pcre ('preg_replace ' , ['#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u ' , '' , $ s ]);
202197
@@ -206,39 +201,15 @@ public static function toAscii(string $s): string
206201 $ s = strtr ($ s , ["\u{AE}" => '(R) ' , "\u{A9}" => '(c) ' , "\u{2026}" => '... ' , "\u{AB}" => '<< ' , "\u{BB}" => '>> ' , "\u{A3}" => 'lb ' , "\u{A5}" => 'yen ' , "\u{B2}" => '^2 ' , "\u{B3}" => '^3 ' , "\u{B5}" => 'u ' , "\u{B9}" => '^1 ' , "\u{BA}" => 'o ' , "\u{BF}" => '? ' , "\u{2CA}" => "' " , "\u{2CD}" => '_ ' , "\u{2DD}" => '" ' , "\u{1FEF}" => '' , "\u{20AC}" => 'EUR ' , "\u{2122}" => 'TM ' , "\u{212E}" => 'e ' , "\u{2190}" => '<- ' , "\u{2191}" => '^ ' , "\u{2192}" => '-> ' , "\u{2193}" => 'V ' , "\u{2194}" => '<-> ' ]); // ® © … « » £ ¥ ² ³ µ ¹ º ¿ ˊ ˍ ˝ ` € ™ ℮ ← ↑ → ↓ ↔
207202 }
208203
209- if ($ transliterator ) {
210- $ s = $ transliterator ->transliterate ($ s );
211- // use iconv because The transliterator leaves some characters out of ASCII, eg → ʾ
212- if ($ iconv === 'glibc ' ) {
213- $ s = strtr ($ s , '? ' , "\x01" ); // temporarily hide ? to distinguish them from the garbage that iconv creates
214- $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
215- $ s = str_replace (['? ' , "\x01" ], ['' , '? ' ], $ s ); // remove garbage and restore ? characters
216- } elseif ($ iconv === 'libiconv ' ) {
217- $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
218- } else { // null or 'unknown' (#216)
219- $ s = self ::pcre ('preg_replace ' , ['#[^\x00-\x7F]++# ' , '' , $ s ]); // remove non-ascii chars
220- }
221- } elseif ($ iconv === 'glibc ' || $ iconv === 'libiconv ' ) {
222- // temporarily hide these characters to distinguish them from the garbage that iconv creates
223- $ s = strtr ($ s , '` \'"^~? ' , "\x01\x02\x03\x04\x05\x06" );
224- if ($ iconv === 'glibc ' ) {
225- // glibc implementation is very limited. transliterate into Windows-1250 and then into ASCII, so most Eastern European characters are preserved
226- $ s = iconv ('UTF-8 ' , 'WINDOWS-1250//TRANSLIT//IGNORE ' , $ s );
227- $ s = strtr (
228- $ s ,
229- "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96\xa0\x8b\x97\x9b\xa6\xad\xb7" ,
230- 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-. ' ,
231- );
232- $ s = self ::pcre ('preg_replace ' , ['#[^\x00-\x7F]++# ' , '' , $ s ]);
233- } else {
234- $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
235- }
236-
237- // remove garbage that iconv creates during transliteration (eg Ý -> Y')
238- $ s = str_replace (['` ' , "' " , '" ' , '^ ' , '~ ' , '? ' ], '' , $ s );
239- // restore temporarily hidden characters
240- $ s = strtr ($ s , "\x01\x02\x03\x04\x05\x06" , '` \'"^~? ' );
241- } else {
204+ $ s = \Transliterator::create ('Any-Latin; Latin-ASCII ' )->transliterate ($ s );
205+ // use iconv because The transliterator leaves some characters out of ASCII, eg → ʾ
206+ if ($ iconv === 'glibc ' ) {
207+ $ s = strtr ($ s , '? ' , "\x01" ); // temporarily hide ? to distinguish them from the garbage that iconv creates
208+ $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
209+ $ s = str_replace (['? ' , "\x01" ], ['' , '? ' ], $ s ); // remove garbage and restore ? characters
210+ } elseif ($ iconv === 'libiconv ' ) {
211+ $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
212+ } else { // null or 'unknown' (#216)
242213 $ s = self ::pcre ('preg_replace ' , ['#[^\x00-\x7F]++# ' , '' , $ s ]); // remove non-ascii chars
243214 }
244215
0 commit comments