@@ -9451,11 +9451,32 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
9451
9451
#undef GET_DESCRIPTION
9452
9452
#ifdef USE_LOCALE_COLLATE
9453
9453
9454
- STATIC void
9454
+ STATIC bool
9455
9455
S_compute_collxfrm_coefficients (pTHX )
9456
9456
{
9457
-
9458
- /* A locale collation definition includes primary, secondary, tertiary,
9457
+ /* This is called from mem_collxfrm() the first time the latter is called
9458
+ * on the current locale to do initialization for it.
9459
+ *
9460
+ * This returns true and initializes the coefficients for a linear equation
9461
+ * that, given a string of some length, predicts how much memory it will
9462
+ * take to hold the result of calling mem_collxfrm() on that string. The
9463
+ * equation is of the form:
9464
+ * m * length + b
9465
+ * where m = PL_collxfrm_mult and b = PL_collxfrm_base
9466
+ *
9467
+ * It returns false if the locale does not appear to be sane.
9468
+ *
9469
+ * The prediction is just an educated guess to save time and,
9470
+ * mem_collxrfm() may adjust it based on experience with strings it
9471
+ * encounters.
9472
+ *
9473
+ * This function also:
9474
+ * sets 'PL_in_utf8_COLLATE_locale' to indicate if the locale is a
9475
+ * UTF-8 one
9476
+ * initializes 'PL_strxfrm_NUL_replacement' to NUL
9477
+ * initializes 'PL_strxfrm_max_cp' = 0;
9478
+ *
9479
+ * A locale collation definition includes primary, secondary, tertiary,
9459
9480
* etc. weights for each character. To sort, the primary weights are used,
9460
9481
* and only if they compare equal, then the secondary weights are used, and
9461
9482
* only if they compare equal, then the tertiary, etc.
@@ -9564,8 +9585,9 @@ S_compute_collxfrm_coefficients(pTHX)
9564
9585
"Disabling locale collation for LC_COLLATE='%s';"
9565
9586
" length for shorter sample=%zu; longer=%zu\n" ,
9566
9587
PL_collation_name , x_len_shorter , x_len_longer ));
9588
+ return false;
9567
9589
}
9568
- else {
9590
+
9569
9591
SSize_t base ; /* Temporary */
9570
9592
9571
9593
/* We have both: m * strlen(longer) + b = x_len_longer
@@ -9597,7 +9619,6 @@ S_compute_collxfrm_coefficients(pTHX)
9597
9619
9598
9620
/* Add 1 for the trailing NUL */
9599
9621
PL_collxfrm_base = base + 1 ;
9600
- }
9601
9622
9602
9623
DEBUG_L (PerlIO_printf (Perl_debug_log ,
9603
9624
"?UTF-8 locale=%d; x_len_shorter=%zu, "
@@ -9606,6 +9627,7 @@ S_compute_collxfrm_coefficients(pTHX)
9606
9627
PL_in_utf8_COLLATE_locale ,
9607
9628
x_len_shorter , x_len_longer ,
9608
9629
PL_collxfrm_mult , PL_collxfrm_base ));
9630
+ return true;
9609
9631
}
9610
9632
9611
9633
char *
@@ -9668,7 +9690,9 @@ Perl_mem_collxfrm_(pTHX_ const char *input_string,
9668
9690
9669
9691
/* (mult, base) == (0,0) means we need to calculate mult and base
9670
9692
* before proceeding */
9671
- S_compute_collxfrm_coefficients (aTHX );
9693
+ if (! S_compute_collxfrm_coefficients (aTHX )) {
9694
+ return NULL ; /* locale collation not sane */
9695
+ }
9672
9696
}
9673
9697
9674
9698
/* Replace any embedded NULs with the control that sorts before any others.
0 commit comments