Skip to content

Commit 28f675e

Browse files
committed
mem_collxfrm(): Return early if locale collation not sane
This changes a subsidiary function's return value from void to bool, returning false if it finds the locale doesn't have sane collation. The calling function is changed to check this, and give up immediately if the locale isn't sane.
1 parent 185010b commit 28f675e

File tree

1 file changed

+30
-6
lines changed

1 file changed

+30
-6
lines changed

locale.c

+30-6
Original file line numberDiff line numberDiff line change
@@ -9451,11 +9451,32 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
94519451
#undef GET_DESCRIPTION
94529452
#ifdef USE_LOCALE_COLLATE
94539453

9454-
STATIC void
9454+
STATIC bool
94559455
S_compute_collxfrm_coefficients(pTHX)
94569456
{
9457-
9458-
/* A locale collation definition includes primary, secondary, tertiary,
9457+
/* This is called from mem_collxfrm() the first time the latter is called
9458+
* on the current locale to do initialization for it.
9459+
*
9460+
* This returns true and initializes the coefficients for a linear equation
9461+
* that, given a string of some length, predicts how much memory it will
9462+
* take to hold the result of calling mem_collxfrm() on that string. The
9463+
* equation is of the form:
9464+
* m * length + b
9465+
* where m = PL_collxfrm_mult and b = PL_collxfrm_base
9466+
*
9467+
* It returns false if the locale does not appear to be sane.
9468+
*
9469+
* The prediction is just an educated guess to save time and,
9470+
* mem_collxrfm() may adjust it based on experience with strings it
9471+
* encounters.
9472+
*
9473+
* This function also:
9474+
* sets 'PL_in_utf8_COLLATE_locale' to indicate if the locale is a
9475+
* UTF-8 one
9476+
* initializes 'PL_strxfrm_NUL_replacement' to NUL
9477+
* initializes 'PL_strxfrm_max_cp' = 0;
9478+
*
9479+
* A locale collation definition includes primary, secondary, tertiary,
94599480
* etc. weights for each character. To sort, the primary weights are used,
94609481
* and only if they compare equal, then the secondary weights are used, and
94619482
* only if they compare equal, then the tertiary, etc.
@@ -9564,8 +9585,9 @@ S_compute_collxfrm_coefficients(pTHX)
95649585
"Disabling locale collation for LC_COLLATE='%s';"
95659586
" length for shorter sample=%zu; longer=%zu\n",
95669587
PL_collation_name, x_len_shorter, x_len_longer));
9588+
return false;
95679589
}
9568-
else {
9590+
95699591
SSize_t base; /* Temporary */
95709592

95719593
/* We have both: m * strlen(longer) + b = x_len_longer
@@ -9597,7 +9619,6 @@ S_compute_collxfrm_coefficients(pTHX)
95979619

95989620
/* Add 1 for the trailing NUL */
95999621
PL_collxfrm_base = base + 1;
9600-
}
96019622

96029623
DEBUG_L(PerlIO_printf(Perl_debug_log,
96039624
"?UTF-8 locale=%d; x_len_shorter=%zu, "
@@ -9606,6 +9627,7 @@ S_compute_collxfrm_coefficients(pTHX)
96069627
PL_in_utf8_COLLATE_locale,
96079628
x_len_shorter, x_len_longer,
96089629
PL_collxfrm_mult, PL_collxfrm_base));
9630+
return true;
96099631
}
96109632

96119633
char *
@@ -9668,7 +9690,9 @@ Perl_mem_collxfrm_(pTHX_ const char *input_string,
96689690

96699691
/* (mult, base) == (0,0) means we need to calculate mult and base
96709692
* before proceeding */
9671-
S_compute_collxfrm_coefficients(aTHX);
9693+
if (! S_compute_collxfrm_coefficients(aTHX)) {
9694+
return NULL; /* locale collation not sane */
9695+
}
96729696
}
96739697

96749698
/* Replace any embedded NULs with the control that sorts before any others.

0 commit comments

Comments
 (0)