-
Notifications
You must be signed in to change notification settings - Fork 6.3k
8385834: Duplicate placeholders behavior of ListFormat #31377
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -84,9 +84,9 @@ | |
| * Note: these examples are from CLDR, there could be different results from other locale providers. | ||
| * <p> | ||
| * Alternatively, Locale, Type, and/or Style independent instances | ||
| * can be created with {@link #getInstance(String[])}. The String array to the | ||
| * method specifies the delimiting patterns for the start/middle/end portion of | ||
| * the formatted string, as well as optional specialized patterns for two or three | ||
| * can be created with {@link #getInstance(String[])}. The String array passed to the | ||
| * method specifies the delimiting patterns for the {@code start}/{@code middle}/{@code end} | ||
| * portion of the formatted string, as well as optional specialized patterns for two or three | ||
| * elements. Refer to the method description for more detail. | ||
| * <p> | ||
| * On parsing, if some ambiguity is found in the input string, such as delimiting | ||
|
|
@@ -121,7 +121,8 @@ public final class ListFormat extends Format { | |
|
|
||
| /** | ||
| * The array of five pattern Strings. Each element corresponds to the Unicode LDML's | ||
| * `listPatternsPart` type, i.e, start/middle/end/two/three. | ||
| * {@code listPatternPart} type, i.e, | ||
| * {@code start}/{@code middle}/{@code end}/{@code two}/{@code three}. | ||
| * @serial | ||
| */ | ||
| private final String[] patterns; | ||
|
|
@@ -153,6 +154,7 @@ private void init() { | |
| var pattern = patterns[START]; | ||
| var placeholderPositions = findPlaceholders(pattern); | ||
| if (placeholderPositions != null && | ||
| placeholderPositions[2] == -1 && | ||
| placeholderPositions[1] + PLACEHOLDER_LENGTH == pattern.length()) { | ||
| startBefore = pattern.substring(0, placeholderPositions[0]); | ||
| startBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH, | ||
|
|
@@ -164,6 +166,7 @@ private void init() { | |
| pattern = patterns[MIDDLE]; | ||
| placeholderPositions = findPlaceholders(pattern); | ||
| if (placeholderPositions != null && | ||
| placeholderPositions[2] == -1 && | ||
| placeholderPositions[0] == 0 && | ||
| placeholderPositions[1] + PLACEHOLDER_LENGTH == pattern.length()) { | ||
| middleBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH, | ||
|
|
@@ -174,7 +177,9 @@ private void init() { | |
|
|
||
| pattern = patterns[END]; | ||
| placeholderPositions = findPlaceholders(pattern); | ||
| if (placeholderPositions != null && placeholderPositions[0] == 0) { | ||
| if (placeholderPositions != null && | ||
| placeholderPositions[2] == -1 && | ||
| placeholderPositions[0] == 0) { | ||
| endBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH, | ||
| placeholderPositions[1]); | ||
| endAfter = pattern.substring(placeholderPositions[1] + PLACEHOLDER_LENGTH); | ||
|
|
@@ -185,7 +190,8 @@ private void init() { | |
| // Validate two/three patterns, if given. Otherwise, generate them | ||
| pattern = patterns[TWO]; | ||
| if (!pattern.isEmpty()) { | ||
| if (findPlaceholders(pattern) == null) { | ||
| placeholderPositions = findPlaceholders(pattern); | ||
| if (placeholderPositions == null || placeholderPositions[2] >= 0) { | ||
| throw new IllegalArgumentException("pattern for two is incorrect: " + pattern); | ||
| } | ||
| } else { | ||
|
|
@@ -245,36 +251,43 @@ public static ListFormat getInstance(Locale locale, Type type, Style style) { | |
| * instead of letting the runtime provide appropriate patterns for the {@code Locale}, | ||
| * {@code Type}, or {@code Style}. | ||
| * <p> | ||
| * The patterns array should contain five String patterns, each corresponding to the Unicode LDML's | ||
| * {@code listPatternPart}, i.e., "start", "middle", "end", two element, and three element patterns | ||
| * in this order. Each pattern contains "{0}" and "{1}" (and "{2}" for the three element pattern) | ||
| * placeholders that are substituted with the passed input strings on formatting. | ||
| * If the length of the patterns array is not 5, an {@code IllegalArgumentException} | ||
| * is thrown. | ||
| * The patterns array should contain five String patterns, each corresponding | ||
| * to the Unicode LDML's {@code listPatternPart}, i.e., {@code start}, | ||
| * {@code middle}, {@code end}, {@code two} element, and {@code three} | ||
| * element patterns in this order. Each pattern contains "{0}" and "{1}" | ||
| * (and "{2}" for the {@code three} element pattern) placeholders that are | ||
| * substituted with the passed input strings on formatting. If the length of | ||
| * the patterns array is not 5, an {@code IllegalArgumentException} is thrown. | ||
| * <p> | ||
| * Each pattern string is first parsed as follows. Literals in parentheses, such as | ||
| * "start_before", are optional: | ||
| * <blockquote><pre> | ||
| * {@snippet : | ||
| * start := (start_before){0}start_between{1} | ||
| * middle := {0}middle_between{1} | ||
| * end := {0}end_between{1}(end_after) | ||
| * two := (two_before){0}two_between{1}(two_after) | ||
| * three := (three_before){0}three_between1{1}three_between2{2}(three_after) | ||
| * </pre></blockquote> | ||
| * If two or three pattern string is empty, it falls back to | ||
| * {@code "(start_before){0}end_between{1}(end_after)"}, | ||
| * {@code "(start_before){0}start_between{1}end_between{2}(end_after)"} respectively. | ||
| * If parsing of any pattern string for start, middle, end, two, or three fails, | ||
| * } | ||
| * If the {@code two} or {@code three} pattern string is empty, it falls back to | ||
| * {@snippet : | ||
| * (start_before){0}end_between{1}(end_after) | ||
| * (start_before){0}start_between{1}end_between{2}(end_after) | ||
| * } | ||
| * respectively. | ||
| * If parsing of any pattern string for {@code start}, {@code middle}, | ||
| * {@code end}, {@code two}, or {@code three} fails, including duplicate | ||
| * placeholders, "{2}" in patterns other than the {@code three} element | ||
| * pattern, or any use of "{" or "}" other than "{0}", "{1}", or "{2}", | ||
| * it throws an {@code IllegalArgumentException}. | ||
| * <p> | ||
| * On formatting, the input string list with {@code n} elements substitutes above | ||
| * placeholders based on the number of elements: | ||
| * <blockquote><pre> | ||
| * {@snippet : | ||
| * n = 1: {0} | ||
| * n = 2: parsed pattern for "two" | ||
| * n = 3: parsed pattern for "three" | ||
| * n > 3: (start_before){0}start_between{1}middle_between{2} ... middle_between{m}end_between{n}(end_after) | ||
| * </pre></blockquote> | ||
| * } | ||
| * As an example, the following table shows a pattern array which is equivalent to | ||
| * {@code STANDARD} type, {@code FULL} style in US English: | ||
| * <table class="striped"> | ||
|
|
@@ -670,9 +683,20 @@ public enum Style { | |
| * @param pattern pattern string to parse | ||
| */ | ||
| private static int[] findPlaceholders(String pattern) { | ||
| if (!validatePlaceholders(pattern)) { | ||
| return null; | ||
| } | ||
|
|
||
| var positions = new int[3]; | ||
| for (int i = 0; i < positions.length; i++) { | ||
| positions[i] = pattern.indexOf("{" + i + "}"); | ||
| var ph = "{" + i + "}"; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
What would we expect to happen if In the current form, depending on the number of elements, it could either be printed out literally, or it could be interpreted as a I would not be opposed to rejecting any placeholders that are not in the form of only 0, 1, and 2. I am going to guess that no locales rely on {HARD_CODED_NUMBER} as being a part of their list patterns. What do you think?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. I think rejecting is fine. In fact, I made it stricter as any curly brace use other than 0/1/2 now throws an exception because MessageFormat has its own parsing, which prevents "{foo}" from working as a literal. |
||
| positions[i] = pattern.indexOf(ph); | ||
| if (positions[i] >= 0) { | ||
| // Check for duplicate placeholders | ||
| if (pattern.indexOf(ph, positions[i] + PLACEHOLDER_LENGTH) >= 0) { | ||
| return null; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Check the existence and order of the placeholders | ||
|
|
@@ -707,4 +731,30 @@ private static int[] findPattern(String source, int pos, String prefix, String s | |
| return prefixPos < suffixPos ? | ||
| new int[] {prefixPos, suffixPos + suffix.length()} : null; | ||
| } | ||
|
|
||
| /** | ||
| * Validates placeholders within the input pattern. Only | ||
| * "{0}", "{1}", or "{2}" are allowed. Any other use of | ||
| * curly braces is not allowed. | ||
| * | ||
| * @param pattern input pattern | ||
| * @return validation result | ||
| */ | ||
| private static boolean validatePlaceholders(String pattern) { | ||
| for (int i = 0; i < pattern.length(); i++) { | ||
| var ch = pattern.charAt(i); | ||
| if (ch == '{') { | ||
| if (i + PLACEHOLDER_LENGTH > pattern.length() || | ||
| pattern.charAt(i + 1) < '0' || | ||
| pattern.charAt(i + 1) > '2' || | ||
| pattern.charAt(i + 2) != '}') { | ||
| return false; | ||
| } | ||
| i += PLACEHOLDER_LENGTH - 1; | ||
| } else if (ch == '}') { | ||
| return false; | ||
| } | ||
| } | ||
| return true; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,7 @@ | |
|
|
||
| /* | ||
| * @test | ||
| * @bug 8041488 8316974 8318569 8306116 8385736 | ||
| * @bug 8041488 8316974 8318569 8306116 8385736 8385834 | ||
| * @summary Tests for ListFormat class | ||
| * @run junit TestListFormat | ||
| */ | ||
|
|
@@ -210,6 +210,10 @@ private static Arguments[] parseObject_parsePos() { | |
| arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE2), | ||
| arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE3), | ||
| arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE4), | ||
| arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE1), | ||
| arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE2), | ||
| arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE3), | ||
| arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE4), | ||
| }; | ||
| } | ||
|
|
||
|
|
@@ -237,13 +241,37 @@ private static Arguments[] getLocale_localeDependent() { | |
| }; | ||
| } | ||
|
|
||
| private static Arguments[] getInstance_1Arg_InvalidLongPattern() { | ||
| private static final String ZERO_REPEAT = "{0}".repeat(100_000); | ||
| private static Arguments[] getInstance_1Arg_InvalidPlaceholder() { | ||
| return new Arguments[] { | ||
| arguments(0, "start pattern is incorrect:"), | ||
| arguments(1, "middle pattern is incorrect:"), | ||
| arguments(2, "end pattern is incorrect:"), | ||
| arguments(3, "pattern for two is incorrect:"), | ||
| arguments(4, "pattern for three is incorrect:"), | ||
| // Duplicate placeholders | ||
| arguments(0, "{0} {0} {1}", "start pattern is incorrect: {0} {0} {1}"), | ||
| arguments(0, "{0} {1} {1}", "start pattern is incorrect: {0} {1} {1}"), | ||
| arguments(0, "{0} {1} {2}", "start pattern is incorrect: {0} {1} {2}"), | ||
| arguments(1, "{0} {0} {1}", "middle pattern is incorrect: {0} {0} {1}"), | ||
| arguments(1, "{0} {1} {1}", "middle pattern is incorrect: {0} {1} {1}"), | ||
| arguments(1, "{0} {1} {2}", "middle pattern is incorrect: {0} {1} {2}"), | ||
| arguments(2, "{0} {0} {1}", "end pattern is incorrect: {0} {0} {1}"), | ||
| arguments(2, "{0} {1} {1}", "end pattern is incorrect: {0} {1} {1}"), | ||
| arguments(2, "{0} {1} {2}", "end pattern is incorrect: {0} {1} {2}"), | ||
| arguments(3, "{0} {0} {1}", "pattern for two is incorrect: {0} {0} {1}"), | ||
| arguments(3, "{0} {1} {1}", "pattern for two is incorrect: {0} {1} {1}"), | ||
| arguments(3, "{0} {1} {2}", "pattern for two is incorrect: {0} {1} {2}"), | ||
| arguments(4, "{0} {2} {1}", "pattern for three is incorrect: {0} {2} {1}"), | ||
| arguments(4, "{0} {0} {1} {2}", "pattern for three is incorrect: {0} {0} {1} {2}"), | ||
| arguments(4, "{0} {1} {1} {2}", "pattern for three is incorrect: {0} {1} {1} {2}"), | ||
| arguments(4, "{0} {1} {2} {2}", "pattern for three is incorrect: {0} {1} {2} {2}"), | ||
| arguments(4, ZERO_REPEAT + " {1} {2}", "pattern for three is incorrect: " + ZERO_REPEAT + " {1} {2}"), | ||
|
|
||
| // invalid placeholders | ||
| arguments(0, "{0} {1} {", "start pattern is incorrect: {0} {1} {"), | ||
| arguments(0, "{0} {1} }", "start pattern is incorrect: {0} {1} }"), | ||
| arguments(3, "{0} {1} {3}", "pattern for two is incorrect: {0} {1} {3}"), | ||
| arguments(4, "{3} {0} {1}", "pattern for three is incorrect: {3} {0} {1}"), | ||
| arguments(4, "{333} {0} {1}", "pattern for three is incorrect: {333} {0} {1}"), | ||
| arguments(4, "{0} {1} {abc}", "pattern for three is incorrect: {0} {1} {abc}"), | ||
| arguments(4, "{0} {1} {2, number}", "pattern for three is incorrect: {0} {1} {2, number}"), | ||
| arguments(4, "{0} {1} {2} {3}", "pattern for three is incorrect: {0} {1} {2} {3}"), | ||
| }; | ||
| } | ||
|
|
||
|
|
@@ -264,21 +292,20 @@ void getInstance_1Arg_IAE(String[] invalidPatterns, String errorMsg) { | |
|
|
||
| @ParameterizedTest | ||
| @MethodSource | ||
| void getInstance_1Arg_InvalidLongPattern(int index, String expected) { | ||
| void getInstance_1Arg_InvalidPlaceholder(int index, String invalidPattern, String expected) { | ||
| var patterns = new String[]{ | ||
| "{0}, {1}", | ||
| "{0}, {1}", | ||
| "{0}, and {1}", | ||
| "{0} and {1}", | ||
| "{0} {1} {2}" | ||
| }; | ||
| patterns[index] = "{0}".repeat(100_000); | ||
| patterns[index] = invalidPattern; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand that the current implementation rejects this input immediately because it contains duplicate placeholders, but I think this |
||
|
|
||
| // Ensures validation of invalid long patterns completes without timing out | ||
| var msg = assertThrows(IllegalArgumentException.class, | ||
| () -> ListFormat.getInstance(patterns)) | ||
| .getMessage(); | ||
| assertEquals(expected, msg.substring(0, Math.min(msg.length(), expected.length()))); | ||
| assertEquals(expected, msg); | ||
| } | ||
|
|
||
| @ParameterizedTest | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think "The string array passed to the method" reads better.