diff --git a/docs/changelog/128362.yaml b/docs/changelog/128362.yaml new file mode 100644 index 0000000000000..89a59a18f645a --- /dev/null +++ b/docs/changelog/128362.yaml @@ -0,0 +1,5 @@ +pr: 128362 +summary: Avoid unnecessary determinization in index pattern conflict checks +area: Indices APIs +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java index b688177611f91..1c438b993cd6a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java @@ -946,12 +946,15 @@ public static Map> findConflictingV1Templates( final String candidateName, final List indexPatterns ) { - Automaton v2automaton = Regex.simpleMatchToAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY)); + // No need to determinize the automaton, as it is only used to check for intersection with another automaton. + // Determinization is avoided because it can fail or become very costly due to state explosion. + Automaton v2automaton = Regex.simpleMatchToNonDeterminizedAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY)); Map> overlappingTemplates = new HashMap<>(); for (Map.Entry cursor : project.templates().entrySet()) { String name = cursor.getKey(); IndexTemplateMetadata template = cursor.getValue(); - Automaton v1automaton = Regex.simpleMatchToAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY)); + // No need to determinize the automaton, as it is only used to check for intersection with another automaton. + Automaton v1automaton = Regex.simpleMatchToNonDeterminizedAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY)); if (Operations.isEmpty(Operations.intersection(v2automaton, v1automaton)) == false) { logger.debug( "composable template {} and legacy template {} would overlap: {} <=> {}", diff --git a/server/src/main/java/org/elasticsearch/common/regex/Regex.java b/server/src/main/java/org/elasticsearch/common/regex/Regex.java index aaaab78b71736..4b7de8787b683 100644 --- a/server/src/main/java/org/elasticsearch/common/regex/Regex.java +++ b/server/src/main/java/org/elasticsearch/common/regex/Regex.java @@ -59,8 +59,15 @@ public static boolean isSuffixWildcard(String str) { return isSuffixMatchPattern(str) && str.endsWith(".*"); } - /** Return an {@link Automaton} that matches the given pattern. */ - public static Automaton simpleMatchToAutomaton(String pattern) { + /** + * Return a non-determinized {@link Automaton} that matches the given pattern. + * WARNING: Use this method only when the resulting {@link Automaton} is used in contexts + * that do not require determinism (e.g., checking the intersection of automatons). + * + * For pattern matching with {@link CharacterRunAutomaton}, a deterministic automaton is required. + * In that case, use {@link Regex#simpleMatchToAutomaton} instead. + */ + public static Automaton simpleMatchToNonDeterminizedAutomaton(String pattern) { List automata = new ArrayList<>(); int previous = 0; for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) { @@ -69,13 +76,24 @@ public static Automaton simpleMatchToAutomaton(String pattern) { previous = i + 1; } automata.add(Automata.makeString(pattern.substring(previous))); - return Operations.determinize(Operations.concatenate(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); + return Operations.concatenate(automata); + } + + /** Return a deterministic {@link Automaton} that matches the given pattern. */ + public static Automaton simpleMatchToAutomaton(String pattern) { + return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(pattern), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); } /** - * Return an Automaton that matches the union of the provided patterns. + * Returns a non-deterministic {@link Automaton} that matches the union of the given patterns. + * + * WARNING: Use this method only when the resulting {@link Automaton} is used in contexts + * that do not require determinism (e.g., checking the intersection of automatons). + * + * For pattern matching with {@link CharacterRunAutomaton}, a deterministic automaton is required. + * In that case, use {@link Regex#simpleMatchToAutomaton} instead. */ - public static Automaton simpleMatchToAutomaton(String... patterns) { + public static Automaton simpleMatchToNonDeterminizedAutomaton(String... patterns) { if (patterns.length < 1) { throw new IllegalArgumentException("There must be at least one pattern, zero given"); } @@ -88,7 +106,7 @@ public static Automaton simpleMatchToAutomaton(String... patterns) { if (isSuffixWildcard(pattern) && pattern.length() < 1000) { prefixes.add(new BytesRef(pattern.substring(0, pattern.length() - 1))); } else if (isSimpleMatchPattern(pattern) || pattern.length() >= 1000) { - automata.add(simpleMatchToAutomaton(pattern)); + automata.add(simpleMatchToNonDeterminizedAutomaton(pattern)); } else { simpleStrings.add(new BytesRef(pattern)); } @@ -113,7 +131,14 @@ public static Automaton simpleMatchToAutomaton(String... patterns) { prefixAutomaton.add(Automata.makeAnyString()); automata.add(Operations.concatenate(prefixAutomaton)); } - return Operations.determinize(Operations.union(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); + return Operations.union(automata); + } + + /** + * Return a deterministic Automaton that matches the union of the provided patterns. + */ + public static Automaton simpleMatchToAutomaton(String... patterns) { + return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(patterns), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); } /** diff --git a/server/src/test/java/org/elasticsearch/common/regex/RegexTests.java b/server/src/test/java/org/elasticsearch/common/regex/RegexTests.java index e0b9da5eb455d..ab5b315da8490 100644 --- a/server/src/test/java/org/elasticsearch/common/regex/RegexTests.java +++ b/server/src/test/java/org/elasticsearch/common/regex/RegexTests.java @@ -10,9 +10,12 @@ import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import org.elasticsearch.test.ESTestCase; import java.io.IOException; +import java.util.Arrays; import java.util.Locale; import java.util.Random; import java.util.function.Predicate; @@ -20,6 +23,7 @@ import static org.elasticsearch.test.LambdaMatchers.falseWith; import static org.elasticsearch.test.LambdaMatchers.trueWith; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; public class RegexTests extends ESTestCase { @@ -250,4 +254,18 @@ public void testThousandsAndLongPattern() throws IOException { assertTrue(predicate.test(patterns[i])); } } + + public void testIntersectNonDeterminizedAutomaton() { + // patterns too complex to determinize within the default limit + String[] patterns = randomArray(20, 100, size -> new String[size], () -> "*" + randomAlphanumericOfLength(10) + "*"); + Automaton a = Regex.simpleMatchToNonDeterminizedAutomaton(patterns); + assertFalse(a.isDeterministic()); + Automaton b = Regex.simpleMatchToNonDeterminizedAutomaton(Arrays.copyOfRange(patterns, patterns.length / 2, patterns.length)); + assertFalse(b.isDeterministic()); + assertFalse(Operations.isEmpty(Operations.intersection(a, b))); + IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> assertMatchesAll(a, "my_test")); + // the run automaton expects a deterministic automaton + assertThat(exc.getMessage(), containsString("deterministic")); + expectThrows(TooComplexToDeterminizeException.class, () -> Regex.simpleMatchToAutomaton(patterns)); + } }