From 300a6f690e29c1f4774fd9358d5a4d3b1db26ca8 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Tue, 21 Apr 2026 15:18:16 +0100 Subject: [PATCH] ES|QL command updates for 9.4 (#1576) (cherry picked from commit d3b04415336bbe071280f3e758ebd7bf28baca64) --- src/Helper/Esql/EsqlBase.php | 192 +++++++++++++++- src/Helper/Esql/MetricsInfoCommand.php | 33 +++ src/Helper/Esql/MmrCommand.php | 65 ++++++ src/Helper/Esql/RegisteredDomainCommand.php | 43 ++++ src/Helper/Esql/SetDirective.php | 35 +++ src/Helper/Esql/TsInfoCommand.php | 33 +++ src/Helper/Esql/UriPartsCommand.php | 53 +++++ src/Helper/Esql/UserAgentCommand.php | 59 +++++ tests/Helper/EsqlTest.php | 235 ++++++++++++++++++++ 9 files changed, 746 insertions(+), 2 deletions(-) create mode 100644 src/Helper/Esql/MetricsInfoCommand.php create mode 100644 src/Helper/Esql/MmrCommand.php create mode 100644 src/Helper/Esql/RegisteredDomainCommand.php create mode 100644 src/Helper/Esql/SetDirective.php create mode 100644 src/Helper/Esql/TsInfoCommand.php create mode 100644 src/Helper/Esql/UriPartsCommand.php create mode 100644 src/Helper/Esql/UserAgentCommand.php diff --git a/src/Helper/Esql/EsqlBase.php b/src/Helper/Esql/EsqlBase.php index 04347815e..b8d3c5dc5 100644 --- a/src/Helper/Esql/EsqlBase.php +++ b/src/Helper/Esql/EsqlBase.php @@ -18,6 +18,15 @@ abstract class EsqlBase { private ?EsqlBase $previous_command = null; + private array $directives = []; + + /** + * JSON formatting without escaped forward slaehs. + */ + protected function esql_json_encode(mixed $data): string + { + return json_encode($data, JSON_UNESCAPED_SLASHES); + } /** * Formatting helper that renders an identifier using proper escaping rules. @@ -37,6 +46,14 @@ protected function formatId(string $id, bool $allow_patterns = false): string return "`" . str_replace("`", "``", $id) . "`"; } + protected function formatExpression(mixed $expr): string + { + if (is_string($expr)) { + return $expr; + } + return $this->esql_json_encode($expr); + } + /** * Formatting helper that renders an associative array as needed by ES|QL. * Used by several ES|QL commands. @@ -51,7 +68,7 @@ function(string $key, mixed $value) use ($joinText) { return $key . " " . $joinText . " " . $value; }, array_keys($map), - array_map($jsonEncode ? 'json_encode' : array($this, 'formatId'), $map), + array_map($jsonEncode ? array($this, 'esql_json_encode') : array($this, 'formatId'), $map), )); } @@ -65,7 +82,7 @@ protected function isNamedArgumentList(array $args): bool { return true; } if ($named_count != 0) { - throw new RuntimeException("foo"); + throw new RuntimeException("Mixed keyword and positional arguments found"); } return false; } @@ -87,6 +104,7 @@ protected function isForked(): bool public function __construct(?EsqlBase $previous_command) { $this->previous_command = $previous_command; + $this->directives = []; } /** @@ -98,6 +116,11 @@ public function render(): string if ($this->previous_command) { $query .= $this->previous_command->render() . "\n| "; } + else { + foreach ($this->directives as $directive) { + $query .= $directive->renderInternal() . ";\n"; + } + } $query .= $this->renderInternal(); return $query; } @@ -107,11 +130,42 @@ public function render(): string */ protected abstract function renderInternal(): string; + private function add_directive(self $directive): void + { + if ($this->previous_command == null) { + array_push($this->directives, $directive); + } + else { + $this->previous_command->add_directive($directive); + } + } + public function __toString(): string { return $this->render() . "\n"; } + /** + * `SET` can be used to specify query settings that modify the behavior of + * an ES|QL query. + * + * @param string ...$params the settings, given as keyword arguments. + * + * Examples: + * + * $query1 = Query::from("many_numbers") + * ->stats(sum: "SUM(sv)") + * ->set(approximation: true); + * $query2 = Query::from("many_numbers") + * ->stats(median: "MEDIAN(sv)") + * ->set(approximation: ["rows" => 10000]); + */ + public function set(mixed ...$params): self + { + $this->add_directive(new SetDirective($params)); + return $this; + } + /** * `CHANGE_POINT` detects spikes, dips, and change points in a metric. * @@ -440,6 +494,63 @@ public function lookupJoin(string $lookup_index): LookupJoinCommand return new LookupJoinCommand($this, $lookup_index); } + /** + * The ``METRICS_INFO`` processing command retrieves information about + * the metrics available in time series data streams, along with their + * applicable dimensions and other metadata. + * + * Examples: + * + * $query1 = Query::ts("k8s") + * ->metricsInfo() + * ->sort("metric_name"); + * $query2 = Query::ts("k8s") + * ->where("cluster == \"prod\"") + * ->metricsInfo() + * ->sort("metric_name"); + */ + public function metricsInfo(): MetricsInfoCommand + { + return new MetricsInfoCommand($this); + } + + /** + * The `MMR` command reduces the result set from a set of input rows by + * applying a diversification strategy to the return rows. + * + * @param string $field The name of the field that will use its values for the + * diversification process. The field must be a dense_vector + * type. + * @param mixed $query_vector The query vector to use as part of the + * diversification algorithm for comparison. Must + * have the same number of dimensions as the vector + * field you are searching against. + * + * Examples: + * + * $query1 = Query::from("mmr_text_vector_keyword") + * ->sort("keyword_field") + * ->limit(10) + * ->mmr("text_vector")->mmr_limit(3) + * ->drop("text_vector", "byte_vector", "bit_vector"); + * $query2 = Query::from("mmr_text_vector_keyword") + * ->sort("keyword_field") + * ->limit(10) + * ->mmr("text_vector", [0.1, 0.2, 0.3])->mmr_limit(3)->with(lambda: 0.1) + * ->drop("text_vector", "byte_vector", "bit_vector"); + * $query3 = Query::from("dense_vector_text")->metadata("_score") + * ->eval(query_embedding: "TEXT_EMBEDDING(\"be excellent to each other\", \"test_dense_inference\")") + * ->where("KNN(text_embedding_field, query_embedding)") + * ->sort("_score DESC") + * ->limit(10) + * ->mmr("text_embedding_field", "TEXT_EMBEDDING(\"be excellent to each other\", \"test_dense_inference\")")->mmr_limit(3)->with(lambda: 0.2) + * ->keep("text_field", "query_embedding"); + */ + public function mmr(string $field, mixed $query_vector = null): MmrCommand + { + return new MmrCommand($this, $field, $query_vector); + } + /** * The `MV_EXPAND` processing command expands multivalued columns into one row per * value, duplicating other columns. @@ -477,6 +588,22 @@ public function rename(string ...$columns): RenameCommand return new RenameCommand($this, $columns); } + /** + * The `REGISTERED_DOMAIN` processing command parses a fully qualified + * domain name (FQDN) string and extracts its parts (domain, registered + * domain, top-level domain, subdomain) into new columns using the public + * suffix list. + * + * @param string ...$prefix A keyword argument, where the argument name is + * the prefix fir the output columns, and the value + * is the string expression containing the FQDN + * to parse. + */ + public function registeredDomain(string ...$prefix): RegisteredDomainCommand + { + return new RegisteredDomainCommand($this, $prefix); + } + /** * The `RERANK` command uses an inference model to compute a new relevance score * for an initial set of documents, directly within your ES|QL queries. @@ -613,6 +740,67 @@ public function stats(string ...$expressions): StatsCommand return new StatsCommand($this, $expressions); } + /** + * The ``TS_INFO`` processing command retrieves information about + * individual time series available in time series data streams, along + * with the dimension values that identify each series. + * + * Examples: + * + * $query1 = Query::ts("k8s") + * ->tsInfo() + * ->sort("metric_name", "dimensions"); + * $query2 = Query::ts("k8s") + * ->where("cluster == \"prod\"") + * ->tsInfo() + * ->sort("metric_name", "dimensions"); + */ + public function tsInfo(): TsInfoCommand + { + return new TsInfoCommand($this); + } + + /** + * The `URI_PARTS` processing command parses a Uniform Resource + * Identifier (URI) string and extracts its components into new columns. + * + * @param string ...$prefix A keyword argument, where the argument name is + * the prefix fir the output columns, and the value + * is the string expression containing the URI + * to parse. + * + */ + public function uriParts(string ...$prefix): UriPartsCommand + { + return new UriPartsCommand($this, $prefix); + } + + /** + * The `USER_AGENT` processing command parses a user-agent string and + * extracts its components (name, version, OS, device) into new columns. + * + * @param string ...$prefix A keyword argument, where the argument name is + * the prefix fir the output columns, and the value + * is the string expression containing the user + * agent string to parse. + * + * Examples: + * + * $query1 = Query::row(input: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36") + * ->userAgent(ua: "input")->with(extract_device_type: true) + * ->keep("ua.*"); + * $query2 = Query::row(input: "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15") + * ->userAgent(ua: "input")->with( + * properties: ["name", "version", "device"], + * extract_device_type: true, + * ) + * ->keep("ua.*"); + */ + public function userAgent(string ...$prefix): UserAgentCommand + { + return new UserAgentCommand($this, $prefix); + } + /** * The `WHERE` processing command produces a table that contains all the rows * from the input table for which the provided condition evaluates to `true`. diff --git a/src/Helper/Esql/MetricsInfoCommand.php b/src/Helper/Esql/MetricsInfoCommand.php new file mode 100644 index 000000000..e5eb41b0b --- /dev/null +++ b/src/Helper/Esql/MetricsInfoCommand.php @@ -0,0 +1,33 @@ +field = $field; + $this->query_vector = $query_vector; + } + + public function mmr_limit(int $max_number_of_rows): MmrCommand + { + $this->max_number_of_rows = $max_number_of_rows; + return $this; + } + + public function with(mixed ...$options): MmrCommand + { + $this->options = $options; + return $this; + } + + protected function renderInternal(): string + { + $on = ""; + if ($this->query_vector != null) { + $on .= $this->formatExpression($this->query_vector) . " "; + } + $on .= "ON " . $this->formatId($this->field); + $limit = ""; + if ($this->max_number_of_rows != null) { + $limit = " LIMIT " . json_encode($this->max_number_of_rows); + } + $with = ""; + if ($this->options != null) { + $with = " WITH " . $this->esql_json_encode($this->options); + } + return "MMR " . $on . $limit . $with; + } +} diff --git a/src/Helper/Esql/RegisteredDomainCommand.php b/src/Helper/Esql/RegisteredDomainCommand.php new file mode 100644 index 000000000..b56f01fa9 --- /dev/null +++ b/src/Helper/Esql/RegisteredDomainCommand.php @@ -0,0 +1,43 @@ +isNamedArgumentList($prefix) || sizeof($prefix) != 1) { + throw new RuntimeException("Only one named argument must be given"); + } + parent::__construct($previous_command); + $this->prefix = $prefix; + } + + protected function renderInternal(): string + { + $key = array_keys($this->prefix)[0]; + $value = array_values($this->prefix)[0]; + return "REGISTERED_DOMAIN " . $this->formatId($key) . " = " . $value; + } +} diff --git a/src/Helper/Esql/SetDirective.php b/src/Helper/Esql/SetDirective.php new file mode 100644 index 000000000..630459bbc --- /dev/null +++ b/src/Helper/Esql/SetDirective.php @@ -0,0 +1,35 @@ +params = $params; + } + + protected function renderInternal(): string + { + return "SET " . $this->formatKeyValues($this->params, jsonEncode: true); + } +} diff --git a/src/Helper/Esql/TsInfoCommand.php b/src/Helper/Esql/TsInfoCommand.php new file mode 100644 index 000000000..05edcc9f9 --- /dev/null +++ b/src/Helper/Esql/TsInfoCommand.php @@ -0,0 +1,33 @@ +uriParts(parts: "uri") + * ->keep("parts.*"); + * $query = Query::from("web_logs") + * ->uriParts(p: "uri") + * ->where("p.domain == \"www.example.com\"") + * ->stats("COUNT(*)")->by("p.path"); + */ +class UriPartsCommand extends EsqlBase { + private array $prefix; + + public function __construct(EsqlBase $previous_command, array $prefix) + { + if (!$this->isNamedArgumentList($prefix) || sizeof($prefix) != 1) { + throw new RuntimeException("Only one named argument must be given"); + } + parent::__construct($previous_command); + $this->prefix = $prefix; + } + + protected function renderInternal(): string + { + $key = array_keys($this->prefix)[0]; + $value = array_values($this->prefix)[0]; + return "URI_PARTS " . $this->formatId($key) . " = " . $value; + } +} diff --git a/src/Helper/Esql/UserAgentCommand.php b/src/Helper/Esql/UserAgentCommand.php new file mode 100644 index 000000000..3b8e20084 --- /dev/null +++ b/src/Helper/Esql/UserAgentCommand.php @@ -0,0 +1,59 @@ +isNamedArgumentList($prefix) || sizeof($prefix) != 1) { + throw new RuntimeException("Only one named argument must be given"); + } + parent::__construct($previous_command); + $this->prefix = $prefix; + } + + /** + * Continuation of the `USER_AGENT` command. + * + * @param string ...$options Parsing options. + */ + public function with(mixed ...$options): UserAgentCommand + { + $this->options = $options; + return $this; + } + + protected function renderInternal(): string + { + $key = array_keys($this->prefix)[0]; + $value = array_values($this->prefix)[0]; + $cmd = "USER_AGENT " . $this->formatId($key) . " = " . $value; + if (sizeof($this->options) > 0) { + $cmd .= " WITH " . json_encode($this->options); + } + return $cmd; + } +} diff --git a/tests/Helper/EsqlTest.php b/tests/Helper/EsqlTest.php index 9b7f3ee39..5d021ab76 100644 --- a/tests/Helper/EsqlTest.php +++ b/tests/Helper/EsqlTest.php @@ -92,6 +92,56 @@ public function testTS(): void } + public function testSet(): void + { + $query = Query::from("many_numbers") + ->stats(sum: "SUM(sv)") + ->set(approximation: true); + $this->assertEquals(<<stats(median: "MEDIAN(sv)") + ->set(approximation: ["rows" => 10000]); + $this->assertEquals(<<set(approximation: true); + $this->assertEquals(<<set(approximation: true); + $this->assertEquals(<<set(approximation: true); + $this->assertEquals(<<metricsInfo() + ->sort("metric_name"); + $this->assertEquals(<<where("cluster == \"prod\"") + ->metricsInfo() + ->sort("metric_name"); + $this->assertEquals(<<sort("keyword_field") + ->limit(10) + ->mmr("text_vector")->mmr_limit(3) + ->drop("text_vector", "byte_vector", "bit_vector"); + $this->assertEquals(<<sort("keyword_field") + ->limit(10) + ->mmr("text_vector", [0.1, 0.2, 0.3])->mmr_limit(3)->with(lambda: 0.1) + ->drop("text_vector", "byte_vector", "bit_vector"); + $this->assertEquals(<<metadata("_score") + ->eval(query_embedding: "TEXT_EMBEDDING(\"be excellent to each other\", \"test_dense_inference\")") + ->where("KNN(text_embedding_field, query_embedding)") + ->sort("_score DESC") + ->limit(10) + ->mmr("text_embedding_field", "TEXT_EMBEDDING(\"be excellent to each other\", \"test_dense_inference\")")->mmr_limit(3)->with(lambda: 0.2) + ->keep("text_field", "query_embedding"); + $this->assertEquals(<<registeredDomain(rd: "fqdn") + ->keep("rd.*"); + $this->assertEquals(<<registeredDomain(rd: "domain") + ->where("rd.registered_domain == \"elastic.co\"") + ->stats("COUNT(*)")->by("rd.subdomain"); + $this->assertEquals(<<tsInfo() + ->sort("metric_name", "dimensions"); + $this->assertEquals(<<where("cluster == \"prod\"") + ->tsInfo() + ->sort("metric_name", "dimensions"); + $this->assertEquals(<<uriParts(parts: "uri") + ->keep("parts.*"); + $this->assertEquals(<<uriParts(p: "uri") + ->where("p.domain == \"www.example.com\"") + ->stats("COUNT(*)")->by("p.path"); + $this->assertEquals(<<userAgent(ua: "input")->with(extract_device_type: true) + ->keep("ua.*"); + $this->assertEquals(<<userAgent(ua: "input")->with( + properties: ["name", "version", "device"], + extract_device_type: true, + ) + ->keep("ua.*"); + $this->assertEquals(<<