@@ -13,7 +13,6 @@ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.parseColumnPath
13
13
import org .apache .spark .sql .connector .expressions .{Expression , FieldReference , LiteralValue }
14
14
import org .apache .spark .sql .connector .expressions .filter .{And , Predicate }
15
15
import org .apache .spark .sql .flint .datatype .FlintDataType .STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS
16
- import org .apache .spark .sql .flint .datatype .FlintMetadataExtensions
17
16
import org .apache .spark .sql .flint .datatype .FlintMetadataExtensions .MetadataExtension
18
17
import org .apache .spark .sql .internal .SQLConf
19
18
import org .apache .spark .sql .types ._
@@ -34,6 +33,14 @@ case class FlintQueryCompiler(schema: StructType) {
34
33
compile(predicates.reduce(new And (_, _)))
35
34
}
36
35
36
+ /**
37
+ * Compile an expression to a query string. Returns empty string if any part of the expression
38
+ * is unsupported.
39
+ */
40
+ def compile (expr : Expression , quoteString : Boolean = true ): String = {
41
+ compileOpt(expr, quoteString).getOrElse(" " )
42
+ }
43
+
37
44
/**
38
45
* Compile Expression to Flint query string.
39
46
*
@@ -42,13 +49,13 @@ case class FlintQueryCompiler(schema: StructType) {
42
49
* @return
43
50
* empty if does not support.
44
51
*/
45
- def compile (expr : Expression , quoteString : Boolean = true ): String = {
52
+ def compileOpt (expr : Expression , quoteString : Boolean = true ): Option [ String ] = {
46
53
expr match {
47
54
case LiteralValue (value, dataType) =>
48
- quote(extract, quoteString)(value, dataType)
55
+ Some ( quote(extract, quoteString)(value, dataType) )
49
56
case p : Predicate => visitPredicate(p)
50
- case f : FieldReference => f.toString()
51
- case _ => " "
57
+ case f : FieldReference => Some ( f.toString() )
58
+ case _ => None
52
59
}
53
60
}
54
61
@@ -77,56 +84,101 @@ case class FlintQueryCompiler(schema: StructType) {
77
84
* 1. currently, we map spark contains to OpenSearch match query. Can we leverage more full
78
85
* text queries for text field. 2. configuration of expensive query.
79
86
*/
80
- def visitPredicate (p : Predicate ): String = {
81
- val name = p.name()
82
- name match {
83
- case " IS_NULL" =>
84
- s """ {"bool":{"must_not":{"exists":{"field":" ${compile(p.children()(0 ))}"}}}} """
85
- case " IS_NOT_NULL" =>
86
- s """ {"exists":{"field":" ${compile(p.children()(0 ))}"}} """
87
- case " AND" =>
88
- s """ {"bool":{"filter":[ ${compile(p.children()(0 ))}, ${compile(p.children()(1 ))}]}} """
89
- case " OR" =>
90
- s """ {"bool":{"should":[{"bool":{"filter": ${compile(
91
- p.children()(0 ))}}},{"bool":{"filter": ${compile(p.children()(1 ))}}}]}} """
92
- case " NOT" =>
93
- s """ {"bool":{"must_not": ${compile(p.children()(0 ))}}} """
94
- case " =" =>
95
- val fieldName = compile(p.children()(0 ))
96
- if (isTextField(fieldName)) {
97
- getKeywordSubfield(fieldName) match {
98
- case Some (keywordField) =>
99
- s """ {"term":{" $keywordField":{"value": ${compile(p.children()(1 ))}}}} """
100
- case None => " "
87
+ def visitPredicate (p : Predicate ): Option [String ] = p.name() match {
88
+ case " IS_NULL" =>
89
+ compileOpt(p.children()(0 )).map { field =>
90
+ s """ {"bool":{"must_not":{"exists":{"field":" $field"}}}} """
91
+ }
92
+ case " IS_NOT_NULL" =>
93
+ compileOpt(p.children()(0 )).map { field =>
94
+ s """ {"exists":{"field":" $field"}} """
95
+ }
96
+ case " AND" =>
97
+ for {
98
+ left <- compileOpt(p.children()(0 ))
99
+ right <- compileOpt(p.children()(1 ))
100
+ } yield s """ {"bool":{"filter":[ $left, $right]}} """
101
+ case " OR" =>
102
+ for {
103
+ left <- compileOpt(p.children()(0 ))
104
+ right <- compileOpt(p.children()(1 ))
105
+ } yield s """ {"bool":{"should":[{"bool":{"filter": $left}},{"bool":{"filter": $right}}]}} """
106
+ case " NOT" =>
107
+ compileOpt(p.children()(0 )).map { child =>
108
+ s """ {"bool":{"must_not": $child}} """
109
+ }
110
+ case " =" =>
111
+ for {
112
+ field <- compileOpt(p.children()(0 ))
113
+ value <- compileOpt(p.children()(1 ))
114
+ result <-
115
+ if (isTextField(field)) {
116
+ getKeywordSubfield(field) match {
117
+ case Some (keywordField) =>
118
+ Some (s """ {"term":{" $keywordField":{"value": $value}}} """ )
119
+ case None => None // Return None for unsupported text fields
120
+ }
121
+ } else {
122
+ Some (s """ {"term":{" $field":{"value": $value}}} """ )
101
123
}
124
+ } yield result
125
+ case " >" =>
126
+ for {
127
+ field <- compileOpt(p.children()(0 ))
128
+ value <- compileOpt(p.children()(1 ))
129
+ } yield s """ {"range":{" $field":{"gt": $value}}} """
130
+ case " >=" =>
131
+ for {
132
+ field <- compileOpt(p.children()(0 ))
133
+ value <- compileOpt(p.children()(1 ))
134
+ } yield s """ {"range":{" $field":{"gte": $value}}} """
135
+ case " <" =>
136
+ for {
137
+ field <- compileOpt(p.children()(0 ))
138
+ value <- compileOpt(p.children()(1 ))
139
+ } yield s """ {"range":{" $field":{"lt": $value}}} """
140
+ case " <=" =>
141
+ for {
142
+ field <- compileOpt(p.children()(0 ))
143
+ value <- compileOpt(p.children()(1 ))
144
+ } yield s """ {"range":{" $field":{"lte": $value}}} """
145
+ case " IN" =>
146
+ for {
147
+ field <- compileOpt(p.children()(0 ))
148
+ valuesList = p.children().tail.flatMap(expr => compileOpt(expr))
149
+ // Only proceed if we have values
150
+ if valuesList.nonEmpty
151
+ } yield {
152
+ val values = valuesList.mkString(" [" , " ," , " ]" )
153
+ s """ {"terms":{" $field": $values}} """
154
+ }
155
+ case " STARTS_WITH" =>
156
+ for {
157
+ field <- compileOpt(p.children()(0 ))
158
+ value <- compileOpt(p.children()(1 ))
159
+ } yield s """ {"prefix":{" $field":{"value": $value}}} """
160
+ case " CONTAINS" =>
161
+ for {
162
+ field <- compileOpt(p.children()(0 ))
163
+ quoteValue <- compileOpt(p.children()(1 ))
164
+ unQuoteValue <- compileOpt(p.children()(1 ), false )
165
+ } yield {
166
+ if (isTextField(field)) {
167
+ s """ {"match":{" $field":{"query": $quoteValue}}} """
102
168
} else {
103
- s """ {"term ":{" $fieldName ":{"value": ${compile(p.children()( 1 ))} }}} """
169
+ s """ {"wildcard ":{" $field ":{"value":"* $unQuoteValue *" }}}"""
104
170
}
105
- case " >" =>
106
- s """ {"range":{" ${compile(p.children()(0 ))}":{"gt": ${compile(p.children()(1 ))}}}} """
107
- case " >=" =>
108
- s """ {"range":{" ${compile(p.children()(0 ))}":{"gte": ${compile(p.children()(1 ))}}}} """
109
- case " <" =>
110
- s """ {"range":{" ${compile(p.children()(0 ))}":{"lt": ${compile(p.children()(1 ))}}}} """
111
- case " <=" =>
112
- s """ {"range":{" ${compile(p.children()(0 ))}":{"lte": ${compile(p.children()(1 ))}}}} """
113
- case " IN" =>
114
- val values = p.children().tail.map(expr => compile(expr)).mkString(" [" , " ," , " ]" )
115
- s """ {"terms":{" ${compile(p.children()(0 ))}": $values}} """
116
- case " STARTS_WITH" =>
117
- s """ {"prefix":{" ${compile(p.children()(0 ))}":{"value": ${compile(p.children()(1 ))}}}} """
118
- case " CONTAINS" =>
119
- val fieldName = compile(p.children()(0 ))
120
- if (isTextField(fieldName)) {
121
- s """ {"match":{" $fieldName":{"query": ${compile(p.children()(1 ))}}}} """
122
- } else {
123
- s """ {"wildcard":{" $fieldName":{"value":"* ${compile(p.children()(1 ), false )}*"}}} """
124
- }
125
- case " ENDS_WITH" =>
126
- s """ {"wildcard":{" ${compile(p.children()(0 ))}":{"value":"* ${compile(
127
- p.children()(1 ),
128
- false )}"}}} """
129
- case " BLOOM_FILTER_MIGHT_CONTAIN" =>
171
+ }
172
+ case " ENDS_WITH" =>
173
+ for {
174
+ field <- compileOpt(p.children()(0 ))
175
+ value <- compileOpt(p.children()(1 ), false )
176
+ } yield s """ {"wildcard":{" $field":{"value":"* $value"}}} """
177
+ case " BLOOM_FILTER_MIGHT_CONTAIN" =>
178
+ for {
179
+ field <- compileOpt(p.children()(0 ))
180
+ value <- compileOpt(p.children()(1 ))
181
+ } yield {
130
182
val code = Source .fromResource(" bloom_filter_query.script" ).getLines().mkString(" " )
131
183
s """
132
184
|{
@@ -137,17 +189,17 @@ case class FlintQueryCompiler(schema: StructType) {
137
189
| "lang": "painless",
138
190
| "source": " $code",
139
191
| "params": {
140
- | "fieldName": " ${compile(p.children()( 0 ))} ",
141
- | "value": ${compile(p.children()( 1 ))}
192
+ | "fieldName": " $field ",
193
+ | "value": $value
142
194
| }
143
195
| }
144
196
| }
145
197
| }
146
198
| }
147
199
|}
148
200
| """ .stripMargin
149
- case _ => " "
150
- }
201
+ }
202
+ case _ => None
151
203
}
152
204
153
205
/**
0 commit comments