Skip to content

Commit cb73b95

Browse files
committed
feat(search): add filter builder DSL and fix search payload for document-index
- Add pharia/filters.py with Filter, And, Or, Not, ModalityCondition for Pythonic operator-overloaded search filters - Fix search_input_to_api to produce document-index format: query wrapped as [{"modality": "text", "text": ...}], limit renamed to maxResults - Change search() return type to list[SearchResult] matching API response - Export Filter DSL from pharia/__init__.py - Add 34 unit tests in tests/test_filters.py - Update integration tests and documentation
1 parent d011f6c commit cb73b95

9 files changed

Lines changed: 616 additions & 42 deletions

File tree

README.md

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ meta = await client.v1.search_stores(ssid).documents("my-doc").get()
200200
content = await client.v1.search_stores(ssid).documents("my-doc").get_content() # list[ContentDTO]
201201

202202
# Search
203-
results = await client.v1.search_stores(ssid).search(query="hello", limit=5)
203+
results = await client.v1.search_stores(ssid).search(query="hello", max_results=5)
204204

205205
# List, filter, batch
206206
docs = await client.v1.search_stores(ssid).documents.list(page=1, size=10, starts_with="my")
@@ -229,6 +229,51 @@ ss = await client.v1.search_stores.vllm.create(
229229
)
230230
```
231231

232+
### Search Filters (Filter DSL)
233+
234+
The SDK provides a Pythonic filter builder using operator overloading:
235+
236+
```python
237+
from pharia import Filter, And, Or, Not, ModalityCondition
238+
from datetime import datetime
239+
240+
# Metadata comparisons
241+
Filter("category") == "science" # equalTo
242+
Filter("category") == None # isNull
243+
Filter("priority") > 5 # greaterThan
244+
Filter("priority") >= 5 # greaterThanOrEqualTo
245+
Filter("priority") < 10 # lessThan
246+
Filter("priority") <= 10 # lessThanOrEqualTo
247+
248+
# Datetime comparisons (auto-detected)
249+
Filter("created") > datetime(2024, 1, 1) # after
250+
Filter("created") <= datetime(2024, 12, 31) # atOrBefore
251+
252+
# Modality filters
253+
ModalityCondition.text() # {"modality": "text"}
254+
ModalityCondition.image() # {"modality": "image"}
255+
256+
# Combine with And / Or / Not
257+
results = await client.v1.search_stores(ssid).search(
258+
query="machine learning",
259+
max_results=10,
260+
filters=[
261+
And(Filter("category") == "science", ModalityCondition.text()),
262+
Not(Filter("archived") == None),
263+
],
264+
)
265+
```
266+
267+
Raw camelCase dicts are also accepted for backward compatibility:
268+
269+
```python
270+
results = await client.v1.search_stores(ssid).search(
271+
query="hello",
272+
max_results=5,
273+
filters=[{"with": [{"metadata": {"field": "category", "equalTo": "science"}}]}],
274+
)
275+
```
276+
232277
## Type Safety
233278

234279
Full TypedDict support for type checking and IDE autocomplete:

examples/README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,21 @@ python create_stages.py
4646

4747
---
4848

49+
### 🔍 Search Stores
50+
**File:** `search_stores_usage.py`
51+
52+
Full search store lifecycle including the Filter DSL:
53+
- Create search stores (semantic, instruct)
54+
- Add documents and search with metadata filters
55+
- Use `Filter`, `And`, `Or`, `Not`, and `ModalityCondition` for Pythonic queries
56+
57+
```bash
58+
cd pharia/examples
59+
python search_stores_usage.py
60+
```
61+
62+
---
63+
4964
### 🛡️ Type-Safe Usage
5065
**File:** `typed_usage.py`
5166

examples/search_stores_usage.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,18 @@
77
- Listing search stores
88
- Getting a specific search store
99
- Updating search store metadata
10+
- Searching with the Filter DSL
1011
- Deleting a search store
1112
"""
1213

1314
import asyncio
1415
import uuid
1516

1617
from examples.helpers import ExamplePrinter
18+
from pharia import And
1719
from pharia import Client
20+
from pharia import Filter
21+
from pharia import ModalityCondition
1822

1923

2024
async def main():
@@ -95,8 +99,38 @@ async def main():
9599
{"Metadata keys": list(updated_store.get("metadata", {}).keys())},
96100
)
97101

98-
# Example 6: Delete the search stores (fluent API)
99-
p.section(6, 6, "Deleting search stores")
102+
# Example 6: Search with Filter DSL
103+
p.section(6, 7, "Searching with Filter DSL")
104+
105+
# Add a document so there's something to search
106+
doc_name = f"test-doc-{uuid.uuid4().hex[:8]}"
107+
await (
108+
client.v1.search_stores(semantic_store_id)
109+
.documents(doc_name)
110+
.create_or_update(
111+
schema_version="V1",
112+
contents=[{"modality": "text", "text": "Machine learning is a subset of AI."}],
113+
metadata={"category": "science"},
114+
)
115+
)
116+
p.info(f"Created document: {doc_name}", indent=1)
117+
118+
# Search using the Filter DSL
119+
search_result = await client.v1.search_stores(semantic_store_id).search(
120+
query="artificial intelligence",
121+
max_results=5,
122+
filters=[And(Filter("category") == "science", ModalityCondition.text())],
123+
)
124+
p.success(
125+
f"Search returned {len(search_result)} results",
126+
{"Query": "artificial intelligence", "Filter": 'category == "science"'},
127+
)
128+
129+
# Clean up document
130+
await client.v1.search_stores(semantic_store_id).documents(doc_name).delete()
131+
132+
# Example 7: Delete the search stores (fluent API)
133+
p.section(7, 7, "Deleting search stores")
100134
await client.v1.search_stores(semantic_store_id).delete()
101135
p.success(f"Deleted semantic search store: {semantic_store_id}")
102136

pharia/__init__.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@
2222
"""
2323

2424
from pharia.client import Client
25+
from pharia.filters import And
26+
from pharia.filters import Filter
27+
from pharia.filters import ModalityCondition
28+
from pharia.filters import Not
29+
from pharia.filters import Or
2530
from pharia.models import ChunkingStrategy
2631
from pharia.models import Connector
2732
from pharia.models import ConnectorFile
@@ -104,15 +109,13 @@
104109

105110

106111
__all__ = [
112+
"And",
107113
"ChunkingStrategy",
108-
# Client
109114
"Client",
110-
# Connector types
111115
"Connector",
112116
"ConnectorFile",
113117
"ConnectorFilesListResponse",
114118
"ConnectorListResponse",
115-
# Enums
116119
"ConnectorType",
117120
"ContentDTO",
118121
"CreateConnectorInput",
@@ -125,62 +128,56 @@
125128
"Cursor",
126129
"DataObjectDTO",
127130
"DataStorage",
128-
# Dataset types
129131
"Dataset",
130132
"DatasetListResponse",
131133
"DestinationConfig",
132134
"DestinationType",
133-
# Document types
134135
"Document",
135136
"DocumentContentResponse",
136137
"DocumentListResponse",
137138
"DocumentSection",
138139
"DocumentWithContents",
139-
# Other types
140140
"Download",
141141
"EmbeddingStrategy",
142142
"EmbeddingStrategyInstructConfig",
143143
"EmbeddingStrategySemanticConfig",
144144
"EmbeddingStrategyVLLMConfig",
145-
# File types
146145
"File",
147146
"FileListResponse",
147+
"Filter",
148148
"GoogleDriveSourceConfig",
149149
"IngestionContext",
150150
"MediaType",
151151
"Modality",
152+
"ModalityCondition",
153+
"Not",
154+
"Or",
152155
"PaginationBase",
153156
"Parameter",
154157
"PresignedURL",
155158
"QueryEngineCloseSessionResult",
156159
"QueryEngineCommandResult",
157160
"QueryEngineDatabaseFile",
158161
"QueryEngineQueryResult",
159-
# Query Engine types
160162
"QueryEngineSession",
161-
# Repository types
162163
"Repository",
163164
"RepositoryListResponse",
164165
"RetentionPolicy",
165166
"Run",
166167
"RunListResponse",
167168
"SchemaVersion",
168-
# Search types
169169
"SearchInput",
170170
"SearchResponse",
171171
"SearchResult",
172-
# Search Store types
173172
"SearchStore",
174173
"SearchStoreListResponse",
175174
"SharepointSourceConfig",
176175
"SourceConfig",
177-
# Stage types
178176
"Stage",
179177
"StageChunkingStrategy",
180178
"StageEmbeddingStrategy",
181179
"StageListResponse",
182180
"StageSearchStoreContext",
183-
# Transformation & Run types
184181
"Transformation",
185182
"TransformationContext",
186183
"TransformationName",

0 commit comments

Comments
 (0)