Skip to content
Merged
Show file tree
Hide file tree
Changes from 120 commits
Commits
Show all changes
121 commits
Select commit Hold shift + click to select a range
8af57d8
CU-869aa22g2: Add ES requirement
mart-r Aug 29, 2025
64f95ad
CU-869aa22g2: Add initial README
mart-r Aug 29, 2025
fcf4d96
CU-869aa22g2: Add (slightly) converted credentials
mart-r Aug 29, 2025
bfdcf82
CU-869aa22g2: Add cogstack module ported from WWC
mart-r Aug 29, 2025
2332a63
CU-869aa22g2: Add search template notebook
mart-r Aug 29, 2025
4dc92aa
CU-869aa22g2: Add ipython dependency (for cogstack and notebook)
mart-r Aug 29, 2025
4f2d463
CU-869aa22g2: Add tqdm dependency
mart-r Aug 29, 2025
61b20f0
CU-869aa22g2: Add pandas dependency
mart-r Aug 29, 2025
ec2a780
CU-869aa22g2: Add a few initial tests
mart-r Aug 29, 2025
0322164
CU-869aa22g2: Remove a bunch of extra whitespace
mart-r Aug 29, 2025
696e669
CU-869aa22g2: Add ruff dependecny
mart-r Aug 29, 2025
90d339b
CU-869aa22g2: Run ruff on cogstack module
mart-r Aug 29, 2025
77b3f3d
CU-869aa22g2: Move to primitives collections for type hinting
mart-r Aug 29, 2025
fe1656f
CU-869aa22g2: Some further linting changes
mart-r Aug 29, 2025
780a3fd
CU-869aa22g2: Refactor cogstack module to make a little more sense
mart-r Aug 29, 2025
403a2e8
CU-869aa22g2: Rename some methods for better descriptions
mart-r Aug 29, 2025
2fd8507
CU-869aa22g2: Rename a method name for better descriptions
mart-r Aug 29, 2025
7e04913
CU-869aa22g2: Minor whitespace fix
mart-r Aug 29, 2025
62b60ca
CU-869aa22g2: Remove path add in notebook
mart-r Aug 29, 2025
92aaabd
CU-869aa22g2: Remove unused import from notebook
mart-r Aug 29, 2025
651a55b
CU-869aa22g2: Import username and password from credentials in case t…
mart-r Aug 29, 2025
e98f647
CU-869aa22g2: Add nbconvert dev-dependency
mart-r Aug 29, 2025
4bd6246
CU-869aa22g2: Add default indices to get fields for
mart-r Aug 29, 2025
ab5dee8
CU-869aa22g2: Improve error handling (avoid hiding stack trace)
mart-r Aug 29, 2025
ab8da2c
CU-869aa22g2: Add default indices in notebook example
mart-r Aug 29, 2025
4b77182
CU-869aa22g2: Update progress bar handling during exception handling
mart-r Aug 29, 2025
3a604cf
CU-869aa22g2: Add default indices in notebook examples
mart-r Aug 29, 2025
921d682
CU-869aa22g2: Add data folder
mart-r Aug 29, 2025
bd4f5bd
CU-869aa22g2 Fix data folder in notebook
mart-r Aug 29, 2025
b1790e0
CU-869aa22g2: Add initial notebook tests
mart-r Aug 29, 2025
42eb024
CU-869aa22g2: Simplify test slightly
mart-r Aug 29, 2025
e570521
CU-869aa22g2: Remove test-time debug output
mart-r Aug 29, 2025
35cce29
CU-869aa22g2: Add assertion and removal of data file created by notebook
mart-r Aug 29, 2025
a0c6fa3
CU-869aa22g2: Add initial workflow
mart-r Aug 29, 2025
95fffd1
CU-869aa22g2: Fix workflow working directory
mart-r Aug 29, 2025
fdea8d8
CU-869aa22g2: Add OpenSearch dependency
mart-r Sep 23, 2025
329f047
CU-869aa22g2: Allow OpenSearch to be used instead of ES
mart-r Sep 23, 2025
379eb06
CU-869aa22g2: Add missing ES/OS helpers import
mart-r Sep 23, 2025
349b7f3
CU-869aa22g2: Fix typo in variable name
mart-r Sep 23, 2025
96857f0
CU-869aa22g2: Fix test time mocking
mart-r Sep 24, 2025
0705f43
Merge branch 'main' into CU-869aa22g2-add-es-from-wwc
mart-r Oct 22, 2025
5300625
CU-869aa22g2: Add minimal permissions to workflow
mart-r Oct 22, 2025
e6aa9fe
CU-869aa22g2: Increase flexibility of scanning
mart-r Oct 22, 2025
798974c
CU-869aa22g2: Fail upon too large a size when scanning
mart-r Oct 22, 2025
2830650
CU-869aa22g2: Increase flexibility of scrolling
mart-r Oct 22, 2025
6702652
CU-869aa22g2: Fail upon too large a size when scrolling
mart-r Oct 22, 2025
cb4d2ba
CU-869aa22g2: Increase flexibility when reading data with sorting
mart-r Oct 22, 2025
3db3907
CU-869aa22g2: Fail upon too large a size when sorting
mart-r Oct 22, 2025
074577c
CU-869aa22g2: Handle index not found better
mart-r Oct 22, 2025
58ddf44
CU-869aa22g2: Improve bad request erro handling
mart-r Oct 22, 2025
c8349be
CU-869aa22g2: Add some end to end tests
mart-r Oct 22, 2025
ebf5051
CU-869aa22g2: Remove debug file
mart-r Oct 22, 2025
abc0664
CU-869aa22g2: Fix ES9 install for local tests
mart-r Oct 22, 2025
5c487e6
CU-869aa22g2: Remove unnecessary files
mart-r Oct 22, 2025
17ad29c
CU-869aa22g2: Make OS run on same port as ES for tests
mart-r Oct 22, 2025
ea35821
Use OpenSearch when ES not available
mart-r Oct 22, 2025
29cc4bb
CU-869aa22g2: Fix OS import
mart-r Oct 22, 2025
4a71ea2
CU-869aa22g2: Add separate workflow for OS
mart-r Oct 22, 2025
06e25f5
CU-869aa22g2: Improve OS-based tests
mart-r Oct 22, 2025
2872ebf
CU-869aa22g2: Fix count for OS
mart-r Oct 22, 2025
03959ac
CU-869aa22g2: Expand OS support
mart-r Oct 22, 2025
1e573b9
CU-869aa22g2: Fix included fields for OS
mart-r Oct 22, 2025
b77e8fd
CU-869aa22g2: Update search for OS (timeout string vs number)
mart-r Oct 22, 2025
4e2d217
CU-869aa22g2: Fix some scrolling issues for OS
mart-r Oct 22, 2025
25d7f3e
CU-869aa22g2: Make scroll more flexible with OS
mart-r Oct 22, 2025
acffa88
Improve OS support when sorting
mart-r Oct 23, 2025
046f89a
Fix count when doing scan
mart-r Oct 23, 2025
0f59e19
Fix typing when scanning
mart-r Oct 23, 2025
bf9411a
Fix some minor typing issues with progress bar
mart-r Oct 23, 2025
4c2c825
CU-869aa22g2: Remove credentials module
mart-r Oct 24, 2025
9da4e11
CU-869aa22g2: Begin moving to a optional ES/OS approach.
mart-r Oct 24, 2025
995ce47
CU-869aa22g2: Move to a pyproject.toml based package
mart-r Oct 24, 2025
0e8b85d
CU-869aa22g2: Update Readme somewhat
mart-r Oct 24, 2025
a17a8ba
CU-869aa22g2: Update python versions in CI (remove 3.9, add 3.13)
mart-r Oct 24, 2025
714d343
CU-869aa22g2: Update workflow to pyproject.toml based install
mart-r Oct 24, 2025
2773d3b
CU-869aa22g2: Simplify imports
mart-r Oct 24, 2025
611f36d
CU-869aa22g2: Move to a folder structure for source
mart-r Oct 24, 2025
ce009b3
CU-869aa22g2: Expose class from package level
mart-r Oct 24, 2025
b899090
CU-869aa22g2: Add separate ES implementation
mart-r Oct 24, 2025
3eba2b9
CU-869aa22g2: Add separate OS implementation
mart-r Oct 24, 2025
c962842
CU-869aa22g2: Separate OS and ES implementation and usage
mart-r Oct 24, 2025
7c00784
CU-869aa22g2: Run mypy with OS and ES in workflow
mart-r Oct 24, 2025
3869b9a
CU-869aa22g2: Remove commented code
mart-r Oct 24, 2025
523adfd
CU-869aa22g2: Add module import time exception if no back end available
mart-r Oct 24, 2025
2d7d122
CU-869aa22g2: Expose print_dataframe from package root
mart-r Oct 24, 2025
188183e
CU-869aa22g2: Update notebook examples to newer format
mart-r Oct 24, 2025
641af56
CU-869aa22g2: Update notebook again
mart-r Oct 24, 2025
1211d7d
CU-869aa22g2: Add search results folder
mart-r Oct 24, 2025
c70f860
CU-869aa22g2: Update file naming in search template
mart-r Oct 24, 2025
dc8e174
CU-869aa22g2: Add module to read credentials from env values
mart-r Oct 24, 2025
41c83b7
CU-869aa22g2: Fix mocking and update paths in notebook tests
mart-r Oct 24, 2025
9397552
CU-869aa22g2: Update tests in line with recent changes
mart-r Oct 24, 2025
f1b4d15
CU-869aa22g2: Update local tests in line with recent changes
mart-r Oct 24, 2025
e4c5e76
CU-869aa22g2: Avoid specifying ports twice for OS
mart-r Oct 24, 2025
34fe392
CU-869aa22g2: Fix small issue with search after for OS
mart-r Oct 24, 2025
87b1f62
CU-869aa22g2: Improve OS query in scan
mart-r Oct 24, 2025
923b896
CU-869aa22g2: Fix query kwarg in scan on OS
mart-r Oct 24, 2025
d5cc4ff
CU-869aa22g2: Fix source kwarg in scan on OS
mart-r Oct 24, 2025
574bd78
CU-869aa22g2: Fix dupplicate args for search in OS
mart-r Oct 24, 2025
949665e
CU-869aa22g2: Fix sort on OS
mart-r Oct 24, 2025
f9569f9
CU-869aa22g2: Update workflow to check types nad lint for the correct…
mart-r Oct 24, 2025
df88c10
CU-869aa22g2: Whitespace change for test module
mart-r Oct 24, 2025
0f49463
CU-869aa22g2: Update mocks to work with OS
mart-r Oct 24, 2025
224d407
CU-869aa22g2: Fix typo in OS class name
mart-r Oct 24, 2025
30d1598
CU-869aa22g2: Fix OS mocking in NB tests
mart-r Oct 24, 2025
288671b
CU-869aa22g2: Expose read_from_env as package level method
mart-r Oct 24, 2025
600452e
CU-869aa22g2: Update notebook to expose username/password from env
mart-r Oct 24, 2025
af052ec
CU-869aa22g2: Fix some tests for scan and OS
mart-r Oct 24, 2025
08ea077
CU-869aa22g2: Fix setup mocks for OS
mart-r Oct 24, 2025
943e421
CU-869aa22g2: Add workflow to push to TestPyPI
mart-r Oct 24, 2025
0645949
CU-869aa22g2: Add permissions to push to TestPyPI to workflow
mart-r Oct 24, 2025
53cc4ae
CU-869aa22g2: Add full on release workflow
mart-r Oct 24, 2025
90cbc61
CU-869aa22g2: Update credentials to use ID and API key and/or encoded…
mart-r Oct 24, 2025
7b21c2f
CU-869aa22g2: Remove commented code
mart-r Oct 24, 2025
29c2531
CU-869aa22g2: Add a few doc strings
mart-r Oct 24, 2025
e590dd8
CU-869aa22g2: Update readme with credentials details
mart-r Oct 24, 2025
50b698b
CU-869aa22g2: Make version dynamic
mart-r Oct 24, 2025
39f44b2
CU-869aa22g2: Force use of setuptools_scm for dynamic versioning
mart-r Oct 24, 2025
d94355f
CU-869aa22g2: Update pyproject.toml with versioning instructions
mart-r Oct 24, 2025
b2765c0
CU-869aa22g2: Update tests for changed environmental variable names
mart-r Oct 24, 2025
ad79801
CU-869aa22g2: Specify version of OpenSearch for end to end tests
mart-r Oct 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions .github/workflows/cogstack-es_main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
name: cogstack-es - Test

on:
push:
branches: [ main ]
pull_request:
paths:
- 'cogstack-es/**'
- '.github/workflows/cogstack-es**'

defaults:
run:
working-directory: ./cogstack-es

permissions:
id-token: write

jobs:
types-only-with-ES-and-OS:
runs-on: ubuntu-latest
permissions:
contents: read
strategy:
matrix:
python-version: [ '3.10', '3.11', '3.12', '3.13' ]
max-parallel: 4

steps:
- uses: actions/checkout@v5
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[dev,OS,ES9]"
- name: Check types
run: |
python -m mypy --follow-imports=normal src/cogstack

types-lint-tests:
runs-on: ubuntu-latest
needs: types-only-with-ES-and-OS
permissions:
contents: read
strategy:
matrix:
python-version: [ '3.10', '3.11', '3.12', '3.13' ]
install-target: [ "ES9", "ES8", "OS", ]
max-parallel: 4

steps:
- uses: actions/checkout@v5
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[dev,${{ matrix.install-target }}]"
- name: Lint
run: |
ruff check src/cogstack
- name: Test
run: |
pytest tests

publish-to-test-PyPI:
runs-on: ubuntu-latest
needs: types-lint-tests
steps:
- name: Checkout main
uses: actions/checkout@v5
with:
fetch-depth: 0 # fetch all history
fetch-tags: true # fetch tags explicitly

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade build

- name: Set timestamp-based dev version
run: |
TS=$(date -u +"%Y%m%d%H%M%S")
echo "SETUPTOOLS_SCM_PRETEND_VERSION_FOR_COGSTACK_ES=0.1.1.dev${TS}" >> $GITHUB_ENV

- name: Install package in development mode
run: |
pip install -e ".[dev,ES9,OS]"

- name: Build package
run: |
python -m build

- name: Publish distribution to TestPyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository_url: https://test.pypi.org/legacy/
packages_dir: cogstack-es/dist
51 changes: 51 additions & 0 deletions .github/workflows/cogstack-es_release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: cogstack-es release-build

on:
push:
tags:
- 'cogstack-es/v*.*.*'

permissions:
id-token: write

defaults:
run:
working-directory: ./cogstack-es

jobs:
test-and-publish-to-PyPI:
runs-on: ubuntu-latest
steps:
- name: Checkout main
uses: actions/checkout@v5

- name: Release Tag
# If GITHUB_REF=refs/tags/cogstack-es/v0.1.2, this returns v0.1.2. Note it's including the "v" though it probably shouldnt
run: echo "RELEASE_VERSION=${GITHUB_REF##refs/*/}" >> $GITHUB_ENV

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade build

- name: Install client package in development mode
run: |
pip install -e ".[dev,ES9,OS]"

- name: Test
run: |
pytest tests

- name: Build client package
run: |
python -m build

- name: Publish production distribution to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages_dir: cogstack-es/dist
50 changes: 50 additions & 0 deletions cogstack-es/ReadMe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

# Login and search
This project is responsible for logging in and performing a search for Elasticsearch or Opensearch.

# Installation

This package is distributed through PyPI and can be installed using one of:
```
pip install cogstack-es[ES9] # For Elasticsearch 9
pip install cogstack-es[ES8] # For Elasticsearch 8
pip install cogstack-es[OS] # For Opensearch
```

PS:
After installation, the import still remains `import cogstack` even though the installed package is called `cogstack-es`.

## Login details
You need to get your login details and host from your administrator.
This is usually an API key.
There is also a mechanism for reading hosts and credentials from environmental variables:
```python
from cogstack import read_from_env, CogStack
hosts, api_key, (username, password) = read_from_env()
# subsequently use one of
cs = CogStack.with_api_key_auth(hosts=hosts, api_key=api_key)
#cs = CogStack.with_basic_auth(hosts=hosts, username=username, password=password)
```
The `read_from_env` method will read the data from the following environmental variables:

| Environmetnal variable name | Description | Example value |
| ------------------------------ | ----------------------------------- | ------------- |
| `COGSTACK_HOSTS` | The host addresses, comma separated | `http://localhost:9200,http://localhost:9201` |
| `COGSTACK_USERNAME` | The username for basic auth | `user123` |
| `COGSTACK_PASSWORD` | The password for basic auth | `sup3rsecur3-pw#946` |
| `COGSTACK_API_KEY_ID` | The API key ID for authentiaction | `l0cGtvtlw1lbsyClOm6w` |
| `COGSTACK_API_KEY` | The unencoded API key for authentiaction with the ID | `I01NJf4Z6yvXyXThh1676g` |
| `COGSTACK_API_KEY_ENCODED` | The encoded API key for authentiaction with just the API key | `ZZpwMtW3ky6Tw9KEtfavVzTP0JcrC7iLnVf7zXbqAh70A15VKJwHd5YX3J==` |


__Note__: If these fields are left blank then the user will be prompted to enter the details themselves.

If you are unsure about the above information please contact your CogStack system administrator.

## How to build a Search query

A core component of cogstack is Elasticsearch which is a search engine built on top of Apache Lucene.

Lucene has a custom query syntax for querying its indexes (Lucene Query Syntax). This query syntax allows for features such as Keyword matching, Wildcard matching, Regular expression, Proximity matching, Range searches.

Full documentation for this syntax is available as part of Elasticsearch [query string syntax](https://www.elastic.co/guide/en/elasticsearch/reference/8.5/query-dsl-query-string-query.html#query-string-syntax).
Empty file added cogstack-es/data/.keep
Empty file.
Empty file.
61 changes: 61 additions & 0 deletions cogstack-es/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
[build-system]
requires = ["setuptools>=61.0", "wheel", "setuptools_scm>=8"]
build-backend = "setuptools.build_meta"

[project]
name = "cogstack-es"
dynamic = ["version"]
description = "ElasticSearch or OpenSearch wrapper for CogStack deployments"
readme = "ReadMe.md"
authors = [{ name = "Mart Ratas", email = "[email protected]" }]
license = { text = "Apache-2.0" }
requires-python = ">=3.10"
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"License :: OSI Approved :: Apache Software License"
]

dependencies = [
"tqdm>=4.64,<5.0",
"pandas>=2.2,<3.0",
"ipython",
]

[project.optional-dependencies]
dev = [
"mypy",
"pandas-stubs",
"types-tqdm",
"pytest",
"ruff",
"nbconvert",
]
ES8 = [
"elasticsearch>=8.0.0,<9.0",
]
ES9 = [
"elasticsearch>=9.0.0,<10.0",
]
OS = [
"opensearch-py>=2.0.0,<3.0",
]

[tool.setuptools]
package-dir = {"" = "src"}

[project.urls]
Homepage = "https://github.com/CogStack/cogstack-nlp/tree/main/cogstack-es"
Repository = "https://github.com/CogStack/cogstack-nlp/tree/main/cogstack-es"
Issues = "https://github.com/CogStack/cogstack-nlp/issues"

[tool.setuptools_scm]
root = ".."
tag_regex = "^cogstack-es/v(?P<version>[0-9]+(?:\\.[0-9]+)*)$"
fallback_version = "0.1.0.dev0"
Loading
Loading