Skip to content

BM25 and Hybrid search with minimum_should_match #300

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ env:
WEAVIATE_128: 1.28.11
WEAVIATE_129: 1.29.1
WEAVIATE_130: 1.30.1
WEAVIATE_131: 1.31.0

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
Expand Down
3 changes: 3 additions & 0 deletions src/collections/query/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ export {
BaseHybridOptions,
BaseNearOptions,
BaseNearTextOptions,
Bm25OperatorOptions,
Bm25Options,
FetchObjectByIdOptions,
FetchObjectsOptions,
Expand All @@ -266,3 +267,5 @@ export {
QueryReturn,
SearchOptions,
} from './types.js';

export { Bm25Operator } from './utils.js';
28 changes: 28 additions & 0 deletions src/collections/query/integration.test.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
/* eslint-disable @typescript-eslint/no-non-null-assertion */
/* eslint-disable @typescript-eslint/no-non-null-asserted-optional-chain */
import { requireAtLeast } from '../../../test/version.js';
import { WeaviateUnsupportedFeatureError } from '../../errors.js';
import weaviate, { WeaviateClient } from '../../index.js';
import { Collection } from '../collection/index.js';
import { CrossReference, Reference } from '../references/index.js';
import { GroupByOptions } from '../types/index.js';
import { Bm25Operator } from './utils.js';

describe('Testing of the collection.query methods with a simple collection', () => {
let client: WeaviateClient;
Expand Down Expand Up @@ -132,6 +134,32 @@ describe('Testing of the collection.query methods with a simple collection', ()
expect(ret.objects[0].uuid).toEqual(id);
});

requireAtLeast(
1,
31,
0
)('bm25 search operator (minimum_should_match)', () => {
it('should query with bm25 + operator', async () => {
const ret = await collection.query.bm25('carrot', {
limit: 1,
operator: Bm25Operator.or({ minimumMatch: 1 }),
});
expect(ret.objects.length).toEqual(1);
expect(ret.objects[0].properties.testProp).toEqual('carrot');
expect(ret.objects[0].uuid).toEqual(id);
});

it('should query with hybrid + bm25Operator', async () => {
const ret = await collection.query.hybrid('carrot', {
limit: 1,
bm25Operator: Bm25Operator.and(),
});
expect(ret.objects.length).toEqual(1);
expect(ret.objects[0].properties.testProp).toEqual('carrot');
expect(ret.objects[0].uuid).toEqual(id);
});
});

it('should query with hybrid and vector', async () => {
const ret = await collection.query.hybrid('carrot', {
limit: 1,
Expand Down
7 changes: 7 additions & 0 deletions src/collections/query/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,15 @@ export type Bm25QueryProperty<T> = {
weight: number;
};

export type Bm25OperatorOr = { operator: 'Or'; minimumMatch: number };
export type Bm25OperatorAnd = { operator: 'And' };

export type Bm25OperatorOptions = Bm25OperatorOr | Bm25OperatorAnd;

export type Bm25SearchOptions<T> = {
/** Which properties of the collection to perform the keyword search on. */
queryProperties?: (PrimitiveKeys<T> | Bm25QueryProperty<T>)[];
operator?: Bm25OperatorOptions;
};

/** Base options available in the `query.bm25` method */
Expand Down Expand Up @@ -115,6 +121,7 @@ export type HybridSearchOptions<T> = {
targetVector?: TargetVectorInputType;
/** The specific vector to search for or a specific vector subsearch. If not specified, the query is vectorized and used in the similarity search. */
vector?: NearVectorInputType | HybridNearTextSubSearch | HybridNearVectorSubSearch;
bm25Operator?: Bm25OperatorOptions;
};

/** Base options available in the `query.hybrid` method */
Expand Down
12 changes: 11 additions & 1 deletion src/collections/query/utils.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { MultiTargetVectorJoin } from '../index.js';
import { NearVectorInputType, TargetVectorInputType } from './types.js';
import { Bm25OperatorOptions, Bm25OperatorOr, NearVectorInputType, TargetVectorInputType } from './types.js';

export class NearVectorInputGuards {
public static is1DArray(input: NearVectorInputType): input is number[] {
Expand Down Expand Up @@ -34,3 +34,13 @@ export class TargetVectorInputGuards {
return i.combination !== undefined && i.targetVectors !== undefined;
}
}

export class Bm25Operator {
static and(): Bm25OperatorOptions {
return { operator: 'And' };
}

static or(opts: Omit<Bm25OperatorOr, 'operator'>): Bm25OperatorOptions {
return { ...opts, operator: 'Or' };
}
}
20 changes: 20 additions & 0 deletions src/collections/serialize/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import {
NearThermalSearch,
NearVector,
NearVideoSearch,
SearchOperatorOptions,
SearchOperatorOptions_Operator,
Targets,
VectorForTarget,
WeightsForTarget,
Expand Down Expand Up @@ -115,6 +117,7 @@ import {
import {
BaseHybridOptions,
BaseNearOptions,
Bm25OperatorOptions,
Bm25Options,
Bm25QueryProperty,
Bm25SearchOptions,
Expand Down Expand Up @@ -960,10 +963,26 @@ export class Serialize {
});
};

private static bm25SearchOperator = (
searchOperator?: Bm25OperatorOptions
): SearchOperatorOptions | undefined => {
if (searchOperator) {
return SearchOperatorOptions.fromPartial(
searchOperator.operator === ('And' as const)
? { operator: SearchOperatorOptions_Operator.OPERATOR_AND }
: {
operator: SearchOperatorOptions_Operator.OPERATOR_OR,
minimumOrTokensMatch: searchOperator.minimumMatch,
}
);
}
};

public static bm25Search = <T>(args: { query: string } & Bm25SearchOptions<T>): BM25 => {
return BM25.fromPartial({
query: args.query,
properties: this.bm25QueryProperties(args.queryProperties),
searchOperator: this.bm25SearchOperator(args.operator),
});
};

Expand Down Expand Up @@ -1074,6 +1093,7 @@ export class Serialize {
vectorBytes: vectorBytes,
vectorDistance: args.maxVectorDistance,
fusionType: fusionType(args.fusionType),
bm25SearchOperator: this.bm25SearchOperator(args.bm25Operator),
targetVectors,
targets,
nearText,
Expand Down
Loading