|
6 | 6 | */
|
7 | 7 | package org.hibernate.search.integrationtest.mapper.pojo.standalone.realbackend.mapping;
|
8 | 8 |
|
| 9 | +import static org.assertj.core.api.Assertions.assertThat; |
9 | 10 | import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
10 | 11 |
|
11 | 12 | import java.lang.invoke.MethodHandles;
|
12 | 13 | import java.util.ArrayList;
|
| 14 | +import java.util.Arrays; |
13 | 15 | import java.util.Collection;
|
14 | 16 | import java.util.List;
|
15 | 17 | import java.util.Objects;
|
16 | 18 |
|
| 19 | +import org.hibernate.search.backend.elasticsearch.ElasticsearchDistributionName; |
| 20 | +import org.hibernate.search.engine.backend.types.VectorSimilarity; |
| 21 | +import org.hibernate.search.engine.search.projection.dsl.SearchProjectionFactory; |
17 | 22 | import org.hibernate.search.integrationtest.mapper.pojo.standalone.realbackend.testsupport.BackendConfigurations;
|
18 | 23 | import org.hibernate.search.mapper.pojo.bridge.ValueBridge;
|
19 | 24 | import org.hibernate.search.mapper.pojo.bridge.binding.ValueBindingContext;
|
|
24 | 29 | import org.hibernate.search.mapper.pojo.mapping.definition.annotation.DocumentId;
|
25 | 30 | import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed;
|
26 | 31 | import org.hibernate.search.mapper.pojo.mapping.definition.annotation.VectorField;
|
| 32 | +import org.hibernate.search.mapper.pojo.mapping.definition.programmatic.TypeMappingStep; |
| 33 | +import org.hibernate.search.mapper.pojo.standalone.cfg.StandalonePojoMapperSettings; |
| 34 | +import org.hibernate.search.mapper.pojo.standalone.mapping.SearchMapping; |
| 35 | +import org.hibernate.search.mapper.pojo.standalone.mapping.StandalonePojoMappingConfigurer; |
| 36 | +import org.hibernate.search.mapper.pojo.standalone.session.SearchSession; |
| 37 | +import org.hibernate.search.mapper.pojo.standalone.work.SearchIndexingPlan; |
27 | 38 | import org.hibernate.search.util.common.SearchException;
|
| 39 | +import org.hibernate.search.util.impl.integrationtest.backend.elasticsearch.dialect.ElasticsearchTestDialect; |
| 40 | +import org.hibernate.search.util.impl.integrationtest.common.extension.BackendConfiguration; |
| 41 | +import org.hibernate.search.util.impl.integrationtest.common.reporting.FailureReportChecker; |
28 | 42 | import org.hibernate.search.util.impl.integrationtest.common.reporting.FailureReportUtils;
|
29 | 43 | import org.hibernate.search.util.impl.integrationtest.mapper.pojo.standalone.StandalonePojoMappingSetupHelper;
|
30 | 44 |
|
31 | 45 | import org.junit.jupiter.api.Test;
|
32 | 46 | import org.junit.jupiter.api.extension.RegisterExtension;
|
| 47 | +import org.junit.jupiter.params.ParameterizedTest; |
| 48 | +import org.junit.jupiter.params.provider.ValueSource; |
33 | 49 |
|
34 | 50 | class VectorFieldIT {
|
35 | 51 |
|
36 | 52 | private static final String INDEX_NAME = "IndexName";
|
| 53 | + private static final int BATCHES = 20; |
| 54 | + private static final int BATCH_SIZE = 1_000; |
37 | 55 |
|
38 | 56 | @RegisterExtension
|
39 | 57 | public StandalonePojoMappingSetupHelper setupHelper = StandalonePojoMappingSetupHelper.withSingleBackend(
|
40 | 58 | MethodHandles.lookup(), BackendConfigurations.simple() );
|
41 | 59 |
|
| 60 | + /* |
| 61 | + * While for the test of the max-allowed dimension it would be enough to index a single document and then search for it, |
| 62 | + * in this case we want to generate more than a few documents to see how the backends, |
| 63 | + * would handle the relatively high number of large vectors. |
| 64 | + * For Lucene-specific limit tests see LuceneVectorFieldIT. |
| 65 | + * As for the Elasticsearch/OpenSearch -- we only transmit the error-response from the backend to the user, |
| 66 | + * so there's no need to do that much of extensive testing for this backend and `vectorSizeLimits_more_than_max` covers the basics. |
| 67 | + */ |
| 68 | + @Test |
| 69 | + void vectorSizeLimits_max_allowed_dimension_with_lots_of_documents() { |
| 70 | + int maxDimension = maxDimension(); |
| 71 | + @Indexed(index = INDEX_NAME) |
| 72 | + class IndexedEntity { |
| 73 | + @DocumentId |
| 74 | + Integer id; |
| 75 | + float[] floats; |
| 76 | + byte[] bytes; |
| 77 | + |
| 78 | + public IndexedEntity(Integer id) { |
| 79 | + this.id = id; |
| 80 | + this.floats = new float[maxDimension]; |
| 81 | + this.bytes = new byte[maxDimension]; |
| 82 | + Arrays.fill( floats, id / (float) maxDimension ); |
| 83 | + Arrays.fill( floats, id * maxDimension % Byte.MAX_VALUE ); |
| 84 | + } |
| 85 | + } |
| 86 | + |
| 87 | + SearchMapping setup = setupHelper.start().expectCustomBeans().withProperty( |
| 88 | + StandalonePojoMapperSettings.MAPPING_CONFIGURER, |
| 89 | + (StandalonePojoMappingConfigurer) context -> { |
| 90 | + TypeMappingStep book = context.programmaticMapping() |
| 91 | + .type( IndexedEntity.class ); |
| 92 | + book.property( "floats" ) |
| 93 | + .vectorField( maxDimension ).vectorSimilarity( VectorSimilarity.L2 ); |
| 94 | + book.property( "bytes" ) |
| 95 | + .vectorField( maxDimension ).vectorSimilarity( VectorSimilarity.L2 ); |
| 96 | + } |
| 97 | + ).setup( IndexedEntity.class ); |
| 98 | + |
| 99 | + for ( int j = 0; j < BATCHES; j++ ) { |
| 100 | + try ( SearchSession session = setup.createSession() ) { |
| 101 | + SearchIndexingPlan searchIndexingPlan = session.indexingPlan(); |
| 102 | + for ( int i = 0; i < BATCH_SIZE; i++ ) { |
| 103 | + searchIndexingPlan.add( new IndexedEntity( i + j * BATCH_SIZE ) ); |
| 104 | + } |
| 105 | + } |
| 106 | + } |
| 107 | + |
| 108 | + try ( SearchSession session = setup.createSession() ) { |
| 109 | + List<Object> bytes = session.search( IndexedEntity.class ).select( SearchProjectionFactory::id ) |
| 110 | + .where( f -> f.knn( BATCHES ).field( "bytes" ).matching( new byte[maxDimension] ) ) |
| 111 | + .fetchAllHits(); |
| 112 | + assertThat( bytes ).hasSizeGreaterThanOrEqualTo( BATCHES ); |
| 113 | + |
| 114 | + List<Object> floats = session.search( IndexedEntity.class ).select( SearchProjectionFactory::id ) |
| 115 | + .where( f -> f.knn( BATCHES ).field( "floats" ).matching( new float[maxDimension] ) ) |
| 116 | + .fetchAllHits(); |
| 117 | + assertThat( floats ).hasSizeGreaterThanOrEqualTo( BATCHES ); |
| 118 | + } |
| 119 | + } |
| 120 | + |
| 121 | + @ParameterizedTest |
| 122 | + @ValueSource(ints = { 1, 2, 5, 10, 150, 500, 500000000 }) |
| 123 | + void vectorSizeLimits_more_than_max(int increment) { |
| 124 | + int dimension = maxDimension() + increment; |
| 125 | + @Indexed(index = INDEX_NAME) |
| 126 | + class IndexedEntity { |
| 127 | + @DocumentId |
| 128 | + Integer id; |
| 129 | + float[] floats; |
| 130 | + byte[] bytes; |
| 131 | + |
| 132 | + public IndexedEntity(Integer id) { |
| 133 | + this.id = id; |
| 134 | + this.floats = new float[dimension]; |
| 135 | + this.bytes = new byte[dimension]; |
| 136 | + Arrays.fill( floats, id / (float) dimension ); |
| 137 | + Arrays.fill( floats, id * dimension % Byte.MAX_VALUE ); |
| 138 | + } |
| 139 | + } |
| 140 | + |
| 141 | + FailureReportChecker failure; |
| 142 | + if ( BackendConfiguration.isLucene() ) { |
| 143 | + String[] message = new String[] { |
| 144 | + "Vector 'dimension' cannot be equal to", |
| 145 | + Objects.toString( dimension ), |
| 146 | + "It must be a positive integer value lesser than or equal to" |
| 147 | + }; |
| 148 | + failure = FailureReportUtils.hasFailureReport() |
| 149 | + .typeContext( IndexedEntity.class.getName() ) |
| 150 | + .pathContext( ".floats" ) |
| 151 | + .failure( message ) |
| 152 | + .pathContext( ".bytes" ) |
| 153 | + .failure( message ); |
| 154 | + } |
| 155 | + else { |
| 156 | + failure = FailureReportUtils.hasFailureReport() |
| 157 | + .typeContext( IndexedEntity.class.getName() ); |
| 158 | + } |
| 159 | + assertThatThrownBy( () -> setupHelper.start().expectCustomBeans().withProperty( |
| 160 | + StandalonePojoMapperSettings.MAPPING_CONFIGURER, |
| 161 | + (StandalonePojoMappingConfigurer) context -> { |
| 162 | + TypeMappingStep book = context.programmaticMapping() |
| 163 | + .type( IndexedEntity.class ); |
| 164 | + book.property( "floats" ) |
| 165 | + .vectorField( dimension ).vectorSimilarity( VectorSimilarity.L2 ); |
| 166 | + book.property( "bytes" ) |
| 167 | + .vectorField( dimension ).vectorSimilarity( VectorSimilarity.L2 ); |
| 168 | + } |
| 169 | + ).setup( IndexedEntity.class ) ) |
| 170 | + .isInstanceOf( SearchException.class ) |
| 171 | + .satisfies( failure ); |
| 172 | + } |
| 173 | + |
42 | 174 | /*
|
43 | 175 | * This test relies on a backend implementation to make sure that the vector dimension was somehow set for the field.
|
44 | 176 | * hence it requires a real backend.
|
@@ -67,6 +199,22 @@ class IndexedEntity {
|
67 | 199 | ) );
|
68 | 200 | }
|
69 | 201 |
|
| 202 | + private static int maxDimension() { |
| 203 | + if ( BackendConfiguration.isLucene() ) { |
| 204 | + return 4096; |
| 205 | + } |
| 206 | + else { |
| 207 | + ElasticsearchDistributionName distribution = ElasticsearchTestDialect.getActualVersion().distribution(); |
| 208 | + if ( ElasticsearchDistributionName.ELASTIC.equals( distribution ) ) { |
| 209 | + return 4096; |
| 210 | + } |
| 211 | + else { |
| 212 | + // looks like there's a bug in OpenSearch? it won't accept 16000 |
| 213 | + return 1024; |
| 214 | + } |
| 215 | + } |
| 216 | + } |
| 217 | + |
70 | 218 | @SuppressWarnings("rawtypes")
|
71 | 219 | public static class ValidImplicitTypeBridge implements ValueBridge<Collection, float[]> {
|
72 | 220 |
|
|
0 commit comments