|
1 | 1 | # magda-csv-semantic-indexer |
2 | | -A Magda semantic indexer can index CSV files |
| 2 | + |
| 3 | + |
| 4 | + |
| 5 | +A Helm chart for Magda CSV Semantic Indexer |
| 6 | + |
| 7 | +**Homepage:** <https://github.com/magda-io/magda-csv-semantic-indexer> |
| 8 | + |
| 9 | +## Source Code |
| 10 | + |
| 11 | +* <https://github.com/magda-io/magda-csv-semantic-indexer> |
| 12 | + |
| 13 | +## Requirements |
| 14 | + |
| 15 | +Kubernetes: `>= 1.14.0-0` |
| 16 | + |
| 17 | +| Repository | Name | Version | |
| 18 | +|------------|------|---------| |
| 19 | +| oci://ghcr.io/magda-io/charts | magda-common | 5.2.0 | |
| 20 | + |
| 21 | +## Values |
| 22 | + |
| 23 | +| Key | Type | Default | Description | |
| 24 | +|-----|------|---------|-------------| |
| 25 | +| defaultAdminUserId | string | `"00000000-0000-4000-8000-000000000000"` | | |
| 26 | +| defaultSemanticIndexerConfig.bulkEmbeddingsSize | int | `1` | | |
| 27 | +| defaultSemanticIndexerConfig.bulkIndexSize | int | `50` | | |
| 28 | +| defaultSemanticIndexerConfig.chunkSizeLimit | int | `512` | | |
| 29 | +| defaultSemanticIndexerConfig.chunkSizeLimit | int | `512` | | |
| 30 | +| defaultSemanticIndexerConfig.id | string | `"csv-semantic-indexer"` | | |
| 31 | +| defaultSemanticIndexerConfig.indexName | string | `"semantic-index"` | | |
| 32 | +| defaultSemanticIndexerConfig.indexVersion | int | `1` | | |
| 33 | +| defaultSemanticIndexerConfig.overlap | int | `50` | | |
| 34 | +| defaultSemanticIndexerConfig.overlap | int | `50` | | |
| 35 | +| embeddingApiURL | string | `"http://magda-embedding-api"` | | |
| 36 | +| global | object | `{"image":{},"rollingUpdate":{},"searchEngine":{"defaultDatasetBucket":"magda-datasets","semanticIndexer":{"indexName":null,"indexVersion":null,"knnVectorFieldConfig":{"compressionLevel":null,"dimension":768,"efConstruction":100,"efSearch":100,"encoder":{"clip":false,"name":"sq","type":"fp16"},"m":16,"mode":"in_memory","spaceType":"l2"},"numberOfReplicas":0,"numberOfShards":1}}}` | only for providing appropriate default value for helm lint | |
| 37 | +| global.searchEngine.semanticIndexer.knnVectorFieldConfig.compressionLevel | string | `nil` | The compression_level mapping parameter selects a quantization encoder that reduces vector memory consumption by the given factor. | |
| 38 | +| global.searchEngine.semanticIndexer.knnVectorFieldConfig.dimension | int | `768` | Dimension of the embedding vectors. | |
| 39 | +| global.searchEngine.semanticIndexer.knnVectorFieldConfig.efConstruction | int | `100` | Similar to efSearch but used during index construction. Higher values improve search quality but increase index build time. | |
| 40 | +| global.searchEngine.semanticIndexer.knnVectorFieldConfig.efSearch | int | `100` | The size of the candidate queue during search. Larger values may improve search quality but increase search latency. | |
| 41 | +| global.searchEngine.semanticIndexer.knnVectorFieldConfig.encoder | object | `{"clip":false,"name":"sq","type":"fp16"}` | FAISS Encoder configuration (If compressionLevel is set, encoder will be ignored). | |
| 42 | +| global.searchEngine.semanticIndexer.knnVectorFieldConfig.m | int | `16` | The maximum number of graph edges per vector. Higher values increase memory usage but may improve search quality. | |
| 43 | +| global.searchEngine.semanticIndexer.knnVectorFieldConfig.mode | string | `"in_memory"` | Vector workload mode: `on_disk` or `in_memory`. | |
| 44 | +| image.name | string | `"data61/magda-csv-semantic-indexer"` | | |
| 45 | +| image.pullPolicy | string | `"IfNotPresent"` | | |
| 46 | +| image.repository | string | `"localhost:5000"` | | |
| 47 | +| image.tag | string | `"latest"` | | |
| 48 | +| minioConfig.defaultDatasetBucket | string | `""` | | |
| 49 | +| minioConfig.endPoint | string | `"magda-minio"` | | |
| 50 | +| minioConfig.port | int | `9000` | | |
| 51 | +| minioConfig.region | string | `""` | | |
| 52 | +| minioConfig.useSSL | bool | `false` | | |
| 53 | +| opensearchURL | string | `"http://opensearch:9200"` | | |
| 54 | +| port | int | `6305` | Service port configuration | |
| 55 | +| resources.limits.cpu | string | `"100m"` | | |
| 56 | +| resources.requests.cpu | string | `"50m"` | | |
| 57 | +| resources.requests.memory | string | `"200Mi"` | | |
| 58 | +| semanticIndexer.bulkEmbeddingsSize | int | `nil` | number of string we request embedding api to process in one request | |
| 59 | +| semanticIndexer.bulkIndexSize | int | `nil` | Number of documents we send to OpenSearch for bulk processing in a single request | |
| 60 | +| semanticIndexer.chunkSizeLimit | int | `nil` | The maximum number of tokens in a single chunk. | |
| 61 | +| semanticIndexer.id | string | `"csv-semantic-indexer-5"` | Semantic indexer ID | |
| 62 | +| semanticIndexer.indexName | string | `nil` | index name | |
| 63 | +| semanticIndexer.indexVersion | int | `nil` | index version | |
| 64 | +| semanticIndexer.overlap | int | `nil` | The number of overlapping tokens between chunks. | |
| 65 | + |
| 66 | +---------------------------------------------- |
| 67 | +Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0) |
0 commit comments