Skip to content

Commit cc76edc

Browse files
GT-176 Add support for Inverted index (#425)
* Inverted index * Add support for Inverted index fix linter warnings * Adjust inverted index test not to use 'nested' on CE * Rename field primarySortCompression -> primarySortCompression for inverted index field primary sort * Add more docs and update Features field type Co-authored-by: Nikita Vaniasin <[email protected]>
1 parent 31d3f9f commit cc76edc

10 files changed

+309
-11
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
- New `LegacyPolygons` parameter for Geo Indexes
1212
- New parameters (`cacheEnabled` and `storedValues`) for Persistent Indexes
1313
- New analyzers: `classification`, `nearest neighbors`, `minhash`
14+
- Add support for Inverted index
1415

1516
## [1.3.3](https://github.com/arangodb/go-driver/tree/v1.3.3) (2022-07-27)
1617
- Fix `lastValue` field type

collection_indexes.go

+55
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ type CollectionIndexes interface {
7474
// EnsureZKDIndex creates a ZKD multi-dimensional index for the collection, if it does not already exist.
7575
// Note that zkd indexes are an experimental feature in ArangoDB 3.9.
7676
EnsureZKDIndex(ctx context.Context, fields []string, options *EnsureZKDIndexOptions) (Index, bool, error)
77+
78+
// EnsureInvertedIndex creates an inverted index in the collection, if it does not already exist.
79+
// Available in ArangoDB 3.10 and later.
80+
EnsureInvertedIndex(ctx context.Context, options *InvertedIndexOptions) (Index, bool, error)
7781
}
7882

7983
// EnsureFullTextIndexOptions contains specific options for creating a full text index.
@@ -196,3 +200,54 @@ type EnsureZKDIndexOptions struct {
196200
// TODO: The sparse property is not supported yet
197201
// Sparse bool
198202
}
203+
204+
// InvertedIndexOptions provides specific options for creating an inverted index
205+
// Available since ArangoDB 3.10
206+
type InvertedIndexOptions struct {
207+
// Name optional user defined name used for hints in AQL queries
208+
Name string `json:"name"`
209+
// InBackground if true will not hold an exclusive collection lock for the entire index creation period (rocksdb only).
210+
InBackground bool `json:"inBackground,omitempty"`
211+
IsNewlyCreated bool `json:"isNewlyCreated,omitempty"`
212+
213+
Parallelism int `json:"parallelism,omitempty"`
214+
// PrimarySort describes how individual fields are sorted
215+
PrimarySort InvertedIndexPrimarySort `json:"primarySort,omitempty"`
216+
// StoredValues these values specifies how the index should track values.
217+
StoredValues []StoredValue `json:"storedValues,omitempty"`
218+
// Analyzer to be used for indexing
219+
Analyzer ArangoSearchAnalyzerType `json:"analyzer,omitempty"`
220+
// Features list of analyzer features, default []
221+
Features []ArangoSearchAnalyzerFeature `json:"features,omitempty"`
222+
// IncludeAllFields If set to true, all fields of this element will be indexed. Defaults to false.
223+
IncludeAllFields bool `json:"includeAllFields,omitempty"`
224+
// TrackListPositions If set to true, values in a listed are treated as separate values. Defaults to false.
225+
TrackListPositions bool `json:"trackListPositions,omitempty"`
226+
// Fields contains the properties for individual fields of the element.
227+
// The key of the map are field names.
228+
Fields []InvertedIndexField `json:"fields,omitempty"`
229+
}
230+
231+
// InvertedIndexPrimarySort defines compression and list of fields to be sorted.
232+
type InvertedIndexPrimarySort struct {
233+
Fields []ArangoSearchPrimarySortEntry `json:"fields,omitempty"`
234+
// Compression optional
235+
Compression PrimarySortCompression `json:"compression,omitempty"`
236+
}
237+
238+
// InvertedIndexField contains configuration for indexing of the field
239+
type InvertedIndexField struct {
240+
// Name of the field
241+
Name string `json:"name"`
242+
// Analyzer optional
243+
Analyzer ArangoSearchAnalyzerType `json:"analyzer,omitempty"`
244+
// IncludeAllFields If set to true, all fields of this element will be indexed. Defaults to false.
245+
IncludeAllFields bool `json:"includeAllFields,omitempty"`
246+
// TrackListPositions If set to true, values in a listed are treated as separate values. Defaults to false.
247+
TrackListPositions bool `json:"trackListPositions,omitempty"`
248+
// Features list of analyzer features, default [].
249+
Features []ArangoSearchAnalyzerFeature `json:"features,omitempty"`
250+
// Nested
251+
// Enterprise-only feature
252+
Nested []InvertedIndexField `json:"nested,omitempty"`
253+
}

collection_indexes_impl.go

+45-3
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package driver
2424

2525
import (
2626
"context"
27+
"encoding/json"
2728
"path"
2829
)
2930

@@ -54,8 +55,7 @@ type indexData struct {
5455
}
5556

5657
type indexListResponse struct {
57-
Indexes []indexData `json:"indexes,omitempty"`
58-
Identifiers map[string]indexData `json:"identifiers,omitempty"`
58+
Indexes []json.RawMessage `json:"indexes,omitempty"`
5959
ArangoError
6060
}
6161

@@ -123,7 +123,7 @@ func (c *collection) Indexes(ctx context.Context) ([]Index, error) {
123123
}
124124
result := make([]Index, 0, len(data.Indexes))
125125
for _, x := range data.Indexes {
126-
idx, err := newIndex(x, c)
126+
idx, err := newIndexFromMap(x, c)
127127
if err != nil {
128128
return nil, WithStack(err)
129129
}
@@ -313,6 +313,48 @@ func (c *collection) EnsureZKDIndex(ctx context.Context, fields []string, option
313313
return idx, created, nil
314314
}
315315

316+
type invertedIndexData struct {
317+
InvertedIndexOptions
318+
Type string `json:"type"`
319+
ID string `json:"id,omitempty"`
320+
321+
ArangoError `json:",inline"`
322+
}
323+
324+
// EnsureInvertedIndex creates an inverted index in the collection, if it does not already exist.
325+
// Available in ArangoDB 3.10 and later.
326+
func (c *collection) EnsureInvertedIndex(ctx context.Context, options *InvertedIndexOptions) (Index, bool, error) {
327+
req, err := c.conn.NewRequest("POST", path.Join(c.db.relPath(), "_api/index"))
328+
if err != nil {
329+
return nil, false, WithStack(err)
330+
}
331+
if options == nil {
332+
options = &InvertedIndexOptions{}
333+
}
334+
req.SetQuery("collection", c.name)
335+
if _, err := req.SetBody(invertedIndexData{InvertedIndexOptions: *options, Type: string(InvertedIndex)}); err != nil {
336+
return nil, false, WithStack(err)
337+
}
338+
resp, err := c.conn.Do(ctx, req)
339+
if err != nil {
340+
return nil, false, WithStack(err)
341+
}
342+
if err := resp.CheckStatus(200, 201); err != nil {
343+
return nil, false, WithStack(err)
344+
}
345+
created := resp.StatusCode() == 201
346+
347+
var data invertedIndexData
348+
if err := resp.ParseBody("", &data); err != nil {
349+
return nil, false, WithStack(err)
350+
}
351+
idx, err := newInvertedIndex(data, c)
352+
if err != nil {
353+
return nil, false, WithStack(err)
354+
}
355+
return idx, created, nil
356+
}
357+
316358
// ensureIndex creates a persistent index in the collection, if it does not already exist.
317359
// Fields is a slice of attribute paths.
318360
// The index is returned, together with a boolean indicating if the index was newly created (true) or pre-existing (false).

edge_collection_indexes_impl.go

+10
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,13 @@ func (c *edgeCollection) EnsureZKDIndex(ctx context.Context, fields []string, op
135135
}
136136
return result, created, nil
137137
}
138+
139+
// EnsureInvertedIndex creates an inverted index in the collection, if it does not already exist.
140+
// Available in ArangoDB 3.10 and later.
141+
func (c *edgeCollection) EnsureInvertedIndex(ctx context.Context, options *InvertedIndexOptions) (Index, bool, error) {
142+
result, created, err := c.rawCollection().EnsureInvertedIndex(ctx, options)
143+
if err != nil {
144+
return nil, false, WithStack(err)
145+
}
146+
return result, created, nil
147+
}

index.go

+4
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ const (
3838
EdgeIndex = IndexType("edge")
3939
TTLIndex = IndexType("ttl")
4040
ZKDIndex = IndexType("zkd")
41+
InvertedIndex = IndexType("inverted")
4142
)
4243

4344
// Index provides access to a single index in a single collection.
@@ -95,4 +96,7 @@ type Index interface {
9596

9697
// StoredValues returns a list of stored values for this index - PersistentIndex only
9798
StoredValues() []string
99+
100+
// InvertedIndexOptions returns the inverted index options for this index - InvertedIndex only
101+
InvertedIndexOptions() InvertedIndexOptions
98102
}

index_impl.go

+76-4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package driver
2424

2525
import (
2626
"context"
27+
"encoding/json"
2728
"path"
2829
"strings"
2930
)
@@ -49,6 +50,8 @@ func indexStringToType(indexTypeString string) (IndexType, error) {
4950
return TTLIndex, nil
5051
case string(ZKDIndex):
5152
return ZKDIndex, nil
53+
case string(InvertedIndex):
54+
return InvertedIndex, nil
5255
default:
5356
return "", WithStack(InvalidArgumentError{Message: "unknown index type"})
5457
}
@@ -79,12 +82,76 @@ func newIndex(data indexData, col *collection) (Index, error) {
7982
}, nil
8083
}
8184

85+
// newIndex creates a new Index implementation.
86+
func newInvertedIndex(data invertedIndexData, col *collection) (Index, error) {
87+
if data.ID == "" {
88+
return nil, WithStack(InvalidArgumentError{Message: "id is empty"})
89+
}
90+
parts := strings.Split(data.ID, "/")
91+
if len(parts) != 2 {
92+
return nil, WithStack(InvalidArgumentError{Message: "id must be `collection/name`"})
93+
}
94+
if col == nil {
95+
return nil, WithStack(InvalidArgumentError{Message: "col is nil"})
96+
}
97+
indexType, err := indexStringToType(data.Type)
98+
if err != nil {
99+
return nil, WithStack(err)
100+
}
101+
102+
dataIndex := indexData{
103+
ID: data.ID,
104+
Type: data.Type,
105+
InBackground: &data.InvertedIndexOptions.InBackground,
106+
IsNewlyCreated: &data.InvertedIndexOptions.IsNewlyCreated,
107+
Name: data.InvertedIndexOptions.Name,
108+
ArangoError: data.ArangoError,
109+
}
110+
return &index{
111+
indexData: dataIndex,
112+
invertedDataIndex: data,
113+
indexType: indexType,
114+
col: col,
115+
db: col.db,
116+
conn: col.conn,
117+
}, nil
118+
}
119+
120+
// newIndexFrom map returns Index implementation based on index type extracted from rawData
121+
func newIndexFromMap(rawData json.RawMessage, col *collection) (Index, error) {
122+
type generalIndexData struct {
123+
Type string `json:"type"`
124+
}
125+
var gen generalIndexData
126+
err := json.Unmarshal(rawData, &gen)
127+
if err != nil {
128+
return nil, WithStack(err)
129+
}
130+
131+
if IndexType(gen.Type) == InvertedIndex {
132+
var idxData invertedIndexData
133+
err = json.Unmarshal(rawData, &idxData)
134+
if err != nil {
135+
return nil, WithStack(err)
136+
}
137+
return newInvertedIndex(idxData, col)
138+
}
139+
140+
var idxData indexData
141+
err = json.Unmarshal(rawData, &idxData)
142+
if err != nil {
143+
return nil, WithStack(err)
144+
}
145+
return newIndex(idxData, col)
146+
}
147+
82148
type index struct {
83149
indexData
84-
indexType IndexType
85-
db *database
86-
col *collection
87-
conn Connection
150+
invertedDataIndex invertedIndexData
151+
indexType IndexType
152+
db *database
153+
col *collection
154+
conn Connection
88155
}
89156

90157
// relPath creates the relative path to this index (`_db/<db-name>/_api/index`)
@@ -197,6 +264,11 @@ func (i *index) StoredValues() []string {
197264
return i.indexData.StoredValues
198265
}
199266

267+
// InvertedIndexOptions returns the inverted index options for this index - InvertedIndex only
268+
func (i *index) InvertedIndexOptions() InvertedIndexOptions {
269+
return i.invertedDataIndex.InvertedIndexOptions
270+
}
271+
200272
// Remove removes the entire index.
201273
// If the index does not exist, a NotFoundError is returned.
202274
func (i *index) Remove(ctx context.Context) error {

test/index_ensure_test.go

+103
Original file line numberDiff line numberDiff line change
@@ -501,3 +501,106 @@ func TestEnsureZKDIndexWithOptions(t *testing.T) {
501501
err = idx.Remove(nil)
502502
require.NoError(t, err)
503503
}
504+
505+
// TestEnsureInvertedIndex creates a collection with an inverted index
506+
func TestEnsureInvertedIndex(t *testing.T) {
507+
ctx := context.Background()
508+
509+
c := createClientFromEnv(t, true)
510+
EnsureVersion(t, ctx, c).CheckVersion(MinimumVersion("3.10.0"))
511+
512+
db := ensureDatabase(ctx, c, "index_test", nil, t)
513+
col := ensureCollection(ctx, db, fmt.Sprintf("inverted_index_opt_test"), nil, t)
514+
515+
type testCase struct {
516+
IsEE bool
517+
Options driver.InvertedIndexOptions
518+
}
519+
testCases := []testCase{
520+
{
521+
IsEE: false,
522+
Options: driver.InvertedIndexOptions{
523+
Name: "inverted-opt",
524+
PrimarySort: driver.InvertedIndexPrimarySort{
525+
Fields: []driver.ArangoSearchPrimarySortEntry{
526+
{Field: "test1", Ascending: newBool(true)},
527+
{Field: "test2", Ascending: newBool(false)},
528+
},
529+
Compression: driver.PrimarySortCompressionLz4,
530+
},
531+
Features: []driver.ArangoSearchAnalyzerFeature{},
532+
StoredValues: []driver.StoredValue{},
533+
Fields: []driver.InvertedIndexField{
534+
{Name: "field1", Features: []driver.ArangoSearchAnalyzerFeature{driver.ArangoSearchAnalyzerFeatureFrequency}, Nested: nil},
535+
{Name: "field2", Features: []driver.ArangoSearchAnalyzerFeature{driver.ArangoSearchAnalyzerFeaturePosition}, TrackListPositions: false, Nested: nil},
536+
},
537+
},
538+
},
539+
{
540+
IsEE: true,
541+
Options: driver.InvertedIndexOptions{
542+
Name: "inverted-opt-nested",
543+
PrimarySort: driver.InvertedIndexPrimarySort{
544+
Fields: []driver.ArangoSearchPrimarySortEntry{
545+
{Field: "test1", Ascending: newBool(true)},
546+
{Field: "test2", Ascending: newBool(false)},
547+
},
548+
Compression: driver.PrimarySortCompressionLz4,
549+
},
550+
Features: []driver.ArangoSearchAnalyzerFeature{},
551+
StoredValues: []driver.StoredValue{},
552+
Fields: []driver.InvertedIndexField{
553+
{Name: "field1", Features: []driver.ArangoSearchAnalyzerFeature{driver.ArangoSearchAnalyzerFeatureFrequency}, Nested: nil},
554+
{Name: "field2", Features: []driver.ArangoSearchAnalyzerFeature{driver.ArangoSearchAnalyzerFeaturePosition}, TrackListPositions: false,
555+
Nested: []driver.InvertedIndexField{
556+
{
557+
Name: "some-nested-field",
558+
Nested: []driver.InvertedIndexField{
559+
{Name: "test"},
560+
{Name: "bas", Nested: []driver.InvertedIndexField{
561+
{Name: "a", Features: nil},
562+
}},
563+
{Name: "kas", Nested: []driver.InvertedIndexField{
564+
{Name: "b", TrackListPositions: true},
565+
{Name: "c"},
566+
}},
567+
},
568+
},
569+
},
570+
},
571+
},
572+
},
573+
},
574+
}
575+
576+
for _, tc := range testCases {
577+
t.Run(tc.Options.Name, func(t *testing.T) {
578+
if tc.IsEE {
579+
skipNoEnterprise(t)
580+
}
581+
582+
idx, created, err := col.EnsureInvertedIndex(ctx, &tc.Options)
583+
require.NoError(t, err)
584+
require.True(t, created)
585+
586+
tc.Options.IsNewlyCreated = true
587+
tc.Options.Analyzer = driver.ArangoSearchAnalyzerTypeIdentity // default value for analyzer
588+
589+
requireIdxEquality := func(invertedIdx driver.Index) {
590+
require.Equal(t, driver.InvertedIndex, idx.Type())
591+
require.Equal(t, tc.Options.Name, idx.UserName())
592+
require.Equal(t, tc.Options, idx.InvertedIndexOptions())
593+
}
594+
requireIdxEquality(idx)
595+
596+
indexes, err := col.Indexes(ctx)
597+
require.NoError(t, err)
598+
require.NotEmpty(t, indexes)
599+
600+
requireIdxEquality(indexes[0])
601+
602+
err = idx.Remove(nil)
603+
require.NoError(t, err)
604+
})
605+
}
606+
}

0 commit comments

Comments
 (0)