Skip to content

Commit 6e98dc1

Browse files
Trigram indexing works and it's fast
Co-authored-by: Techatrix <[email protected]>
1 parent 685e28c commit 6e98dc1

File tree

3 files changed

+82
-142
lines changed

3 files changed

+82
-142
lines changed

src/DocumentStore.zig

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ pub const BuildFile = struct {
187187
pub const Handle = struct {
188188
uri: Uri,
189189
tree: Ast,
190+
trigram_store: TrigramStore,
190191
/// Contains one entry for every import in the document
191192
import_uris: std.ArrayListUnmanaged(Uri) = .empty,
192193
/// Contains one entry for every cimport in the document
@@ -270,13 +271,17 @@ pub const Handle = struct {
270271
var tree = try parseTree(allocator, text, mode);
271272
errdefer tree.deinit(allocator);
272273

273-
const cimports = try collectCImports(allocator, tree);
274+
var cimports = try collectCImports(allocator, tree);
274275
errdefer cimports.deinit(allocator);
275276

277+
var trigram_store: TrigramStore = try .init(allocator, tree, .@"utf-16");
278+
errdefer trigram_store.deinit();
279+
276280
return .{
277281
.uri = uri,
278282
.tree = tree,
279283
.cimports = cimports,
284+
.trigram_store = trigram_store,
280285
.impl = .{
281286
.status = .init(@bitCast(Status{
282287
.lsp_synced = lsp_synced,
@@ -307,6 +312,8 @@ pub const Handle = struct {
307312
for (self.cimports.items(.source)) |source| allocator.free(source);
308313
self.cimports.deinit(allocator);
309314

315+
self.trigram_store.deinit(allocator);
316+
310317
switch (self.impl.associated_build_file) {
311318
.none, .resolved => {},
312319
.unresolved => |*payload| payload.deinit(allocator),
@@ -697,31 +704,6 @@ pub fn getOrLoadHandle(store: *DocumentStore, uri: Uri) ?*Handle {
697704
};
698705
}
699706

700-
pub fn trigramIndexUri(
701-
store: *DocumentStore,
702-
uri: Uri,
703-
encoding: offsets.Encoding,
704-
) error{OutOfMemory}!void {
705-
const gop = try store.trigram_stores.getOrPut(store.allocator, uri);
706-
707-
if (gop.found_existing) {
708-
return;
709-
}
710-
711-
errdefer {
712-
store.allocator.free(gop.key_ptr.*);
713-
store.trigram_stores.swapRemoveAt(gop.index);
714-
}
715-
716-
gop.key_ptr.* = try store.allocator.dupe(u8, uri);
717-
gop.value_ptr.* = .empty;
718-
719-
const file_contents = store.readUri(uri) orelse return;
720-
defer store.allocator.free(file_contents);
721-
722-
try gop.value_ptr.fill(store.allocator, file_contents, encoding);
723-
}
724-
725707
/// **Thread safe** takes a shared lock
726708
/// This function does not protect against data races from modifying the BuildFile
727709
pub fn getBuildFile(self: *DocumentStore, uri: Uri) ?*BuildFile {

src/Server.zig

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1936,36 +1936,17 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ
19361936
fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.WorkspaceSymbolParams) Error!lsp.ResultType("workspace/symbol") {
19371937
if (request.query.len < 3) return null;
19381938

1939-
for (server.workspaces.items) |workspace| {
1940-
const path = Uri.parse(arena, workspace.uri) catch return error.InternalError;
1941-
var dir = std.fs.cwd().openDir(path, .{ .iterate = true }) catch return error.InternalError;
1942-
defer dir.close();
1943-
1944-
var walker = try dir.walk(arena);
1945-
defer walker.deinit();
1946-
1947-
while (walker.next() catch return error.InternalError) |entry| {
1948-
if (std.mem.eql(u8, std.fs.path.extension(entry.basename), ".zig")) {
1949-
const uri = Uri.fromPath(
1950-
arena,
1951-
std.fs.path.join(arena, &.{ path, entry.path }) catch return error.InternalError,
1952-
) catch return error.InternalError;
1953-
1954-
server.document_store.trigramIndexUri(
1955-
uri,
1956-
server.offset_encoding,
1957-
) catch return error.InternalError;
1958-
}
1959-
}
1960-
}
1939+
// TODO: take this and get copy of handle ptrs
1940+
server.document_store.lock.lock();
1941+
defer server.document_store.lock.unlock();
19611942

19621943
var symbols: std.ArrayListUnmanaged(types.WorkspaceSymbol) = .empty;
19631944
var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty;
19641945

1965-
for (
1966-
server.document_store.trigram_stores.keys(),
1967-
server.document_store.trigram_stores.values(),
1968-
) |uri, trigram_store| {
1946+
for (server.document_store.handles.keys(), server.document_store.handles.values()) |uri, handle| {
1947+
const trigram_store = &handle.trigram_store;
1948+
1949+
declaration_buffer.clearRetainingCapacity();
19691950
try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer);
19701951

19711952
const slice = trigram_store.declarations.slice();

src/TrigramStore.zig

Lines changed: 67 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -22,51 +22,25 @@ pub const Declaration = struct {
2222
range: offsets.Range,
2323
};
2424

25-
pub const empty: TrigramStore = .{
26-
.has_filter = false,
27-
.filter_buckets = .empty,
28-
.trigram_to_declarations = .empty,
29-
.declarations = .empty,
30-
.names = .empty,
31-
};
32-
3325
has_filter: bool,
3426
filter_buckets: std.ArrayListUnmanaged(CuckooFilter.Bucket),
3527
trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnmanaged(Declaration.Index)),
3628
declarations: std.MultiArrayList(Declaration),
3729
names: std.ArrayListUnmanaged(u8),
3830

39-
pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void {
40-
store.filter_buckets.deinit(allocator);
41-
for (store.trigram_to_declarations.values()) |*list| {
42-
list.deinit(allocator);
43-
}
44-
store.trigram_to_declarations.deinit(allocator);
45-
store.declarations.deinit(allocator);
46-
store.names.deinit(allocator);
47-
store.* = undefined;
48-
}
49-
50-
fn clearRetainingCapacity(store: *TrigramStore) void {
51-
store.filter_buckets.clearRetainingCapacity();
52-
store.has_filter = false;
53-
for (store.trigram_to_declarations.values()) |*list| {
54-
list.clearRetainingCapacity();
55-
}
56-
store.declarations.clearRetainingCapacity();
57-
store.names.clearRetainingCapacity();
58-
}
59-
60-
pub fn fill(
61-
store: *TrigramStore,
31+
pub fn init(
6232
allocator: std.mem.Allocator,
63-
source: [:0]const u8,
33+
tree: Ast,
6434
encoding: offsets.Encoding,
65-
) error{OutOfMemory}!void {
66-
store.clearRetainingCapacity();
67-
68-
var tree = try Ast.parse(allocator, source, .zig);
69-
defer tree.deinit(allocator);
35+
) error{OutOfMemory}!TrigramStore {
36+
var store: TrigramStore = .{
37+
.has_filter = false,
38+
.filter_buckets = .empty,
39+
.trigram_to_declarations = .empty,
40+
.declarations = .empty,
41+
.names = .empty,
42+
};
43+
errdefer store.deinit(allocator);
7044

7145
const Context = struct {
7246
allocator: std.mem.Allocator,
@@ -126,15 +100,61 @@ pub fn fill(
126100
}
127101
};
128102

129-
var context = Context{
103+
var context: Context = .{
130104
.allocator = allocator,
131-
.store = store,
105+
.store = &store,
132106
.in_function = false,
133107
.encoding = encoding,
134108
};
135109
try ast.iterateChildren(tree, .root, &context, Context.Error, Context.callback);
136110

137-
try store.finalize(allocator);
111+
const lists = store.trigram_to_declarations.values();
112+
var index: usize = 0;
113+
while (index < lists.len) {
114+
if (lists[index].items.len == 0) {
115+
lists[index].deinit(allocator);
116+
store.trigram_to_declarations.swapRemoveAt(index);
117+
} else {
118+
index += 1;
119+
}
120+
}
121+
122+
const trigrams = store.trigram_to_declarations.keys();
123+
124+
if (trigrams.len > 0) {
125+
var prng = std.Random.DefaultPrng.init(0);
126+
127+
const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable;
128+
try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity);
129+
store.filter_buckets.items.len = filter_capacity;
130+
131+
const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items };
132+
filter.reset();
133+
store.has_filter = true;
134+
135+
for (trigrams) |trigram| {
136+
filter.append(prng.random(), trigram) catch |err| switch (err) {
137+
error.EvictionFailed => {
138+
// NOTE: This should generally be quite rare.
139+
store.has_filter = false;
140+
break;
141+
},
142+
};
143+
}
144+
}
145+
146+
return store;
147+
}
148+
149+
pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void {
150+
store.filter_buckets.deinit(allocator);
151+
for (store.trigram_to_declarations.values()) |*list| {
152+
list.deinit(allocator);
153+
}
154+
store.trigram_to_declarations.deinit(allocator);
155+
store.declarations.deinit(allocator);
156+
store.names.deinit(allocator);
157+
store.* = undefined;
138158
}
139159

140160
/// Caller must not submit name.len < 3.
@@ -167,53 +187,15 @@ fn appendDeclaration(
167187
}
168188
}
169189

170-
/// Must be called before any queries are executed.
171-
fn finalize(store: *TrigramStore, allocator: std.mem.Allocator) error{OutOfMemory}!void {
172-
{
173-
const lists = store.trigram_to_declarations.values();
174-
var index: usize = 0;
175-
while (index < lists.len) {
176-
if (lists[index].items.len == 0) {
177-
lists[index].deinit(allocator);
178-
store.trigram_to_declarations.swapRemoveAt(index);
179-
} else {
180-
index += 1;
181-
}
182-
}
183-
}
184-
185-
const trigrams = store.trigram_to_declarations.keys();
186-
187-
if (trigrams.len > 0) {
188-
var prng = std.Random.DefaultPrng.init(0);
189-
190-
const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable;
191-
try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity);
192-
store.filter_buckets.items.len = filter_capacity;
193-
194-
const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items };
195-
filter.reset();
196-
store.has_filter = true;
197-
198-
for (trigrams) |trigram| {
199-
filter.append(prng.random(), trigram) catch |err| switch (err) {
200-
error.EvictionFailed => {
201-
// NOTE: This should generally be quite rare.
202-
store.has_filter = false;
203-
break;
204-
},
205-
};
206-
}
207-
}
208-
}
209-
190+
/// Asserts query.len >= 3. Asserts declaration_buffer.items.len == 0.
210191
pub fn declarationsForQuery(
211192
store: *const TrigramStore,
212193
allocator: std.mem.Allocator,
213194
query: []const u8,
214195
declaration_buffer: *std.ArrayListUnmanaged(Declaration.Index),
215196
) error{OutOfMemory}!void {
216197
assert(query.len >= 3);
198+
assert(declaration_buffer.items.len == 0);
217199

218200
const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items };
219201

@@ -226,14 +208,9 @@ pub fn declarationsForQuery(
226208
}
227209
}
228210

229-
const first = (store.trigram_to_declarations.get(query[0..3].*) orelse {
230-
declaration_buffer.clearRetainingCapacity();
231-
return;
232-
}).items;
211+
const first = (store.trigram_to_declarations.get(query[0..3].*) orelse return).items;
233212

234-
declaration_buffer.clearRetainingCapacity();
235-
try declaration_buffer.ensureTotalCapacity(allocator, first.len * 2);
236-
declaration_buffer.items.len = first.len * 2;
213+
try declaration_buffer.resize(allocator, first.len * 2);
237214

238215
var len = first.len;
239216
@memcpy(declaration_buffer.items[0..len], first);
@@ -242,18 +219,18 @@ pub fn declarationsForQuery(
242219
const trigram = query[index..][0..3].*;
243220
const old_len = len;
244221
len = mergeIntersection(
245-
(store.trigram_to_declarations.get(trigram[0..3].*) orelse return {
222+
(store.trigram_to_declarations.get(trigram[0..3].*) orelse {
246223
declaration_buffer.clearRetainingCapacity();
247224
return;
248225
}).items,
249226
declaration_buffer.items[0..len],
250227
declaration_buffer.items[len..],
251228
);
252229
@memcpy(declaration_buffer.items[0..len], declaration_buffer.items[old_len..][0..len]);
253-
declaration_buffer.items.len = len * 2;
230+
declaration_buffer.shrinkRetainingCapacity(len * 2);
254231
}
255232

256-
declaration_buffer.items.len = declaration_buffer.items.len / 2;
233+
declaration_buffer.shrinkRetainingCapacity(declaration_buffer.items.len / 2);
257234
}
258235

259236
/// Asserts `@min(a.len, b.len) <= out.len`.

0 commit comments

Comments
 (0)