Skip to content

Commit 4443b25

Browse files
Fix new trigram iterator, misc bugs
Co-Authored-By: Techatrix <[email protected]>
1 parent 04476c2 commit 4443b25

File tree

2 files changed

+131
-123
lines changed

2 files changed

+131
-123
lines changed

src/TrigramStore.zig

Lines changed: 130 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -33,126 +33,96 @@ declarations: std.MultiArrayList(Declaration),
3333
pub const TrigramIterator = struct {
3434
buffer: []const u8,
3535
index: usize,
36-
boundary: Boundary,
36+
37+
trigram_buffer: Trigram,
38+
trigram_buffer_index: u2,
3739

3840
pub fn init(buffer: []const u8) TrigramIterator {
3941
assert(buffer.len != 0);
40-
return .{ .buffer = buffer, .index = 0, .boundary = .calculate(buffer, 0) };
42+
return .{
43+
.buffer = buffer,
44+
.index = 0,
45+
.trigram_buffer = @splat(0),
46+
.trigram_buffer_index = 0,
47+
};
4148
}
4249

43-
pub const Boundary = struct {
44-
end: usize,
45-
next_start: ?usize,
46-
47-
pub fn calculate(buffer: []const u8, index: usize) Boundary {
48-
assert(buffer[index..].len > 0);
49-
50-
if (std.ascii.isLower(buffer[index])) {
51-
// First character lowercase
52-
for (buffer[index + 1 ..], index + 1..) |c, i| {
53-
if (!std.ascii.isLower(c)) {
54-
return .{
55-
.end = i,
56-
.next_start = i,
57-
};
58-
}
59-
}
60-
} else {
61-
if (index + 1 >= buffer.len) {
62-
return .{
63-
.end = buffer.len,
64-
.next_start = null,
65-
};
66-
}
67-
68-
if (std.ascii.isLower(buffer[index + 1])) {
69-
// First char is uppercase, second char is lowercase
70-
for (buffer[index + 2 ..], index + 2..) |c, i| {
71-
if (!std.ascii.isLower(c)) {
72-
return .{
73-
.end = i,
74-
.next_start = i,
75-
};
76-
}
77-
}
78-
} else {
79-
// First and second chars are uppercase
80-
for (buffer[index + 2 ..], index + 2..) |c, i| {
81-
if (!std.ascii.isUpper(c)) {
82-
return .{
83-
.end = i,
84-
.next_start = i,
85-
};
86-
}
87-
}
88-
}
89-
}
90-
91-
return .{
92-
.end = buffer.len,
93-
.next_start = null,
94-
};
95-
}
96-
};
97-
9850
pub fn next(ti: *TrigramIterator) ?Trigram {
99-
if (ti.index == ti.buffer.len) return null;
100-
assert(ti.index < ti.boundary.end);
101-
102-
var trigram: [3]u8 = @splat(0);
103-
const unpadded = ti.buffer[ti.index..@min(ti.index + 3, ti.boundary.end)];
104-
_ = std.ascii.lowerString(&trigram, unpadded);
51+
while (ti.index < ti.buffer.len) {
52+
defer ti.index += 1;
53+
const c = std.ascii.toLower(ti.buffer[ti.index]);
54+
if (c == '_') continue;
55+
56+
if (ti.trigram_buffer_index < 3) {
57+
ti.trigram_buffer[ti.trigram_buffer_index] = c;
58+
ti.trigram_buffer_index += 1;
59+
continue;
60+
}
10561

106-
if (unpadded.len < 3 or ti.index + 3 >= ti.boundary.end) {
107-
ti.index = ti.boundary.next_start orelse {
108-
ti.index = ti.buffer.len;
109-
return trigram;
110-
};
111-
ti.boundary = .calculate(ti.buffer, ti.index);
62+
defer {
63+
@memmove(ti.trigram_buffer[0..2], ti.trigram_buffer[1..3]);
64+
ti.trigram_buffer[2] = c;
65+
}
66+
return ti.trigram_buffer;
67+
} else if (ti.trigram_buffer_index > 0) {
68+
ti.trigram_buffer_index = 0;
69+
return ti.trigram_buffer;
11270
} else {
113-
ti.index += 1;
71+
return null;
11472
}
115-
116-
return trigram;
11773
}
11874
};
11975

120-
test "TrigramIterator.Boundary.calculate" {
121-
var boundary: TrigramIterator.Boundary = .calculate("helloWORLD", 0);
122-
try std.testing.expectEqual(5, boundary.end);
123-
try std.testing.expectEqual(5, boundary.next_start.?);
124-
125-
boundary = .calculate("helloWORLD", 5);
126-
try std.testing.expectEqual(10, boundary.end);
127-
try std.testing.expectEqual(null, boundary.next_start);
76+
test TrigramIterator {
77+
try testTrigramIterator("a", &.{"a\x00\x00".*});
78+
try testTrigramIterator("ab", &.{"ab\x00".*});
79+
try testTrigramIterator("abc", &.{"abc".*});
80+
81+
try testTrigramIterator("hello", &.{ "hel".*, "ell".*, "llo".* });
82+
try testTrigramIterator("HELLO", &.{ "hel".*, "ell".*, "llo".* });
83+
try testTrigramIterator("HellO", &.{ "hel".*, "ell".*, "llo".* });
84+
85+
try testTrigramIterator("a_", &.{"a\x00\x00".*});
86+
try testTrigramIterator("ab_", &.{"ab\x00".*});
87+
try testTrigramIterator("abc_", &.{"abc".*});
88+
89+
try testTrigramIterator("_a", &.{"a\x00\x00".*});
90+
try testTrigramIterator("_a_", &.{"a\x00\x00".*});
91+
try testTrigramIterator("_a__", &.{"a\x00\x00".*});
92+
93+
try testTrigramIterator("_", &.{});
94+
try testTrigramIterator("__", &.{});
95+
try testTrigramIterator("___", &.{});
96+
97+
try testTrigramIterator("He_ll_O", &.{ "hel".*, "ell".*, "llo".* });
98+
try testTrigramIterator("He__ll___O", &.{ "hel".*, "ell".*, "llo".* });
99+
try testTrigramIterator("__He__ll__O_", &.{ "hel".*, "ell".*, "llo".* });
100+
101+
try testTrigramIterator("HellO__World___HelloWorld", &.{
102+
"hel".*, "ell".*, "llo".*,
103+
"low".*, "owo".*, "wor".*,
104+
"orl".*, "rld".*, "ldh".*,
105+
"dhe".*, "hel".*, "ell".*,
106+
"llo".*, "low".*, "owo".*,
107+
"wor".*, "orl".*, "rld".*,
108+
});
128109
}
129110

130-
test TrigramIterator {
111+
fn testTrigramIterator(
112+
input: []const u8,
113+
expected: []const Trigram,
114+
) !void {
131115
const allocator = std.testing.allocator;
132116

133-
const matrix: []const struct { []const u8, []const Trigram } = &.{
134-
.{ "a", &.{"a\x00\x00".*} },
135-
.{ "ab", &.{"ab\x00".*} },
136-
.{ "helloWORLD", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } },
137-
.{ "HelloWORLD", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } },
138-
.{ "HelloWorld", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } },
139-
};
140-
141-
var actual: std.ArrayList(Trigram) = .empty;
142-
defer actual.deinit(allocator);
117+
var actual_buffer: std.ArrayList(Trigram) = .empty;
118+
defer actual_buffer.deinit(allocator);
143119

144-
for (matrix) |entry| {
145-
const input, const expected = entry;
146-
147-
actual.clearRetainingCapacity();
148-
149-
var it: TrigramIterator = .init(input);
150-
while (it.next()) |trigram| {
151-
try actual.append(allocator, trigram);
152-
}
153-
154-
try @import("testing.zig").expectEqual(expected, actual.items);
120+
var it: TrigramIterator = .init(input);
121+
while (it.next()) |trigram| {
122+
try actual_buffer.append(allocator, trigram);
155123
}
124+
125+
try @import("testing.zig").expectEqual(expected, actual_buffer.items);
156126
}
157127

158128
pub fn init(
@@ -190,7 +160,7 @@ pub fn init(
190160

191161
try context.store.appendDeclaration(
192162
context.allocator,
193-
offsets.identifierTokenToNameSlice(cb_tree, fn_token + 1),
163+
cb_tree,
194164
fn_token + 1,
195165
.function,
196166
);
@@ -227,18 +197,18 @@ pub fn init(
227197

228198
try context.store.appendDeclaration(
229199
context.allocator,
230-
offsets.identifierTokenToNameSlice(cb_tree, main_token + 1),
200+
cb_tree,
231201
main_token + 1,
232202
kind,
233203
);
234204
},
235205

236206
.test_decl => skip: {
237-
const test_name_token, const test_name = ast.testDeclNameAndToken(cb_tree, node) orelse break :skip;
207+
const test_name_token = cb_tree.nodeData(node).opt_token_and_node[0].unwrap() orelse break :skip;
238208

239209
try context.store.appendDeclaration(
240210
context.allocator,
241-
test_name,
211+
cb_tree,
242212
test_name_token,
243213
.test_function,
244214
);
@@ -273,7 +243,7 @@ pub fn init(
273243
if (trigrams.len > 0) {
274244
var prng = std.Random.DefaultPrng.init(0);
275245

276-
const filter_capacity = CuckooFilter.capacityForCount(@intCast(store.trigram_to_declarations.count())) catch unreachable;
246+
const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable;
277247
try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity);
278248
store.filter_buckets.items.len = filter_capacity;
279249

@@ -308,21 +278,54 @@ pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void {
308278
fn appendDeclaration(
309279
store: *TrigramStore,
310280
allocator: std.mem.Allocator,
311-
name: []const u8,
281+
tree: *const Ast,
312282
name_token: Ast.TokenIndex,
313283
kind: Declaration.Kind,
314284
) error{OutOfMemory}!void {
315-
if (name.len < 3) return;
285+
const raw_name = tree.tokenSlice(name_token);
286+
287+
const strategy: enum { raw, smart }, const name = switch (tree.tokenTag(name_token)) {
288+
.string_literal => .{ .raw, raw_name[1 .. raw_name.len - 1] },
289+
.identifier => if (std.mem.startsWith(u8, raw_name, "@"))
290+
.{ .raw, raw_name[2 .. raw_name.len - 1] }
291+
else
292+
.{ .smart, raw_name },
293+
else => unreachable,
294+
};
295+
296+
switch (strategy) {
297+
.raw => {
298+
if (name.len < 3) return;
299+
for (0..name.len - 2) |index| {
300+
const trigram = name[index..][0..3].*;
301+
try store.appendOneTrigram(allocator, trigram);
302+
}
303+
},
304+
.smart => {
305+
var it: TrigramIterator = .init(name);
306+
while (it.next()) |trigram| {
307+
try store.appendOneTrigram(allocator, trigram);
308+
}
309+
},
310+
}
316311

317312
try store.declarations.append(allocator, .{
318313
.name = name_token,
319314
.kind = kind,
320315
});
316+
}
321317

322-
for (0..name.len - 2) |index| {
323-
const trigram = name[index..][0..3].*;
324-
const gop = try store.trigram_to_declarations.getOrPutValue(allocator, trigram, .empty);
325-
try gop.value_ptr.append(allocator, @enumFromInt(store.declarations.len - 1));
318+
fn appendOneTrigram(
319+
store: *TrigramStore,
320+
allocator: std.mem.Allocator,
321+
trigram: Trigram,
322+
) error{OutOfMemory}!void {
323+
const declaration_index: Declaration.Index = @enumFromInt(store.declarations.len);
324+
325+
const gop = try store.trigram_to_declarations.getOrPutValue(allocator, trigram, .empty);
326+
327+
if (gop.value_ptr.getLastOrNull() != declaration_index) {
328+
try gop.value_ptr.append(allocator, declaration_index);
326329
}
327330
}
328331

@@ -333,32 +336,33 @@ pub fn declarationsForQuery(
333336
query: []const u8,
334337
declaration_buffer: *std.ArrayListUnmanaged(Declaration.Index),
335338
) error{OutOfMemory}!void {
336-
assert(query.len >= 3);
339+
assert(query.len >= 1);
337340
assert(declaration_buffer.items.len == 0);
338341

339342
const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items };
340343

341344
if (store.has_filter) {
342-
for (0..query.len - 2) |index| {
343-
const trigram = query[index..][0..3].*;
345+
var ti: TrigramIterator = .init(query);
346+
while (ti.next()) |trigram| {
344347
if (!filter.contains(trigram)) {
345348
return;
346349
}
347350
}
348351
}
349352

350-
const first = (store.trigram_to_declarations.get(query[0..3].*) orelse return).items;
353+
var ti: TrigramIterator = .init(query);
354+
355+
const first = (store.trigram_to_declarations.get(ti.next() orelse return) orelse return).items;
351356

352357
try declaration_buffer.resize(allocator, first.len * 2);
353358

354359
var len = first.len;
355360
@memcpy(declaration_buffer.items[0..len], first);
356361

357-
for (0..query.len - 2) |index| {
358-
const trigram = query[index..][0..3].*;
362+
while (ti.next()) |trigram| {
359363
const old_len = len;
360364
len = mergeIntersection(
361-
(store.trigram_to_declarations.get(trigram[0..3].*) orelse {
365+
(store.trigram_to_declarations.get(trigram) orelse {
362366
declaration_buffer.clearRetainingCapacity();
363367
return;
364368
}).items,
@@ -493,8 +497,12 @@ pub const CuckooFilter = struct {
493497
@memset(filter.buckets, [1]Fingerprint{.none} ** @typeInfo(Bucket).array.len);
494498
}
495499

496-
pub fn capacityForCount(count: u32) error{Overflow}!u32 {
497-
return count + (count & 1);
500+
pub fn capacityForCount(count: usize) error{Overflow}!usize {
501+
const overallocated_count = std.math.divCeil(usize, try std.math.mul(usize, count, 105), 100) catch |err| switch (err) {
502+
error.DivisionByZero => unreachable,
503+
else => |e| return e,
504+
};
505+
return overallocated_count + (overallocated_count & 1);
498506
}
499507

500508
pub fn append(filter: CuckooFilter, random: std.Random, trigram: Trigram) error{EvictionFailed}!void {

src/features/workspace_symbols.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const Server = @import("../Server.zig");
1111
const TrigramStore = @import("../TrigramStore.zig");
1212

1313
pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) error{OutOfMemory}!lsp.ResultType("workspace/symbol") {
14-
if (request.query.len < 3) return null;
14+
if (request.query.len == 0) return null;
1515

1616
var workspace_paths: std.ArrayList([]const u8) = try .initCapacity(arena, server.workspaces.items.len);
1717
for (server.workspaces.items) |workspace| {

0 commit comments

Comments
 (0)