@@ -33,126 +33,96 @@ declarations: std.MultiArrayList(Declaration),
3333pub const TrigramIterator = struct {
3434 buffer : []const u8 ,
3535 index : usize ,
36- boundary : Boundary ,
36+
37+ trigram_buffer : Trigram ,
38+ trigram_buffer_index : u2 ,
3739
3840 pub fn init (buffer : []const u8 ) TrigramIterator {
3941 assert (buffer .len != 0 );
40- return .{ .buffer = buffer , .index = 0 , .boundary = .calculate (buffer , 0 ) };
42+ return .{
43+ .buffer = buffer ,
44+ .index = 0 ,
45+ .trigram_buffer = @splat (0 ),
46+ .trigram_buffer_index = 0 ,
47+ };
4148 }
4249
43- pub const Boundary = struct {
44- end : usize ,
45- next_start : ? usize ,
46-
47- pub fn calculate (buffer : []const u8 , index : usize ) Boundary {
48- assert (buffer [index .. ].len > 0 );
49-
50- if (std .ascii .isLower (buffer [index ])) {
51- // First character lowercase
52- for (buffer [index + 1 .. ], index + 1.. ) | c , i | {
53- if (! std .ascii .isLower (c )) {
54- return .{
55- .end = i ,
56- .next_start = i ,
57- };
58- }
59- }
60- } else {
61- if (index + 1 >= buffer .len ) {
62- return .{
63- .end = buffer .len ,
64- .next_start = null ,
65- };
66- }
67-
68- if (std .ascii .isLower (buffer [index + 1 ])) {
69- // First char is uppercase, second char is lowercase
70- for (buffer [index + 2 .. ], index + 2.. ) | c , i | {
71- if (! std .ascii .isLower (c )) {
72- return .{
73- .end = i ,
74- .next_start = i ,
75- };
76- }
77- }
78- } else {
79- // First and second chars are uppercase
80- for (buffer [index + 2 .. ], index + 2.. ) | c , i | {
81- if (! std .ascii .isUpper (c )) {
82- return .{
83- .end = i ,
84- .next_start = i ,
85- };
86- }
87- }
88- }
89- }
90-
91- return .{
92- .end = buffer .len ,
93- .next_start = null ,
94- };
95- }
96- };
97-
9850 pub fn next (ti : * TrigramIterator ) ? Trigram {
99- if (ti .index == ti .buffer .len ) return null ;
100- assert (ti .index < ti .boundary .end );
101-
102- var trigram : [3 ]u8 = @splat (0 );
103- const unpadded = ti .buffer [ti .index .. @min (ti .index + 3 , ti .boundary .end )];
104- _ = std .ascii .lowerString (& trigram , unpadded );
51+ while (ti .index < ti .buffer .len ) {
52+ defer ti .index += 1 ;
53+ const c = std .ascii .toLower (ti .buffer [ti .index ]);
54+ if (c == '_' ) continue ;
55+
56+ if (ti .trigram_buffer_index < 3 ) {
57+ ti .trigram_buffer [ti .trigram_buffer_index ] = c ;
58+ ti .trigram_buffer_index += 1 ;
59+ continue ;
60+ }
10561
106- if (unpadded .len < 3 or ti .index + 3 >= ti .boundary .end ) {
107- ti .index = ti .boundary .next_start orelse {
108- ti .index = ti .buffer .len ;
109- return trigram ;
110- };
111- ti .boundary = .calculate (ti .buffer , ti .index );
62+ defer {
63+ @memmove (ti .trigram_buffer [0.. 2], ti .trigram_buffer [1.. 3]);
64+ ti .trigram_buffer [2 ] = c ;
65+ }
66+ return ti .trigram_buffer ;
67+ } else if (ti .trigram_buffer_index > 0 ) {
68+ ti .trigram_buffer_index = 0 ;
69+ return ti .trigram_buffer ;
11270 } else {
113- ti . index += 1 ;
71+ return null ;
11472 }
115-
116- return trigram ;
11773 }
11874};
11975
120- test "TrigramIterator.Boundary.calculate" {
121- var boundary : TrigramIterator.Boundary = .calculate ("helloWORLD" , 0 );
122- try std .testing .expectEqual (5 , boundary .end );
123- try std .testing .expectEqual (5 , boundary .next_start .? );
124-
125- boundary = .calculate ("helloWORLD" , 5 );
126- try std .testing .expectEqual (10 , boundary .end );
127- try std .testing .expectEqual (null , boundary .next_start );
76+ test TrigramIterator {
77+ try testTrigramIterator ("a" , &.{"a\x00\x00 " .* });
78+ try testTrigramIterator ("ab" , &.{"ab\x00 " .* });
79+ try testTrigramIterator ("abc" , &.{"abc" .* });
80+
81+ try testTrigramIterator ("hello" , &.{ "hel" .* , "ell" .* , "llo" .* });
82+ try testTrigramIterator ("HELLO" , &.{ "hel" .* , "ell" .* , "llo" .* });
83+ try testTrigramIterator ("HellO" , &.{ "hel" .* , "ell" .* , "llo" .* });
84+
85+ try testTrigramIterator ("a_" , &.{"a\x00\x00 " .* });
86+ try testTrigramIterator ("ab_" , &.{"ab\x00 " .* });
87+ try testTrigramIterator ("abc_" , &.{"abc" .* });
88+
89+ try testTrigramIterator ("_a" , &.{"a\x00\x00 " .* });
90+ try testTrigramIterator ("_a_" , &.{"a\x00\x00 " .* });
91+ try testTrigramIterator ("_a__" , &.{"a\x00\x00 " .* });
92+
93+ try testTrigramIterator ("_" , &.{});
94+ try testTrigramIterator ("__" , &.{});
95+ try testTrigramIterator ("___" , &.{});
96+
97+ try testTrigramIterator ("He_ll_O" , &.{ "hel" .* , "ell" .* , "llo" .* });
98+ try testTrigramIterator ("He__ll___O" , &.{ "hel" .* , "ell" .* , "llo" .* });
99+ try testTrigramIterator ("__He__ll__O_" , &.{ "hel" .* , "ell" .* , "llo" .* });
100+
101+ try testTrigramIterator ("HellO__World___HelloWorld" , &.{
102+ "hel" .* , "ell" .* , "llo" .* ,
103+ "low" .* , "owo" .* , "wor" .* ,
104+ "orl" .* , "rld" .* , "ldh" .* ,
105+ "dhe" .* , "hel" .* , "ell" .* ,
106+ "llo" .* , "low" .* , "owo" .* ,
107+ "wor" .* , "orl" .* , "rld" .* ,
108+ });
128109}
129110
130- test TrigramIterator {
111+ fn testTrigramIterator (
112+ input : []const u8 ,
113+ expected : []const Trigram ,
114+ ) ! void {
131115 const allocator = std .testing .allocator ;
132116
133- const matrix : []const struct { []const u8 , []const Trigram } = &.{
134- .{ "a" , &.{"a\x00\x00 " .* } },
135- .{ "ab" , &.{"ab\x00 " .* } },
136- .{ "helloWORLD" , &.{ "hel" .* , "ell" .* , "llo" .* , "wor" .* , "orl" .* , "rld" .* } },
137- .{ "HelloWORLD" , &.{ "hel" .* , "ell" .* , "llo" .* , "wor" .* , "orl" .* , "rld" .* } },
138- .{ "HelloWorld" , &.{ "hel" .* , "ell" .* , "llo" .* , "wor" .* , "orl" .* , "rld" .* } },
139- };
140-
141- var actual : std .ArrayList (Trigram ) = .empty ;
142- defer actual .deinit (allocator );
117+ var actual_buffer : std .ArrayList (Trigram ) = .empty ;
118+ defer actual_buffer .deinit (allocator );
143119
144- for (matrix ) | entry | {
145- const input , const expected = entry ;
146-
147- actual .clearRetainingCapacity ();
148-
149- var it : TrigramIterator = .init (input );
150- while (it .next ()) | trigram | {
151- try actual .append (allocator , trigram );
152- }
153-
154- try @import ("testing.zig" ).expectEqual (expected , actual .items );
120+ var it : TrigramIterator = .init (input );
121+ while (it .next ()) | trigram | {
122+ try actual_buffer .append (allocator , trigram );
155123 }
124+
125+ try @import ("testing.zig" ).expectEqual (expected , actual_buffer .items );
156126}
157127
158128pub fn init (
@@ -190,7 +160,7 @@ pub fn init(
190160
191161 try context .store .appendDeclaration (
192162 context .allocator ,
193- offsets . identifierTokenToNameSlice ( cb_tree , fn_token + 1 ) ,
163+ cb_tree ,
194164 fn_token + 1 ,
195165 .function ,
196166 );
@@ -227,18 +197,18 @@ pub fn init(
227197
228198 try context .store .appendDeclaration (
229199 context .allocator ,
230- offsets . identifierTokenToNameSlice ( cb_tree , main_token + 1 ) ,
200+ cb_tree ,
231201 main_token + 1 ,
232202 kind ,
233203 );
234204 },
235205
236206 .test_decl = > skip : {
237- const test_name_token , const test_name = ast . testDeclNameAndToken ( cb_tree , node ) orelse break :skip ;
207+ const test_name_token = cb_tree . nodeData ( node ). opt_token_and_node [ 0 ]. unwrap ( ) orelse break :skip ;
238208
239209 try context .store .appendDeclaration (
240210 context .allocator ,
241- test_name ,
211+ cb_tree ,
242212 test_name_token ,
243213 .test_function ,
244214 );
@@ -273,7 +243,7 @@ pub fn init(
273243 if (trigrams .len > 0 ) {
274244 var prng = std .Random .DefaultPrng .init (0 );
275245
276- const filter_capacity = CuckooFilter .capacityForCount (@intCast ( store .trigram_to_declarations .count () )) catch unreachable ;
246+ const filter_capacity = CuckooFilter .capacityForCount (store .trigram_to_declarations .count ()) catch unreachable ;
277247 try store .filter_buckets .ensureTotalCapacityPrecise (allocator , filter_capacity );
278248 store .filter_buckets .items .len = filter_capacity ;
279249
@@ -308,21 +278,54 @@ pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void {
308278fn appendDeclaration (
309279 store : * TrigramStore ,
310280 allocator : std.mem.Allocator ,
311- name : [] const u8 ,
281+ tree : * const Ast ,
312282 name_token : Ast.TokenIndex ,
313283 kind : Declaration.Kind ,
314284) error {OutOfMemory }! void {
315- if (name .len < 3 ) return ;
285+ const raw_name = tree .tokenSlice (name_token );
286+
287+ const strategy : enum { raw , smart }, const name = switch (tree .tokenTag (name_token )) {
288+ .string_literal = > .{ .raw , raw_name [1 .. raw_name .len - 1 ] },
289+ .identifier = > if (std .mem .startsWith (u8 , raw_name , "@" ))
290+ .{ .raw , raw_name [2 .. raw_name .len - 1 ] }
291+ else
292+ .{ .smart , raw_name },
293+ else = > unreachable ,
294+ };
295+
296+ switch (strategy ) {
297+ .raw = > {
298+ if (name .len < 3 ) return ;
299+ for (0.. name .len - 2 ) | index | {
300+ const trigram = name [index .. ][0.. 3].* ;
301+ try store .appendOneTrigram (allocator , trigram );
302+ }
303+ },
304+ .smart = > {
305+ var it : TrigramIterator = .init (name );
306+ while (it .next ()) | trigram | {
307+ try store .appendOneTrigram (allocator , trigram );
308+ }
309+ },
310+ }
316311
317312 try store .declarations .append (allocator , .{
318313 .name = name_token ,
319314 .kind = kind ,
320315 });
316+ }
321317
322- for (0.. name .len - 2 ) | index | {
323- const trigram = name [index .. ][0.. 3].* ;
324- const gop = try store .trigram_to_declarations .getOrPutValue (allocator , trigram , .empty );
325- try gop .value_ptr .append (allocator , @enumFromInt (store .declarations .len - 1 ));
318+ fn appendOneTrigram (
319+ store : * TrigramStore ,
320+ allocator : std.mem.Allocator ,
321+ trigram : Trigram ,
322+ ) error {OutOfMemory }! void {
323+ const declaration_index : Declaration.Index = @enumFromInt (store .declarations .len );
324+
325+ const gop = try store .trigram_to_declarations .getOrPutValue (allocator , trigram , .empty );
326+
327+ if (gop .value_ptr .getLastOrNull () != declaration_index ) {
328+ try gop .value_ptr .append (allocator , declaration_index );
326329 }
327330}
328331
@@ -333,32 +336,33 @@ pub fn declarationsForQuery(
333336 query : []const u8 ,
334337 declaration_buffer : * std .ArrayListUnmanaged (Declaration.Index ),
335338) error {OutOfMemory }! void {
336- assert (query .len >= 3 );
339+ assert (query .len >= 1 );
337340 assert (declaration_buffer .items .len == 0 );
338341
339342 const filter : CuckooFilter = .{ .buckets = store .filter_buckets .items };
340343
341344 if (store .has_filter ) {
342- for (0 .. query . len - 2 ) | index | {
343- const trigram = query [ index .. ][0 .. 3] .* ;
345+ var ti : TrigramIterator = . init ( query );
346+ while ( ti . next ()) | trigram | {
344347 if (! filter .contains (trigram )) {
345348 return ;
346349 }
347350 }
348351 }
349352
350- const first = (store .trigram_to_declarations .get (query [0.. 3].* ) orelse return ).items ;
353+ var ti : TrigramIterator = .init (query );
354+
355+ const first = (store .trigram_to_declarations .get (ti .next () orelse return ) orelse return ).items ;
351356
352357 try declaration_buffer .resize (allocator , first .len * 2 );
353358
354359 var len = first .len ;
355360 @memcpy (declaration_buffer .items [0.. len ], first );
356361
357- for (0.. query .len - 2 ) | index | {
358- const trigram = query [index .. ][0.. 3].* ;
362+ while (ti .next ()) | trigram | {
359363 const old_len = len ;
360364 len = mergeIntersection (
361- (store .trigram_to_declarations .get (trigram [0 .. 3] .* ) orelse {
365+ (store .trigram_to_declarations .get (trigram ) orelse {
362366 declaration_buffer .clearRetainingCapacity ();
363367 return ;
364368 }).items ,
@@ -493,8 +497,12 @@ pub const CuckooFilter = struct {
493497 @memset (filter .buckets , [1 ]Fingerprint {.none } ** @typeInfo (Bucket ).array .len );
494498 }
495499
496- pub fn capacityForCount (count : u32 ) error {Overflow }! u32 {
497- return count + (count & 1 );
500+ pub fn capacityForCount (count : usize ) error {Overflow }! usize {
501+ const overallocated_count = std .math .divCeil (usize , try std .math .mul (usize , count , 105 ), 100 ) catch | err | switch (err ) {
502+ error .DivisionByZero = > unreachable ,
503+ else = > | e | return e ,
504+ };
505+ return overallocated_count + (overallocated_count & 1 );
498506 }
499507
500508 pub fn append (filter : CuckooFilter , random : std.Random , trigram : Trigram ) error {EvictionFailed }! void {
0 commit comments