Skip to content

Commit 34835fb

Browse files
authored
Fix nested scopes with tree sitter (#241349)
* Fix nested scopes with tree sitter * Thank you tests
1 parent 5c820ba commit 34835fb

File tree

2 files changed

+71
-35
lines changed

2 files changed

+71
-35
lines changed

src/vs/workbench/services/treeSitter/browser/treeSitterTokenizationFeature.ts

Lines changed: 55 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ interface EndOffsetToken {
4545
interface EndOffsetAndScopes {
4646
endOffset: number;
4747
scopes: string[];
48-
bracket?: boolean;
48+
bracket?: number[];
4949
}
5050

51-
const BRACKETS = /[\{\}\[\]\<\>\(\)]/;
51+
const BRACKETS = /[\{\}\[\]\<\>\(\)]/g;
5252

5353
export class TreeSitterTokenizationFeature extends Disposable implements ITreeSitterTokenizationFeature {
5454
public _serviceBrand: undefined;
@@ -558,56 +558,76 @@ export class TreeSitterTokenizationSupport extends Disposable implements ITreeSi
558558
continue;
559559
}
560560

561-
const hasBracket = () => {
562-
return !!capture.text?.match(BRACKETS) && capture.name.includes('punctuation');
561+
const brackets = (): number[] | undefined => {
562+
return (capture.name.includes('punctuation') && capture.text) ? Array.from(capture.text.matchAll(BRACKETS)).map(match => startOffset + match.index) : undefined;
563563
};
564564

565-
const addCurrentTokenToArray = () => {
566-
endOffsetsAndScopes[tokenIndex] = { endOffset: endOffset, scopes: [capture.name], bracket: hasBracket() };
565+
const addCurrentTokenToArray = (position?: number) => {
566+
if (position !== undefined) {
567+
let oldBracket = endOffsetsAndScopes[position].bracket;
568+
// Check that the previous token ends at the same point that the current token starts
569+
const prevEndOffset = position > 0 ? endOffsetsAndScopes[position - 1].endOffset : 0;
570+
if (prevEndOffset !== startOffset) {
571+
let preInsertBracket: number[] | undefined = undefined;
572+
if (oldBracket && oldBracket.length > 0) {
573+
preInsertBracket = [];
574+
const postInsertBracket: number[] = [];
575+
for (let i = 0; i < oldBracket.length; i++) {
576+
const bracket = oldBracket[i];
577+
if (bracket < startOffset) {
578+
preInsertBracket.push(bracket);
579+
} else if (bracket > endOffset) {
580+
postInsertBracket.push(bracket);
581+
}
582+
}
583+
if (preInsertBracket.length === 0) {
584+
preInsertBracket = undefined;
585+
}
586+
if (postInsertBracket.length === 0) {
587+
oldBracket = undefined;
588+
} else {
589+
oldBracket = postInsertBracket;
590+
}
591+
}
592+
// We need to add some of the position token to cover the space
593+
endOffsetsAndScopes.splice(position, 0, { endOffset: startOffset, scopes: [...endOffsetsAndScopes[position].scopes], bracket: preInsertBracket });
594+
position++;
595+
increaseSizeOfTokensByOneToken();
596+
tokenIndex++;
597+
}
598+
599+
endOffsetsAndScopes.splice(position, 0, { endOffset: endOffset, scopes: [capture.name], bracket: brackets() });
600+
endOffsetsAndScopes[tokenIndex].bracket = oldBracket;
601+
} else {
602+
endOffsetsAndScopes[tokenIndex] = { endOffset: endOffset, scopes: [capture.name], bracket: brackets() };
603+
}
567604
tokenIndex++;
568605
};
569606

570607
if (previousEndOffset >= endOffset) {
571608
// walk back through the tokens until we find the one that contains the current token
572609
let withinTokenIndex = tokenIndex - 1;
573-
let originalPreviousTokenEndOffset;
610+
let previousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;
574611

575-
let previousTokenStartOffset;
576-
let previousPreviousTokenEndOffset;
612+
let previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
577613
do {
578-
originalPreviousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;
579-
previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
580-
previousPreviousTokenEndOffset = (withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0;
581614

582615
// Check that the current token doesn't just replace the last token
583-
if ((previousTokenStartOffset + currentTokenLength) === originalPreviousTokenEndOffset) {
616+
if ((previousTokenStartOffset + currentTokenLength) === previousTokenEndOffset) {
584617
if (previousTokenStartOffset === startOffset) {
585618
// Current token and previous token span the exact same characters, replace the last scope
586619
endOffsetsAndScopes[withinTokenIndex].scopes[endOffsetsAndScopes[withinTokenIndex].scopes.length - 1] = capture.name;
620+
endOffsetsAndScopes[withinTokenIndex].bracket = brackets();
587621
}
588-
} else if (previousPreviousTokenEndOffset <= startOffset) {
589-
let originalPreviousTokenScopes;
622+
} else if (previousTokenStartOffset <= startOffset) {
590623
// The current token is within the previous token. Adjust the end of the previous token
591-
if (previousPreviousTokenEndOffset !== startOffset) {
592-
endOffsetsAndScopes[withinTokenIndex] = { endOffset: startOffset, scopes: endOffsetsAndScopes[withinTokenIndex].scopes, bracket: hasBracket() };
593-
addCurrentTokenToArray();
594-
originalPreviousTokenScopes = [...endOffsetsAndScopes[withinTokenIndex].scopes];
595-
} else {
596-
originalPreviousTokenScopes = [...endOffsetsAndScopes[withinTokenIndex].scopes];
597-
endOffsetsAndScopes[withinTokenIndex] = { endOffset: endOffset, scopes: [capture.name], bracket: hasBracket() };
598-
}
599-
600-
// Add the rest of the previous token after the current token
601-
if (originalPreviousTokenEndOffset !== endOffset) {
602-
increaseSizeOfTokensByOneToken();
603-
endOffsetsAndScopes[tokenIndex] = { endOffset: originalPreviousTokenEndOffset, scopes: originalPreviousTokenScopes, bracket: endOffsetsAndScopes[withinTokenIndex].bracket };
604-
tokenIndex++;
605-
} else {
606-
endOffsetsAndScopes[withinTokenIndex].scopes.unshift(...originalPreviousTokenScopes);
607-
}
624+
addCurrentTokenToArray(withinTokenIndex);
625+
break;
608626
}
609627
withinTokenIndex--;
610-
} while (previousTokenStartOffset > startOffset);
628+
previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
629+
previousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;
630+
} while (previousTokenEndOffset > startOffset);
611631
} else {
612632
// Just add the token to the array
613633
addCurrentTokenToArray();
@@ -640,10 +660,10 @@ export class TreeSitterTokenizationSupport extends Disposable implements ITreeSi
640660
if (!emptyTokens) {
641661
return undefined;
642662
}
643-
const endOffsetsAndScopes: { endOffset: number; scopes: string[]; metadata?: number; bracket?: boolean }[] = emptyTokens.endOffsets;
663+
const endOffsetsAndScopes: { endOffset: number; scopes: string[]; metadata?: number; bracket?: number[] }[] = emptyTokens.endOffsets;
644664
for (let i = 0; i < endOffsetsAndScopes.length; i++) {
645665
const token = endOffsetsAndScopes[i];
646-
token.metadata = findMetadata(this._colorThemeData, token.scopes, encodedLanguageId, !!token.bracket);
666+
token.metadata = findMetadata(this._colorThemeData, token.scopes, encodedLanguageId, !!token.bracket && (token.bracket.length > 0));
647667
}
648668

649669
const metadataTime = stopwatch.elapsed();

src/vs/workbench/test/electron-main/treeSitterTokenizationFeature.test.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,4 +396,20 @@ class y {
396396
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
397397
modelService.destroyModel(model.uri);
398398
});
399+
400+
test('Many nested scopes', async () => {
401+
const content = `y = new x(ttt({
402+
message: '{0} i\\n\\n [commandName]({1}).',
403+
args: ['Test', \`command:\${openSettingsCommand}?\${encodeURIComponent('["SettingName"]')}\`],
404+
// To make sure the translators don't break the link
405+
comment: ["{Locked=']({'}"]
406+
}));`;
407+
const model = await getModelAndPrepTree(content);
408+
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 6, 5), 0, 238);
409+
verifyTokens(tokens);
410+
assert.deepStrictEqual(tokens?.length, 56);
411+
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
412+
modelService.destroyModel(model.uri);
413+
});
414+
399415
});

0 commit comments

Comments
 (0)