Skip to content

Commit

Permalink
Fix nested scopes with tree sitter (#241349)
Browse files Browse the repository at this point in the history
* Fix nested scopes with tree sitter

* Thank you tests
  • Loading branch information
alexr00 authored Feb 20, 2025
1 parent 5c820ba commit 34835fb
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ interface EndOffsetToken {
interface EndOffsetAndScopes {
endOffset: number;
scopes: string[];
bracket?: boolean;
bracket?: number[];
}

const BRACKETS = /[\{\}\[\]\<\>\(\)]/;
const BRACKETS = /[\{\}\[\]\<\>\(\)]/g;

export class TreeSitterTokenizationFeature extends Disposable implements ITreeSitterTokenizationFeature {
public _serviceBrand: undefined;
Expand Down Expand Up @@ -558,56 +558,76 @@ export class TreeSitterTokenizationSupport extends Disposable implements ITreeSi
continue;
}

const hasBracket = () => {
return !!capture.text?.match(BRACKETS) && capture.name.includes('punctuation');
const brackets = (): number[] | undefined => {
return (capture.name.includes('punctuation') && capture.text) ? Array.from(capture.text.matchAll(BRACKETS)).map(match => startOffset + match.index) : undefined;
};

const addCurrentTokenToArray = () => {
endOffsetsAndScopes[tokenIndex] = { endOffset: endOffset, scopes: [capture.name], bracket: hasBracket() };
const addCurrentTokenToArray = (position?: number) => {
if (position !== undefined) {
let oldBracket = endOffsetsAndScopes[position].bracket;
// Check that the previous token ends at the same point that the current token starts
const prevEndOffset = position > 0 ? endOffsetsAndScopes[position - 1].endOffset : 0;
if (prevEndOffset !== startOffset) {
let preInsertBracket: number[] | undefined = undefined;
if (oldBracket && oldBracket.length > 0) {
preInsertBracket = [];
const postInsertBracket: number[] = [];
for (let i = 0; i < oldBracket.length; i++) {
const bracket = oldBracket[i];
if (bracket < startOffset) {
preInsertBracket.push(bracket);
} else if (bracket > endOffset) {
postInsertBracket.push(bracket);
}
}
if (preInsertBracket.length === 0) {
preInsertBracket = undefined;
}
if (postInsertBracket.length === 0) {
oldBracket = undefined;
} else {
oldBracket = postInsertBracket;
}
}
// We need to add some of the position token to cover the space
endOffsetsAndScopes.splice(position, 0, { endOffset: startOffset, scopes: [...endOffsetsAndScopes[position].scopes], bracket: preInsertBracket });
position++;
increaseSizeOfTokensByOneToken();
tokenIndex++;
}

endOffsetsAndScopes.splice(position, 0, { endOffset: endOffset, scopes: [capture.name], bracket: brackets() });
endOffsetsAndScopes[tokenIndex].bracket = oldBracket;
} else {
endOffsetsAndScopes[tokenIndex] = { endOffset: endOffset, scopes: [capture.name], bracket: brackets() };
}
tokenIndex++;
};

if (previousEndOffset >= endOffset) {
// walk back through the tokens until we find the one that contains the current token
let withinTokenIndex = tokenIndex - 1;
let originalPreviousTokenEndOffset;
let previousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;

let previousTokenStartOffset;
let previousPreviousTokenEndOffset;
let previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
do {
originalPreviousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;
previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
previousPreviousTokenEndOffset = (withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0;

// Check that the current token doesn't just replace the last token
if ((previousTokenStartOffset + currentTokenLength) === originalPreviousTokenEndOffset) {
if ((previousTokenStartOffset + currentTokenLength) === previousTokenEndOffset) {
if (previousTokenStartOffset === startOffset) {
// Current token and previous token span the exact same characters, replace the last scope
endOffsetsAndScopes[withinTokenIndex].scopes[endOffsetsAndScopes[withinTokenIndex].scopes.length - 1] = capture.name;
endOffsetsAndScopes[withinTokenIndex].bracket = brackets();
}
} else if (previousPreviousTokenEndOffset <= startOffset) {
let originalPreviousTokenScopes;
} else if (previousTokenStartOffset <= startOffset) {
// The current token is within the previous token. Adjust the end of the previous token
if (previousPreviousTokenEndOffset !== startOffset) {
endOffsetsAndScopes[withinTokenIndex] = { endOffset: startOffset, scopes: endOffsetsAndScopes[withinTokenIndex].scopes, bracket: hasBracket() };
addCurrentTokenToArray();
originalPreviousTokenScopes = [...endOffsetsAndScopes[withinTokenIndex].scopes];
} else {
originalPreviousTokenScopes = [...endOffsetsAndScopes[withinTokenIndex].scopes];
endOffsetsAndScopes[withinTokenIndex] = { endOffset: endOffset, scopes: [capture.name], bracket: hasBracket() };
}

// Add the rest of the previous token after the current token
if (originalPreviousTokenEndOffset !== endOffset) {
increaseSizeOfTokensByOneToken();
endOffsetsAndScopes[tokenIndex] = { endOffset: originalPreviousTokenEndOffset, scopes: originalPreviousTokenScopes, bracket: endOffsetsAndScopes[withinTokenIndex].bracket };
tokenIndex++;
} else {
endOffsetsAndScopes[withinTokenIndex].scopes.unshift(...originalPreviousTokenScopes);
}
addCurrentTokenToArray(withinTokenIndex);
break;
}
withinTokenIndex--;
} while (previousTokenStartOffset > startOffset);
previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
previousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;
} while (previousTokenEndOffset > startOffset);
} else {
// Just add the token to the array
addCurrentTokenToArray();
Expand Down Expand Up @@ -640,10 +660,10 @@ export class TreeSitterTokenizationSupport extends Disposable implements ITreeSi
if (!emptyTokens) {
return undefined;
}
const endOffsetsAndScopes: { endOffset: number; scopes: string[]; metadata?: number; bracket?: boolean }[] = emptyTokens.endOffsets;
const endOffsetsAndScopes: { endOffset: number; scopes: string[]; metadata?: number; bracket?: number[] }[] = emptyTokens.endOffsets;
for (let i = 0; i < endOffsetsAndScopes.length; i++) {
const token = endOffsetsAndScopes[i];
token.metadata = findMetadata(this._colorThemeData, token.scopes, encodedLanguageId, !!token.bracket);
token.metadata = findMetadata(this._colorThemeData, token.scopes, encodedLanguageId, !!token.bracket && (token.bracket.length > 0));
}

const metadataTime = stopwatch.elapsed();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,4 +396,20 @@ class y {
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
modelService.destroyModel(model.uri);
});

test('Many nested scopes', async () => {
const content = `y = new x(ttt({
message: '{0} i\\n\\n [commandName]({1}).',
args: ['Test', \`command:\${openSettingsCommand}?\${encodeURIComponent('["SettingName"]')}\`],
// To make sure the translators don't break the link
comment: ["{Locked=']({'}"]
}));`;
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 6, 5), 0, 238);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 56);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
modelService.destroyModel(model.uri);
});

});

0 comments on commit 34835fb

Please sign in to comment.