Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix nested scopes with tree sitter #241349

Merged
merged 2 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ interface EndOffsetToken {
interface EndOffsetAndScopes {
endOffset: number;
scopes: string[];
bracket?: boolean;
bracket?: number[];
}

const BRACKETS = /[\{\}\[\]\<\>\(\)]/;
const BRACKETS = /[\{\}\[\]\<\>\(\)]/g;

export class TreeSitterTokenizationFeature extends Disposable implements ITreeSitterTokenizationFeature {
public _serviceBrand: undefined;
Expand Down Expand Up @@ -558,56 +558,75 @@ export class TreeSitterTokenizationSupport extends Disposable implements ITreeSi
continue;
}

const hasBracket = () => {
return !!capture.text?.match(BRACKETS) && capture.name.includes('punctuation');
const brackets = (): number[] | undefined => {
return (capture.name.includes('punctuation') && capture.text) ? Array.from(capture.text.matchAll(BRACKETS)).map(match => startOffset + match.index) : undefined;
};

const addCurrentTokenToArray = () => {
endOffsetsAndScopes[tokenIndex] = { endOffset: endOffset, scopes: [capture.name], bracket: hasBracket() };
const addCurrentTokenToArray = (position?: number) => {
if (position !== undefined) {
let oldBracket = endOffsetsAndScopes[position].bracket;
// Check that the previous token ends at the same point that the current token starts
if ((position > 0) && (endOffsetsAndScopes[position - 1].endOffset !== startOffset)) {
let preInsertBracket: number[] | undefined = undefined;
if (oldBracket && oldBracket.length > 0) {
preInsertBracket = [];
const postInsertBracket: number[] = [];
for (let i = 0; i < oldBracket.length; i++) {
const bracket = oldBracket[i];
if (bracket < startOffset) {
preInsertBracket.push(bracket);
} else if (bracket > endOffset) {
postInsertBracket.push(bracket);
}
}
if (preInsertBracket.length === 0) {
preInsertBracket = undefined;
}
if (postInsertBracket.length === 0) {
oldBracket = undefined;
} else {
oldBracket = postInsertBracket;
}
}
// We need to add some of the position token to cover the space
endOffsetsAndScopes.splice(position, 0, { endOffset: startOffset, scopes: [...endOffsetsAndScopes[position].scopes], bracket: preInsertBracket });
position++;
increaseSizeOfTokensByOneToken();
tokenIndex++;
}

endOffsetsAndScopes.splice(position, 0, { endOffset: endOffset, scopes: [capture.name], bracket: brackets() });
endOffsetsAndScopes[tokenIndex].bracket = oldBracket;
} else {
endOffsetsAndScopes[tokenIndex] = { endOffset: endOffset, scopes: [capture.name], bracket: brackets() };
}
tokenIndex++;
};

if (previousEndOffset >= endOffset) {
// walk back through the tokens until we find the one that contains the current token
let withinTokenIndex = tokenIndex - 1;
let originalPreviousTokenEndOffset;
let previousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;

let previousTokenStartOffset;
let previousPreviousTokenEndOffset;
let previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
do {
originalPreviousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;
previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
previousPreviousTokenEndOffset = (withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0;

// Check that the current token doesn't just replace the last token
if ((previousTokenStartOffset + currentTokenLength) === originalPreviousTokenEndOffset) {
if ((previousTokenStartOffset + currentTokenLength) === previousTokenEndOffset) {
if (previousTokenStartOffset === startOffset) {
// Current token and previous token span the exact same characters, replace the last scope
endOffsetsAndScopes[withinTokenIndex].scopes[endOffsetsAndScopes[withinTokenIndex].scopes.length - 1] = capture.name;
endOffsetsAndScopes[withinTokenIndex].bracket = brackets();
}
} else if (previousPreviousTokenEndOffset <= startOffset) {
let originalPreviousTokenScopes;
} else if (previousTokenStartOffset <= startOffset) {
// The current token is within the previous token. Adjust the end of the previous token
if (previousPreviousTokenEndOffset !== startOffset) {
endOffsetsAndScopes[withinTokenIndex] = { endOffset: startOffset, scopes: endOffsetsAndScopes[withinTokenIndex].scopes, bracket: hasBracket() };
addCurrentTokenToArray();
originalPreviousTokenScopes = [...endOffsetsAndScopes[withinTokenIndex].scopes];
} else {
originalPreviousTokenScopes = [...endOffsetsAndScopes[withinTokenIndex].scopes];
endOffsetsAndScopes[withinTokenIndex] = { endOffset: endOffset, scopes: [capture.name], bracket: hasBracket() };
}

// Add the rest of the previous token after the current token
if (originalPreviousTokenEndOffset !== endOffset) {
increaseSizeOfTokensByOneToken();
endOffsetsAndScopes[tokenIndex] = { endOffset: originalPreviousTokenEndOffset, scopes: originalPreviousTokenScopes, bracket: endOffsetsAndScopes[withinTokenIndex].bracket };
tokenIndex++;
} else {
endOffsetsAndScopes[withinTokenIndex].scopes.unshift(...originalPreviousTokenScopes);
}
addCurrentTokenToArray(withinTokenIndex);
break;
}
withinTokenIndex--;
} while (previousTokenStartOffset > startOffset);
previousTokenStartOffset = ((withinTokenIndex >= 2) ? endOffsetsAndScopes[withinTokenIndex - 1].endOffset : 0);
previousTokenEndOffset = endOffsetsAndScopes[withinTokenIndex].endOffset;
} while (previousTokenEndOffset > startOffset);
} else {
// Just add the token to the array
addCurrentTokenToArray();
Expand Down Expand Up @@ -640,10 +659,10 @@ export class TreeSitterTokenizationSupport extends Disposable implements ITreeSi
if (!emptyTokens) {
return undefined;
}
const endOffsetsAndScopes: { endOffset: number; scopes: string[]; metadata?: number; bracket?: boolean }[] = emptyTokens.endOffsets;
const endOffsetsAndScopes: { endOffset: number; scopes: string[]; metadata?: number; bracket?: number[] }[] = emptyTokens.endOffsets;
for (let i = 0; i < endOffsetsAndScopes.length; i++) {
const token = endOffsetsAndScopes[i];
token.metadata = findMetadata(this._colorThemeData, token.scopes, encodedLanguageId, !!token.bracket);
token.metadata = findMetadata(this._colorThemeData, token.scopes, encodedLanguageId, !!token.bracket && (token.bracket.length > 0));
}

const metadataTime = stopwatch.elapsed();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,4 +396,20 @@ class y {
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
modelService.destroyModel(model.uri);
});

test('Many nested scopes', async () => {
const content = `y = new x(ttt({
message: '{0} i\\n\\n [commandName]({1}).',
args: ['Test', \`command:\${openSettingsCommand}?\${encodeURIComponent('["SettingName"]')}\`],
// To make sure the translators don't break the link
comment: ["{Locked=']({'}"]
}));`;
const model = await getModelAndPrepTree(content);
const tokens = treeSitterTokenizationSupport.getTokensInRange(model, new Range(1, 1, 6, 5), 0, 238);
verifyTokens(tokens);
assert.deepStrictEqual(tokens?.length, 56);
assert.deepStrictEqual(tokensContentSize(tokens), content.length);
modelService.destroyModel(model.uri);
});

});
Loading