Move tokenizationSupport inside TokenizationStateStore (#140476)

This commit is contained in:
Alex Dima
2022-01-26 15:36:46 +01:00
parent 11468e8941
commit 44f7998196
4 changed files with 143 additions and 134 deletions

View File

@@ -12,7 +12,7 @@ import * as resources from 'vs/base/common/resources';
import * as types from 'vs/base/common/types';
import { equals as equalArray } from 'vs/base/common/arrays';
import { URI } from 'vs/base/common/uri';
import { IState, ITokenizationSupport, LanguageId, TokenMetadata, TokenizationRegistry, StandardTokenType, ITokenizationSupportFactory, TokenizationResult, EncodedTokenizationResult } from 'vs/editor/common/languages';
import { IState, ITokenizationSupport, LanguageId, TokenizationRegistry, StandardTokenType, ITokenizationSupportFactory, TokenizationResult, EncodedTokenizationResult } from 'vs/editor/common/languages';
import { nullTokenizeEncoded } from 'vs/editor/common/languages/nullMode';
import { generateTokensCSSForColorMap } from 'vs/editor/common/languages/supports/tokenization';
import { ILanguageService } from 'vs/editor/common/services/language';
@@ -29,6 +29,7 @@ import { IValidGrammarDefinition, IValidEmbeddedLanguagesMap, IValidTokenTypeMap
import { TMGrammarFactory } from 'vs/workbench/services/textMate/common/TMGrammarFactory';
import { IExtensionResourceLoaderService } from 'vs/workbench/services/extensionResourceLoader/common/extensionResourceLoader';
import { IProgressService, ProgressLocation } from 'vs/platform/progress/common/progress';
import { TMTokenization } from 'vs/workbench/services/textMate/common/TMTokenization';
export abstract class AbstractTextMateService extends Disposable implements ITextMateService {
public _serviceBrand: undefined;
@@ -263,7 +264,7 @@ export abstract class AbstractTextMateService extends Disposable implements ITex
this._onDidEncounterLanguage.fire(languageId);
}
});
return new TMTokenizationSupport(languageId, encodedLanguageId, tokenization, this._configurationService);
return new TMTokenizationSupportWithLineLimit(languageId, encodedLanguageId, tokenization, this._configurationService);
} catch (err) {
onUnexpectedError(err);
return null;
@@ -396,7 +397,7 @@ export abstract class AbstractTextMateService extends Disposable implements ITex
protected abstract _loadVSCodeOnigurumWASM(): Promise<Response | ArrayBuffer>;
}
class TMTokenizationSupport implements ITokenizationSupport {
class TMTokenizationSupportWithLineLimit implements ITokenizationSupport {
private readonly _languageId: string;
private readonly _encodedLanguageId: LanguageId;
private readonly _actual: TMTokenization;
@@ -437,66 +438,6 @@ class TMTokenizationSupport implements ITokenizationSupport {
return nullTokenizeEncoded(this._encodedLanguageId, state);
}
return this._actual.tokenizeEncoded(line, state);
}
}
class TMTokenization extends Disposable {
private readonly _grammar: IGrammar;
private readonly _containsEmbeddedLanguages: boolean;
private readonly _seenLanguages: boolean[];
private readonly _initialState: StackElement;
private readonly _onDidEncounterLanguage: Emitter<LanguageId> = this._register(new Emitter<LanguageId>());
public readonly onDidEncounterLanguage: Event<LanguageId> = this._onDidEncounterLanguage.event;
constructor(grammar: IGrammar, initialState: StackElement, containsEmbeddedLanguages: boolean) {
super();
this._grammar = grammar;
this._initialState = initialState;
this._containsEmbeddedLanguages = containsEmbeddedLanguages;
this._seenLanguages = [];
}
public getInitialState(): IState {
return this._initialState;
}
public tokenizeEncoded(line: string, state: StackElement): EncodedTokenizationResult {
const textMateResult = this._grammar.tokenizeLine2(line, state, 500);
if (textMateResult.stoppedEarly) {
console.warn(`Time limit reached when tokenizing line: ${line.substring(0, 100)}`);
// return the state at the beginning of the line
return new EncodedTokenizationResult(textMateResult.tokens, state);
}
if (this._containsEmbeddedLanguages) {
let seenLanguages = this._seenLanguages;
let tokens = textMateResult.tokens;
// Must check if any of the embedded languages was hit
for (let i = 0, len = (tokens.length >>> 1); i < len; i++) {
let metadata = tokens[(i << 1) + 1];
let languageId = TokenMetadata.getLanguageId(metadata);
if (!seenLanguages[languageId]) {
seenLanguages[languageId] = true;
this._onDidEncounterLanguage.fire(languageId);
}
}
}
let endState: StackElement;
// try to save an object if possible
if (state.equals(textMateResult.ruleStack)) {
endState = state;
} else {
endState = textMateResult.ruleStack;
}
return new EncodedTokenizationResult(textMateResult.tokens, endState);
return this._actual.tokenizeEncoded(line, hasEOL, state);
}
}

View File

@@ -11,11 +11,12 @@ import { TMGrammarFactory, ICreateGrammarResult } from 'vs/workbench/services/te
import { IModelChangedEvent, MirrorTextModel } from 'vs/editor/common/model/mirrorTextModel';
import { TextMateWorkerHost } from 'vs/workbench/services/textMate/browser/nativeTextMateService';
import { TokenizationStateStore } from 'vs/editor/common/model/textModelTokens';
import type { IGrammar, StackElement, IRawTheme, IOnigLib } from 'vscode-textmate';
import type { IRawTheme, IOnigLib } from 'vscode-textmate';
import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
import { countEOL } from 'vs/editor/common/core/eolCounter';
import { LineTokens } from 'vs/editor/common/tokens/lineTokens';
import { FileAccess } from 'vs/base/common/network';
import { TMTokenization } from 'vs/workbench/services/textMate/common/TMTokenization';
export interface IValidGrammarDefinitionDTO {
location: UriComponents;
@@ -41,21 +42,19 @@ export interface IRawModelData {
class TextMateWorkerModel extends MirrorTextModel {
private readonly _tokenizationStateStore: TokenizationStateStore;
private _tokenizationStateStore: TokenizationStateStore | null;
private readonly _worker: TextMateWorker;
private _languageId: string;
private _encodedLanguageId: LanguageId;
private _grammar: IGrammar | null;
private _isDisposed: boolean;
constructor(uri: URI, lines: string[], eol: string, versionId: number, worker: TextMateWorker, languageId: string, encodedLanguageId: LanguageId) {
super(uri, lines, eol, versionId);
this._tokenizationStateStore = new TokenizationStateStore();
this._tokenizationStateStore = null;
this._worker = worker;
this._languageId = languageId;
this._encodedLanguageId = encodedLanguageId;
this._isDisposed = false;
this._grammar = null;
this._resetTokenization();
}
@@ -72,17 +71,18 @@ class TextMateWorkerModel extends MirrorTextModel {
override onEvents(e: IModelChangedEvent): void {
super.onEvents(e);
for (let i = 0; i < e.changes.length; i++) {
const change = e.changes[i];
const [eolCount] = countEOL(change.text);
this._tokenizationStateStore.applyEdits(change.range, eolCount);
if (this._tokenizationStateStore) {
for (let i = 0; i < e.changes.length; i++) {
const change = e.changes[i];
const [eolCount] = countEOL(change.text);
this._tokenizationStateStore.applyEdits(change.range, eolCount);
}
}
this._ensureTokens();
}
private _resetTokenization(): void {
this._grammar = null;
this._tokenizationStateStore.flush(null);
this._tokenizationStateStore = null;
const languageId = this._languageId;
const encodedLanguageId = this._encodedLanguageId;
@@ -91,14 +91,18 @@ class TextMateWorkerModel extends MirrorTextModel {
return;
}
this._grammar = r.grammar;
this._tokenizationStateStore.flush(r.initialState);
if (r.grammar) {
const tokenizationSupport = new TMTokenization(r.grammar, r.initialState, false);
this._tokenizationStateStore = new TokenizationStateStore(tokenizationSupport, tokenizationSupport.getInitialState());
} else {
this._tokenizationStateStore = null;
}
this._ensureTokens();
});
}
private _ensureTokens(): void {
if (!this._grammar) {
if (!this._tokenizationStateStore) {
return;
}
const builder = new ContiguousMultilineTokensBuilder();
@@ -109,10 +113,10 @@ class TextMateWorkerModel extends MirrorTextModel {
const text = this._lines[lineIndex];
const lineStartState = this._tokenizationStateStore.getBeginState(lineIndex);
const r = this._grammar.tokenizeLine2(text, <StackElement>lineStartState!);
const r = this._tokenizationStateStore.tokenizationSupport.tokenizeEncoded(text, true, lineStartState!);
LineTokens.convertToEndOffset(r.tokens, text.length);
builder.add(lineIndex + 1, r.tokens);
this._tokenizationStateStore.setEndState(lineCount, lineIndex, r.ruleStack);
this._tokenizationStateStore.setEndState(lineCount, lineIndex, r.endState);
lineIndex = this._tokenizationStateStore.invalidLineStartIndex - 1; // -1 because the outer loop increments it
}