Move tokenizationSupport inside TokenizationStateStore (#140476)

2026-04-29 13:03:42 +01:00 · 2022-01-26 15:36:46 +01:00
parent 11468e8941
commit 44f7998196
4 changed files with 143 additions and 134 deletions
--- a/src/vs/workbench/services/textMate/browser/abstractTextMateService.ts
+++ b/src/vs/workbench/services/textMate/browser/abstractTextMateService.ts
@@ -12,7 +12,7 @@ import * as resources from 'vs/base/common/resources';
 import * as types from 'vs/base/common/types';
 import { equals as equalArray } from 'vs/base/common/arrays';
 import { URI } from 'vs/base/common/uri';
-import { IState, ITokenizationSupport, LanguageId, TokenMetadata, TokenizationRegistry, StandardTokenType, ITokenizationSupportFactory, TokenizationResult, EncodedTokenizationResult } from 'vs/editor/common/languages';
+import { IState, ITokenizationSupport, LanguageId, TokenizationRegistry, StandardTokenType, ITokenizationSupportFactory, TokenizationResult, EncodedTokenizationResult } from 'vs/editor/common/languages';
 import { nullTokenizeEncoded } from 'vs/editor/common/languages/nullMode';
 import { generateTokensCSSForColorMap } from 'vs/editor/common/languages/supports/tokenization';
 import { ILanguageService } from 'vs/editor/common/services/language';
@@ -29,6 +29,7 @@ import { IValidGrammarDefinition, IValidEmbeddedLanguagesMap, IValidTokenTypeMap
 import { TMGrammarFactory } from 'vs/workbench/services/textMate/common/TMGrammarFactory';
 import { IExtensionResourceLoaderService } from 'vs/workbench/services/extensionResourceLoader/common/extensionResourceLoader';
 import { IProgressService, ProgressLocation } from 'vs/platform/progress/common/progress';
+import { TMTokenization } from 'vs/workbench/services/textMate/common/TMTokenization';

 export abstract class AbstractTextMateService extends Disposable implements ITextMateService {
 	public _serviceBrand: undefined;
@@ -263,7 +264,7 @@ export abstract class AbstractTextMateService extends Disposable implements ITex
 							this._onDidEncounterLanguage.fire(languageId);
 						}
 					});
-					return new TMTokenizationSupport(languageId, encodedLanguageId, tokenization, this._configurationService);
+					return new TMTokenizationSupportWithLineLimit(languageId, encodedLanguageId, tokenization, this._configurationService);
 				} catch (err) {
 					onUnexpectedError(err);
 					return null;
@@ -396,7 +397,7 @@ export abstract class AbstractTextMateService extends Disposable implements ITex
 	protected abstract _loadVSCodeOnigurumWASM(): Promise<Response | ArrayBuffer>;
 }

-class TMTokenizationSupport implements ITokenizationSupport {
+class TMTokenizationSupportWithLineLimit implements ITokenizationSupport {
 	private readonly _languageId: string;
 	private readonly _encodedLanguageId: LanguageId;
 	private readonly _actual: TMTokenization;
@@ -437,66 +438,6 @@ class TMTokenizationSupport implements ITokenizationSupport {
 			return nullTokenizeEncoded(this._encodedLanguageId, state);
 		}

-		return this._actual.tokenizeEncoded(line, state);
-	}
-}
-
-class TMTokenization extends Disposable {
-
-	private readonly _grammar: IGrammar;
-	private readonly _containsEmbeddedLanguages: boolean;
-	private readonly _seenLanguages: boolean[];
-	private readonly _initialState: StackElement;
-
-	private readonly _onDidEncounterLanguage: Emitter<LanguageId> = this._register(new Emitter<LanguageId>());
-	public readonly onDidEncounterLanguage: Event<LanguageId> = this._onDidEncounterLanguage.event;
-
-	constructor(grammar: IGrammar, initialState: StackElement, containsEmbeddedLanguages: boolean) {
-		super();
-		this._grammar = grammar;
-		this._initialState = initialState;
-		this._containsEmbeddedLanguages = containsEmbeddedLanguages;
-		this._seenLanguages = [];
-	}
-
-	public getInitialState(): IState {
-		return this._initialState;
-	}
-
-	public tokenizeEncoded(line: string, state: StackElement): EncodedTokenizationResult {
-		const textMateResult = this._grammar.tokenizeLine2(line, state, 500);
-
-		if (textMateResult.stoppedEarly) {
-			console.warn(`Time limit reached when tokenizing line: ${line.substring(0, 100)}`);
-			// return the state at the beginning of the line
-			return new EncodedTokenizationResult(textMateResult.tokens, state);
-		}
-
-		if (this._containsEmbeddedLanguages) {
-			let seenLanguages = this._seenLanguages;
-			let tokens = textMateResult.tokens;
-
-			// Must check if any of the embedded languages was hit
-			for (let i = 0, len = (tokens.length >>> 1); i < len; i++) {
-				let metadata = tokens[(i << 1) + 1];
-				let languageId = TokenMetadata.getLanguageId(metadata);
-
-				if (!seenLanguages[languageId]) {
-					seenLanguages[languageId] = true;
-					this._onDidEncounterLanguage.fire(languageId);
-				}
-			}
-		}
-
-		let endState: StackElement;
-		// try to save an object if possible
-		if (state.equals(textMateResult.ruleStack)) {
-			endState = state;
-		} else {
-			endState = textMateResult.ruleStack;
-
-		}
-
-		return new EncodedTokenizationResult(textMateResult.tokens, endState);
+		return this._actual.tokenizeEncoded(line, hasEOL, state);
 	}
 }
--- a/src/vs/workbench/services/textMate/browser/textMateWorker.ts
+++ b/src/vs/workbench/services/textMate/browser/textMateWorker.ts
@@ -11,11 +11,12 @@ import { TMGrammarFactory, ICreateGrammarResult } from 'vs/workbench/services/te
 import { IModelChangedEvent, MirrorTextModel } from 'vs/editor/common/model/mirrorTextModel';
 import { TextMateWorkerHost } from 'vs/workbench/services/textMate/browser/nativeTextMateService';
 import { TokenizationStateStore } from 'vs/editor/common/model/textModelTokens';
-import type { IGrammar, StackElement, IRawTheme, IOnigLib } from 'vscode-textmate';
+import type { IRawTheme, IOnigLib } from 'vscode-textmate';
 import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
 import { countEOL } from 'vs/editor/common/core/eolCounter';
 import { LineTokens } from 'vs/editor/common/tokens/lineTokens';
 import { FileAccess } from 'vs/base/common/network';
+import { TMTokenization } from 'vs/workbench/services/textMate/common/TMTokenization';

 export interface IValidGrammarDefinitionDTO {
 	location: UriComponents;
@@ -41,21 +42,19 @@ export interface IRawModelData {

 class TextMateWorkerModel extends MirrorTextModel {

-	private readonly _tokenizationStateStore: TokenizationStateStore;
+	private _tokenizationStateStore: TokenizationStateStore | null;
 	private readonly _worker: TextMateWorker;
 	private _languageId: string;
 	private _encodedLanguageId: LanguageId;
-	private _grammar: IGrammar | null;
 	private _isDisposed: boolean;

 	constructor(uri: URI, lines: string[], eol: string, versionId: number, worker: TextMateWorker, languageId: string, encodedLanguageId: LanguageId) {
 		super(uri, lines, eol, versionId);
-		this._tokenizationStateStore = new TokenizationStateStore();
+		this._tokenizationStateStore = null;
 		this._worker = worker;
 		this._languageId = languageId;
 		this._encodedLanguageId = encodedLanguageId;
 		this._isDisposed = false;
-		this._grammar = null;
 		this._resetTokenization();
 	}

@@ -72,17 +71,18 @@ class TextMateWorkerModel extends MirrorTextModel {

 	override onEvents(e: IModelChangedEvent): void {
 		super.onEvents(e);
-		for (let i = 0; i < e.changes.length; i++) {
-			const change = e.changes[i];
-			const [eolCount] = countEOL(change.text);
-			this._tokenizationStateStore.applyEdits(change.range, eolCount);
+		if (this._tokenizationStateStore) {
+			for (let i = 0; i < e.changes.length; i++) {
+				const change = e.changes[i];
+				const [eolCount] = countEOL(change.text);
+				this._tokenizationStateStore.applyEdits(change.range, eolCount);
+			}
 		}
 		this._ensureTokens();
 	}

 	private _resetTokenization(): void {
-		this._grammar = null;
-		this._tokenizationStateStore.flush(null);
+		this._tokenizationStateStore = null;

 		const languageId = this._languageId;
 		const encodedLanguageId = this._encodedLanguageId;
@@ -91,14 +91,18 @@ class TextMateWorkerModel extends MirrorTextModel {
 				return;
 			}

-			this._grammar = r.grammar;
-			this._tokenizationStateStore.flush(r.initialState);
+			if (r.grammar) {
+				const tokenizationSupport = new TMTokenization(r.grammar, r.initialState, false);
+				this._tokenizationStateStore = new TokenizationStateStore(tokenizationSupport, tokenizationSupport.getInitialState());
+			} else {
+				this._tokenizationStateStore = null;
+			}
 			this._ensureTokens();
 		});
 	}

 	private _ensureTokens(): void {
-		if (!this._grammar) {
+		if (!this._tokenizationStateStore) {
 			return;
 		}
 		const builder = new ContiguousMultilineTokensBuilder();
@@ -109,10 +113,10 @@ class TextMateWorkerModel extends MirrorTextModel {
 			const text = this._lines[lineIndex];
 			const lineStartState = this._tokenizationStateStore.getBeginState(lineIndex);

-			const r = this._grammar.tokenizeLine2(text, <StackElement>lineStartState!);
+			const r = this._tokenizationStateStore.tokenizationSupport.tokenizeEncoded(text, true, lineStartState!);
 			LineTokens.convertToEndOffset(r.tokens, text.length);
 			builder.add(lineIndex + 1, r.tokens);
-			this._tokenizationStateStore.setEndState(lineCount, lineIndex, r.ruleStack);
+			this._tokenizationStateStore.setEndState(lineCount, lineIndex, r.endState);
 			lineIndex = this._tokenizationStateStore.invalidLineStartIndex - 1; // -1 because the outer loop increments it
 		}