diff --git a/build/gulpfile.hygiene.js b/build/gulpfile.hygiene.js index b49bc069366..f6594804322 100644 --- a/build/gulpfile.hygiene.js +++ b/build/gulpfile.hygiene.js @@ -119,7 +119,8 @@ const copyrightFilter = [ '!resources/completions/**', '!extensions/markdown-language-features/media/highlight.css', '!extensions/html-language-features/server/src/modes/typescript/*', - '!extensions/*/server/bin/*' + '!extensions/*/server/bin/*', + '!src/vs/editor/test/node/classification/typescript-test.ts', ]; const eslintFilter = [ diff --git a/extensions/typescript-basics/package.json b/extensions/typescript-basics/package.json index f5532d15dd3..b30299e378c 100644 --- a/extensions/typescript-basics/package.json +++ b/extensions/typescript-basics/package.json @@ -53,13 +53,7 @@ { "language": "typescript", "scopeName": "source.ts", - "path": "./syntaxes/TypeScript.tmLanguage.json", - "tokenTypes": { - "entity.name.type.instance.jsdoc": "other", - "entity.name.function.tagged-template": "other", - "meta.import string.quoted": "other", - "variable.other.jsdoc": "other" - } + "path": "./syntaxes/TypeScript.tmLanguage.json" }, { "language": "typescriptreact", diff --git a/src/vs/editor/common/model/textModelTokens.ts b/src/vs/editor/common/model/textModelTokens.ts index b959ba7a1b6..4fe36090b86 100644 --- a/src/vs/editor/common/model/textModelTokens.ts +++ b/src/vs/editor/common/model/textModelTokens.ts @@ -117,6 +117,9 @@ export class TokenizationStateStore { if (deleteCount === 0) { return; } + if (start + deleteCount > this._len) { + deleteCount = this._len - start; + } this._beginState.splice(start, deleteCount); this._valid.splice(start, deleteCount); this._len -= deleteCount; diff --git a/src/vs/editor/common/model/tokensStore.ts b/src/vs/editor/common/model/tokensStore.ts index 8377ce07c3a..e0057c597ad 100644 --- a/src/vs/editor/common/model/tokensStore.ts +++ b/src/vs/editor/common/model/tokensStore.ts @@ -116,6 +116,9 @@ export class TokensStore { if (deleteCount === 0) { return; } + if (start + deleteCount > this._len) { + deleteCount = this._len - start; + } this._lineTokens.splice(start, deleteCount); this._len -= deleteCount; } diff --git a/src/vs/editor/common/modes/tokenization/typescript.ts b/src/vs/editor/common/modes/tokenization/typescript.ts new file mode 100644 index 00000000000..207e8f492ee --- /dev/null +++ b/src/vs/editor/common/modes/tokenization/typescript.ts @@ -0,0 +1,304 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { StandardTokenType } from 'vs/editor/common/modes'; +import { CharCode } from 'vs/base/common/charCode'; + +class ParserContext { + public readonly text: string; + public readonly len: number; + public readonly tokens: number[]; + public pos: number; + + private currentTokenStartOffset: number; + private currentTokenType: StandardTokenType; + + constructor(text: string) { + this.text = text; + this.len = this.text.length; + this.tokens = []; + this.pos = 0; + this.currentTokenStartOffset = 0; + this.currentTokenType = StandardTokenType.Other; + } + + private _safeCharCodeAt(index: number): number { + if (index >= this.len) { + return CharCode.Null; + } + return this.text.charCodeAt(index); + } + + peek(distance: number = 0): number { + return this._safeCharCodeAt(this.pos + distance); + } + + next(): number { + const result = this._safeCharCodeAt(this.pos); + this.pos++; + return result; + } + + advance(distance: number): void { + this.pos += distance; + } + + eof(): boolean { + return this.pos >= this.len; + } + + beginToken(tokenType: StandardTokenType, deltaPos: number = 0): void { + this.currentTokenStartOffset = this.pos + deltaPos; + this.currentTokenType = tokenType; + } + + endToken(deltaPos: number = 0): void { + const length = this.pos + deltaPos - this.currentTokenStartOffset; + // check if it is touching previous token + if (this.tokens.length > 0) { + const previousStartOffset = this.tokens[this.tokens.length - 3]; + const previousLength = this.tokens[this.tokens.length - 2]; + const previousTokenType = this.tokens[this.tokens.length - 1]; + const previousEndOffset = previousStartOffset + previousLength; + if (this.currentTokenStartOffset === previousEndOffset && previousTokenType === this.currentTokenType) { + // extend previous token + this.tokens[this.tokens.length - 2] += length; + return; + } + } + this.tokens.push(this.currentTokenStartOffset, length, this.currentTokenType); + } +} + +export function parse(text: string): number[] { + const ctx = new ParserContext(text); + while (!ctx.eof()) { + parseRoot(ctx); + } + return ctx.tokens; +} + +function parseRoot(ctx: ParserContext): void { + let curlyCount = 0; + while (!ctx.eof()) { + const ch = ctx.peek(); + + switch (ch) { + case CharCode.SingleQuote: + parseSimpleString(ctx, CharCode.SingleQuote); + break; + case CharCode.DoubleQuote: + parseSimpleString(ctx, CharCode.DoubleQuote); + break; + case CharCode.BackTick: + parseInterpolatedString(ctx); + break; + case CharCode.Slash: + parseSlash(ctx); + break; + case CharCode.OpenCurlyBrace: + ctx.advance(1); + curlyCount++; + break; + case CharCode.CloseCurlyBrace: + ctx.advance(1); + curlyCount--; + if (curlyCount < 0) { + return; + } + break; + default: + ctx.advance(1); + } + } + +} + +function parseSimpleString(ctx: ParserContext, closingQuote: number): void { + ctx.beginToken(StandardTokenType.String); + + // skip the opening quote + ctx.advance(1); + + while (!ctx.eof()) { + const ch = ctx.next(); + if (ch === CharCode.Backslash) { + // skip \r\n or any other character following a backslash + const advanceCount = (ctx.peek() === CharCode.CarriageReturn && ctx.peek(1) === CharCode.LineFeed ? 2 : 1); + ctx.advance(advanceCount); + } else if (ch === closingQuote) { + // hit end quote, so stop + break; + } + } + + ctx.endToken(); +} + +function parseInterpolatedString(ctx: ParserContext): void { + ctx.beginToken(StandardTokenType.String); + + // skip the opening quote + ctx.advance(1); + + while (!ctx.eof()) { + const ch = ctx.next(); + if (ch === CharCode.Backslash) { + // skip \r\n or any other character following a backslash + const advanceCount = (ctx.peek() === CharCode.CarriageReturn && ctx.peek(1) === CharCode.LineFeed ? 2 : 1); + ctx.advance(advanceCount); + } else if (ch === CharCode.BackTick) { + // hit end quote, so stop + break; + } else if (ch === CharCode.DollarSign) { + if (ctx.peek() === CharCode.OpenCurlyBrace) { + ctx.advance(1); + ctx.endToken(); + parseRoot(ctx); + ctx.beginToken(StandardTokenType.String, -1); + } + } + } + + ctx.endToken(); +} + +function parseSlash(ctx: ParserContext): void { + + const nextCh = ctx.peek(1); + if (nextCh === CharCode.Asterisk) { + parseMultiLineComment(ctx); + return; + } + + if (nextCh === CharCode.Slash) { + parseSingleLineComment(ctx); + return; + } + + if (tryParseRegex(ctx)) { + return; + } + + ctx.advance(1); +} + +function tryParseRegex(ctx: ParserContext): boolean { + // See https://www.ecma-international.org/ecma-262/10.0/index.html#prod-RegularExpressionLiteral + + // TODO: avoid regex... + let contentBefore = ctx.text.substr(ctx.pos - 100, 100); + if (/[a-zA-Z0-9](\s*)$/.test(contentBefore)) { + // Cannot start after an identifier + return false; + } + + let pos = 0; + let len = ctx.len - ctx.pos; + let inClass = false; + + // skip / + pos++; + + while (pos < len) { + const ch = ctx.peek(pos++); + + if (ch === CharCode.CarriageReturn || ch === CharCode.LineFeed) { + return false; + } + + if (ch === CharCode.Backslash) { + const nextCh = ctx.peek(); + if (nextCh === CharCode.CarriageReturn || nextCh === CharCode.LineFeed) { + return false; + } + // skip next character + pos++; + continue; + } + + if (inClass) { + + if (ch === CharCode.CloseSquareBracket) { + inClass = false; + continue; + } + + } else { + + if (ch === CharCode.Slash) { + // cannot be directly followed by a / + if (ctx.peek(pos) === CharCode.Slash) { + return false; + } + + // consume flags + do { + let nextCh = ctx.peek(pos); + if (nextCh >= CharCode.a && nextCh <= CharCode.z) { + pos++; + continue; + } else { + break; + } + } while (true); + + // TODO: avoid regex... + if (/^(\s*)(\.|;|\/|,|\)|\]|\}|$)/.test(ctx.text.substr(ctx.pos + pos))) { + // Must be followed by an operator of kinds + ctx.beginToken(StandardTokenType.RegEx); + ctx.advance(pos); + ctx.endToken(); + return true; + } + + return false; + } + + if (ch === CharCode.OpenSquareBracket) { + inClass = true; + continue; + } + + } + } + + return false; +} + +function parseMultiLineComment(ctx: ParserContext): void { + ctx.beginToken(StandardTokenType.Comment); + + // skip the /* + ctx.advance(2); + + while (!ctx.eof()) { + const ch = ctx.next(); + if (ch === CharCode.Asterisk) { + if (ctx.peek() === CharCode.Slash) { + ctx.advance(1); + break; + } + } + } + + ctx.endToken(); +} + +function parseSingleLineComment(ctx: ParserContext): void { + ctx.beginToken(StandardTokenType.Comment); + + // skip the // + ctx.advance(2); + + while (!ctx.eof()) { + const ch = ctx.next(); + if (ch === CharCode.CarriageReturn || ch === CharCode.LineFeed) { + break; + } + } + + ctx.endToken(); +} diff --git a/src/vs/editor/contrib/tokenization/tokenization.ts b/src/vs/editor/contrib/tokenization/tokenization.ts index d24591b4eb9..8c07788c36b 100644 --- a/src/vs/editor/contrib/tokenization/tokenization.ts +++ b/src/vs/editor/contrib/tokenization/tokenization.ts @@ -9,6 +9,7 @@ import { EditorAction, ServicesAccessor, registerEditorAction } from 'vs/editor/ import { StopWatch } from 'vs/base/common/stopwatch'; import { StandardTokenType } from 'vs/editor/common/modes'; import { ITextModel } from 'vs/editor/common/model'; +import { parse } from 'vs/editor/common/modes/tokenization/typescript'; class ForceRetokenizeAction extends EditorAction { constructor() { @@ -25,19 +26,88 @@ class ForceRetokenizeAction extends EditorAction { return; } const model = editor.getModel(); - model.resetTokenization(); + // model.resetTokenization(); const sw = new StopWatch(true); model.forceTokenization(model.getLineCount()); sw.stop(); console.log(`tokenization took ${sw.elapsed()}`); if (!true) { - extractTokenTypes(model); + const expected = extractTokenTypes(model); + + const sw2 = new StopWatch(true); + const actual = parse(model.getValue()); + sw2.stop(); + console.log(`classification took ${sw2.elapsed()}`); + + let expectedIndex = 0, expectedCount = expected.length / 3; + let actualIndex = 0, actualCount = actual.length / 3; + outer: while (expectedIndex < expectedCount && actualIndex < actualCount) { + const expectedOffset = expected[3 * expectedIndex]; + const expectedLength = expected[3 * expectedIndex + 1]; + const expectedType = expected[3 * expectedIndex + 2]; + const actualOffset = actual[3 * actualIndex]; + const actualLength = actual[3 * actualIndex + 1]; + const actualType = actual[3 * actualIndex + 2]; + + // TS breaks up comments or begins them before (in case of whitespace)... + if (actualType === StandardTokenType.Comment && expectedOffset <= actualOffset && expectedType === actualType) { + const actualEndOffset = actualOffset + actualLength; + while (expectedIndex < expectedCount && expected[3 * expectedIndex] + expected[3 * expectedIndex + 1] <= actualEndOffset) { + // console.log(`(Fuzzy match):`); + // console.log(`--- Expected: ${model.getPositionAt(expected[3 * expectedIndex])} - ${expected[3 * expectedIndex]}, ${expected[3 * expectedIndex + 1]}, ${expected[3 * expectedIndex + 2]}`); + // console.log(`--- Actual: ${model.getPositionAt(actualOffset)} - ${actualOffset}, ${actualLength}, ${actualType}`); + expectedIndex++; + } + actualIndex++; + continue; + } + + // TS identifies regexes as strings and begins them before (in case of whitespace)... + if (actualType === StandardTokenType.RegEx && expectedOffset <= actualOffset && expectedType === StandardTokenType.String) { + const actualEndOffset = actualOffset + actualLength; + while (expectedIndex < expectedCount && expected[3 * expectedIndex] + expected[3 * expectedIndex + 1] <= actualEndOffset) { + expectedIndex++; + } + actualIndex++; + continue; + } + + if (actualType === StandardTokenType.String && expectedType === actualType) { + const actualEndOffset = actualOffset + actualLength; + while (expectedIndex < expectedCount && expected[3 * expectedIndex] + expected[3 * expectedIndex + 1] <= actualEndOffset) { + // console.log(`(Fuzzy match):`); + // console.log(`--- Expected: ${model.getPositionAt(expected[3 * expectedIndex])} - ${expected[3 * expectedIndex]}, ${expected[3 * expectedIndex + 1]}, ${expected[3 * expectedIndex + 2]}`); + // console.log(`--- Actual: ${model.getPositionAt(actualOffset)} - ${actualOffset}, ${actualLength}, ${actualType}`); + expectedIndex++; + } + actualIndex++; + continue; + } + + if (expectedOffset === actualOffset && expectedLength === actualLength && expectedType === actualType) { + expectedIndex++; + actualIndex++; + continue; + } + + const expectedPosition = model.getPositionAt(expectedOffset); + console.error(`Missmatch at position: ${expectedPosition}`); + console.error(`Expected: ${model.getPositionAt(expectedOffset)} - ${expectedOffset}, ${expectedLength}, ${expectedType}`); + console.error(`Actual: ${model.getPositionAt(actualOffset)} - ${actualOffset}, ${actualLength}, ${actualType}`); + break; + } + + if (expectedIndex !== expectedCount || actualIndex !== actualCount) { + console.error(`Missmatch at the end`); + } + + console.log(`Finished comparison!`); } } } -function extractTokenTypes(model: ITextModel): void { +function extractTokenTypes(model: ITextModel): number[] { const eolLength = model.getEOL().length; let result: number[] = []; let resultLen: number = 0; @@ -46,6 +116,7 @@ function extractTokenTypes(model: ITextModel): void { let offset = 0; for (let lineNumber = 1, lineCount = model.getLineCount(); lineNumber <= lineCount; lineNumber++) { const lineTokens = model.getLineTokens(lineNumber); + const lineText = lineTokens.getLineContent(); for (let i = 0, len = lineTokens.getCount(); i < len; i++) { const tokenType = lineTokens.getStandardTokenType(i); @@ -67,7 +138,7 @@ function extractTokenTypes(model: ITextModel): void { continue; } - result[resultLen++] = startOffset; // - lastEndOffset + result[resultLen++] = startOffset; result[resultLen++] = length; result[resultLen++] = tokenType; @@ -75,8 +146,10 @@ function extractTokenTypes(model: ITextModel): void { lastEndOffset = endOffset; } - offset += lineTokens.getLineContent().length + eolLength; + offset += lineText.length + eolLength; } + + return result; } registerEditorAction(ForceRetokenizeAction); diff --git a/src/vs/editor/test/node/classification/typescript-test.ts b/src/vs/editor/test/node/classification/typescript-test.ts new file mode 100644 index 00000000000..f8c68e4ee85 --- /dev/null +++ b/src/vs/editor/test/node/classification/typescript-test.ts @@ -0,0 +1,71 @@ +/// +/* tslint:disable */ +const x01 = "string"; +/// ^^^^^^^^ string + +const x02 = '\''; +/// ^^^^ string + +const x03 = '\n\'\t'; +/// ^^^^^^^^ string + +const x04 = 'this is\ +/// ^^^^^^^^^ string\ +a multiline string'; +/// <------------------- string + +const x05 = x01;// just some text +/// ^^^^^^^^^^^^^^^^^ comment + +const x06 = x05;/* multi +/// ^^^^^^^^ comment +line *comment */ +/// <---------------- comment + +const x07 = 4 / 5; + +const x08 = `howdy`; +/// ^^^^^^^ string + +const x09 = `\'\"\``; +/// ^^^^^^^^ string + +const x10 = `$[]`; +/// ^^^^^ string + +const x11 = `${x07 +/**/3}px`; +/// ^^^ string +/// ^^^^ comment +/// ^^^^ string + +const x12 = `${x07 + (function () { return 5; })()/**/}px`; +/// ^^^ string +/// ^^^^ comment +/// ^^^^ string + +const x13 = /([\w\-]+)?(#([\w\-]+))?((.([\w\-]+))*)/; +/// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ regex + +const x14 = /\./g; +/// ^^^^^ regex + + +const x15 = Math.abs(x07) / x07; // speed +/// ^^^^^^^^ comment + +const x16 = / x07; /.test('3'); +/// ^^^^^^^^ regex +/// ^^^ string + +const x17 = `.dialog-modal-block${true ? '.dimmed' : ''}`; +/// ^^^^^^^^^^^^^^^^^^^^^^ string +/// ^^^^^^^^^ string +/// ^^^^ string + +const x18 = Math.min((14 <= 0.5 ? 123 / (2 * 1) : ''.length / (2 - (2 * 1))), 1); +/// ^^ string + +const x19 = `${3 / '5'.length} km/h)`; +/// ^^^ string +/// ^^^ string +/// ^^^^^^^ string diff --git a/src/vs/editor/test/node/classification/typescript.test.ts b/src/vs/editor/test/node/classification/typescript.test.ts new file mode 100644 index 00000000000..0817a93198d --- /dev/null +++ b/src/vs/editor/test/node/classification/typescript.test.ts @@ -0,0 +1,139 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import * as assert from 'assert'; +import { StandardTokenType } from 'vs/editor/common/modes'; +import * as fs from 'fs'; +import { getPathFromAmdModule } from 'vs/base/common/amd'; +import { parse } from 'vs/editor/common/modes/tokenization/typescript'; +import { toStandardTokenType } from 'vs/editor/common/modes/supports/tokenization'; + +interface IParseFunc { + (text: string): number[]; +} + +interface IAssertion { + testLineNumber: number; + startOffset: number; + length: number; + tokenType: StandardTokenType; +} + +interface ITest { + content: string; + assertions: IAssertion[]; +} + +function parseTest(fileName: string): ITest { + interface ILineWithAssertions { + line: string; + assertions: ILineAssertion[]; + } + + interface ILineAssertion { + testLineNumber: number; + startOffset: number; + length: number; + expectedTokenType: StandardTokenType; + } + + const testContents = fs.readFileSync(fileName).toString(); + const lines = testContents.split(/\r\n|\n/); + const magicToken = lines[0]; + + let currentElement: ILineWithAssertions = { + line: lines[1], + assertions: [] + }; + + let parsedTest: ILineWithAssertions[] = []; + for (let i = 2; i < lines.length; i++) { + let line = lines[i]; + if (line.substr(0, magicToken.length) === magicToken) { + // this is an assertion line + let m1 = line.substr(magicToken.length).match(/^( +)([\^]+) (\w+)\\?$/); + if (m1) { + currentElement.assertions.push({ + testLineNumber: i + 1, + startOffset: magicToken.length + m1[1].length, + length: m1[2].length, + expectedTokenType: toStandardTokenType(m1[3]) + }); + } else { + let m2 = line.substr(magicToken.length).match(/^( +)<(-+) (\w+)\\?$/); + if (m2) { + currentElement.assertions.push({ + testLineNumber: i + 1, + startOffset: 0, + length: m2[2].length, + expectedTokenType: toStandardTokenType(m2[3]) + }); + } else { + throw new Error(`Invalid test line at line number ${i + 1}.`); + } + } + } else { + // this is a line to be parsed + parsedTest.push(currentElement); + currentElement = { + line: line, + assertions: [] + }; + } + } + parsedTest.push(currentElement); + + let assertions: IAssertion[] = []; + + let offset = 0; + for (let i = 0; i < parsedTest.length; i++) { + const parsedTestLine = parsedTest[i]; + for (let j = 0; j < parsedTestLine.assertions.length; j++) { + const assertion = parsedTestLine.assertions[j]; + assertions.push({ + testLineNumber: assertion.testLineNumber, + startOffset: offset + assertion.startOffset, + length: assertion.length, + tokenType: assertion.expectedTokenType + }); + } + offset += parsedTestLine.line.length + 1; + } + + let content: string = parsedTest.map(parsedTestLine => parsedTestLine.line).join('\n'); + + return { content, assertions }; +} + +function executeTest(fileName: string, parseFunc: IParseFunc): void { + const { content, assertions } = parseTest(fileName); + const actual = parseFunc(content); + + let actualIndex = 0, actualCount = actual.length / 3; + for (let i = 0; i < assertions.length; i++) { + const assertion = assertions[i]; + while (actualIndex < actualCount && actual[3 * actualIndex] + actual[3 * actualIndex + 1] <= assertion.startOffset) { + actualIndex++; + } + assert.ok( + actual[3 * actualIndex] <= assertion.startOffset, + `Line ${assertion.testLineNumber} : startOffset : ${actual[3 * actualIndex]} <= ${assertion.startOffset}` + ); + assert.ok( + actual[3 * actualIndex] + actual[3 * actualIndex + 1] >= assertion.startOffset + assertion.length, + `Line ${assertion.testLineNumber} : length : ${actual[3 * actualIndex]} + ${actual[3 * actualIndex + 1]} >= ${assertion.startOffset} + ${assertion.length}.` + ); + assert.equal( + actual[3 * actualIndex + 2], + assertion.tokenType, + `Line ${assertion.testLineNumber} : tokenType`); + } +} + +suite('Classification', () => { + test('TypeScript', () => { + executeTest(getPathFromAmdModule(require, 'vs/editor/test/node/classification/typescript-test.ts').replace(/\bout\b/, 'src'), parse); + }); +});