mirror of
https://github.com/microsoft/vscode.git
synced 2026-04-25 19:18:59 +01:00
change scanner API from class to functional style
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
'use strict';
|
||||
|
||||
import { Scanner, TokenType } from './htmlScanner';
|
||||
import { TokenType, createScanner } from './htmlScanner';
|
||||
import { findFirst } from '../utils/arrays';
|
||||
import { isEmptyElement } from './htmlTags';
|
||||
|
||||
@@ -33,8 +33,7 @@ export interface HTMLDocument {
|
||||
}
|
||||
|
||||
export function parse(text: string) : HTMLDocument {
|
||||
let scanner = new Scanner();
|
||||
scanner.setSource(text);
|
||||
let scanner = createScanner(text);
|
||||
|
||||
let htmlDocument = new Node(0, text.length, [], null);
|
||||
let curr = htmlDocument;
|
||||
@@ -43,21 +42,21 @@ export function parse(text: string) : HTMLDocument {
|
||||
while (token !== TokenType.EOS) {
|
||||
switch (token) {
|
||||
case TokenType.StartTagOpen:
|
||||
let child = new Node(scanner.tokenOffset, text.length, [], curr);
|
||||
let child = new Node(scanner.getTokenOffset(), text.length, [], curr);
|
||||
curr.children.push(child);
|
||||
curr = child;
|
||||
break;
|
||||
case TokenType.StartTag:
|
||||
curr.tag = scanner.tokenText;
|
||||
curr.tag = scanner.getTokenText();
|
||||
break;
|
||||
case TokenType.StartTagClose:
|
||||
curr.end = scanner.position; // might be later set to end tag position
|
||||
curr.end = scanner.getTokenEnd(); // might be later set to end tag position
|
||||
if (isEmptyElement(curr.tag) && curr !== htmlDocument) {
|
||||
curr = curr.parent;
|
||||
}
|
||||
break;
|
||||
case TokenType.EndTag:
|
||||
let closeTag = scanner.tokenText;
|
||||
let closeTag = scanner.getTokenText();
|
||||
while (curr.tag !== closeTag && curr !== htmlDocument) {
|
||||
curr = curr.parent;
|
||||
}
|
||||
@@ -65,7 +64,7 @@ export function parse(text: string) : HTMLDocument {
|
||||
case TokenType.StartTagSelfClose:
|
||||
case TokenType.EndTagClose:
|
||||
if (curr !== htmlDocument) {
|
||||
curr.end = scanner.position;
|
||||
curr.end = scanner.getTokenEnd();
|
||||
curr = curr.parent;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -191,226 +191,216 @@ export enum ScannerState {
|
||||
AttributeValue
|
||||
}
|
||||
|
||||
export class Scanner {
|
||||
export interface Scanner {
|
||||
scan() : TokenType;
|
||||
getTokenType(): TokenType;
|
||||
getTokenOffset(): number;
|
||||
getTokenLength(): number;
|
||||
getTokenEnd(): number;
|
||||
getTokenText(): string;
|
||||
getScannerState(): ScannerState;
|
||||
}
|
||||
|
||||
private _stream: MultiLineStream;
|
||||
private _state: ScannerState;
|
||||
private _tokenType: TokenType;
|
||||
private _tokenOffset: number;
|
||||
export function createScanner(input: string, initialState: ScannerState = ScannerState.Content) : Scanner {
|
||||
|
||||
private _hasSpaceAfterTag: boolean;
|
||||
private _lastTag: string;
|
||||
let stream = new MultiLineStream(input);
|
||||
let state = initialState;
|
||||
let tokenOffset: number = 0;
|
||||
let tokenType: number = void 0;
|
||||
|
||||
public setSource(input: string, initialState: ScannerState = ScannerState.Content): void {
|
||||
this._stream = new MultiLineStream(input);
|
||||
this._state = initialState;
|
||||
let hasSpaceAfterTag: boolean;
|
||||
let lastTag: string;
|
||||
|
||||
function nextElementName(): string {
|
||||
return stream.advanceIfRegExp(/^[_:\w][_:\w-.\d]*/).toLowerCase();
|
||||
}
|
||||
|
||||
public get position(): number {
|
||||
return this._stream.pos();
|
||||
function nextAttributeName(): string {
|
||||
return stream.advanceIfRegExp(/^[^\s"'>/=\x00-\x0F\x7F\x80-\x9F]*/).toLowerCase();
|
||||
}
|
||||
|
||||
public get scannerState(): number {
|
||||
return this._state;
|
||||
}
|
||||
|
||||
public get tokenType(): number {
|
||||
return this._tokenType;
|
||||
}
|
||||
|
||||
public get tokenOffset(): number {
|
||||
return this._tokenOffset;
|
||||
}
|
||||
|
||||
public get tokenLength(): number {
|
||||
return this._stream.pos() - this._tokenOffset;
|
||||
}
|
||||
|
||||
public get tokenText(): string {
|
||||
return this._stream.getSource().substring(this._tokenOffset, this._stream.pos());
|
||||
}
|
||||
|
||||
private nextElementName(): string {
|
||||
return this._stream.advanceIfRegExp(/^[_:\w][_:\w-.\d]*/).toLowerCase();
|
||||
}
|
||||
|
||||
private nextAttributeName(): string {
|
||||
return this._stream.advanceIfRegExp(/^[^\s"'>/=\x00-\x0F\x7F\x80-\x9F]*/).toLowerCase();
|
||||
}
|
||||
|
||||
private finishToken(offset: number, type: TokenType): TokenType {
|
||||
this._tokenType = type;
|
||||
this._tokenOffset = offset;
|
||||
function finishToken(offset: number, type: TokenType): TokenType {
|
||||
tokenType = type;
|
||||
tokenOffset = offset;
|
||||
return type;
|
||||
}
|
||||
|
||||
public scan(): TokenType {
|
||||
let offset = this._stream.pos();
|
||||
if (this._stream.eos()) {
|
||||
return this.finishToken(offset, TokenType.EOS);
|
||||
function scan(): TokenType {
|
||||
let offset = stream.pos();
|
||||
if (stream.eos()) {
|
||||
return finishToken(offset, TokenType.EOS);
|
||||
}
|
||||
|
||||
switch (this._state) {
|
||||
switch (state) {
|
||||
case ScannerState.WithinComment:
|
||||
if (this._stream.advanceIfChars([_MIN, _MIN, _RAN])) { // -->
|
||||
this._state = ScannerState.Content;
|
||||
return this.finishToken(offset, TokenType.EndCommentTag);
|
||||
if (stream.advanceIfChars([_MIN, _MIN, _RAN])) { // -->
|
||||
state = ScannerState.Content;
|
||||
return finishToken(offset, TokenType.EndCommentTag);
|
||||
}
|
||||
this._stream.advanceUntilChars([_MIN, _MIN, _RAN]); // -->
|
||||
return this.finishToken(offset, TokenType.Comment);
|
||||
stream.advanceUntilChars([_MIN, _MIN, _RAN]); // -->
|
||||
return finishToken(offset, TokenType.Comment);
|
||||
case ScannerState.WithinDoctype:
|
||||
if (this._stream.advanceIfChar(_RAN)) {
|
||||
this._state = ScannerState.Content;
|
||||
return this.finishToken(offset, TokenType.EndDoctypeTag);
|
||||
if (stream.advanceIfChar(_RAN)) {
|
||||
state = ScannerState.Content;
|
||||
return finishToken(offset, TokenType.EndDoctypeTag);
|
||||
}
|
||||
this._stream.advanceUntilChar(_RAN); // >
|
||||
return this.finishToken(offset, TokenType.Doctype);
|
||||
stream.advanceUntilChar(_RAN); // >
|
||||
return finishToken(offset, TokenType.Doctype);
|
||||
case ScannerState.Content:
|
||||
if (this._stream.advanceIfChar(_LAN)) { // <
|
||||
if (!this._stream.eos() && this._stream.peekChar() === _BNG) { // !
|
||||
if (this._stream.advanceIfChars([_BNG, _MIN, _MIN])) { // <!--
|
||||
this._state = ScannerState.WithinComment;
|
||||
return this.finishToken(offset, TokenType.StartCommentTag);
|
||||
if (stream.advanceIfChar(_LAN)) { // <
|
||||
if (!stream.eos() && stream.peekChar() === _BNG) { // !
|
||||
if (stream.advanceIfChars([_BNG, _MIN, _MIN])) { // <!--
|
||||
state = ScannerState.WithinComment;
|
||||
return finishToken(offset, TokenType.StartCommentTag);
|
||||
}
|
||||
if (this._stream.advanceIfRegExp(/^!doctype/i)) {
|
||||
this._state = ScannerState.WithinDoctype;
|
||||
return this.finishToken(offset, TokenType.StartDoctypeTag);
|
||||
if (stream.advanceIfRegExp(/^!doctype/i)) {
|
||||
state = ScannerState.WithinDoctype;
|
||||
return finishToken(offset, TokenType.StartDoctypeTag);
|
||||
}
|
||||
}
|
||||
if (this._stream.advanceIfChar(_FSL)) { // /
|
||||
this._state = ScannerState.OpeningEndTag;
|
||||
return this.finishToken(offset, TokenType.EndTagOpen);
|
||||
if (stream.advanceIfChar(_FSL)) { // /
|
||||
state = ScannerState.OpeningEndTag;
|
||||
return finishToken(offset, TokenType.EndTagOpen);
|
||||
}
|
||||
this._state = ScannerState.OpeningStartTag;
|
||||
return this.finishToken(offset, TokenType.StartTagOpen);
|
||||
state = ScannerState.OpeningStartTag;
|
||||
return finishToken(offset, TokenType.StartTagOpen);
|
||||
}
|
||||
this._stream.advanceUntilChar(_LAN);
|
||||
return this.finishToken(offset, TokenType.Content);
|
||||
stream.advanceUntilChar(_LAN);
|
||||
return finishToken(offset, TokenType.Content);
|
||||
case ScannerState.OpeningEndTag:
|
||||
let tagName = this.nextElementName();
|
||||
let tagName = nextElementName();
|
||||
if (tagName.length > 0) {
|
||||
return this.finishToken(offset, TokenType.EndTag);
|
||||
} else if (this._stream.advanceIfChar(_RAN)) { // >
|
||||
this._state = ScannerState.Content;
|
||||
return this.finishToken(offset, TokenType.EndTagClose);
|
||||
return finishToken(offset, TokenType.EndTag);
|
||||
} else if (stream.advanceIfChar(_RAN)) { // >
|
||||
state = ScannerState.Content;
|
||||
return finishToken(offset, TokenType.EndTagClose);
|
||||
}
|
||||
this._stream.advanceUntilChar(_RAN);
|
||||
return this.finishToken(offset, TokenType.Whitespace);
|
||||
stream.advanceUntilChar(_RAN);
|
||||
return finishToken(offset, TokenType.Whitespace);
|
||||
case ScannerState.OpeningStartTag:
|
||||
this._lastTag = this.nextElementName();
|
||||
if (this._lastTag.length > 0) {
|
||||
this._hasSpaceAfterTag = false;
|
||||
this._state = ScannerState.WithinTag;
|
||||
return this.finishToken(offset, TokenType.StartTag);
|
||||
lastTag = nextElementName();
|
||||
if (lastTag.length > 0) {
|
||||
hasSpaceAfterTag = false;
|
||||
state = ScannerState.WithinTag;
|
||||
return finishToken(offset, TokenType.StartTag);
|
||||
}
|
||||
break;
|
||||
case ScannerState.WithinTag:
|
||||
if (this._stream.skipWhitespace()) {
|
||||
this._hasSpaceAfterTag = true; // remember that we have seen a whitespace
|
||||
return this.finishToken(offset, TokenType.Whitespace);
|
||||
if (stream.skipWhitespace()) {
|
||||
hasSpaceAfterTag = true; // remember that we have seen a whitespace
|
||||
return finishToken(offset, TokenType.Whitespace);
|
||||
}
|
||||
if (this._hasSpaceAfterTag) {
|
||||
let name = this.nextAttributeName();
|
||||
if (hasSpaceAfterTag) {
|
||||
let name = nextAttributeName();
|
||||
if (name.length > 0) {
|
||||
this._state = ScannerState.AttributeName;
|
||||
this._hasSpaceAfterTag = false;
|
||||
return this.finishToken(offset, TokenType.AttributeName);
|
||||
state = ScannerState.AttributeName;
|
||||
hasSpaceAfterTag = false;
|
||||
return finishToken(offset, TokenType.AttributeName);
|
||||
}
|
||||
}
|
||||
if (this._stream.advanceIfChars([_FSL, _RAN])) { // />
|
||||
this._state = ScannerState.Content;
|
||||
return this.finishToken(offset, TokenType.StartTagSelfClose);
|
||||
if (stream.advanceIfChars([_FSL, _RAN])) { // />
|
||||
state = ScannerState.Content;
|
||||
return finishToken(offset, TokenType.StartTagSelfClose);
|
||||
}
|
||||
if (this._stream.advanceIfChar(_RAN)) { // >
|
||||
if (this._lastTag === 'script') {
|
||||
this._state = ScannerState.WithinScriptContent;
|
||||
} else if (this._lastTag === 'style') {
|
||||
this._state = ScannerState.WithinStyleContent;
|
||||
if (stream.advanceIfChar(_RAN)) { // >
|
||||
if (lastTag === 'script') {
|
||||
state = ScannerState.WithinScriptContent;
|
||||
} else if (lastTag === 'style') {
|
||||
state = ScannerState.WithinStyleContent;
|
||||
} else {
|
||||
this._state = ScannerState.Content;
|
||||
state = ScannerState.Content;
|
||||
}
|
||||
return this.finishToken(offset, TokenType.StartTagClose);
|
||||
return finishToken(offset, TokenType.StartTagClose);
|
||||
}
|
||||
this._stream.advance(1);
|
||||
return this.finishToken(offset, TokenType.Unknown);
|
||||
stream.advance(1);
|
||||
return finishToken(offset, TokenType.Unknown);
|
||||
case ScannerState.AttributeName:
|
||||
if (this._stream.skipWhitespace()) {
|
||||
this._hasSpaceAfterTag = true;
|
||||
return this.finishToken(offset, TokenType.Whitespace);
|
||||
if (stream.skipWhitespace()) {
|
||||
hasSpaceAfterTag = true;
|
||||
return finishToken(offset, TokenType.Whitespace);
|
||||
}
|
||||
|
||||
if (this._stream.advanceIfChar(_EQS)) {
|
||||
this._state = ScannerState.AttributeValue;
|
||||
return this.finishToken(offset, TokenType.DelimiterAssign);
|
||||
if (stream.advanceIfChar(_EQS)) {
|
||||
state = ScannerState.AttributeValue;
|
||||
return finishToken(offset, TokenType.DelimiterAssign);
|
||||
}
|
||||
this._state = ScannerState.WithinTag;
|
||||
return this.scan(); // no advance yet - jump to WithinTag
|
||||
state = ScannerState.WithinTag;
|
||||
return scan(); // no advance yet - jump to WithinTag
|
||||
case ScannerState.AttributeValue:
|
||||
if (this._stream.skipWhitespace()) {
|
||||
return this.finishToken(offset, TokenType.Whitespace);
|
||||
if (stream.skipWhitespace()) {
|
||||
return finishToken(offset, TokenType.Whitespace);
|
||||
}
|
||||
let attributeValue = this._stream.advanceIfRegExp(/^[^\s"'`=<>]+/);
|
||||
let attributeValue = stream.advanceIfRegExp(/^[^\s"'`=<>]+/);
|
||||
if (attributeValue.length > 0) {
|
||||
this._state = ScannerState.WithinTag;
|
||||
this._hasSpaceAfterTag = false;
|
||||
return this.finishToken(offset, TokenType.AttributeValue);
|
||||
state = ScannerState.WithinTag;
|
||||
hasSpaceAfterTag = false;
|
||||
return finishToken(offset, TokenType.AttributeValue);
|
||||
}
|
||||
let ch = this._stream.peekChar();
|
||||
let ch = stream.peekChar();
|
||||
if (ch === _SQO || ch === _DQO) {
|
||||
this._stream.advance(1); // consume quote
|
||||
if (this._stream.advanceUntilChar(ch)) {
|
||||
this._stream.advance(1); // consume quote
|
||||
stream.advance(1); // consume quote
|
||||
if (stream.advanceUntilChar(ch)) {
|
||||
stream.advance(1); // consume quote
|
||||
}
|
||||
this._state = ScannerState.WithinTag;
|
||||
this._hasSpaceAfterTag = false;
|
||||
return this.finishToken(offset, TokenType.AttributeValue);
|
||||
state = ScannerState.WithinTag;
|
||||
hasSpaceAfterTag = false;
|
||||
return finishToken(offset, TokenType.AttributeValue);
|
||||
}
|
||||
this._state = ScannerState.WithinTag;
|
||||
this._hasSpaceAfterTag = false;
|
||||
return this.scan(); // no advance yet - jump to WithinTag
|
||||
state = ScannerState.WithinTag;
|
||||
hasSpaceAfterTag = false;
|
||||
return scan(); // no advance yet - jump to WithinTag
|
||||
case ScannerState.WithinScriptContent:
|
||||
// see http://stackoverflow.com/questions/14574471/how-do-browsers-parse-a-script-tag-exactly
|
||||
let state = 1;
|
||||
while (!this._stream.eos()) {
|
||||
let match = this._stream.advanceIfRegExp(/<!--|-->|<\/?script\s*\/?>?/i);
|
||||
let sciptState = 1;
|
||||
while (!stream.eos()) {
|
||||
let match = stream.advanceIfRegExp(/<!--|-->|<\/?script\s*\/?>?/i);
|
||||
if (match.length === 0) {
|
||||
this._stream.goToEnd();
|
||||
return this.finishToken(offset, TokenType.Script);
|
||||
stream.goToEnd();
|
||||
return finishToken(offset, TokenType.Script);
|
||||
} else if (match === '<!--') {
|
||||
if (state === 1) {
|
||||
state = 2;
|
||||
if (sciptState === 1) {
|
||||
sciptState = 2;
|
||||
}
|
||||
} else if (match === '-->') {
|
||||
state = 1;
|
||||
sciptState = 1;
|
||||
} else if (match[1] !== '/') { // <script
|
||||
if (state === 2) {
|
||||
state = 3;
|
||||
if (sciptState === 2) {
|
||||
sciptState = 3;
|
||||
}
|
||||
} else { // </script
|
||||
if (state === 3) {
|
||||
state = 2;
|
||||
if (sciptState === 3) {
|
||||
sciptState = 2;
|
||||
} else {
|
||||
this._stream.goBack(match.length); // to the beginning of the closing tag
|
||||
stream.goBack(match.length); // to the beginning of the closing tag
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
this._state = ScannerState.Content;
|
||||
if (offset < this._stream.pos()) {
|
||||
return this.finishToken(offset, TokenType.Script);
|
||||
state = ScannerState.Content;
|
||||
if (offset < stream.pos()) {
|
||||
return finishToken(offset, TokenType.Script);
|
||||
}
|
||||
return this.scan(); // no advance yet - jump to content
|
||||
return scan(); // no advance yet - jump to content
|
||||
case ScannerState.WithinScriptContent:
|
||||
this._stream.advanceUntilRegExp(/<\/style/i);
|
||||
this._state = ScannerState.Content;
|
||||
if (offset < this._stream.pos()) {
|
||||
return this.finishToken(offset, TokenType.Styles);
|
||||
stream.advanceUntilRegExp(/<\/style/i);
|
||||
state = ScannerState.Content;
|
||||
if (offset < stream.pos()) {
|
||||
return finishToken(offset, TokenType.Styles);
|
||||
}
|
||||
return this.scan(); // no advance yet - jump to content
|
||||
return scan(); // no advance yet - jump to content
|
||||
}
|
||||
|
||||
this._stream.advance(1);
|
||||
this._state = ScannerState.Content;
|
||||
return this.finishToken(offset, TokenType.Unknown);
|
||||
stream.advance(1);
|
||||
state = ScannerState.Content;
|
||||
return finishToken(offset, TokenType.Unknown);
|
||||
}
|
||||
return {
|
||||
scan,
|
||||
getTokenType: () => tokenType,
|
||||
getTokenOffset: () => tokenOffset,
|
||||
getTokenLength: () => stream.pos() - tokenOffset,
|
||||
getTokenEnd: () => stream.pos(),
|
||||
getTokenText: () => stream.getSource().substring(tokenOffset, stream.pos()),
|
||||
getScannerState: () => state
|
||||
};
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
'use strict';
|
||||
|
||||
import * as assert from 'assert';
|
||||
import {Scanner, TokenType, ScannerState} from '../parser/htmlScanner';
|
||||
import {Scanner, TokenType, ScannerState, createScanner} from '../parser/htmlScanner';
|
||||
|
||||
suite('HTML Scanner', () => {
|
||||
|
||||
@@ -16,22 +16,22 @@ suite('HTML Scanner', () => {
|
||||
}
|
||||
|
||||
function assertTokens(tests: {input: string; tokens: Token[]; }[]) {
|
||||
let scanner = new Scanner();
|
||||
|
||||
let scannerState = ScannerState.Content;
|
||||
for (let t of tests) {
|
||||
scanner.setSource(t.input, scannerState);
|
||||
let scanner = createScanner(t.input, scannerState);
|
||||
let tokenType = scanner.scan();
|
||||
let actual : Token[] = [];
|
||||
while (tokenType !== TokenType.EOS) {
|
||||
let actualToken : Token= { offset: scanner.tokenOffset, type: tokenType };
|
||||
let actualToken : Token= { offset: scanner.getTokenOffset(), type: tokenType };
|
||||
if (tokenType == TokenType.StartTag || tokenType == TokenType.EndTag) {
|
||||
actualToken.content = t.input.substr(scanner.tokenOffset, scanner.tokenLength);
|
||||
actualToken.content = t.input.substr(scanner.getTokenOffset(), scanner.getTokenLength());
|
||||
}
|
||||
actual.push(actualToken);
|
||||
tokenType = scanner.scan();
|
||||
}
|
||||
assert.deepEqual(actual, t.tokens);
|
||||
scannerState = scanner.scannerState;
|
||||
scannerState = scanner.getScannerState();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user