Files
vscode/extensions/json/server/src/json-toolbox/json.ts
2016-01-26 09:43:21 +01:00

790 lines
19 KiB
TypeScript

/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
'use strict';
export enum ScanError {
None,
UnexpectedEndOfComment,
UnexpectedEndOfString,
UnexpectedEndOfNumber,
InvalidUnicode,
InvalidEscapeCharacter
}
export enum SyntaxKind {
Unknown = 0,
OpenBraceToken,
CloseBraceToken,
OpenBracketToken,
CloseBracketToken,
CommaToken,
ColonToken,
NullKeyword,
TrueKeyword,
FalseKeyword,
StringLiteral,
NumericLiteral,
LineCommentTrivia,
BlockCommentTrivia,
LineBreakTrivia,
Trivia,
EOF
}
export interface JSONScanner {
scan(): SyntaxKind;
getPosition(): number;
getToken(): SyntaxKind;
getTokenValue(): string;
getTokenOffset(): number;
getTokenLength(): number;
getTokenError(): ScanError;
}
export function createScanner(text:string, ignoreTrivia:boolean = false):JSONScanner {
var pos = 0,
len = text.length,
value:string = '',
tokenOffset = 0,
token:SyntaxKind = SyntaxKind.Unknown,
scanError:ScanError = ScanError.None;
function scanHexDigits(count: number, exact?: boolean): number {
var digits = 0;
var value = 0;
while (digits < count || !exact) {
var ch = text.charCodeAt(pos);
if (ch >= CharacterCodes._0 && ch <= CharacterCodes._9) {
value = value * 16 + ch - CharacterCodes._0;
}
else if (ch >= CharacterCodes.A && ch <= CharacterCodes.F) {
value = value * 16 + ch - CharacterCodes.A + 10;
}
else if (ch >= CharacterCodes.a && ch <= CharacterCodes.f) {
value = value * 16 + ch - CharacterCodes.a + 10;
}
else {
break;
}
pos++;
digits++;
}
if (digits < count) {
value = -1;
}
return value;
}
function scanNumber(): string {
var start = pos;
if (text.charCodeAt(pos) === CharacterCodes._0) {
pos++;
} else {
pos++;
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
pos++;
}
}
if (pos < text.length && text.charCodeAt(pos) === CharacterCodes.dot) {
pos++;
if (pos < text.length && isDigit(text.charCodeAt(pos))) {
pos++;
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
pos++;
}
} else {
scanError = ScanError.UnexpectedEndOfNumber;
return text.substring(start, end);
}
}
var end = pos;
if (pos < text.length && (text.charCodeAt(pos) === CharacterCodes.E || text.charCodeAt(pos) === CharacterCodes.e)) {
pos++;
if (pos < text.length && text.charCodeAt(pos) === CharacterCodes.plus || text.charCodeAt(pos) === CharacterCodes.minus) {
pos++;
}
if (pos < text.length && isDigit(text.charCodeAt(pos))) {
pos++;
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
pos++;
}
end = pos;
} else {
scanError = ScanError.UnexpectedEndOfNumber;
}
}
return text.substring(start, end);
}
function scanString(): string {
var result = '',
start = pos;
while (true) {
if (pos >= len) {
result += text.substring(start, pos);
scanError = ScanError.UnexpectedEndOfString;
break;
}
var ch = text.charCodeAt(pos);
if (ch === CharacterCodes.doubleQuote) {
result += text.substring(start, pos);
pos++;
break;
}
if (ch === CharacterCodes.backslash) {
result += text.substring(start, pos);
pos++;
if (pos >= len) {
scanError = ScanError.UnexpectedEndOfString;
break;
}
ch = text.charCodeAt(pos++);
switch (ch) {
case CharacterCodes.doubleQuote:
result += '\"';
break;
case CharacterCodes.backslash:
result += '\\';
break;
case CharacterCodes.slash:
result += '/';
break;
case CharacterCodes.b:
result += '\b';
break;
case CharacterCodes.f:
result += '\f';
break;
case CharacterCodes.n:
result += '\n';
break;
case CharacterCodes.r:
result += '\r';
break;
case CharacterCodes.t:
result += '\t';
break;
case CharacterCodes.u:
var ch = scanHexDigits(4, true);
if (ch >= 0) {
result += String.fromCharCode(ch);
} else {
scanError = ScanError.InvalidUnicode;
}
break;
default:
scanError = ScanError.InvalidEscapeCharacter;
}
start = pos;
continue;
}
if (isLineBreak(ch)) {
result += text.substring(start, pos);
scanError = ScanError.UnexpectedEndOfString;
break;
}
pos++;
}
return result;
}
function scanNext():SyntaxKind {
value = '';
scanError = ScanError.None;
tokenOffset = pos;
if(pos >= len) {
// at the end
tokenOffset = len;
return token = SyntaxKind.EOF;
}
var code = text.charCodeAt(pos);
// trivia: whitespace
if (isWhiteSpace(code)) {
do {
pos++;
value += String.fromCharCode(code);
code = text.charCodeAt(pos);
} while (isWhiteSpace(code));
return token = SyntaxKind.Trivia;
}
// trivia: newlines
if (isLineBreak(code)) {
pos++;
value += String.fromCharCode(code);
if (code === CharacterCodes.carriageReturn && text.charCodeAt(pos) === CharacterCodes.lineFeed) {
pos++;
value += '\n';
}
return token = SyntaxKind.LineBreakTrivia;
}
switch(code) {
// tokens: []{}:,
case CharacterCodes.openBrace:
pos++;
return token = SyntaxKind.OpenBraceToken;
case CharacterCodes.closeBrace:
pos++;
return token = SyntaxKind.CloseBraceToken;
case CharacterCodes.openBracket:
pos++;
return token = SyntaxKind.OpenBracketToken;
case CharacterCodes.closeBracket:
pos++;
return token = SyntaxKind.CloseBracketToken;
case CharacterCodes.colon:
pos++;
return token = SyntaxKind.ColonToken;
case CharacterCodes.comma:
pos++;
return token = SyntaxKind.CommaToken;
// strings
case CharacterCodes.doubleQuote:
pos++;
value = scanString();
return token = SyntaxKind.StringLiteral;
// comments
case CharacterCodes.slash:
var start = pos - 1;
// Single-line comment
if (text.charCodeAt(pos + 1) === CharacterCodes.slash) {
pos += 2;
while (pos < len) {
if (isLineBreak(text.charCodeAt(pos))) {
break;
}
pos++;
}
value = text.substring(start, pos);
return token = SyntaxKind.LineCommentTrivia;
}
// Multi-line comment
if (text.charCodeAt(pos + 1) === CharacterCodes.asterisk) {
pos += 2;
var safeLength = len - 1; // For lookahead.
var commentClosed = false;
while (pos < safeLength) {
var ch = text.charCodeAt(pos);
if (ch === CharacterCodes.asterisk && text.charCodeAt(pos + 1) === CharacterCodes.slash) {
pos += 2;
commentClosed = true;
break;
}
pos++;
}
if (!commentClosed) {
pos++;
scanError = ScanError.UnexpectedEndOfComment;
}
value = text.substring(start, pos);
return token = SyntaxKind.BlockCommentTrivia;
}
// just a single slash
value += String.fromCharCode(code);
pos++;
return token = SyntaxKind.Unknown;
// numbers
case CharacterCodes.minus:
value += String.fromCharCode(code);
pos++;
if (pos === len || !isDigit(text.charCodeAt(pos))) {
return token = SyntaxKind.Unknown;
}
// found a minus, followed by a number so
// we fall through to proceed with scanning
// numbers
case CharacterCodes._0:
case CharacterCodes._1:
case CharacterCodes._2:
case CharacterCodes._3:
case CharacterCodes._4:
case CharacterCodes._5:
case CharacterCodes._6:
case CharacterCodes._7:
case CharacterCodes._8:
case CharacterCodes._9:
value += scanNumber();
return token = SyntaxKind.NumericLiteral;
// literals and unknown symbols
default:
// is a literal? Read the full word.
while (pos < len && isUnknownContentCharacter(code)) {
pos++;
code = text.charCodeAt(pos);
}
if (tokenOffset !== pos) {
value = text.substring(tokenOffset, pos);
// keywords: true, false, null
switch (value) {
case 'true': return token = SyntaxKind.TrueKeyword;
case 'false': return token = SyntaxKind.FalseKeyword;
case 'null': return token = SyntaxKind.NullKeyword;
}
return token = SyntaxKind.Unknown;
}
// some
value += String.fromCharCode(code);
pos++;
return token = SyntaxKind.Unknown;
}
}
function isUnknownContentCharacter(code: CharacterCodes) {
if (isWhiteSpace(code) || isLineBreak(code)) {
return false;
}
switch (code) {
case CharacterCodes.closeBrace:
case CharacterCodes.closeBracket:
case CharacterCodes.openBrace:
case CharacterCodes.openBracket:
case CharacterCodes.doubleQuote:
case CharacterCodes.colon:
case CharacterCodes.comma:
return false;
}
return true;
}
function scanNextNonTrivia():SyntaxKind {
var result : SyntaxKind;
do {
result = scanNext();
} while (result >= SyntaxKind.LineCommentTrivia && result <= SyntaxKind.Trivia);
return result;
}
return {
getPosition: () => pos,
scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
getToken: () => token,
getTokenValue: () => value,
getTokenOffset: () => tokenOffset,
getTokenLength: () => pos - tokenOffset,
getTokenError: () => scanError
};
}
function isWhiteSpace(ch: number): boolean {
return ch === CharacterCodes.space || ch === CharacterCodes.tab || ch === CharacterCodes.verticalTab || ch === CharacterCodes.formFeed ||
ch === CharacterCodes.nonBreakingSpace || ch === CharacterCodes.ogham || ch >= CharacterCodes.enQuad && ch <= CharacterCodes.zeroWidthSpace ||
ch === CharacterCodes.narrowNoBreakSpace || ch === CharacterCodes.mathematicalSpace || ch === CharacterCodes.ideographicSpace || ch === CharacterCodes.byteOrderMark;
}
function isLineBreak(ch: number): boolean {
return ch === CharacterCodes.lineFeed || ch === CharacterCodes.carriageReturn || ch === CharacterCodes.lineSeparator || ch === CharacterCodes.paragraphSeparator;
}
function isDigit(ch: number): boolean {
return ch >= CharacterCodes._0 && ch <= CharacterCodes._9;
}
export function isLetter(ch: number): boolean {
return ch >= CharacterCodes.a && ch <= CharacterCodes.z || ch >= CharacterCodes.A && ch <= CharacterCodes.Z;
}
enum CharacterCodes {
nullCharacter = 0,
maxAsciiCharacter = 0x7F,
lineFeed = 0x0A, // \n
carriageReturn = 0x0D, // \r
lineSeparator = 0x2028,
paragraphSeparator = 0x2029,
// REVIEW: do we need to support this? The scanner doesn't, but our IText does. This seems
// like an odd disparity? (Or maybe it's completely fine for them to be different).
nextLine = 0x0085,
// Unicode 3.0 space characters
space = 0x0020, // " "
nonBreakingSpace = 0x00A0, //
enQuad = 0x2000,
emQuad = 0x2001,
enSpace = 0x2002,
emSpace = 0x2003,
threePerEmSpace = 0x2004,
fourPerEmSpace = 0x2005,
sixPerEmSpace = 0x2006,
figureSpace = 0x2007,
punctuationSpace = 0x2008,
thinSpace = 0x2009,
hairSpace = 0x200A,
zeroWidthSpace = 0x200B,
narrowNoBreakSpace = 0x202F,
ideographicSpace = 0x3000,
mathematicalSpace = 0x205F,
ogham = 0x1680,
_ = 0x5F,
$ = 0x24,
_0 = 0x30,
_1 = 0x31,
_2 = 0x32,
_3 = 0x33,
_4 = 0x34,
_5 = 0x35,
_6 = 0x36,
_7 = 0x37,
_8 = 0x38,
_9 = 0x39,
a = 0x61,
b = 0x62,
c = 0x63,
d = 0x64,
e = 0x65,
f = 0x66,
g = 0x67,
h = 0x68,
i = 0x69,
j = 0x6A,
k = 0x6B,
l = 0x6C,
m = 0x6D,
n = 0x6E,
o = 0x6F,
p = 0x70,
q = 0x71,
r = 0x72,
s = 0x73,
t = 0x74,
u = 0x75,
v = 0x76,
w = 0x77,
x = 0x78,
y = 0x79,
z = 0x7A,
A = 0x41,
B = 0x42,
C = 0x43,
D = 0x44,
E = 0x45,
F = 0x46,
G = 0x47,
H = 0x48,
I = 0x49,
J = 0x4A,
K = 0x4B,
L = 0x4C,
M = 0x4D,
N = 0x4E,
O = 0x4F,
P = 0x50,
Q = 0x51,
R = 0x52,
S = 0x53,
T = 0x54,
U = 0x55,
V = 0x56,
W = 0x57,
X = 0x58,
Y = 0x59,
Z = 0x5a,
ampersand = 0x26, // &
asterisk = 0x2A, // *
at = 0x40, // @
backslash = 0x5C, // \
bar = 0x7C, // |
caret = 0x5E, // ^
closeBrace = 0x7D, // }
closeBracket = 0x5D, // ]
closeParen = 0x29, // )
colon = 0x3A, // :
comma = 0x2C, // ,
dot = 0x2E, // .
doubleQuote = 0x22, // "
equals = 0x3D, // =
exclamation = 0x21, // !
greaterThan = 0x3E, // >
lessThan = 0x3C, // <
minus = 0x2D, // -
openBrace = 0x7B, // {
openBracket = 0x5B, // [
openParen = 0x28, // (
percent = 0x25, // %
plus = 0x2B, // +
question = 0x3F, // ?
semicolon = 0x3B, // ;
singleQuote = 0x27, // '
slash = 0x2F, // /
tilde = 0x7E, // ~
backspace = 0x08, // \b
formFeed = 0x0C, // \f
byteOrderMark = 0xFEFF,
tab = 0x09, // \t
verticalTab = 0x0B, // \v
}
/**
* Takes JSON with JavaScript-style comments and remove
* them. Optionally replaces every none-newline character
* of comments with a replaceCharacter
*/
export function stripComments(text:string, replaceCh?:string):string {
var _scanner = createScanner(text),
parts: string[] = [],
kind:SyntaxKind,
offset = 0,
pos:number;
do {
pos = _scanner.getPosition();
kind = _scanner.scan();
switch (kind) {
case SyntaxKind.LineCommentTrivia:
case SyntaxKind.BlockCommentTrivia:
case SyntaxKind.EOF:
if(offset !== pos) {
parts.push(text.substring(offset, pos));
}
if(replaceCh !== void 0) {
parts.push(_scanner.getTokenValue().replace(/[^\r\n]/g, replaceCh));
}
offset = _scanner.getPosition();
break;
}
} while(kind !== SyntaxKind.EOF);
return parts.join('');
}
export enum ParseErrorCode {
InvalidSymbol,
InvalidNumberFormat,
PropertyNameExpected,
ValueExpected,
ColonExpected,
CommaExpected,
CloseBraceExpected,
CloseBracketExpected,
EndOfFileExpected
}
export function getParseErrorMessage(errorCode: ParseErrorCode) : string {
switch (errorCode) {
case ParseErrorCode.InvalidSymbol: return 'Invalid symbol';
case ParseErrorCode.InvalidNumberFormat: return 'Invalid number format';
case ParseErrorCode.PropertyNameExpected: return 'Property name expected';
case ParseErrorCode.ValueExpected: return 'Value expected';
case ParseErrorCode.ColonExpected: return 'Colon expected';
case ParseErrorCode.CommaExpected: return 'Comma expected';
case ParseErrorCode.CloseBraceExpected: return 'Closing brace expected';
case ParseErrorCode.CloseBracketExpected: return 'Closing bracket expected';
case ParseErrorCode.EndOfFileExpected: return 'End of file expected';
default:
return '';
}
}
export function parse(text:string, errors: { error:ParseErrorCode; }[] = []) : any {
var noMatch = Object();
var _scanner = createScanner(text, true);
function scanNext() : SyntaxKind {
var token = _scanner.scan();
while (token === SyntaxKind.Unknown) {
handleError(ParseErrorCode.InvalidSymbol);
token = _scanner.scan();
}
return token;
}
function handleError(error:ParseErrorCode, skipUntilAfter: SyntaxKind[] = [], skipUntil: SyntaxKind[] = []) : void {
errors.push({ error: error });
if (skipUntilAfter.length + skipUntil.length > 0) {
var token = _scanner.getToken();
while (token !== SyntaxKind.EOF) {
if (skipUntilAfter.indexOf(token) !== -1) {
scanNext();
break;
} else if (skipUntil.indexOf(token) !== -1) {
break;
}
token = scanNext();
}
}
}
function parseString() : any {
if (_scanner.getToken() !== SyntaxKind.StringLiteral) {
return noMatch;
}
var value = _scanner.getTokenValue();
scanNext();
return value;
}
function parseLiteral() : any {
var value : any;
switch (_scanner.getToken()) {
case SyntaxKind.NumericLiteral:
try {
value = JSON.parse(_scanner.getTokenValue());
if (typeof value !== 'number') {
handleError(ParseErrorCode.InvalidNumberFormat);
value = 0;
}
} catch (e) {
value = 0;
}
break;
case SyntaxKind.NullKeyword:
value = null;
break;
case SyntaxKind.TrueKeyword:
value = true;
break;
case SyntaxKind.FalseKeyword:
value = false;
break;
default:
return noMatch;
}
scanNext();
return value;
}
function parseProperty(result: any) : any {
var key = parseString();
if (key === noMatch) {
handleError(ParseErrorCode.PropertyNameExpected, [], [SyntaxKind.CloseBraceToken, SyntaxKind.CommaToken] );
return false;
}
if (_scanner.getToken() === SyntaxKind.ColonToken) {
scanNext(); // consume colon
var value = parseValue();
if (value !== noMatch) {
result[key] = value;
} else {
handleError(ParseErrorCode.ValueExpected, [], [SyntaxKind.CloseBraceToken, SyntaxKind.CommaToken] );
}
} else {
handleError(ParseErrorCode.ColonExpected, [], [SyntaxKind.CloseBraceToken, SyntaxKind.CommaToken] );
}
return true;
}
function parseObject() : any {
if (_scanner.getToken() !== SyntaxKind.OpenBraceToken) {
return noMatch;
}
var obj = {};
scanNext(); // consume open brace
var needsComma = false;
while (_scanner.getToken() !== SyntaxKind.CloseBraceToken && _scanner.getToken() !== SyntaxKind.EOF) {
if (_scanner.getToken() === SyntaxKind.CommaToken) {
if (!needsComma) {
handleError(ParseErrorCode.ValueExpected, [], [] );
}
scanNext(); // consume comma
} else if (needsComma) {
handleError(ParseErrorCode.CommaExpected, [], [] );
}
var propertyParsed = parseProperty(obj);
if (!propertyParsed) {
handleError(ParseErrorCode.ValueExpected, [], [SyntaxKind.CloseBraceToken, SyntaxKind.CommaToken] );
}
needsComma = true;
}
if (_scanner.getToken() !== SyntaxKind.CloseBraceToken) {
handleError(ParseErrorCode.CloseBraceExpected, [SyntaxKind.CloseBraceToken], []);
} else {
scanNext(); // consume close brace
}
return obj;
}
function parseArray() : any {
if (_scanner.getToken() !== SyntaxKind.OpenBracketToken) {
return noMatch;
}
var arr: any[] = [];
scanNext(); // consume open bracket
var needsComma = false;
while (_scanner.getToken() !== SyntaxKind.CloseBracketToken && _scanner.getToken() !== SyntaxKind.EOF) {
if (_scanner.getToken() === SyntaxKind.CommaToken) {
if (!needsComma) {
handleError(ParseErrorCode.ValueExpected, [], [] );
}
scanNext(); // consume comma
} else if (needsComma) {
handleError(ParseErrorCode.CommaExpected, [], [] );
}
var value = parseValue();
if (value === noMatch) {
handleError(ParseErrorCode.ValueExpected, [], [SyntaxKind.CloseBracketToken, SyntaxKind.CommaToken] );
} else {
arr.push(value);
}
needsComma = true;
}
if (_scanner.getToken() !== SyntaxKind.CloseBracketToken) {
handleError(ParseErrorCode.CloseBracketExpected, [SyntaxKind.CloseBracketToken], []);
} else {
scanNext(); // consume close bracket
}
return arr;
}
function parseValue() : any {
var result = parseArray();
if (result !== noMatch) {
return result;
}
result = parseObject();
if (result !== noMatch) {
return result;
}
result = parseString();
if (result !== noMatch) {
return result;
}
return parseLiteral();
}
scanNext();
var value = parseValue();
if (value === noMatch) {
handleError(ParseErrorCode.ValueExpected, [], []);
return void 0;
}
if (_scanner.getToken() !== SyntaxKind.EOF) {
handleError(ParseErrorCode.EndOfFileExpected, [], []);
}
return value;
}