mirror of
https://github.com/microsoft/vscode.git
synced 2026-04-28 12:33:35 +01:00
* Allow to configure a list of encodings to use when guessing #36951 * Bump up the jschardet version into 3.1.2 #36951 * missing merge * some polish * renames * some polish * some polish * cleanup --------- Co-authored-by: Benjamin Pasero <benjamin.pasero@microsoft.com>
This commit is contained in:
@@ -49,15 +49,38 @@ const JSCHARDET_TO_ICONV_ENCODINGS: { [name: string]: string } = {
|
||||
'big5': 'cp950'
|
||||
};
|
||||
|
||||
export function detectEncoding(buffer: Buffer): string | null {
|
||||
const MAP_CANDIDATE_GUESS_ENCODING_TO_JSCHARDET: { [key: string]: string } = {
|
||||
utf8: 'UTF-8',
|
||||
utf16le: 'UTF-16LE',
|
||||
utf16be: 'UTF-16BE',
|
||||
windows1252: 'windows-1252',
|
||||
windows1250: 'windows-1250',
|
||||
iso88592: 'ISO-8859-2',
|
||||
windows1251: 'windows-1251',
|
||||
cp866: 'IBM866',
|
||||
iso88595: 'ISO-8859-5',
|
||||
koi8r: 'KOI8-R',
|
||||
windows1253: 'windows-1253',
|
||||
iso88597: 'ISO-8859-7',
|
||||
windows1255: 'windows-1255',
|
||||
iso88598: 'ISO-8859-8',
|
||||
cp950: 'Big5',
|
||||
shiftjis: 'SHIFT_JIS',
|
||||
eucjp: 'EUC-JP',
|
||||
euckr: 'EUC-KR',
|
||||
gb2312: 'GB2312'
|
||||
};
|
||||
|
||||
export function detectEncoding(buffer: Buffer, candidateGuessEncodings: string[]): string | null {
|
||||
const result = detectEncodingByBOM(buffer);
|
||||
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
const detected = jschardet.detect(buffer);
|
||||
candidateGuessEncodings = candidateGuessEncodings.map(e => MAP_CANDIDATE_GUESS_ENCODING_TO_JSCHARDET[e]).filter(e => !!e);
|
||||
|
||||
const detected = jschardet.detect(buffer, candidateGuessEncodings.length > 0 ? { detectEncodings: candidateGuessEncodings } : undefined);
|
||||
if (!detected || !detected.encoding) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -1233,11 +1233,11 @@ export class Repository {
|
||||
.filter(entry => !!entry);
|
||||
}
|
||||
|
||||
async bufferString(object: string, encoding: string = 'utf8', autoGuessEncoding = false): Promise<string> {
|
||||
async bufferString(object: string, encoding: string = 'utf8', autoGuessEncoding = false, candidateGuessEncodings: string[] = []): Promise<string> {
|
||||
const stdout = await this.buffer(object);
|
||||
|
||||
if (autoGuessEncoding) {
|
||||
encoding = detectEncoding(stdout) || encoding;
|
||||
encoding = detectEncoding(stdout, candidateGuessEncodings) || encoding;
|
||||
}
|
||||
|
||||
encoding = iconv.encodingExists(encoding) ? encoding : 'utf8';
|
||||
|
||||
@@ -1865,13 +1865,14 @@ export class Repository implements Disposable {
|
||||
const configFiles = workspace.getConfiguration('files', Uri.file(filePath));
|
||||
const defaultEncoding = configFiles.get<string>('encoding');
|
||||
const autoGuessEncoding = configFiles.get<boolean>('autoGuessEncoding');
|
||||
const candidateGuessEncodings = configFiles.get<string[]>('candidateGuessEncodings');
|
||||
|
||||
try {
|
||||
return await this.repository.bufferString(`${ref}:${path}`, defaultEncoding, autoGuessEncoding);
|
||||
return await this.repository.bufferString(`${ref}:${path}`, defaultEncoding, autoGuessEncoding, candidateGuessEncodings);
|
||||
} catch (err) {
|
||||
if (err.gitErrorCode === GitErrorCodes.WrongCase) {
|
||||
const gitRelativePath = await this.repository.getGitRelativePath(ref, path);
|
||||
return await this.repository.bufferString(`${ref}:${gitRelativePath}`, defaultEncoding, autoGuessEncoding);
|
||||
return await this.repository.bufferString(`${ref}:${gitRelativePath}`, defaultEncoding, autoGuessEncoding, candidateGuessEncodings);
|
||||
}
|
||||
|
||||
throw err;
|
||||
|
||||
Reference in New Issue
Block a user