diff --git a/js/modules/link_previews.js b/js/modules/link_previews.js index b35a89347d..8837f1d40f 100644 --- a/js/modules/link_previews.js +++ b/js/modules/link_previews.js @@ -212,134 +212,7 @@ function assembleChunks(chunkDescriptors) { return concatenateBytes(...chunks); } -const LATIN_PATTERN = new RegExp( - '[' + - '\\u0041-\\u005A' + - '\\u0061-\\u007A' + - '\\u00AA' + - '\\u00BA' + - '\\u00C0-\\u00DC' + - '\\u00D8-\\u00F6' + - '\\u00F8-\\u01BA' + - ']' -); - -const CYRILLIC_PATTERN = new RegExp( - '[' + - '\\u0400-\\u0481' + - '\\u0482' + - '\\u0483-\\u0484' + - '\\u0487' + - '\\u0488-\\u0489' + - '\\u048A-\\u052F' + - '\\u1C80-\\u1C88' + - '\\u1D2B' + - '\\u1D78' + - '\\u2DE0-\\u2DFF' + - '\\uA640-\\uA66D' + - '\\uA66E' + - '\\uA66F' + - '\\uA670-\\uA672' + - '\\uA673' + - '\\uA674-\\uA67D' + - '\\uA67E' + - '\\uA67F' + - '\\uA680-\\uA69B' + - '\\uA69C-\\uA69D' + - '\\uA69E-\\uA69F' + - '\\uFE2E-\\uFE2F' + - ']' -); - -const GREEK_PATTERN = new RegExp( - '[' + - '\\u0370-\\u0373' + - '\\u0375' + - '\\u0376-\\u0377' + - '\\u037A' + - '\\u037B-\\u037D' + - '\\u037F' + - '\\u0384' + - '\\u0386' + - '\\u0388-\\u038A' + - '\\u038C' + - '\\u038E-\\u03A1' + - '\\u03A3-\\u03E1' + - '\\u03F0-\\u03F5' + - '\\u03F6' + - '\\u03F7-\\u03FF' + - '\\u1D26-\\u1D2A' + - '\\u1D5D-\\u1D61' + - '\\u1D66-\\u1D6A' + - '\\u1DBF' + - '\\u1F00-\\u1F15' + - '\\u1F18-\\u1F1D' + - '\\u1F20-\\u1F45' + - '\\u1F48-\\u1F4D' + - '\\u1F50-\\u1F57' + - '\\u1F59' + - '\\u1F5B' + - '\\u1F5D' + - '\\u1F5F-\\u1F7D' + - '\\u1F80-\\u1FB4' + - '\\u1FB6-\\u1FBC' + - '\\u1FBD' + - '\\u1FBE' + - '\\u1FBF-\\u1FC1' + - '\\u1FC2-\\u1FC4' + - '\\u1FC6-\\u1FCC' + - '\\u1FCD-\\u1FCF' + - '\\u1FD0-\\u1FD3' + - '\\u1FD6-\\u1FDB' + - '\\u1FDD-\\u1FDF' + - '\\u1FE0-\\u1FEC' + - '\\u1FED-\\u1FEF' + - '\\u1FF2-\\u1FF4' + - '\\u1FF6-\\u1FFC' + - '\\u1FFD-\\u1FFE' + - '\\u2126' + - '\\uAB65' + - ']' -); - -const HIGH_GREEK_PATTERN = new RegExp( - '[' + - `${String.fromCodePoint(0x10140)}-${String.fromCodePoint(0x10174)}` + - `${String.fromCodePoint(0x10175)}-${String.fromCodePoint(0x10178)}` + - `${String.fromCodePoint(0x10179)}-${String.fromCodePoint(0x10189)}` + - `${String.fromCodePoint(0x1018a)}-${String.fromCodePoint(0x1018b)}` + - `${String.fromCodePoint(0x1018c)}-${String.fromCodePoint(0x1018e)}` + - `${String.fromCodePoint(0x101a0)}` + - `${String.fromCodePoint(0x1d200)}-${String.fromCodePoint(0x1d241)}` + - `${String.fromCodePoint(0x1d242)}-${String.fromCodePoint(0x1d244)}` + - `${String.fromCodePoint(0x1d245)}` + - ']', - 'u' -); - -function isChunkSneaky(chunk) { - const hasLatin = LATIN_PATTERN.test(chunk); - if (!hasLatin) { - return false; - } - - const hasCyrillic = CYRILLIC_PATTERN.test(chunk); - if (hasCyrillic) { - return true; - } - - const hasGreek = GREEK_PATTERN.test(chunk); - if (hasGreek) { - return true; - } - - const hasHighGreek = HIGH_GREEK_PATTERN.test(chunk); - if (hasHighGreek) { - return true; - } - - return false; -} +const ASCII_PATTERN = new RegExp('[\\u0000-\\u007F]', 'g'); function isLinkSneaky(link) { const domain = getDomain(link); @@ -350,12 +223,14 @@ function isLinkSneaky(link) { ? nodeUrl.domainToUnicode(domain) : domain; - const chunks = unicodeDomain.split('.'); - for (let i = 0, max = chunks.length; i < max; i += 1) { - const chunk = chunks[i]; - if (isChunkSneaky(chunk)) { - return true; - } + const withoutPeriods = unicodeDomain.replace(/\./g, ''); + + const hasASCII = ASCII_PATTERN.test(withoutPeriods); + const withoutASCII = withoutPeriods.replace(ASCII_PATTERN, ''); + + const isMixed = hasASCII && withoutASCII.length > 0; + if (isMixed) { + return true; } return false; diff --git a/test/modules/link_previews_test.js b/test/modules/link_previews_test.js index 5be7f88b7d..6054f8b5f7 100644 --- a/test/modules/link_previews_test.js +++ b/test/modules/link_previews_test.js @@ -360,6 +360,12 @@ describe('Link previews', () => { assert.strictEqual(actual, true); }); + it('returns true for ASCII and non-ASCII mix', () => { + const link = 'https://www.аррӏе.com'; + const actual = isLinkSneaky(link); + assert.strictEqual(actual, true); + }); + it('returns true for Latin + High Greek domain', () => { const link = `https://www.apple${String.fromCodePoint(0x101a0)}.com`; const actual = isLinkSneaky(link);