From 3050a314f9242e044c7647dace446d4a0433ad5a Mon Sep 17 00:00:00 2001 From: Jamie Kyle <113370520+jamiebuilds-signal@users.noreply.github.com> Date: Wed, 28 Feb 2024 16:42:43 -0800 Subject: [PATCH] Add unicode bidi isolates to i18n substitutions for strings --- app/main.ts | 2 +- package.json | 2 +- ts/scripts/test-electron.ts | 6 +- ts/test-both/util/unicodeBidi_test.ts | 98 ++++++++++++ ts/util/setupI18n.tsx | 25 ++- ts/util/unicodeBidi.ts | 218 ++++++++++++++++++++++++++ 6 files changed, 345 insertions(+), 6 deletions(-) create mode 100644 ts/test-both/util/unicodeBidi_test.ts create mode 100644 ts/util/unicodeBidi.ts diff --git a/app/main.ts b/app/main.ts index f87d8de70c..e99e1f726b 100644 --- a/app/main.ts +++ b/app/main.ts @@ -986,7 +986,7 @@ async function createWindow() { await safeLoadURL( mainWindow, - getEnvironment() === Environment.Test + process.env.TEST_ELECTRON_SCRIPT != null ? await prepareFileUrl([__dirname, '../test/index.html']) : await prepareFileUrl([__dirname, '../background.html']) ); diff --git a/package.json b/package.json index 77ba9eea19..24905a643e 100644 --- a/package.json +++ b/package.json @@ -43,7 +43,7 @@ "test-electron": "node ts/scripts/test-electron.js", "test-release": "node ts/scripts/test-release.js", "test-node": "cross-env LANG=en-us electron-mocha --timeout 10000 --file test/setup-test-node.js --recursive test/modules ts/test-node ts/test-both", - "test-mock": "mocha --require ts/test-mock/setup-ci.js ts/test-mock/**/*_test.js", + "test-mock": "cross-env NODE_ENV=test mocha --require ts/test-mock/setup-ci.js ts/test-mock/**/*_test.js", "test-eslint": "mocha .eslint/rules/**/*.test.js --ignore-leaks", "test-node-coverage": "nyc --reporter=lcov --reporter=text mocha --recursive test/modules ts/test-node ts/test-both", "test-lint-intl": "ts-node ./build/intl-linter/linter.ts --test", diff --git a/ts/scripts/test-electron.ts b/ts/scripts/test-electron.ts index c05791b246..5583608cac 100644 --- a/ts/scripts/test-electron.ts +++ b/ts/scripts/test-electron.ts @@ -29,9 +29,11 @@ function launchElectron(attempt: number): string { cwd: ROOT_DIR, env: { ...process.env, - // Setting node_env to test triggers main.ts to load 'test/index.html' instead of - // 'background.html', which loads the tests via `test.js` NODE_ENV: 'test', + // Setting TEST_ELECTRON_SCRIPT to test triggers main.ts to load + // 'test/index.html' instead of 'background.html', which loads the tests + // via `test.js` + TEST_ELECTRON_SCRIPT: 'on', TEST_QUIT_ON_COMPLETE: 'on', }, encoding: 'utf8', diff --git a/ts/test-both/util/unicodeBidi_test.ts b/ts/test-both/util/unicodeBidi_test.ts new file mode 100644 index 0000000000..edb9d4e2b8 --- /dev/null +++ b/ts/test-both/util/unicodeBidi_test.ts @@ -0,0 +1,98 @@ +// Copyright 2024 Signal Messenger, LLC +// SPDX-License-Identifier: AGPL-3.0-only + +import { assert } from 'chai'; +import { + FIRST_STRONG_ISOLATE as FSI, + POP_DIRECTIONAL_ISOLATE as PSI, + LTR_ISOLATE, + POP_DIRECTIONAL_FORMATTING, + RTL_ISOLATE, + _bidiIsolate, + LTR_EMBEDDING, + LTR_OVERRIDE, + RTL_EMBEDDING, + RTL_OVERRIDE, +} from '../../util/unicodeBidi'; + +function debugUnicode(text: string) { + return text + .split('') + .map(char => char.charCodeAt(0).toString(16)) + .join(' '); +} + +function assertEqual(actual: string, expected: string) { + assert.equal(debugUnicode(actual), debugUnicode(expected)); +} + +describe('_bidiIsolate', () => { + it('returns a string with bidi isolate characters', () => { + const actual = _bidiIsolate('Hello'); + assertEqual(actual, `${FSI}Hello${PSI}`); + }); + + it('strips unopened pop isolate characters', () => { + const actual = _bidiIsolate(`${PSI}Hello`); + assertEqual(actual, `${FSI}Hello${PSI}`); + }); + + it('strips unopened pop formatting characters', () => { + const actual = _bidiIsolate(`${POP_DIRECTIONAL_FORMATTING}Hello`); + assertEqual(actual, `${FSI}Hello${PSI}`); + }); + + it('closes isolates that were left open', () => { + const actual = _bidiIsolate(`${LTR_ISOLATE}${RTL_ISOLATE}${FSI}Hello`); + assertEqual( + actual, + `${FSI}${LTR_ISOLATE}${RTL_ISOLATE}${FSI}Hello${PSI}${PSI}${PSI}${PSI}` + ); + }); + + it('closes overrides that were left open', () => { + const actual = _bidiIsolate(`${LTR_OVERRIDE}${RTL_OVERRIDE}Hello`); + assertEqual( + actual, + `${FSI}${LTR_OVERRIDE}${RTL_OVERRIDE}Hello${POP_DIRECTIONAL_FORMATTING}${POP_DIRECTIONAL_FORMATTING}${PSI}` + ); + }); + + it('closes formatting that was left open', () => { + const actual = _bidiIsolate(`${LTR_EMBEDDING}${RTL_EMBEDDING}Hello`); + assertEqual( + actual, + `${FSI}${LTR_EMBEDDING}${RTL_EMBEDDING}Hello${POP_DIRECTIONAL_FORMATTING}${POP_DIRECTIONAL_FORMATTING}${PSI}` + ); + }); + + it('leaves properly balanced isolates alone', () => { + const actual = _bidiIsolate( + `${FSI}${LTR_ISOLATE}${RTL_ISOLATE}${PSI}${PSI}${PSI}Hello` + ); + assertEqual( + actual, + `${FSI}${FSI}${LTR_ISOLATE}${RTL_ISOLATE}${PSI}${PSI}${PSI}Hello${PSI}` + ); + }); + + it('leaves properly balanced overrides alone', () => { + const actual = _bidiIsolate( + `${LTR_OVERRIDE}${RTL_OVERRIDE}${POP_DIRECTIONAL_FORMATTING}${POP_DIRECTIONAL_FORMATTING}Hello` + ); + assertEqual( + actual, + `${FSI}${LTR_OVERRIDE}${RTL_OVERRIDE}${POP_DIRECTIONAL_FORMATTING}${POP_DIRECTIONAL_FORMATTING}Hello${PSI}` + ); + }); + + it('leaves properly balanced formatting alone', () => { + const actual = _bidiIsolate( + `${LTR_EMBEDDING}${RTL_EMBEDDING}${POP_DIRECTIONAL_FORMATTING}${POP_DIRECTIONAL_FORMATTING}Hello` + ); + assertEqual( + actual, + `${FSI}${LTR_EMBEDDING}${RTL_EMBEDDING}${POP_DIRECTIONAL_FORMATTING}${POP_DIRECTIONAL_FORMATTING}Hello${PSI}` + ); + }); +}); diff --git a/ts/util/setupI18n.tsx b/ts/util/setupI18n.tsx index 9d786212d1..d84b984bbb 100644 --- a/ts/util/setupI18n.tsx +++ b/ts/util/setupI18n.tsx @@ -5,12 +5,13 @@ import React from 'react'; import type { IntlShape } from 'react-intl'; import { createIntl, createIntlCache } from 'react-intl'; import type { LocaleMessageType, LocaleMessagesType } from '../types/I18N'; -import type { LocalizerType } from '../types/Util'; +import type { LocalizerType, ReplacementValuesType } from '../types/Util'; import { strictAssert } from './assert'; import { Emojify } from '../components/conversation/Emojify'; import * as log from '../logging/log'; import * as Errors from '../types/errors'; import { Environment, getEnvironment } from '../environment'; +import { bidiIsolate } from './unicodeBidi'; export function isLocaleMessageType( value: unknown @@ -76,6 +77,23 @@ export function createCachedIntl( return intl; } +function normalizeSubstitutions( + substitutions?: ReplacementValuesType +): ReplacementValuesType | undefined { + if (!substitutions) { + return; + } + const normalized: ReplacementValuesType = {}; + for (const [key, value] of Object.entries(substitutions)) { + if (typeof value === 'string') { + normalized[key] = bidiIsolate(value); + } else { + normalized[key] = value; + } + } + return normalized; +} + export function setupI18n( locale: string, messages: LocaleMessagesType @@ -100,7 +118,10 @@ export function setupI18n( `i18n: Substitutions must be an object for ICU message "${key}"` ); - const result = intl.formatMessage({ id: key }, substitutions); + const result = intl.formatMessage( + { id: key }, + normalizeSubstitutions(substitutions) + ); strictAssert( typeof result === 'string', diff --git a/ts/util/unicodeBidi.ts b/ts/util/unicodeBidi.ts new file mode 100644 index 0000000000..94fad516f9 --- /dev/null +++ b/ts/util/unicodeBidi.ts @@ -0,0 +1,218 @@ +// Copyright 2024 Signal Messenger, LLC +// SPDX-License-Identifier: AGPL-3.0-only + +import { Environment, getEnvironment } from '../environment'; + +/** + * Left-to-Right Isolate + * Sets direction to LTR and isolates the embedded content from the surrounding text + * @example + * ```html + *
...
+ * ``` + */ +export const LTR_ISOLATE = '\u2066'; + +/** + * Right-to-Left Isolate + * Sets direction to RTL and isolates the embedded content from the surrounding text + * @example + * ```html + *
...
+ * ``` + */ +export const RTL_ISOLATE = '\u2067'; + +/** + * First Strong Isolate + * Sets direction to the first strong directional character in the embedded + * content and isolates it from the surrounding text + * @example + * ```html + *
...
+ * ``` + */ +export const FIRST_STRONG_ISOLATE = '\u2068'; + +/** + * Pop Directional Isolate + * Terminates the scope of the last LRI, RLI, FSI, or PDI, and returns to the + * embedding level of the surrounding text + * @example + * ```html + * + * ``` + */ +export const POP_DIRECTIONAL_ISOLATE = '\u2069'; + +/** + * Left-to-Right Embedding + * Sets direction to LTR but allows embedded text to interact with + * surrounding text, so risk of spillover effects + * @example + * ```html + * ... + * ``` + */ +export const LTR_EMBEDDING = '\u202A'; + +/** + * Right-to-Left Embedding + * Sets direction to RTL but allows embedded text to interact with surrounding + * text, so risk of spillover effects + * @example + * ```html + * ... + * ``` + */ +export const RTL_EMBEDDING = '\u202B'; + +/** + * Pop Directional Formatting + * Terminates the scope of the last LRE, RLE, LRI, RLI, FSI, or PDI, and + * returns to the embedding level of the surrounding text + * @example + * ```html + * + * ``` + */ +export const POP_DIRECTIONAL_FORMATTING = '\u202C'; + +/** + * Left-to-Right Override + * Forces direction to LTR, even if the surrounding text is RTL + * @example + * ```html + * ... + * ``` + */ +export const LTR_OVERRIDE = '\u202D'; + +/** + * Right-to-Left Override + * Forces direction to RTL, even if the surrounding text is LTR + * @example + * ```html + * ... + * ``` + */ +export const RTL_OVERRIDE = '\u202E'; + +export const ANY_UNICODE_DIR_CONTROL_CHAR_REGEX = new RegExp( + [ + LTR_ISOLATE, + RTL_ISOLATE, + FIRST_STRONG_ISOLATE, + POP_DIRECTIONAL_ISOLATE, + LTR_EMBEDDING, + RTL_EMBEDDING, + POP_DIRECTIONAL_FORMATTING, + LTR_OVERRIDE, + RTL_OVERRIDE, + ].join('|') +); + +export function hasAnyUnicodeDirControlChars(input: string): boolean { + return input.match(ANY_UNICODE_DIR_CONTROL_CHAR_REGEX) != null; +} + +/** + * You probably want `bidiIsolate` instead of this function. + * + * Ensures that the input string has balanced Unicode directional control + * characters. If the input string has unbalanced control characters, this + * function will add the necessary characters to balance them. + */ +function balanceUnicodeDirControlChars(input: string): string { + // This gets called by i18n code on many strings, so we want to avoid + // as much work as possible + if (!hasAnyUnicodeDirControlChars(input)) { + return input; + } + + let result = ''; + let formattingDepth = 0; + let isolateDepth = 0; + + // We need to scan the entire input string and drop some characters as we + // go in case they are closing something that was never opened. + for (let index = 0; index < input.length; index += 1) { + const char = input[index]; + switch (char) { + case LTR_EMBEDDING: + case RTL_EMBEDDING: + case LTR_OVERRIDE: + case RTL_OVERRIDE: + formattingDepth += 1; + result += char; + break; + case POP_DIRECTIONAL_FORMATTING: + formattingDepth -= 1; + // skip if its closing formatting that was never opened + if (formattingDepth >= 0) { + result += char; + } + break; + case LTR_ISOLATE: + case RTL_ISOLATE: + case FIRST_STRONG_ISOLATE: + isolateDepth += 1; + result += char; + break; + case POP_DIRECTIONAL_ISOLATE: + isolateDepth -= 1; + // skip if its closing an isolate that was never opened + if (isolateDepth >= 0) { + result += char; + } + break; + default: + result += char; + break; + } + } + + // Ensure everything is closed + let suffix = ''; + if (formattingDepth > 0) { + suffix += POP_DIRECTIONAL_FORMATTING.repeat(formattingDepth); + } + if (isolateDepth > 0) { + suffix += POP_DIRECTIONAL_ISOLATE.repeat(isolateDepth); + } + + return result + suffix; +} + +/** + * @private + * Exported for testing + */ +export function _bidiIsolate(text: string): string { + // Wrap with with first strong isolate so directional characters appear + // correctly. + return ( + FIRST_STRONG_ISOLATE + + balanceUnicodeDirControlChars(text) + + POP_DIRECTIONAL_ISOLATE + ); +} + +/** + * BEFORE YOU USE THIS, YOU PROBABLY WANT TO USE HTML ELEMENTS WITH `dir` ATTRIBUTES + * + * Wraps the input string with Unicode directional control characters to ensure + * that the text is displayed correctly in a bidirectional context. + * + * @example + * ```ts + * bidiIsolate('Hello') === '\u2068Hello\u2069' + * ``` + */ +export function bidiIsolate(text: string): string { + if (getEnvironment() === Environment.Test) { + // Turn this off in tests to make it easier to compare strings + return text; + } + return _bidiIsolate(text); +}