From a46a4a67b9b8425708d2d6303dfc50999c73a6b3 Mon Sep 17 00:00:00 2001 From: Fedor Indutny <79877362+indutny-signal@users.noreply.github.com> Date: Tue, 25 Feb 2025 09:36:00 -0800 Subject: [PATCH] More accurate startup/backups benchmarks --- ts/test-mock/benchmarks/backup_bench.ts | 60 +++++----- ts/test-mock/benchmarks/fixtures.ts | 4 + ts/test-mock/benchmarks/startup_bench.ts | 52 ++++---- ts/test-mock/bootstrap.ts | 146 +++++++++++++++++++++-- ts/test-mock/playwright.ts | 2 + ts/util/benchmark/stats.ts | 113 ++++++++++++++++++ 6 files changed, 317 insertions(+), 60 deletions(-) diff --git a/ts/test-mock/benchmarks/backup_bench.ts b/ts/test-mock/benchmarks/backup_bench.ts index f58c0892f9..b481de721e 100644 --- a/ts/test-mock/benchmarks/backup_bench.ts +++ b/ts/test-mock/benchmarks/backup_bench.ts @@ -1,38 +1,44 @@ // Copyright 2024 Signal Messenger, LLC // SPDX-License-Identifier: AGPL-3.0-only -/* eslint-disable no-console */ -import { Bootstrap } from './fixtures'; +import { Bootstrap, MAX_CYCLES } from './fixtures'; +import { type RegressionSample } from '../bootstrap'; import { generateBackup } from '../../test-both/helpers/generateBackup'; -Bootstrap.benchmark(async (bootstrap: Bootstrap): Promise => { - const { phone, server } = bootstrap; +const INITIAL_MESSAGE_COUNT = 10000; +const FINAL_MESSAGE_COUNT = 30000; - const { backupId, stream: backupStream } = generateBackup({ - aci: phone.device.aci, - profileKey: phone.profileKey.serialize(), - accountEntropyPool: phone.accountEntropyPool, - mediaRootBackupKey: phone.mediaRootBackupKey, - conversations: 1000, - messages: 60 * 1000, - }); +Bootstrap.regressionBenchmark( + async ({ bootstrap, value: messageCount }): Promise => { + const { phone, server } = bootstrap; - await server.storeBackupOnCdn(backupId, backupStream); + const { backupId, stream: backupStream } = generateBackup({ + aci: phone.device.aci, + profileKey: phone.profileKey.serialize(), + accountEntropyPool: phone.accountEntropyPool, + mediaRootBackupKey: phone.mediaRootBackupKey, + conversations: 1000, + messages: messageCount, + }); - const app = await bootstrap.link(); - const { duration: importDuration } = await app.waitForBackupImportComplete(); + await server.storeBackupOnCdn(backupId, backupStream); - const migrateStart = Date.now(); - await app.migrateAllMessages(); - const migrateEnd = Date.now(); + const app = await bootstrap.link(); + const { duration: importDuration } = + await app.waitForBackupImportComplete(); - const exportStart = Date.now(); - await app.uploadBackup(); - const exportEnd = Date.now(); + const exportStart = Date.now(); + await app.uploadBackup(); + const exportEnd = Date.now(); - console.log('run=%d info=%j', 0, { - importDuration, - exportDuration: exportEnd - exportStart, - migrationDuration: migrateEnd - migrateStart, - }); -}); + return { + importDuration, + exportDuration: exportEnd - exportStart, + }; + }, + { + fromValue: INITIAL_MESSAGE_COUNT, + toValue: FINAL_MESSAGE_COUNT, + maxCycles: MAX_CYCLES, + } +); diff --git a/ts/test-mock/benchmarks/fixtures.ts b/ts/test-mock/benchmarks/fixtures.ts index b64df6e884..dd98b29970 100644 --- a/ts/test-mock/benchmarks/fixtures.ts +++ b/ts/test-mock/benchmarks/fixtures.ts @@ -39,6 +39,10 @@ export const BLOCKED_COUNT = process.env.BLOCKED_COUNT ? parseInt(process.env.BLOCKED_COUNT, 10) : 0; +export const MAX_CYCLES = process.env.MAX_CYCLES + ? parseInt(process.env.MAX_CYCLES, 10) + : 1; + // Can happen if electron exits prematurely process.on('unhandledRejection', reason => { console.error('Unhandled rejection:'); diff --git a/ts/test-mock/benchmarks/startup_bench.ts b/ts/test-mock/benchmarks/startup_bench.ts index 2a1fc02389..6926735c14 100644 --- a/ts/test-mock/benchmarks/startup_bench.ts +++ b/ts/test-mock/benchmarks/startup_bench.ts @@ -1,29 +1,28 @@ // Copyright 2022 Signal Messenger, LLC // SPDX-License-Identifier: AGPL-3.0-only -/* eslint-disable no-await-in-loop, no-console */ import { ReceiptType } from '@signalapp/mock-server'; +import { omit } from 'lodash'; -import { debug, Bootstrap, RUN_COUNT } from './fixtures'; -import { stats } from '../../util/benchmark/stats'; +import { debug, Bootstrap, MAX_CYCLES } from './fixtures'; +import { type RegressionSample } from '../bootstrap'; -const MESSAGE_BATCH_SIZE = 1000; // messages +const INITIAL_MESSAGE_COUNT = 1000; +const FINAL_MESSAGE_COUNT = 5000; const ENABLE_RECEIPTS = Boolean(process.env.ENABLE_RECEIPTS); -Bootstrap.benchmark(async (bootstrap: Bootstrap): Promise => { - await bootstrap.linkAndClose(); +Bootstrap.regressionBenchmark( + async ({ bootstrap, value: messageCount }): Promise => { + await bootstrap.linkAndClose(); - const { server, contacts, phone, desktop } = bootstrap; + const { server, contacts, phone, desktop } = bootstrap; - const messagesPerSec = new Array(); - - for (let runId = 0; runId < RUN_COUNT; runId += 1) { // Generate messages const messagePromises = new Array>(); debug('started generating messages'); - for (let i = 0; i < MESSAGE_BATCH_SIZE; i += 1) { + for (let i = 0; i < messageCount; i += 1) { const contact = contacts[Math.floor(i / 2) % contacts.length]; const direction = i % 2 ? 'message' : 'reply'; @@ -33,7 +32,7 @@ Bootstrap.benchmark(async (bootstrap: Bootstrap): Promise => { messagePromises.push( contact.encryptText( desktop, - `Ping from mock server ${i + 1} / ${MESSAGE_BATCH_SIZE}`, + `Ping from mock server ${i + 1} / ${messageCount}`, { timestamp: messageTimestamp, sealed: true, @@ -60,7 +59,7 @@ Bootstrap.benchmark(async (bootstrap: Bootstrap): Promise => { messagePromises.push( phone.encryptSyncSent( desktop, - `Pong from mock server ${i + 1} / ${MESSAGE_BATCH_SIZE}`, + `Pong from mock server ${i + 1} / ${messageCount}`, { timestamp: messageTimestamp, destinationServiceId: contact.device.aci, @@ -103,23 +102,26 @@ Bootstrap.benchmark(async (bootstrap: Bootstrap): Promise => { ); }; - const run = async (): Promise => { + const run = async () => { const app = await bootstrap.startApp(); const appLoadedInfo = await app.waitUntilLoaded(); - console.log('run=%d info=%j', runId, appLoadedInfo); - - messagesPerSec.push(appLoadedInfo.messagesPerSec); - await app.close(); + + return appLoadedInfo; }; - await Promise.all([queue(), run()]); - } - } + const [, info] = await Promise.all([queue(), run()]); - // Compute human-readable statistics - if (messagesPerSec.length !== 0) { - console.log('stats info=%j', { messagesPerSec: stats(messagesPerSec) }); + const { loadTime, preloadTime, connectTime } = info; + const messagesDuration = loadTime - preloadTime - connectTime; + + return { messagesDuration, metrics: omit(info, 'messagesPerSec') }; + } + }, + { + fromValue: INITIAL_MESSAGE_COUNT, + toValue: FINAL_MESSAGE_COUNT, + maxCycles: MAX_CYCLES, } -}); +); diff --git a/ts/test-mock/bootstrap.ts b/ts/test-mock/bootstrap.ts index 81f0c37aa6..83f16a5186 100644 --- a/ts/test-mock/bootstrap.ts +++ b/ts/test-mock/bootstrap.ts @@ -22,8 +22,9 @@ import { loadCertificates, } from '@signalapp/mock-server'; import { MAX_READ_KEYS as MAX_STORAGE_READ_KEYS } from '../services/storageConstants'; -import * as durations from '../util/durations'; +import { SECOND, MINUTE, WEEK, MONTH } from '../util/durations'; import { drop } from '../util/drop'; +import { regress } from '../util/benchmark/stats'; import type { RendererConfigType } from '../types/RendererConfig'; import type { MIMEType } from '../types/MIME'; import { App } from './playwright'; @@ -138,6 +139,28 @@ type BootstrapInternalOptions = BootstrapOptions & contactNames: ReadonlyArray; }>; +export type RegressionBenchmarkOptions = Readonly<{ + fromValue: number; + toValue: number; + iterationCount?: number; + maxCycles?: number; + maxError?: number; + timeout?: number; +}>; + +export type RegressionBenchmarkFnOptions = Readonly<{ + bootstrap: Bootstrap; + iteration: number; + value: number; +}>; + +export type RegressionSample = Readonly<{ + [key: `${string}Duration`]: number; + + // Metrics independent of the regressed value + metrics?: Record; +}>; + function sanitizePathComponent(component: string): string { return normalizePath(component.replace(/[^a-z]+/gi, '-')); } @@ -187,7 +210,7 @@ export class Bootstrap { #privPhone?: PrimaryDevice; #privDesktop?: Device; #storagePath?: string; - #timestamp: number = Date.now() - durations.WEEK; + #timestamp: number = Date.now() - WEEK; #lastApp?: App; readonly #randomId = crypto.randomBytes(8).toString('hex'); @@ -284,11 +307,18 @@ export class Bootstrap { public static benchmark( fn: (bootstrap: Bootstrap) => Promise, - timeout = 5 * durations.MINUTE + timeout = 5 * MINUTE ): void { drop(Bootstrap.runBenchmark(fn, timeout)); } + public static regressionBenchmark( + fn: (fnOptions: RegressionBenchmarkFnOptions) => Promise, + options: RegressionBenchmarkOptions + ): void { + drop(Bootstrap.runRegressionBenchmark(fn, options)); + } + public get logsDir(): string { assert( this.#storagePath !== undefined, @@ -697,18 +727,19 @@ export class Bootstrap { return outDir; } - private static async runBenchmark( - fn: (bootstrap: Bootstrap) => Promise, + private static async runBenchmark( + fn: (bootstrap: Bootstrap) => Promise, timeout: number - ): Promise { + ): Promise { const bootstrap = new Bootstrap({ benchmark: true, }); await bootstrap.init(); + let result: Result; try { - await pTimeout(fn(bootstrap), timeout); + result = await pTimeout(fn(bootstrap), timeout); if (process.env.FORCE_ARTIFACT_SAVE) { await bootstrap.saveLogs(); } @@ -718,6 +749,105 @@ export class Bootstrap { } finally { await bootstrap.teardown(); } + + return result; + } + + private static async runRegressionBenchmark( + fn: (fnOptions: RegressionBenchmarkFnOptions) => Promise, + { + iterationCount = 10, + maxCycles = 1, + maxError = 0.025 /* 2.5% */, + fromValue, + toValue, + timeout = 5 * MINUTE, + }: RegressionBenchmarkOptions + ): Promise { + if (iterationCount <= 1) { + throw new Error('Not enough iterations'); + } + + const samples = new Array<{ value: number; data: RegressionSample }>(); + let lineNum = 0; + for (let cycle = 0; cycle < maxCycles; cycle += 1) { + for (let iteration = 0; iteration < iterationCount; iteration += 1) { + const progress = (iteration % iterationCount) / (iterationCount - 1); + const value = Math.round( + fromValue * (1 - progress) + toValue * progress + ); + + // eslint-disable-next-line no-await-in-loop + const data = await Bootstrap.runBenchmark(bootstrap => { + return fn({ bootstrap, iteration, value }); + }, timeout); + + if (data.metrics) { + // eslint-disable-next-line no-console + console.log(`run=${lineNum} info=%j`, data.metrics); + lineNum += 1; + } + + samples.push({ + value, + data, + }); + + // eslint-disable-next-line no-console + console.log( + 'cycle=%d iteration=%d value=%d data=%j', + cycle, + iteration, + value, + data + ); + } + + const result: Record = Object.create(null); + const keys = Object.keys(samples[0].data).filter( + (key: string): key is `${string}Duration` => key.endsWith('Duration') + ); + const human = new Array(); + + let worstError = 0; + for (const key of keys) { + const { yIntercept, slope, confidence, outliers, severeOutliers } = + regress(samples.map(s => ({ y: s.value, x: s.data[key] }))); + + const delay = -yIntercept / slope; + const perSecond = slope * SECOND; + const error = confidence * SECOND; + + const valueType = key.replace(/Duration$/, ''); + + human.push( + `cycle=${cycle} ${valueType}PerSecond=` + + `${perSecond.toFixed(2)}±${error.toFixed(2)} ` + + `outliers=${outliers + severeOutliers} delay=${delay.toFixed(2)}ms` + ); + + result[`${valueType}PerSec`] = perSecond; + result[`${valueType}Delay`] = delay; + result[`${valueType}Error`] = error; + + worstError = Math.max(worstError, error / perSecond); + } + + // eslint-disable-next-line no-console + console.log(human.join('\n')); + + if (cycle !== maxCycles - 1 && worstError > maxError) { + // eslint-disable-next-line no-console + console.warn( + `cycle=${cycle} error=${worstError} max=${maxError} continuing` + ); + continue; + } + + // eslint-disable-next-line no-console + console.log(`run=${lineNum} info=%j`, result); + break; + } } async #generateConfig( @@ -734,7 +864,7 @@ export class Bootstrap { forcePreloadBundle: this.#options.benchmark, ciMode: 'full', - buildExpiration: Date.now() + durations.MONTH, + buildExpiration: Date.now() + MONTH, storagePath: this.#storagePath, storageProfile: 'mock', serverUrl: url, diff --git a/ts/test-mock/playwright.ts b/ts/test-mock/playwright.ts index 257572c185..6b4bbea410 100644 --- a/ts/test-mock/playwright.ts +++ b/ts/test-mock/playwright.ts @@ -18,6 +18,8 @@ import type { SocketStatuses } from '../textsecure/SocketManager'; export type AppLoadedInfoType = Readonly<{ loadTime: number; + preloadTime: number; + connectTime: number; messagesPerSec: number; }>; diff --git a/ts/util/benchmark/stats.ts b/ts/util/benchmark/stats.ts index 3dd412cecd..8209903183 100644 --- a/ts/util/benchmark/stats.ts +++ b/ts/util/benchmark/stats.ts @@ -38,3 +38,116 @@ export function stats( return result; } + +// t-distribution value for various sample count and p=0.001 +// https://www.itl.nist.gov/div898/handbook/eda/section3/eda3672.htm +const STUDENT_T = [ + 318.309, 22.327, 10.215, 7.173, 5.893, 5.208, 4.785, 4.501, 4.297, 4.144, + 4.025, 3.93, 3.852, 3.787, 3.733, 3.686, 3.646, 3.61, 3.579, 3.552, 3.527, + 3.505, 3.485, 3.467, 3.45, 3.435, 3.421, 3.408, 3.396, 3.385, 3.375, 3.365, + 3.356, 3.348, 3.34, 3.333, 3.326, 3.319, 3.313, 3.307, 3.301, 3.296, 3.291, + 3.286, 3.281, 3.277, 3.273, 3.269, 3.265, 3.261, 3.258, 3.255, 3.251, 3.248, + 3.245, 3.242, 3.239, 3.237, 3.234, 3.232, 3.229, 3.227, 3.225, 3.223, 3.22, + 3.218, 3.216, 3.214, 3.213, 3.211, 3.209, 3.207, 3.206, 3.204, 3.202, 3.201, + 3.199, 3.198, 3.197, 3.195, 3.194, 3.193, 3.191, 3.19, 3.189, 3.188, 3.187, + 3.185, 3.184, 3.183, 3.182, 3.181, 3.18, 3.179, 3.178, 3.177, 3.176, 3.175, + 3.175, 3.174, + + // Infinity + 3.09, +]; + +export type Sample = Readonly<{ + y: number; + x: number; +}>; + +export type Regression = Readonly<{ + yIntercept: number; + slope: number; + confidence: number; + outliers: number; + severeOutliers: number; +}>; + +export function regress(samples: ReadonlyArray): Regression { + // Bin the data by iteration count + const bins = new Map>(); + for (const { x, y } of samples) { + let bin = bins.get(x); + if (bin === undefined) { + bin = []; + bins.set(x, bin); + } + bin.push(y); + } + + let outliers = 0; + let severeOutliers = 0; + + // Within each iteration bin identify the outliers for reporting purposes. + for (const [, ys] of bins) { + ys.sort(); + + const p25 = ys[Math.floor(ys.length * 0.25)] ?? -Infinity; + const p75 = ys[Math.ceil(ys.length * 0.75)] ?? +Infinity; + const iqr = p75 - p25; + + const outlierLow = p25 - iqr * 1.5; + const outlierHigh = p75 + iqr * 1.5; + const badOutlierLow = p25 - iqr * 3; + const badOutlierHigh = p75 + iqr * 3; + + // Tukey's method + for (const d of ys) { + if (d < badOutlierLow || d > badOutlierHigh) { + severeOutliers += 1; + } else if (d < outlierLow || d > outlierHigh) { + outliers += 1; + } + } + } + + if (samples.length < 2) { + throw new Error('Low sample count'); + } + + let meanY = 0; + let meanX = 0; + for (const { y, x } of samples) { + meanY += y; + meanX += x; + } + meanY /= samples.length; + meanX /= samples.length; + + let slopeNum = 0; + let slopeDenom = 0; + for (const { y, x } of samples) { + slopeNum += (y - meanY) * (x - meanX); + slopeDenom += (x - meanX) ** 2; + } + + // Slope + const slope = slopeNum / slopeDenom; + + // Y-Intercept + const yIntercept = meanY - slope * meanX; + + let stdError = 0; + for (const { y, x } of samples) { + stdError += (y - yIntercept - slope * x) ** 2; + } + stdError /= samples.length - 2; + stdError /= slopeDenom; + stdError = Math.sqrt(stdError); + + return { + yIntercept, + slope, + confidence: + STUDENT_T[Math.min(samples.length, STUDENT_T.length - 1)] * stdError, + outliers, + severeOutliers, + }; +}