From a46a4a67b9b8425708d2d6303dfc50999c73a6b3 Mon Sep 17 00:00:00 2001
From: Fedor Indutny <79877362+indutny-signal@users.noreply.github.com>
Date: Tue, 25 Feb 2025 09:36:00 -0800
Subject: [PATCH] More accurate startup/backups benchmarks

---
 ts/test-mock/benchmarks/backup_bench.ts  |  60 +++++-----
 ts/test-mock/benchmarks/fixtures.ts      |   4 +
 ts/test-mock/benchmarks/startup_bench.ts |  52 ++++----
 ts/test-mock/bootstrap.ts                | 146 +++++++++++++++++++++--
 ts/test-mock/playwright.ts               |   2 +
 ts/util/benchmark/stats.ts               | 113 ++++++++++++++++++
 6 files changed, 317 insertions(+), 60 deletions(-)
diff --git a/ts/test-mock/benchmarks/backup_bench.ts b/ts/test-mock/benchmarks/backup_bench.ts
index f58c0892f9..b481de721e 100644
--- a/ts/test-mock/benchmarks/backup_bench.ts
+++ b/ts/test-mock/benchmarks/backup_bench.ts
@@ -1,38 +1,44 @@
 // Copyright 2024 Signal Messenger, LLC
 // SPDX-License-Identifier: AGPL-3.0-only
-/* eslint-disable no-console */
 
-import { Bootstrap } from './fixtures';
+import { Bootstrap, MAX_CYCLES } from './fixtures';
+import { type RegressionSample } from '../bootstrap';
 import { generateBackup } from '../../test-both/helpers/generateBackup';
 
-Bootstrap.benchmark(async (bootstrap: Bootstrap): Promise<void> => {
-  const { phone, server } = bootstrap;
+const INITIAL_MESSAGE_COUNT = 10000;
+const FINAL_MESSAGE_COUNT = 30000;
 
-  const { backupId, stream: backupStream } = generateBackup({
-    aci: phone.device.aci,
-    profileKey: phone.profileKey.serialize(),
-    accountEntropyPool: phone.accountEntropyPool,
-    mediaRootBackupKey: phone.mediaRootBackupKey,
-    conversations: 1000,
-    messages: 60 * 1000,
-  });
+Bootstrap.regressionBenchmark(
+  async ({ bootstrap, value: messageCount }): Promise<RegressionSample> => {
+    const { phone, server } = bootstrap;
 
-  await server.storeBackupOnCdn(backupId, backupStream);
+    const { backupId, stream: backupStream } = generateBackup({
+      aci: phone.device.aci,
+      profileKey: phone.profileKey.serialize(),
+      accountEntropyPool: phone.accountEntropyPool,
+      mediaRootBackupKey: phone.mediaRootBackupKey,
+      conversations: 1000,
+      messages: messageCount,
+    });
 
-  const app = await bootstrap.link();
-  const { duration: importDuration } = await app.waitForBackupImportComplete();
+    await server.storeBackupOnCdn(backupId, backupStream);
 
-  const migrateStart = Date.now();
-  await app.migrateAllMessages();
-  const migrateEnd = Date.now();
+    const app = await bootstrap.link();
+    const { duration: importDuration } =
+      await app.waitForBackupImportComplete();
 
-  const exportStart = Date.now();
-  await app.uploadBackup();
-  const exportEnd = Date.now();
+    const exportStart = Date.now();
+    await app.uploadBackup();
+    const exportEnd = Date.now();
 
-  console.log('run=%d info=%j', 0, {
-    importDuration,
-    exportDuration: exportEnd - exportStart,
-    migrationDuration: migrateEnd - migrateStart,
-  });
-});
+    return {
+      importDuration,
+      exportDuration: exportEnd - exportStart,
+    };
+  },
+  {
+    fromValue: INITIAL_MESSAGE_COUNT,
+    toValue: FINAL_MESSAGE_COUNT,
+    maxCycles: MAX_CYCLES,
+  }
+);
diff --git a/ts/test-mock/benchmarks/fixtures.ts b/ts/test-mock/benchmarks/fixtures.ts
index b64df6e884..dd98b29970 100644
--- a/ts/test-mock/benchmarks/fixtures.ts
+++ b/ts/test-mock/benchmarks/fixtures.ts
@@ -39,6 +39,10 @@ export const BLOCKED_COUNT = process.env.BLOCKED_COUNT
   ? parseInt(process.env.BLOCKED_COUNT, 10)
   : 0;
 
+export const MAX_CYCLES = process.env.MAX_CYCLES
+  ? parseInt(process.env.MAX_CYCLES, 10)
+  : 1;
+
 // Can happen if electron exits prematurely
 process.on('unhandledRejection', reason => {
   console.error('Unhandled rejection:');
diff --git a/ts/test-mock/benchmarks/startup_bench.ts b/ts/test-mock/benchmarks/startup_bench.ts
index 2a1fc02389..6926735c14 100644
--- a/ts/test-mock/benchmarks/startup_bench.ts
+++ b/ts/test-mock/benchmarks/startup_bench.ts
@@ -1,29 +1,28 @@
 // Copyright 2022 Signal Messenger, LLC
 // SPDX-License-Identifier: AGPL-3.0-only
-/* eslint-disable no-await-in-loop, no-console */
 
 import { ReceiptType } from '@signalapp/mock-server';
+import { omit } from 'lodash';
 
-import { debug, Bootstrap, RUN_COUNT } from './fixtures';
-import { stats } from '../../util/benchmark/stats';
+import { debug, Bootstrap, MAX_CYCLES } from './fixtures';
+import { type RegressionSample } from '../bootstrap';
 
-const MESSAGE_BATCH_SIZE = 1000; // messages
+const INITIAL_MESSAGE_COUNT = 1000;
+const FINAL_MESSAGE_COUNT = 5000;
 
 const ENABLE_RECEIPTS = Boolean(process.env.ENABLE_RECEIPTS);
 
-Bootstrap.benchmark(async (bootstrap: Bootstrap): Promise<void> => {
-  await bootstrap.linkAndClose();
+Bootstrap.regressionBenchmark(
+  async ({ bootstrap, value: messageCount }): Promise<RegressionSample> => {
+    await bootstrap.linkAndClose();
 
-  const { server, contacts, phone, desktop } = bootstrap;
+    const { server, contacts, phone, desktop } = bootstrap;
 
-  const messagesPerSec = new Array<number>();
-
-  for (let runId = 0; runId < RUN_COUNT; runId += 1) {
     // Generate messages
     const messagePromises = new Array<Promise<Buffer>>();
     debug('started generating messages');
 
-    for (let i = 0; i < MESSAGE_BATCH_SIZE; i += 1) {
+    for (let i = 0; i < messageCount; i += 1) {
       const contact = contacts[Math.floor(i / 2) % contacts.length];
       const direction = i % 2 ? 'message' : 'reply';
 
@@ -33,7 +32,7 @@ Bootstrap.benchmark(async (bootstrap: Bootstrap): Promise<void> => {
         messagePromises.push(
           contact.encryptText(
             desktop,
-            `Ping from mock server ${i + 1} / ${MESSAGE_BATCH_SIZE}`,
+            `Ping from mock server ${i + 1} / ${messageCount}`,
             {
               timestamp: messageTimestamp,
               sealed: true,
@@ -60,7 +59,7 @@ Bootstrap.benchmark(async (bootstrap: Bootstrap): Promise<void> => {
       messagePromises.push(
         phone.encryptSyncSent(
           desktop,
-          `Pong from mock server ${i + 1} / ${MESSAGE_BATCH_SIZE}`,
+          `Pong from mock server ${i + 1} / ${messageCount}`,
           {
             timestamp: messageTimestamp,
             destinationServiceId: contact.device.aci,
@@ -103,23 +102,26 @@ Bootstrap.benchmark(async (bootstrap: Bootstrap): Promise<void> => {
         );
       };
 
-      const run = async (): Promise<void> => {
+      const run = async () => {
         const app = await bootstrap.startApp();
         const appLoadedInfo = await app.waitUntilLoaded();
 
-        console.log('run=%d info=%j', runId, appLoadedInfo);
-
-        messagesPerSec.push(appLoadedInfo.messagesPerSec);
-
         await app.close();
+
+        return appLoadedInfo;
       };
 
-      await Promise.all([queue(), run()]);
-    }
-  }
+      const [, info] = await Promise.all([queue(), run()]);
 
-  // Compute human-readable statistics
-  if (messagesPerSec.length !== 0) {
-    console.log('stats info=%j', { messagesPerSec: stats(messagesPerSec) });
+      const { loadTime, preloadTime, connectTime } = info;
+      const messagesDuration = loadTime - preloadTime - connectTime;
+
+      return { messagesDuration, metrics: omit(info, 'messagesPerSec') };
+    }
+  },
+  {
+    fromValue: INITIAL_MESSAGE_COUNT,
+    toValue: FINAL_MESSAGE_COUNT,
+    maxCycles: MAX_CYCLES,
   }
-});
+);
diff --git a/ts/test-mock/bootstrap.ts b/ts/test-mock/bootstrap.ts
index 81f0c37aa6..83f16a5186 100644
--- a/ts/test-mock/bootstrap.ts
+++ b/ts/test-mock/bootstrap.ts
@@ -22,8 +22,9 @@ import {
   loadCertificates,
 } from '@signalapp/mock-server';
 import { MAX_READ_KEYS as MAX_STORAGE_READ_KEYS } from '../services/storageConstants';
-import * as durations from '../util/durations';
+import { SECOND, MINUTE, WEEK, MONTH } from '../util/durations';
 import { drop } from '../util/drop';
+import { regress } from '../util/benchmark/stats';
 import type { RendererConfigType } from '../types/RendererConfig';
 import type { MIMEType } from '../types/MIME';
 import { App } from './playwright';
@@ -138,6 +139,28 @@ type BootstrapInternalOptions = BootstrapOptions &
     contactNames: ReadonlyArray<string>;
   }>;
 
+export type RegressionBenchmarkOptions = Readonly<{
+  fromValue: number;
+  toValue: number;
+  iterationCount?: number;
+  maxCycles?: number;
+  maxError?: number;
+  timeout?: number;
+}>;
+
+export type RegressionBenchmarkFnOptions = Readonly<{
+  bootstrap: Bootstrap;
+  iteration: number;
+  value: number;
+}>;
+
+export type RegressionSample = Readonly<{
+  [key: `${string}Duration`]: number;
+
+  // Metrics independent of the regressed value
+  metrics?: Record<string, number>;
+}>;
+
 function sanitizePathComponent(component: string): string {
   return normalizePath(component.replace(/[^a-z]+/gi, '-'));
 }
@@ -187,7 +210,7 @@ export class Bootstrap {
   #privPhone?: PrimaryDevice;
   #privDesktop?: Device;
   #storagePath?: string;
-  #timestamp: number = Date.now() - durations.WEEK;
+  #timestamp: number = Date.now() - WEEK;
   #lastApp?: App;
   readonly #randomId = crypto.randomBytes(8).toString('hex');
 
@@ -284,11 +307,18 @@ export class Bootstrap {
 
   public static benchmark(
     fn: (bootstrap: Bootstrap) => Promise<void>,
-    timeout = 5 * durations.MINUTE
+    timeout = 5 * MINUTE
   ): void {
     drop(Bootstrap.runBenchmark(fn, timeout));
   }
 
+  public static regressionBenchmark(
+    fn: (fnOptions: RegressionBenchmarkFnOptions) => Promise<RegressionSample>,
+    options: RegressionBenchmarkOptions
+  ): void {
+    drop(Bootstrap.runRegressionBenchmark(fn, options));
+  }
+
   public get logsDir(): string {
     assert(
       this.#storagePath !== undefined,
@@ -697,18 +727,19 @@ export class Bootstrap {
     return outDir;
   }
 
-  private static async runBenchmark(
-    fn: (bootstrap: Bootstrap) => Promise<void>,
+  private static async runBenchmark<Result>(
+    fn: (bootstrap: Bootstrap) => Promise<Result>,
     timeout: number
-  ): Promise<void> {
+  ): Promise<Result> {
     const bootstrap = new Bootstrap({
       benchmark: true,
     });
 
     await bootstrap.init();
 
+    let result: Result;
     try {
-      await pTimeout(fn(bootstrap), timeout);
+      result = await pTimeout(fn(bootstrap), timeout);
       if (process.env.FORCE_ARTIFACT_SAVE) {
         await bootstrap.saveLogs();
       }
@@ -718,6 +749,105 @@ export class Bootstrap {
     } finally {
       await bootstrap.teardown();
     }
+
+    return result;
+  }
+
+  private static async runRegressionBenchmark(
+    fn: (fnOptions: RegressionBenchmarkFnOptions) => Promise<RegressionSample>,
+    {
+      iterationCount = 10,
+      maxCycles = 1,
+      maxError = 0.025 /* 2.5% */,
+      fromValue,
+      toValue,
+      timeout = 5 * MINUTE,
+    }: RegressionBenchmarkOptions
+  ): Promise<void> {
+    if (iterationCount <= 1) {
+      throw new Error('Not enough iterations');
+    }
+
+    const samples = new Array<{ value: number; data: RegressionSample }>();
+    let lineNum = 0;
+    for (let cycle = 0; cycle < maxCycles; cycle += 1) {
+      for (let iteration = 0; iteration < iterationCount; iteration += 1) {
+        const progress = (iteration % iterationCount) / (iterationCount - 1);
+        const value = Math.round(
+          fromValue * (1 - progress) + toValue * progress
+        );
+
+        // eslint-disable-next-line no-await-in-loop
+        const data = await Bootstrap.runBenchmark(bootstrap => {
+          return fn({ bootstrap, iteration, value });
+        }, timeout);
+
+        if (data.metrics) {
+          // eslint-disable-next-line no-console
+          console.log(`run=${lineNum} info=%j`, data.metrics);
+          lineNum += 1;
+        }
+
+        samples.push({
+          value,
+          data,
+        });
+
+        // eslint-disable-next-line no-console
+        console.log(
+          'cycle=%d iteration=%d value=%d data=%j',
+          cycle,
+          iteration,
+          value,
+          data
+        );
+      }
+
+      const result: Record<string, number> = Object.create(null);
+      const keys = Object.keys(samples[0].data).filter(
+        (key: string): key is `${string}Duration` => key.endsWith('Duration')
+      );
+      const human = new Array<string>();
+
+      let worstError = 0;
+      for (const key of keys) {
+        const { yIntercept, slope, confidence, outliers, severeOutliers } =
+          regress(samples.map(s => ({ y: s.value, x: s.data[key] })));
+
+        const delay = -yIntercept / slope;
+        const perSecond = slope * SECOND;
+        const error = confidence * SECOND;
+
+        const valueType = key.replace(/Duration$/, '');
+
+        human.push(
+          `cycle=${cycle} ${valueType}PerSecond=` +
+            `${perSecond.toFixed(2)}±${error.toFixed(2)} ` +
+            `outliers=${outliers + severeOutliers} delay=${delay.toFixed(2)}ms`
+        );
+
+        result[`${valueType}PerSec`] = perSecond;
+        result[`${valueType}Delay`] = delay;
+        result[`${valueType}Error`] = error;
+
+        worstError = Math.max(worstError, error / perSecond);
+      }
+
+      // eslint-disable-next-line no-console
+      console.log(human.join('\n'));
+
+      if (cycle !== maxCycles - 1 && worstError > maxError) {
+        // eslint-disable-next-line no-console
+        console.warn(
+          `cycle=${cycle} error=${worstError} max=${maxError} continuing`
+        );
+        continue;
+      }
+
+      // eslint-disable-next-line no-console
+      console.log(`run=${lineNum} info=%j`, result);
+      break;
+    }
   }
 
   async #generateConfig(
@@ -734,7 +864,7 @@ export class Bootstrap {
       forcePreloadBundle: this.#options.benchmark,
       ciMode: 'full',
 
-      buildExpiration: Date.now() + durations.MONTH,
+      buildExpiration: Date.now() + MONTH,
       storagePath: this.#storagePath,
       storageProfile: 'mock',
       serverUrl: url,
diff --git a/ts/test-mock/playwright.ts b/ts/test-mock/playwright.ts
index 257572c185..6b4bbea410 100644
--- a/ts/test-mock/playwright.ts
+++ b/ts/test-mock/playwright.ts
@@ -18,6 +18,8 @@ import type { SocketStatuses } from '../textsecure/SocketManager';
 
 export type AppLoadedInfoType = Readonly<{
   loadTime: number;
+  preloadTime: number;
+  connectTime: number;
   messagesPerSec: number;
 }>;
 
diff --git a/ts/util/benchmark/stats.ts b/ts/util/benchmark/stats.ts
index 3dd412cecd..8209903183 100644
--- a/ts/util/benchmark/stats.ts
+++ b/ts/util/benchmark/stats.ts
@@ -38,3 +38,116 @@ export function stats(
 
   return result;
 }
+
+// t-distribution value for various sample count and p=0.001
+// https://www.itl.nist.gov/div898/handbook/eda/section3/eda3672.htm
+const STUDENT_T = [
+  318.309, 22.327, 10.215, 7.173, 5.893, 5.208, 4.785, 4.501, 4.297, 4.144,
+  4.025, 3.93, 3.852, 3.787, 3.733, 3.686, 3.646, 3.61, 3.579, 3.552, 3.527,
+  3.505, 3.485, 3.467, 3.45, 3.435, 3.421, 3.408, 3.396, 3.385, 3.375, 3.365,
+  3.356, 3.348, 3.34, 3.333, 3.326, 3.319, 3.313, 3.307, 3.301, 3.296, 3.291,
+  3.286, 3.281, 3.277, 3.273, 3.269, 3.265, 3.261, 3.258, 3.255, 3.251, 3.248,
+  3.245, 3.242, 3.239, 3.237, 3.234, 3.232, 3.229, 3.227, 3.225, 3.223, 3.22,
+  3.218, 3.216, 3.214, 3.213, 3.211, 3.209, 3.207, 3.206, 3.204, 3.202, 3.201,
+  3.199, 3.198, 3.197, 3.195, 3.194, 3.193, 3.191, 3.19, 3.189, 3.188, 3.187,
+  3.185, 3.184, 3.183, 3.182, 3.181, 3.18, 3.179, 3.178, 3.177, 3.176, 3.175,
+  3.175, 3.174,
+
+  // Infinity
+  3.09,
+];
+
+export type Sample = Readonly<{
+  y: number;
+  x: number;
+}>;
+
+export type Regression = Readonly<{
+  yIntercept: number;
+  slope: number;
+  confidence: number;
+  outliers: number;
+  severeOutliers: number;
+}>;
+
+export function regress(samples: ReadonlyArray<Sample>): Regression {
+  // Bin the data by iteration count
+  const bins = new Map<number, Array<number>>();
+  for (const { x, y } of samples) {
+    let bin = bins.get(x);
+    if (bin === undefined) {
+      bin = [];
+      bins.set(x, bin);
+    }
+    bin.push(y);
+  }
+
+  let outliers = 0;
+  let severeOutliers = 0;
+
+  // Within each iteration bin identify the outliers for reporting purposes.
+  for (const [, ys] of bins) {
+    ys.sort();
+
+    const p25 = ys[Math.floor(ys.length * 0.25)] ?? -Infinity;
+    const p75 = ys[Math.ceil(ys.length * 0.75)] ?? +Infinity;
+    const iqr = p75 - p25;
+
+    const outlierLow = p25 - iqr * 1.5;
+    const outlierHigh = p75 + iqr * 1.5;
+    const badOutlierLow = p25 - iqr * 3;
+    const badOutlierHigh = p75 + iqr * 3;
+
+    // Tukey's method
+    for (const d of ys) {
+      if (d < badOutlierLow || d > badOutlierHigh) {
+        severeOutliers += 1;
+      } else if (d < outlierLow || d > outlierHigh) {
+        outliers += 1;
+      }
+    }
+  }
+
+  if (samples.length < 2) {
+    throw new Error('Low sample count');
+  }
+
+  let meanY = 0;
+  let meanX = 0;
+  for (const { y, x } of samples) {
+    meanY += y;
+    meanX += x;
+  }
+  meanY /= samples.length;
+  meanX /= samples.length;
+
+  let slopeNum = 0;
+  let slopeDenom = 0;
+  for (const { y, x } of samples) {
+    slopeNum += (y - meanY) * (x - meanX);
+    slopeDenom += (x - meanX) ** 2;
+  }
+
+  // Slope
+  const slope = slopeNum / slopeDenom;
+
+  // Y-Intercept
+  const yIntercept = meanY - slope * meanX;
+
+  let stdError = 0;
+  for (const { y, x } of samples) {
+    stdError += (y - yIntercept - slope * x) ** 2;
+  }
+  stdError /= samples.length - 2;
+  stdError /= slopeDenom;
+  stdError = Math.sqrt(stdError);
+
+  return {
+    yIntercept,
+    slope,
+    confidence:
+      STUDENT_T[Math.min(samples.length, STUDENT_T.length - 1)] * stdError,
+    outliers,
+    severeOutliers,
+  };
+}