From 87704409c3f8318ff19dc4ab129dfe51946c73dd Mon Sep 17 00:00:00 2001 From: trevor-signal <131492920+trevor-signal@users.noreply.github.com> Date: Mon, 8 Sep 2025 14:09:17 -0400 Subject: [PATCH] Improve message attachment parsing --- ts/sql/Server.ts | 84 +++++++----- ts/sql/server/messageAttachments.ts | 88 +++++++++++++ ts/sql/util.ts | 7 +- .../normalizedAttachments_test.ts | 124 ++++++++---------- ts/util/dropNull.ts | 3 + 5 files changed, 204 insertions(+), 102 deletions(-) create mode 100644 ts/sql/server/messageAttachments.ts diff --git a/ts/sql/Server.ts b/ts/sql/Server.ts index 628c35482b..6257503d71 100644 --- a/ts/sql/Server.ts +++ b/ts/sql/Server.ts @@ -56,6 +56,7 @@ import { consoleLogger } from '../util/consoleLogger'; import { dropNull, shallowConvertUndefinedToNull, + type ShallowNullToUndefined, type ShallowUndefinedToNull, } from '../util/dropNull'; import { isNormalNumber } from '../util/isNormalNumber'; @@ -82,7 +83,7 @@ import { sqlFragment, sqlJoin, QueryFragment, - convertOptionalBooleanToNullableInteger, + convertOptionalBooleanToInteger, } from './util'; import { hydrateMessage, @@ -120,7 +121,12 @@ import { callHistoryGroupSchema, } from '../types/CallDisposition'; import { redactGenericText } from '../util/privacy'; -import { parseStrict, parseUnknown, safeParseUnknown } from '../util/schemas'; +import { + parseLoose, + parseStrict, + parseUnknown, + safeParseUnknown, +} from '../util/schemas'; import { SNIPPET_LEFT_PLACEHOLDER, SNIPPET_RIGHT_PLACEHOLDER, @@ -263,7 +269,7 @@ import { import { generateMessageId } from '../util/generateMessageId'; import type { ConversationColorType, CustomColorType } from '../types/Colors'; import { sqlLogger } from './sqlLogger'; -import { APPLICATION_OCTET_STREAM } from '../types/MIME'; +import { permissiveMessageAttachmentSchema } from './server/messageAttachments'; type ConversationRow = Readonly<{ json: string; @@ -2642,7 +2648,7 @@ function saveMessageAttachment({ orderInMessage: number; editHistoryIndex: number | null; }) { - const values: MessageAttachmentDBType = shallowConvertUndefinedToNull({ + const unparsedValues: ShallowNullToUndefined = { messageId, editHistoryIndex: editHistoryIndex ?? ROOT_MESSAGE_ATTACHMENT_EDIT_HISTORY_INDEX, @@ -2651,8 +2657,8 @@ function saveMessageAttachment({ conversationId, sentAt, clientUuid: attachment.clientUuid, - size: attachment.size ?? 0, - contentType: attachment.contentType ?? APPLICATION_OCTET_STREAM, + size: attachment.size, + contentType: attachment.contentType, path: attachment.path, localKey: attachment.localKey, plaintextHash: attachment.plaintextHash, @@ -2666,15 +2672,9 @@ function saveMessageAttachment({ downloadPath: attachment.downloadPath, transitCdnKey: attachment.cdnKey ?? attachment.cdnId, transitCdnNumber: attachment.cdnNumber, - transitCdnUploadTimestamp: isNumber(attachment.uploadTimestamp) - ? attachment.uploadTimestamp - : null, + transitCdnUploadTimestamp: attachment.uploadTimestamp, backupCdnNumber: attachment.backupCdnNumber, - incrementalMac: - // resilience to Uint8Array-stored incrementalMac values - typeof attachment.incrementalMac === 'string' - ? attachment.incrementalMac - : null, + incrementalMac: attachment.incrementalMac, incrementalMacChunkSize: attachment.chunkSize, thumbnailPath: attachment.thumbnail?.path, thumbnailSize: attachment.thumbnail?.size, @@ -2693,33 +2693,57 @@ function saveMessageAttachment({ backupThumbnailVersion: attachment.thumbnailFromBackup?.version, storyTextAttachmentJson: attachment.textAttachment ? objectToJSON(attachment.textAttachment) - : null, + : undefined, localBackupPath: attachment.localBackupPath, flags: attachment.flags, - error: convertOptionalBooleanToNullableInteger(attachment.error), - wasTooBig: convertOptionalBooleanToNullableInteger(attachment.wasTooBig), - backfillError: convertOptionalBooleanToNullableInteger( - attachment.backfillError - ), - isCorrupted: convertOptionalBooleanToNullableInteger( - attachment.isCorrupted - ), + error: convertOptionalBooleanToInteger(attachment.error), + wasTooBig: convertOptionalBooleanToInteger(attachment.wasTooBig), + backfillError: convertOptionalBooleanToInteger(attachment.backfillError), + isCorrupted: convertOptionalBooleanToInteger(attachment.isCorrupted), copiedFromQuotedAttachment: 'copied' in attachment - ? convertOptionalBooleanToNullableInteger(attachment.copied) - : null, + ? convertOptionalBooleanToInteger(attachment.copied) + : undefined, version: attachment.version, - pending: convertOptionalBooleanToNullableInteger(attachment.pending), - }); + pending: convertOptionalBooleanToInteger(attachment.pending), + }; - db.prepare( - ` + try { + const values: MessageAttachmentDBType = + shallowConvertUndefinedToNull(unparsedValues); + + db.prepare( + ` INSERT OR REPLACE INTO message_attachments (${MESSAGE_ATTACHMENT_COLUMNS.join(', ')}) VALUES (${MESSAGE_ATTACHMENT_COLUMNS.map(name => `$${name}`).join(', ')}); ` - ).run(values); + ).run(values); + } catch (e) { + // Attachments used to be stored in JSON and may not have the types we expect. If we + // fail to save one, we parse/transform through a permissive zod schema (i.e. one that + // will convert invalid values to null when possible) + logger.error( + 'Failed to save to message_attachments', + Errors.toLogFormat(e) + ); + const values: MessageAttachmentDBType = parseLoose( + permissiveMessageAttachmentSchema, + unparsedValues + ); + + db.prepare( + ` + INSERT OR REPLACE INTO message_attachments + (${MESSAGE_ATTACHMENT_COLUMNS.join(', ')}) + VALUES + (${MESSAGE_ATTACHMENT_COLUMNS.map(name => `$${name}`).join(', ')}); + ` + ).run(values); + + logger.info('Recovered from invalid message_attachment save'); + } } function _testOnlyRemoveMessageAttachments( diff --git a/ts/sql/server/messageAttachments.ts b/ts/sql/server/messageAttachments.ts new file mode 100644 index 0000000000..60e4343a11 --- /dev/null +++ b/ts/sql/server/messageAttachments.ts @@ -0,0 +1,88 @@ +// Copyright 2025 Signal Messenger, LLC +// SPDX-License-Identifier: AGPL-3.0-only + +import { z } from 'zod'; +import { convertUndefinedToNull } from '../../util/dropNull'; +import { attachmentDownloadTypeSchema } from '../../types/AttachmentDownload'; +import { APPLICATION_OCTET_STREAM } from '../../types/MIME'; +import type { MessageAttachmentDBType } from '../Interface'; + +const permissiveStringOrNull = z + .string() + .optional() + .transform(convertUndefinedToNull) + .catch(null); +const permissiveNumberOrNull = z + .number() + .optional() + .transform(convertUndefinedToNull) + .catch(null); +const permissiveAttachmentVersion = z + .union([z.literal(1), z.literal(2)]) + .optional() + .transform(convertUndefinedToNull) + .catch(null); +const permissiveOptionalBool = z + .union([z.literal(0), z.literal(1)]) + .optional() + .transform(convertUndefinedToNull) + .catch(null); + +// A schema which converts invalid values to null, to handle bad data when +// attachments were stored in JSON +export const permissiveMessageAttachmentSchema = z.object({ + // Fields required to be NOT NULL + messageId: z.string(), + editHistoryIndex: z.number(), + attachmentType: attachmentDownloadTypeSchema, + orderInMessage: z.number(), + conversationId: z.string(), + sentAt: z.number().catch(0), + size: z.number().catch(0), + contentType: z.string().catch(APPLICATION_OCTET_STREAM), + + // Fields allowing NULL + path: permissiveStringOrNull, + clientUuid: permissiveStringOrNull, + localKey: permissiveStringOrNull, + plaintextHash: permissiveStringOrNull, + caption: permissiveStringOrNull, + blurHash: permissiveStringOrNull, + height: permissiveNumberOrNull, + width: permissiveNumberOrNull, + digest: permissiveStringOrNull, + key: permissiveStringOrNull, + fileName: permissiveStringOrNull, + downloadPath: permissiveStringOrNull, + transitCdnKey: permissiveStringOrNull, + transitCdnNumber: permissiveNumberOrNull, + transitCdnUploadTimestamp: permissiveNumberOrNull, + backupCdnNumber: permissiveNumberOrNull, + incrementalMac: permissiveStringOrNull, + incrementalMacChunkSize: permissiveNumberOrNull, + thumbnailPath: permissiveStringOrNull, + thumbnailSize: permissiveNumberOrNull, + thumbnailContentType: permissiveStringOrNull, + thumbnailLocalKey: permissiveStringOrNull, + thumbnailVersion: permissiveAttachmentVersion, + screenshotPath: permissiveStringOrNull, + screenshotSize: permissiveNumberOrNull, + screenshotContentType: permissiveStringOrNull, + screenshotLocalKey: permissiveStringOrNull, + screenshotVersion: permissiveAttachmentVersion, + backupThumbnailPath: permissiveStringOrNull, + backupThumbnailSize: permissiveNumberOrNull, + backupThumbnailContentType: permissiveStringOrNull, + backupThumbnailLocalKey: permissiveStringOrNull, + backupThumbnailVersion: permissiveAttachmentVersion, + storyTextAttachmentJson: permissiveStringOrNull, + localBackupPath: permissiveStringOrNull, + flags: permissiveNumberOrNull, + error: permissiveOptionalBool, + wasTooBig: permissiveOptionalBool, + backfillError: permissiveOptionalBool, + isCorrupted: permissiveOptionalBool, + copiedFromQuotedAttachment: permissiveOptionalBool, + version: permissiveAttachmentVersion, + pending: permissiveOptionalBool, +}) satisfies z.ZodType; diff --git a/ts/sql/util.ts b/ts/sql/util.ts index c9e41dbde1..8dbff8b2e7 100644 --- a/ts/sql/util.ts +++ b/ts/sql/util.ts @@ -3,6 +3,7 @@ /* eslint-disable max-classes-per-file */ import { isNumber, last } from 'lodash'; + import type { ReadableDB, WritableDB } from './Interface'; import type { LoggerType } from '../types/Logging'; @@ -431,14 +432,14 @@ export function convertOptionalIntegerToBoolean( return undefined; } -export function convertOptionalBooleanToNullableInteger( +export function convertOptionalBooleanToInteger( optionalBoolean?: boolean -): 1 | 0 | null { +): 1 | 0 | undefined { if (optionalBoolean === true) { return 1; } if (optionalBoolean === false) { return 0; } - return null; + return undefined; } diff --git a/ts/test-electron/normalizedAttachments_test.ts b/ts/test-electron/normalizedAttachments_test.ts index 50ea144682..41fb35ad2d 100644 --- a/ts/test-electron/normalizedAttachments_test.ts +++ b/ts/test-electron/normalizedAttachments_test.ts @@ -3,7 +3,6 @@ import { assert } from 'chai'; import { v4 as generateGuid } from 'uuid'; -import { omit } from 'lodash'; import * as Bytes from '../Bytes'; import type { @@ -18,7 +17,6 @@ import { IMAGE_JPEG, IMAGE_PNG, LONG_MESSAGE, - type MIMEType, } from '../types/MIME'; import type { MessageAttributesType } from '../model-types'; import { generateAci } from '../types/ServiceId'; @@ -606,89 +604,77 @@ describe('normalizes attachment references', () => { assert(messageFromDB, 'message was saved'); assert.deepEqual(messageFromDB, message); }); - it('handles bad data', async () => { - const attachment: AttachmentType = { + + describe('handles bad data', () => { + const badDataAttachment = { ...composeAttachment(), - size: undefined as unknown as number, - contentType: undefined as unknown as MIMEType, + size: undefined, + contentType: undefined, + width: '100', + isCorrupted: 1, + key: {}, + digest: { '1': 234 }, + randomKey: 'random', uploadTimestamp: { low: 6174, high: 0, unsigned: false, - } as unknown as number, - incrementalMac: Bytes.fromString('incrementalMac') as unknown as string, - }; - const message = composeMessage(Date.now(), { - attachments: [attachment], - }); + }, + incrementalMac: Bytes.fromString('incrementalMac'), + } as unknown as AttachmentType & { randomKey?: string }; - await DataWriter.saveMessage(message, { - forceSave: true, - ourAci: generateAci(), - postSaveUpdates: () => Promise.resolve(), - }); - - const messageFromDB = await DataReader.getMessageById(message.id); - assert(messageFromDB, 'message was saved'); - assert.deepEqual(messageFromDB.attachments?.[0], { - ...attachment, + const cleanedAttachment = { + ...badDataAttachment, size: 0, + width: undefined, + digest: undefined, + key: undefined, + isCorrupted: undefined, contentType: APPLICATION_OCTET_STREAM, uploadTimestamp: undefined, incrementalMac: undefined, - }); - }); + }; + delete cleanedAttachment.randomKey; - it('is resilient when called from saveMessagesIndividually to incorrect data', async () => { - const attachment = { - ...composeAttachment(), - key: {}, - randomKey: 'random', - } as unknown as AttachmentType; + it('is resilient to bad data when saved', async () => { + const message = composeMessage(Date.now(), { + attachments: [badDataAttachment], + }); - const attachments = [attachment]; - const message = composeMessage(Date.now(), { - attachments, - }); - - await DataWriter.saveMessages([message], { - forceSave: true, - ourAci: generateAci(), - postSaveUpdates: () => Promise.resolve(), - _testOnlyAvoidNormalizingAttachments: true, - }); - - await DataWriter.saveMessagesIndividually([message], { - ourAci: generateAci(), - postSaveUpdates: () => Promise.resolve(), - }); - - let messageFromDB = await DataReader.getMessageById(message.id); - assert(messageFromDB, 'message was saved'); - assert.deepEqual(messageFromDB.attachments?.[0], attachment); - - const attachmentWithoutKey = { ...attachment, key: undefined }; - await DataWriter.saveMessagesIndividually( - [ - { - ...message, - attachments: [attachmentWithoutKey], - }, - ], - { + await DataWriter.saveMessage(message, { + forceSave: true, ourAci: generateAci(), postSaveUpdates: () => Promise.resolve(), - } - ); + }); - messageFromDB = await DataReader.getMessageById(message.id); - assert(messageFromDB, 'message was saved'); - assert.deepEqual( - messageFromDB.attachments?.[0], - omit(attachmentWithoutKey, 'randomKey') - ); + const messageFromDB = await DataReader.getMessageById(message.id); + assert(messageFromDB, 'message was saved'); + assert.deepEqual(messageFromDB.attachments?.[0], cleanedAttachment); + }); + + it('is resilient to bad data when saved via saveMessagesIndividually', async () => { + const attachments = [badDataAttachment]; + const message = composeMessage(Date.now(), { + attachments, + }); + + await DataWriter.saveMessages([message], { + forceSave: true, + ourAci: generateAci(), + postSaveUpdates: () => Promise.resolve(), + _testOnlyAvoidNormalizingAttachments: true, + }); + + await DataWriter.saveMessagesIndividually([message], { + ourAci: generateAci(), + postSaveUpdates: () => Promise.resolve(), + }); + + const messageFromDB = await DataReader.getMessageById(message.id); + assert(messageFromDB, 'message was saved'); + assert.deepEqual(messageFromDB.attachments?.[0], cleanedAttachment); + }); }); - it('adds a placeholder attachment when attachments had been deleted', async () => { const message = composeMessage(Date.now(), { attachments: [composeAttachment(), composeAttachment()], diff --git a/ts/util/dropNull.ts b/ts/util/dropNull.ts index 0544e9cc46..4f420c0748 100644 --- a/ts/util/dropNull.ts +++ b/ts/util/dropNull.ts @@ -10,6 +10,9 @@ export type UndefinedToNull = export type ShallowUndefinedToNull = { [P in keyof T]: UndefinedToNull; }; +export type ShallowNullToUndefined = { + [P in keyof T]: NullToUndefined; +}; export function dropNull( value: NonNullable | null | undefined