Improve message attachment parsing

This commit is contained in:
trevor-signal
2025-09-08 14:09:17 -04:00
committed by GitHub
parent c85ad2b867
commit 87704409c3
5 changed files with 204 additions and 102 deletions

View File

@@ -56,6 +56,7 @@ import { consoleLogger } from '../util/consoleLogger';
import {
dropNull,
shallowConvertUndefinedToNull,
type ShallowNullToUndefined,
type ShallowUndefinedToNull,
} from '../util/dropNull';
import { isNormalNumber } from '../util/isNormalNumber';
@@ -82,7 +83,7 @@ import {
sqlFragment,
sqlJoin,
QueryFragment,
convertOptionalBooleanToNullableInteger,
convertOptionalBooleanToInteger,
} from './util';
import {
hydrateMessage,
@@ -120,7 +121,12 @@ import {
callHistoryGroupSchema,
} from '../types/CallDisposition';
import { redactGenericText } from '../util/privacy';
import { parseStrict, parseUnknown, safeParseUnknown } from '../util/schemas';
import {
parseLoose,
parseStrict,
parseUnknown,
safeParseUnknown,
} from '../util/schemas';
import {
SNIPPET_LEFT_PLACEHOLDER,
SNIPPET_RIGHT_PLACEHOLDER,
@@ -263,7 +269,7 @@ import {
import { generateMessageId } from '../util/generateMessageId';
import type { ConversationColorType, CustomColorType } from '../types/Colors';
import { sqlLogger } from './sqlLogger';
import { APPLICATION_OCTET_STREAM } from '../types/MIME';
import { permissiveMessageAttachmentSchema } from './server/messageAttachments';
type ConversationRow = Readonly<{
json: string;
@@ -2642,7 +2648,7 @@ function saveMessageAttachment({
orderInMessage: number;
editHistoryIndex: number | null;
}) {
const values: MessageAttachmentDBType = shallowConvertUndefinedToNull({
const unparsedValues: ShallowNullToUndefined<MessageAttachmentDBType> = {
messageId,
editHistoryIndex:
editHistoryIndex ?? ROOT_MESSAGE_ATTACHMENT_EDIT_HISTORY_INDEX,
@@ -2651,8 +2657,8 @@ function saveMessageAttachment({
conversationId,
sentAt,
clientUuid: attachment.clientUuid,
size: attachment.size ?? 0,
contentType: attachment.contentType ?? APPLICATION_OCTET_STREAM,
size: attachment.size,
contentType: attachment.contentType,
path: attachment.path,
localKey: attachment.localKey,
plaintextHash: attachment.plaintextHash,
@@ -2666,15 +2672,9 @@ function saveMessageAttachment({
downloadPath: attachment.downloadPath,
transitCdnKey: attachment.cdnKey ?? attachment.cdnId,
transitCdnNumber: attachment.cdnNumber,
transitCdnUploadTimestamp: isNumber(attachment.uploadTimestamp)
? attachment.uploadTimestamp
: null,
transitCdnUploadTimestamp: attachment.uploadTimestamp,
backupCdnNumber: attachment.backupCdnNumber,
incrementalMac:
// resilience to Uint8Array-stored incrementalMac values
typeof attachment.incrementalMac === 'string'
? attachment.incrementalMac
: null,
incrementalMac: attachment.incrementalMac,
incrementalMacChunkSize: attachment.chunkSize,
thumbnailPath: attachment.thumbnail?.path,
thumbnailSize: attachment.thumbnail?.size,
@@ -2693,33 +2693,57 @@ function saveMessageAttachment({
backupThumbnailVersion: attachment.thumbnailFromBackup?.version,
storyTextAttachmentJson: attachment.textAttachment
? objectToJSON(attachment.textAttachment)
: null,
: undefined,
localBackupPath: attachment.localBackupPath,
flags: attachment.flags,
error: convertOptionalBooleanToNullableInteger(attachment.error),
wasTooBig: convertOptionalBooleanToNullableInteger(attachment.wasTooBig),
backfillError: convertOptionalBooleanToNullableInteger(
attachment.backfillError
),
isCorrupted: convertOptionalBooleanToNullableInteger(
attachment.isCorrupted
),
error: convertOptionalBooleanToInteger(attachment.error),
wasTooBig: convertOptionalBooleanToInteger(attachment.wasTooBig),
backfillError: convertOptionalBooleanToInteger(attachment.backfillError),
isCorrupted: convertOptionalBooleanToInteger(attachment.isCorrupted),
copiedFromQuotedAttachment:
'copied' in attachment
? convertOptionalBooleanToNullableInteger(attachment.copied)
: null,
? convertOptionalBooleanToInteger(attachment.copied)
: undefined,
version: attachment.version,
pending: convertOptionalBooleanToNullableInteger(attachment.pending),
});
pending: convertOptionalBooleanToInteger(attachment.pending),
};
db.prepare(
`
try {
const values: MessageAttachmentDBType =
shallowConvertUndefinedToNull(unparsedValues);
db.prepare(
`
INSERT OR REPLACE INTO message_attachments
(${MESSAGE_ATTACHMENT_COLUMNS.join(', ')})
VALUES
(${MESSAGE_ATTACHMENT_COLUMNS.map(name => `$${name}`).join(', ')});
`
).run(values);
).run(values);
} catch (e) {
// Attachments used to be stored in JSON and may not have the types we expect. If we
// fail to save one, we parse/transform through a permissive zod schema (i.e. one that
// will convert invalid values to null when possible)
logger.error(
'Failed to save to message_attachments',
Errors.toLogFormat(e)
);
const values: MessageAttachmentDBType = parseLoose(
permissiveMessageAttachmentSchema,
unparsedValues
);
db.prepare(
`
INSERT OR REPLACE INTO message_attachments
(${MESSAGE_ATTACHMENT_COLUMNS.join(', ')})
VALUES
(${MESSAGE_ATTACHMENT_COLUMNS.map(name => `$${name}`).join(', ')});
`
).run(values);
logger.info('Recovered from invalid message_attachment save');
}
}
function _testOnlyRemoveMessageAttachments(

View File

@@ -0,0 +1,88 @@
// Copyright 2025 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import { z } from 'zod';
import { convertUndefinedToNull } from '../../util/dropNull';
import { attachmentDownloadTypeSchema } from '../../types/AttachmentDownload';
import { APPLICATION_OCTET_STREAM } from '../../types/MIME';
import type { MessageAttachmentDBType } from '../Interface';
const permissiveStringOrNull = z
.string()
.optional()
.transform(convertUndefinedToNull)
.catch(null);
const permissiveNumberOrNull = z
.number()
.optional()
.transform(convertUndefinedToNull)
.catch(null);
const permissiveAttachmentVersion = z
.union([z.literal(1), z.literal(2)])
.optional()
.transform(convertUndefinedToNull)
.catch(null);
const permissiveOptionalBool = z
.union([z.literal(0), z.literal(1)])
.optional()
.transform(convertUndefinedToNull)
.catch(null);
// A schema which converts invalid values to null, to handle bad data when
// attachments were stored in JSON
export const permissiveMessageAttachmentSchema = z.object({
// Fields required to be NOT NULL
messageId: z.string(),
editHistoryIndex: z.number(),
attachmentType: attachmentDownloadTypeSchema,
orderInMessage: z.number(),
conversationId: z.string(),
sentAt: z.number().catch(0),
size: z.number().catch(0),
contentType: z.string().catch(APPLICATION_OCTET_STREAM),
// Fields allowing NULL
path: permissiveStringOrNull,
clientUuid: permissiveStringOrNull,
localKey: permissiveStringOrNull,
plaintextHash: permissiveStringOrNull,
caption: permissiveStringOrNull,
blurHash: permissiveStringOrNull,
height: permissiveNumberOrNull,
width: permissiveNumberOrNull,
digest: permissiveStringOrNull,
key: permissiveStringOrNull,
fileName: permissiveStringOrNull,
downloadPath: permissiveStringOrNull,
transitCdnKey: permissiveStringOrNull,
transitCdnNumber: permissiveNumberOrNull,
transitCdnUploadTimestamp: permissiveNumberOrNull,
backupCdnNumber: permissiveNumberOrNull,
incrementalMac: permissiveStringOrNull,
incrementalMacChunkSize: permissiveNumberOrNull,
thumbnailPath: permissiveStringOrNull,
thumbnailSize: permissiveNumberOrNull,
thumbnailContentType: permissiveStringOrNull,
thumbnailLocalKey: permissiveStringOrNull,
thumbnailVersion: permissiveAttachmentVersion,
screenshotPath: permissiveStringOrNull,
screenshotSize: permissiveNumberOrNull,
screenshotContentType: permissiveStringOrNull,
screenshotLocalKey: permissiveStringOrNull,
screenshotVersion: permissiveAttachmentVersion,
backupThumbnailPath: permissiveStringOrNull,
backupThumbnailSize: permissiveNumberOrNull,
backupThumbnailContentType: permissiveStringOrNull,
backupThumbnailLocalKey: permissiveStringOrNull,
backupThumbnailVersion: permissiveAttachmentVersion,
storyTextAttachmentJson: permissiveStringOrNull,
localBackupPath: permissiveStringOrNull,
flags: permissiveNumberOrNull,
error: permissiveOptionalBool,
wasTooBig: permissiveOptionalBool,
backfillError: permissiveOptionalBool,
isCorrupted: permissiveOptionalBool,
copiedFromQuotedAttachment: permissiveOptionalBool,
version: permissiveAttachmentVersion,
pending: permissiveOptionalBool,
}) satisfies z.ZodType<MessageAttachmentDBType, z.ZodTypeDef, unknown>;

View File

@@ -3,6 +3,7 @@
/* eslint-disable max-classes-per-file */
import { isNumber, last } from 'lodash';
import type { ReadableDB, WritableDB } from './Interface';
import type { LoggerType } from '../types/Logging';
@@ -431,14 +432,14 @@ export function convertOptionalIntegerToBoolean(
return undefined;
}
export function convertOptionalBooleanToNullableInteger(
export function convertOptionalBooleanToInteger(
optionalBoolean?: boolean
): 1 | 0 | null {
): 1 | 0 | undefined {
if (optionalBoolean === true) {
return 1;
}
if (optionalBoolean === false) {
return 0;
}
return null;
return undefined;
}

View File

@@ -3,7 +3,6 @@
import { assert } from 'chai';
import { v4 as generateGuid } from 'uuid';
import { omit } from 'lodash';
import * as Bytes from '../Bytes';
import type {
@@ -18,7 +17,6 @@ import {
IMAGE_JPEG,
IMAGE_PNG,
LONG_MESSAGE,
type MIMEType,
} from '../types/MIME';
import type { MessageAttributesType } from '../model-types';
import { generateAci } from '../types/ServiceId';
@@ -606,89 +604,77 @@ describe('normalizes attachment references', () => {
assert(messageFromDB, 'message was saved');
assert.deepEqual(messageFromDB, message);
});
it('handles bad data', async () => {
const attachment: AttachmentType = {
describe('handles bad data', () => {
const badDataAttachment = {
...composeAttachment(),
size: undefined as unknown as number,
contentType: undefined as unknown as MIMEType,
size: undefined,
contentType: undefined,
width: '100',
isCorrupted: 1,
key: {},
digest: { '1': 234 },
randomKey: 'random',
uploadTimestamp: {
low: 6174,
high: 0,
unsigned: false,
} as unknown as number,
incrementalMac: Bytes.fromString('incrementalMac') as unknown as string,
};
const message = composeMessage(Date.now(), {
attachments: [attachment],
});
},
incrementalMac: Bytes.fromString('incrementalMac'),
} as unknown as AttachmentType & { randomKey?: string };
await DataWriter.saveMessage(message, {
forceSave: true,
ourAci: generateAci(),
postSaveUpdates: () => Promise.resolve(),
});
const messageFromDB = await DataReader.getMessageById(message.id);
assert(messageFromDB, 'message was saved');
assert.deepEqual(messageFromDB.attachments?.[0], {
...attachment,
const cleanedAttachment = {
...badDataAttachment,
size: 0,
width: undefined,
digest: undefined,
key: undefined,
isCorrupted: undefined,
contentType: APPLICATION_OCTET_STREAM,
uploadTimestamp: undefined,
incrementalMac: undefined,
});
});
};
delete cleanedAttachment.randomKey;
it('is resilient when called from saveMessagesIndividually to incorrect data', async () => {
const attachment = {
...composeAttachment(),
key: {},
randomKey: 'random',
} as unknown as AttachmentType;
it('is resilient to bad data when saved', async () => {
const message = composeMessage(Date.now(), {
attachments: [badDataAttachment],
});
const attachments = [attachment];
const message = composeMessage(Date.now(), {
attachments,
});
await DataWriter.saveMessages([message], {
forceSave: true,
ourAci: generateAci(),
postSaveUpdates: () => Promise.resolve(),
_testOnlyAvoidNormalizingAttachments: true,
});
await DataWriter.saveMessagesIndividually([message], {
ourAci: generateAci(),
postSaveUpdates: () => Promise.resolve(),
});
let messageFromDB = await DataReader.getMessageById(message.id);
assert(messageFromDB, 'message was saved');
assert.deepEqual(messageFromDB.attachments?.[0], attachment);
const attachmentWithoutKey = { ...attachment, key: undefined };
await DataWriter.saveMessagesIndividually(
[
{
...message,
attachments: [attachmentWithoutKey],
},
],
{
await DataWriter.saveMessage(message, {
forceSave: true,
ourAci: generateAci(),
postSaveUpdates: () => Promise.resolve(),
}
);
});
messageFromDB = await DataReader.getMessageById(message.id);
assert(messageFromDB, 'message was saved');
assert.deepEqual(
messageFromDB.attachments?.[0],
omit(attachmentWithoutKey, 'randomKey')
);
const messageFromDB = await DataReader.getMessageById(message.id);
assert(messageFromDB, 'message was saved');
assert.deepEqual(messageFromDB.attachments?.[0], cleanedAttachment);
});
it('is resilient to bad data when saved via saveMessagesIndividually', async () => {
const attachments = [badDataAttachment];
const message = composeMessage(Date.now(), {
attachments,
});
await DataWriter.saveMessages([message], {
forceSave: true,
ourAci: generateAci(),
postSaveUpdates: () => Promise.resolve(),
_testOnlyAvoidNormalizingAttachments: true,
});
await DataWriter.saveMessagesIndividually([message], {
ourAci: generateAci(),
postSaveUpdates: () => Promise.resolve(),
});
const messageFromDB = await DataReader.getMessageById(message.id);
assert(messageFromDB, 'message was saved');
assert.deepEqual(messageFromDB.attachments?.[0], cleanedAttachment);
});
});
it('adds a placeholder attachment when attachments had been deleted', async () => {
const message = composeMessage(Date.now(), {
attachments: [composeAttachment(), composeAttachment()],

View File

@@ -10,6 +10,9 @@ export type UndefinedToNull<T> =
export type ShallowUndefinedToNull<T extends { [key: string]: unknown }> = {
[P in keyof T]: UndefinedToNull<T[P]>;
};
export type ShallowNullToUndefined<T extends { [key: string]: unknown }> = {
[P in keyof T]: NullToUndefined<T[P]>;
};
export function dropNull<T>(
value: NonNullable<T> | null | undefined