Improve ref counting when deduplicating attachments on disk

Co-authored-by: trevor-signal <131492920+trevor-signal@users.noreply.github.com>
This commit is contained in:
automated-signal
2026-02-13 08:22:45 -06:00
committed by GitHub
parent 6a5182eaa8
commit 95fd8bccb4
11 changed files with 374 additions and 53 deletions

View File

@@ -860,6 +860,7 @@ export async function runDownloadAttachmentJobInner({
const existingAttachmentData = await getExistingAttachmentDataForReuse({
plaintextHash: downloadedAttachment.plaintextHash,
contentType: attachment.contentType,
messageId,
logId,
});

View File

@@ -1444,12 +1444,20 @@ type WritableInterface = {
plaintextHash,
version,
contentType,
messageId,
}: {
plaintextHash: string;
version: number;
contentType: MIMEType;
messageId: string;
}) => ExistingAttachmentData | undefined;
_protectAttachmentPathFromDeletion: (path: string) => void;
_protectAttachmentPathFromDeletion: ({
path,
messageId,
}: {
path: string;
messageId: string;
}) => void;
resetProtectedAttachmentPaths: () => void;
removeAll: () => void;

View File

@@ -2932,12 +2932,19 @@ function getAndProtectExistingAttachmentPath(
plaintextHash,
version,
contentType,
}: { plaintextHash: string; version: number; contentType: string }
messageId,
}: {
plaintextHash: string;
version: number;
contentType: string;
messageId: string;
}
): ExistingAttachmentData | undefined {
if (!isValidPlaintextHash(plaintextHash)) {
logger.error('getAndProtectExistingAttachmentPath: Invalid plaintextHash');
return;
}
if (version < 2) {
logger.error(
'getAndProtectExistingAttachmentPath: Invalid version',
@@ -2985,8 +2992,8 @@ function getAndProtectExistingAttachmentPath(
(${existingData.thumbnailPath}),
(${existingData.screenshotPath})
)
INSERT OR REPLACE INTO attachments_protected_from_deletion(path)
SELECT path
INSERT OR REPLACE INTO attachments_protected_from_deletion(path, messageId)
SELECT path, ${messageId}
FROM existingMessageAttachmentPaths
WHERE path IS NOT NULL;
`;
@@ -2997,11 +3004,13 @@ function getAndProtectExistingAttachmentPath(
function _protectAttachmentPathFromDeletion(
db: WritableDB,
path: string
{ path, messageId }: { path: string; messageId: string }
): void {
const [protectQuery, protectParams] = sql`
INSERT OR REPLACE INTO attachments_protected_from_deletion(path)
VALUES (${path});
INSERT OR REPLACE INTO attachments_protected_from_deletion
(path, messageId)
VALUES
(${path}, ${messageId});
`;
db.prepare(protectQuery).run(protectParams);
}

View File

@@ -0,0 +1,58 @@
// Copyright 2026 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import type { WritableDB } from '../Interface.std.js';
export default function updateToSchemaVersion1660(db: WritableDB): void {
db.exec(`
DROP TABLE attachments_protected_from_deletion;
CREATE TABLE attachments_protected_from_deletion (
path TEXT NOT NULL,
messageId TEXT NOT NULL,
PRIMARY KEY (path, messageId)
) STRICT;
`);
db.exec(`
DROP TRIGGER stop_protecting_attachments_after_update;
CREATE TRIGGER stop_protecting_attachments_after_update
AFTER UPDATE OF path, thumbnailPath, screenshotPath, backupThumbnailPath
ON message_attachments
WHEN
OLD.path IS NOT NEW.path OR
OLD.thumbnailPath IS NOT NEW.thumbnailPath OR
OLD.screenshotPath IS NOT NEW.screenshotPath OR
OLD.backupThumbnailPath IS NOT NEW.backupThumbnailPath
BEGIN
DELETE FROM attachments_protected_from_deletion
WHERE
messageId IS NEW.messageId
AND path IN (
NEW.path,
NEW.thumbnailPath,
NEW.screenshotPath,
NEW.backupThumbnailPath
);
END;
`);
db.exec(`
DROP TRIGGER stop_protecting_attachments_after_insert;
CREATE TRIGGER stop_protecting_attachments_after_insert
AFTER INSERT
ON message_attachments
BEGIN
DELETE FROM attachments_protected_from_deletion
WHERE
messageId IS NEW.messageId
AND path IN (
NEW.path,
NEW.thumbnailPath,
NEW.screenshotPath,
NEW.backupThumbnailPath
);
END;
`);
}

View File

@@ -142,6 +142,7 @@ import updateToSchemaVersion1620 from './1620-sort-bigger-media.std.js';
import updateToSchemaVersion1630 from './1630-message-pin-message-data.std.js';
import updateToSchemaVersion1640 from './1640-key-transparency.std.js';
import updateToSchemaVersion1650 from './1650-protected-attachments.std.js';
import updateToSchemaVersion1660 from './1660-protected-attachments-non-unique.std.js';
import { DataWriter } from '../Server.node.js';
@@ -1644,6 +1645,7 @@ export const SCHEMA_VERSIONS: ReadonlyArray<SchemaUpdateType> = [
{ version: 1630, update: updateToSchemaVersion1630 },
{ version: 1640, update: updateToSchemaVersion1640 },
{ version: 1650, update: updateToSchemaVersion1650 },
{ version: 1660, update: updateToSchemaVersion1660 },
];
export class DBVersionFromFutureError extends Error {

View File

@@ -273,9 +273,10 @@ describe('cleanupOrphanedAttachments', () => {
postSaveUpdates: () => Promise.resolve(),
});
await DataWriter._protectAttachmentPathFromDeletion(
`attachment${attachmentIndex + 1}`
);
await DataWriter._protectAttachmentPathFromDeletion({
path: `attachment${attachmentIndex + 1}`,
messageId: 'messageId',
});
await DataWriter.cleanupOrphanedAttachments({ _block: true });
assert.strictEqual(attachmentIndex, NUM_ATTACHMENT_FILES_IN_MESSAGE);

View File

@@ -302,46 +302,98 @@ describe('deleteMessageAttachments', () => {
});
it('is not safe to delete if the file is protected, even if no references', async () => {
const attachment = {
size: 1,
version: 2,
contentType: IMAGE_JPEG,
plaintextHash: testPlaintextHash(),
await DataWriter._protectAttachmentPathFromDeletion({
path: 'attachment0',
thumbnail: { size: 1, contentType: IMAGE_JPEG, path: 'attachment1' },
screenshot: { size: 1, contentType: IMAGE_JPEG, path: 'attachment2' },
// thumbnailFromBackup is not protected
thumbnailFromBackup: {
size: 1,
contentType: IMAGE_JPEG,
path: 'attachment3',
},
} as const;
const message = { ...composeMessage(), attachments: [attachment] };
messageId: 'messageId',
});
await DataWriter.saveMessage(message, {
ourAci: generateAci(),
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment0'));
assert.isTrue(await DataReader.isAttachmentSafeToDelete('attachment1'));
assert.isTrue(await DataReader.isAttachmentSafeToDelete('attachment2'));
assert.isTrue(await DataReader.isAttachmentSafeToDelete('attachment3'));
assert.isTrue(await DataReader.isAttachmentSafeToDelete('attachment4'));
});
it('properly counts attachment references', async () => {
const attachment1 = {
size: 1200,
contentType: IMAGE_JPEG,
path: 'attachment1',
localKey: testAttachmentLocalKey(),
plaintextHash: testPlaintextHash(),
version: 2,
} as const;
const message1: MessageAttributesType = {
id: generateUuid(),
timestamp: Date.now(),
sent_at: Date.now(),
received_at: Date.now(),
type: 'incoming',
conversationId: 'convoId',
attachments: [attachment1],
};
const message2 = { ...message1, id: generateUuid() };
const message3 = { ...message1, id: generateUuid() };
assert.isTrue(await DataReader.isAttachmentSafeToDelete('attachment1'));
await DataWriter.saveMessage(message1, {
forceSave: true,
ourAci: generateAci(),
postSaveUpdates: () => Promise.resolve(),
});
await DataWriter.getAndProtectExistingAttachmentPath({
plaintextHash: attachment.plaintextHash,
version: 2,
contentType: attachment.contentType,
});
await DataWriter.removeMessageById(message.id, {
cleanupMessages: () => Promise.resolve(),
});
assert.isUndefined(await DataReader.getMessageById(message.id));
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment0'));
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment1'));
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment2'));
assert.isTrue(await DataReader.isAttachmentSafeToDelete('attachment3'));
assert.isTrue(await DataReader.isAttachmentSafeToDelete('attachment4'));
// Protect it twice
await DataWriter.getAndProtectExistingAttachmentPath({
plaintextHash: attachment1.plaintextHash,
version: 2,
contentType: IMAGE_JPEG,
messageId: message2.id,
});
await DataWriter.getAndProtectExistingAttachmentPath({
plaintextHash: attachment1.plaintextHash,
version: 2,
contentType: IMAGE_JPEG,
messageId: message3.id,
});
// Delete the original message
await DataWriter.removeMessageById(message1.id, {
cleanupMessages,
});
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment1'));
// Save message2
await DataWriter.saveMessage(message2, {
forceSave: true,
ourAci: generateAci(),
postSaveUpdates: () => Promise.resolve(),
});
// Delete message2
await DataWriter.removeMessageById(message2.id, {
cleanupMessages,
});
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment1'));
// Save message3
await DataWriter.saveMessage(message3, {
forceSave: true,
ourAci: generateAci(),
postSaveUpdates: () => Promise.resolve(),
});
assert.isFalse(await DataReader.isAttachmentSafeToDelete('attachment1'));
// Delete message3
await DataWriter.removeMessageById(message3.id, {
cleanupMessages,
});
assert.isTrue(await DataReader.isAttachmentSafeToDelete('attachment1'));
});
});
@@ -424,7 +476,6 @@ describe('deleteMessageAttachments', () => {
await writeFiles(NUM_DOWNLOAD_FILES_IN_MESSAGE, 'download');
const message1 = composeMessageWithAllAttachments();
const attachment1: AttachmentType = message1.attachments?.[0];
const message2 = { ...composeMessage() };
assert.strictEqual(attachmentIndex, NUM_ATTACHMENT_FILES_IN_MESSAGE);
assert.strictEqual(downloadIndex, NUM_DOWNLOAD_FILES_IN_MESSAGE);
@@ -443,6 +494,7 @@ describe('deleteMessageAttachments', () => {
plaintextHash: attachment1.plaintextHash,
version: attachment1.version,
contentType: attachment1.contentType,
messageId: 'newmessage',
});
assert.strictEqual(existingAttachment?.path, attachment1.path);
@@ -460,8 +512,6 @@ describe('deleteMessageAttachments', () => {
attachment1.screenshot?.path,
]);
assert.strictEqual(listFiles('download').length, 0);
await DataWriter.removeMessageById(message2.id, { cleanupMessages });
await cleanupAllMessageAttachmentFiles(message2);
});
});
@@ -552,7 +602,10 @@ describe('deleteMessageAttachments', () => {
},
};
await DataWriter._protectAttachmentPathFromDeletion('attachment0');
await DataWriter._protectAttachmentPathFromDeletion({
path: 'attachment0',
messageId: 'messageId',
});
await cleanupAttachmentFiles(attachment);
assert.sameDeepMembers(listFiles('attachment'), [
'attachment0',

View File

@@ -0,0 +1,175 @@
// Copyright 2026 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import { assert } from 'chai';
import type { WritableDB } from '../../sql/Interface.std.js';
import {
createDB,
getTableData,
insertData,
updateToVersion,
} from './helpers.node.js';
describe('SQL/updateToSchemaVersion1660', () => {
let db: WritableDB;
beforeEach(() => {
db = createDB();
updateToVersion(db, 1660);
});
afterEach(() => {
db.close();
});
it('allows same path for different messageIds', () => {
insertData(db, 'attachments_protected_from_deletion', [
{ path: 'shared', messageId: 'msg1' },
{ path: 'shared', messageId: 'msg2' },
]);
assert.deepStrictEqual(
getTableData(db, 'attachments_protected_from_deletion'),
[
{ path: 'shared', messageId: 'msg1' },
{ path: 'shared', messageId: 'msg2' },
]
);
});
it('insert trigger only removes protection for matching messageId', () => {
insertData(db, 'messages', [{ id: 'msg1', conversationId: 'convoId' }]);
insertData(db, 'attachments_protected_from_deletion', [
{ path: 'a', messageId: 'msg1' },
{ path: 'a', messageId: 'msg2' },
]);
insertData(db, 'message_attachments', [
{
messageId: 'msg1',
editHistoryIndex: -1,
orderInMessage: 0,
attachmentType: 'attachment',
receivedAt: 42,
sentAt: 42,
size: 128,
contentType: 'image/png',
conversationId: 'convoId',
path: 'a',
},
]);
assert.deepStrictEqual(
getTableData(db, 'attachments_protected_from_deletion'),
[{ path: 'a', messageId: 'msg2' }]
);
});
it('insert trigger removes protection for all 4 path types', () => {
insertData(db, 'messages', [{ id: 'msg1', conversationId: 'convoId' }]);
insertData(db, 'attachments_protected_from_deletion', [
{ path: 'path', messageId: 'msg1' },
{ path: 'thumbnailPath', messageId: 'msg1' },
{ path: 'screenshotPath', messageId: 'msg1' },
{ path: 'backupThumbnailPath', messageId: 'msg1' },
{ path: 'unrelated', messageId: 'msg1' },
]);
insertData(db, 'message_attachments', [
{
messageId: 'msg1',
editHistoryIndex: -1,
orderInMessage: 0,
attachmentType: 'attachment',
receivedAt: 42,
sentAt: 42,
size: 128,
contentType: 'image/png',
conversationId: 'convoId',
path: 'path',
thumbnailPath: 'thumbnailPath',
screenshotPath: 'screenshotPath',
backupThumbnailPath: 'backupThumbnailPath',
},
]);
assert.deepStrictEqual(
getTableData(db, 'attachments_protected_from_deletion'),
[{ path: 'unrelated', messageId: 'msg1' }]
);
});
it('update trigger only removes protection for matching messageId', () => {
insertData(db, 'messages', [{ id: 'msg1', conversationId: 'convoId' }]);
insertData(db, 'message_attachments', [
{
messageId: 'msg1',
editHistoryIndex: -1,
orderInMessage: 0,
attachmentType: 'attachment',
receivedAt: 42,
sentAt: 42,
size: 128,
contentType: 'image/png',
conversationId: 'convoId',
path: 'old',
},
]);
insertData(db, 'attachments_protected_from_deletion', [
{ path: 'new', messageId: 'msg1' },
{ path: 'new', messageId: 'msg2' },
]);
db.prepare("UPDATE message_attachments SET path='new'").run();
assert.deepStrictEqual(
getTableData(db, 'attachments_protected_from_deletion'),
[{ path: 'new', messageId: 'msg2' }]
);
});
it('update trigger removes protection for all 4 path types', () => {
insertData(db, 'messages', [{ id: 'msg1', conversationId: 'convoId' }]);
insertData(db, 'message_attachments', [
{
messageId: 'msg1',
editHistoryIndex: -1,
orderInMessage: 0,
attachmentType: 'attachment',
receivedAt: 42,
sentAt: 42,
size: 128,
contentType: 'image/png',
conversationId: 'convoId',
path: 'old',
},
]);
insertData(db, 'attachments_protected_from_deletion', [
{ path: 'path', messageId: 'msg1' },
{ path: 'thumbnailPath', messageId: 'msg1' },
{ path: 'screenshotPath', messageId: 'msg1' },
{ path: 'backupThumbnailPath', messageId: 'msg1' },
{ path: 'unrelated', messageId: 'msg1' },
]);
db.prepare(
`UPDATE message_attachments SET
path='path',
thumbnailPath='thumbnailPath',
screenshotPath='screenshotPath',
backupThumbnailPath='backupThumbnailPath'`
).run();
assert.deepStrictEqual(
getTableData(db, 'attachments_protected_from_deletion'),
[{ path: 'unrelated', messageId: 'msg1' }]
);
});
});

View File

@@ -1008,7 +1008,8 @@ async function resolveReferences(packId: string): Promise<void> {
try {
attachments = await pMap(
messageIds,
() => copyStickerToAttachments(packId, stickerId),
messageId =>
copyStickerToAttachments({ packId, stickerId, messageId }),
{ concurrency: 3 }
);
} catch (error) {
@@ -1094,10 +1095,15 @@ export function getSticker(
return pack.stickers[stickerId];
}
export async function copyStickerToAttachments(
packId: string,
stickerId: number
): Promise<AttachmentType> {
export async function copyStickerToAttachments({
packId,
stickerId,
messageId,
}: {
packId: string;
stickerId: number;
messageId: string;
}): Promise<AttachmentType> {
const sticker = getSticker(packId, stickerId);
if (!sticker) {
throw new Error(
@@ -1132,6 +1138,7 @@ export async function copyStickerToAttachments(
const existingAttachmentData = await getExistingAttachmentDataForReuse({
plaintextHash,
contentType,
messageId,
logId: 'copyStickerToAttachments',
});

View File

@@ -30,10 +30,12 @@ type AttachmentDataToBeReused = WithRequiredProperties<
export async function getExistingAttachmentDataForReuse({
plaintextHash,
contentType,
messageId,
logId,
}: {
plaintextHash: string;
contentType: MIMEType;
messageId: string;
logId?: string;
}): Promise<AttachmentDataToBeReused | null> {
const existingAttachmentData =
@@ -41,6 +43,7 @@ export async function getExistingAttachmentDataForReuse({
plaintextHash,
version: CURRENT_ATTACHMENT_VERSION,
contentType,
messageId,
});
if (!existingAttachmentData) {

View File

@@ -343,7 +343,11 @@ export async function queueAttachmentDownloads(
log.info(`${logId}: Copying sticker from installed pack`);
copiedSticker = true;
const data = await copyStickerToAttachments(packId, stickerId);
const data = await copyStickerToAttachments({
packId,
stickerId,
messageId,
});
// Refresh sticker attachment since we had to await above
const freshSticker = message.get('sticker');
strictAssert(freshSticker != null, 'Sticker is gone while copying');