Improve digest backfill migration.

This commit is contained in:
Greyson Parrelli
2024-09-06 16:15:08 -04:00
committed by Cody Henthorne
parent 1e8626647e
commit c4bcb7dc93
4 changed files with 15 additions and 59 deletions

View File

@@ -6,9 +6,8 @@
package org.thoughtcrime.securesms.jobs
import org.signal.core.util.Base64
import org.signal.core.util.StreamUtil
import org.signal.core.util.copyTo
import org.signal.core.util.logging.Log
import org.signal.core.util.readLength
import org.signal.core.util.stream.NullOutputStream
import org.signal.core.util.withinTransaction
import org.thoughtcrime.securesms.attachments.AttachmentId
@@ -21,8 +20,8 @@ import org.whispersystems.signalservice.internal.crypto.PaddingInputStream
import java.io.IOException
/**
* For attachments that were created before we saved IV's, this will generate an IV and update the corresponding digest.
* This is important for backupsV2, where we need to know an attachments digest in advance.
* This goes through all attachments with pre-existing data and recalcuates their digests.
* This is important for backupsV2, where we need to know an attachment's digest in advance.
*
* This job needs to be careful to (1) minimize time in the transaction, and (2) never write partial results to disk, i.e. only write the full (key/iv/digest)
* tuple together all at once (partial writes could poison the db, preventing us from retrying properly in the event of a crash or transient error).
@@ -43,7 +42,6 @@ class BackfillDigestJob private constructor(
.setQueue("BackfillDigestJob")
.setMaxAttempts(3)
.setLifespan(Parameters.IMMORTAL)
.setPriority(Parameters.PRIORITY_LOW)
.build()
)
@@ -66,31 +64,6 @@ class BackfillDigestJob private constructor(
return Result.success()
}
if (attachment.remoteKey != null && attachment.remoteIv != null && attachment.remoteDigest != null) {
Log.w(TAG, "$attachmentId already has all required components! Skipping.")
return Result.success()
}
// There was a bug where we were accidentally saving the padded size for the attachment as the actual size. This corrects that.
// However, we're in a transaction, and reading a file is expensive in general, so we only do this if the length is unset or set to the padded size.
// Given that the padding algorithm targets padding <= 5%, and most attachments are a couple hundred kb, this should greatly limit the false positive rate
// to something like 1 in 10,000ish.
val fileLength = if (attachment.size == 0L || attachment.size == PaddingInputStream.getPaddedSize(attachment.size)) {
try {
SignalDatabase.attachments.getAttachmentStream(attachmentId, offset = 0).use { it.readLength() }
} catch (e: IOException) {
Log.w(TAG, "Could not open a stream for $attachmentId while calculating the length. Assuming that the file no longer exists. Skipping.", e)
return Result.success()
}
} else {
attachment.size
}
if (fileLength != attachment.size) {
Log.w(TAG, "$attachmentId had a saved size of ${attachment.size} but the actual size is $fileLength. Will update.")
SignalDatabase.attachments.updateSize(attachmentId, fileLength)
}
val stream = try {
SignalDatabase.attachments.getAttachmentStream(attachmentId, offset = 0)
} catch (e: IOException) {
@@ -99,14 +72,14 @@ class BackfillDigestJob private constructor(
}
// In order to match the exact digest calculation, we need to use the same padding that we would use when uploading the attachment.
Triple(attachment.remoteKey?.let { Base64.decode(it) }, attachment.remoteIv, PaddingInputStream(stream, fileLength))
Triple(attachment.remoteKey?.let { Base64.decode(it) }, attachment.remoteIv, PaddingInputStream(stream, attachment.size))
}
val key = originalKey ?: Util.getSecretBytes(64)
val iv = originalIv ?: Util.getSecretBytes(16)
val cipherOutputStream = AttachmentCipherOutputStream(key, iv, NullOutputStream)
StreamUtil.copy(decryptingStream, cipherOutputStream)
decryptingStream.copyTo(cipherOutputStream)
val digest = cipherOutputStream.transmittedDigest