Fix some issues with BackupMediaSnapshots.

This commit is contained in:
Greyson Parrelli
2025-09-09 15:10:14 -04:00
parent da6ad2b629
commit a05d5ff5e6
6 changed files with 228 additions and 74 deletions

View File

@@ -30,6 +30,8 @@ import com.bumptech.glide.Glide
import com.fasterxml.jackson.annotation.JsonProperty
import kotlinx.parcelize.IgnoredOnParcel
import kotlinx.parcelize.Parcelize
import okio.ByteString
import okio.ByteString.Companion.toByteString
import org.json.JSONArray
import org.json.JSONException
import org.signal.core.util.Base64
@@ -46,6 +48,7 @@ import org.signal.core.util.groupBy
import org.signal.core.util.isNull
import org.signal.core.util.logging.Log
import org.signal.core.util.readToList
import org.signal.core.util.readToSet
import org.signal.core.util.readToSingleInt
import org.signal.core.util.readToSingleLong
import org.signal.core.util.readToSingleObject
@@ -95,6 +98,7 @@ import org.thoughtcrime.securesms.dependencies.AppDependencies
import org.thoughtcrime.securesms.jobs.AttachmentDownloadJob
import org.thoughtcrime.securesms.jobs.AttachmentUploadJob
import org.thoughtcrime.securesms.jobs.GenerateAudioWaveFormJob
import org.thoughtcrime.securesms.keyvalue.SignalStore
import org.thoughtcrime.securesms.mms.DecryptableUri
import org.thoughtcrime.securesms.mms.MediaStream
import org.thoughtcrime.securesms.mms.MmsException
@@ -111,6 +115,8 @@ import org.thoughtcrime.securesms.util.StorageUtil
import org.thoughtcrime.securesms.util.Util
import org.thoughtcrime.securesms.video.EncryptedMediaDataSource
import org.whispersystems.signalservice.api.attachment.AttachmentUploadResult
import org.whispersystems.signalservice.api.backup.MediaId
import org.whispersystems.signalservice.api.backup.MediaName
import org.whispersystems.signalservice.api.crypto.AttachmentCipherStreamUtil
import org.whispersystems.signalservice.api.util.UuidUtil
import org.whispersystems.signalservice.internal.crypto.PaddingInputStream
@@ -422,10 +428,10 @@ class AttachmentTable(
}
/**
* Returns a cursor (with just the plaintextHash+remoteKey+archive_cdn) for all attachments that are slated to be included in the current archive upload.
* Returns a cursor (with just the plaintextHash+remoteKey+archive_cdn) for all full-size attachments that are slated to be included in the current archive upload.
* Used for snapshotting data in [BackupMediaSnapshotTable].
*/
fun getAttachmentsThatWillBeIncludedInArchive(): Cursor {
fun getFullSizeAttachmentsThatWillBeIncludedInArchive(): Cursor {
return readableDatabase
.select(DATA_HASH_END, REMOTE_KEY, ARCHIVE_CDN, QUOTE, CONTENT_TYPE)
.from("$TABLE_NAME LEFT JOIN ${MessageTable.TABLE_NAME} ON $TABLE_NAME.$MESSAGE_ID = ${MessageTable.TABLE_NAME}.${MessageTable.ID}")
@@ -433,6 +439,25 @@ class AttachmentTable(
.run()
}
/**
* Returns a cursor (with just the plaintextHash+remoteKey+archive_cdn) for all thumbnail attachments that are slated to be included in the current archive upload.
* Used for snapshotting data in [BackupMediaSnapshotTable].
*/
fun getThumbnailAttachmentsThatWillBeIncludedInArchive(): Cursor {
return readableDatabase
.select(DATA_HASH_END, REMOTE_KEY, ARCHIVE_CDN, QUOTE, CONTENT_TYPE)
.from("$TABLE_NAME LEFT JOIN ${MessageTable.TABLE_NAME} ON $TABLE_NAME.$MESSAGE_ID = ${MessageTable.TABLE_NAME}.${MessageTable.ID}")
.where(
"""
${buildAttachmentsThatNeedUploadQuery(transferStateFilter = "$ARCHIVE_THUMBNAIL_TRANSFER_STATE != ${ArchiveTransferState.PERMANENT_FAILURE.value}")} AND
$QUOTE = 0 AND
($CONTENT_TYPE LIKE 'image/%' OR $CONTENT_TYPE LIKE 'video/%') AND
$CONTENT_TYPE != 'image/svg+xml'
"""
)
.run()
}
fun hasData(attachmentId: AttachmentId): Boolean {
return readableDatabase
.exists(TABLE_NAME)
@@ -1074,6 +1099,7 @@ class AttachmentTable(
* Should be the same or subset of that returned by [getAttachmentsThatNeedArchiveUpload].
*/
fun getPendingArchiveUploadBytes(): Long {
val archiveTransferStateFilter = "$ARCHIVE_TRANSFER_STATE NOT IN (${ArchiveTransferState.FINISHED.value}, ${ArchiveTransferState.PERMANENT_FAILURE.value})"
return readableDatabase
.rawQuery(
"""
@@ -1081,50 +1107,13 @@ class AttachmentTable(
FROM (
SELECT DISTINCT $DATA_HASH_END, $REMOTE_KEY, $DATA_SIZE
FROM $TABLE_NAME LEFT JOIN ${MessageTable.TABLE_NAME} ON $TABLE_NAME.$MESSAGE_ID = ${MessageTable.TABLE_NAME}.${MessageTable.ID}
WHERE
$DATA_FILE NOT NULL AND
$DATA_HASH_END NOT NULL AND
$REMOTE_KEY NOT NULL AND
$ARCHIVE_TRANSFER_STATE NOT IN (${ArchiveTransferState.FINISHED.value}, ${ArchiveTransferState.PERMANENT_FAILURE.value}) AND
$CONTENT_TYPE != '${MediaUtil.LONG_TEXT}' AND
(${MessageTable.STORY_TYPE} = 0 OR ${MessageTable.STORY_TYPE} IS NULL) AND
(${MessageTable.EXPIRES_IN} = 0 OR ${MessageTable.EXPIRES_IN} > ${ChatItemArchiveExporter.EXPIRATION_CUTOFF.inWholeMilliseconds}) AND
${MessageTable.TABLE_NAME}.${MessageTable.VIEW_ONCE} = 0
WHERE ${buildAttachmentsThatNeedUploadQuery(archiveTransferStateFilter)}
)
""".trimIndent()
)
.readToSingleLong()
}
/**
* Returns sum of the file sizes of attachments that are not fully uploaded to the archive CDN.
*/
fun debugGetPendingArchiveUploadAttachments(): List<DatabaseAttachment> {
return readableDatabase
.rawQuery(
"""
SELECT *
FROM $TABLE_NAME as t
JOIN (
SELECT DISTINCT $DATA_HASH_END, $REMOTE_KEY, $DATA_SIZE
FROM $TABLE_NAME LEFT JOIN ${MessageTable.TABLE_NAME} ON $TABLE_NAME.$MESSAGE_ID = ${MessageTable.TABLE_NAME}.${MessageTable.ID}
WHERE
$DATA_FILE NOT NULL AND
$DATA_HASH_END NOT NULL AND
$REMOTE_KEY NOT NULL AND
$ARCHIVE_TRANSFER_STATE NOT IN (${ArchiveTransferState.FINISHED.value}, ${ArchiveTransferState.PERMANENT_FAILURE.value}) AND
$CONTENT_TYPE != '${MediaUtil.LONG_TEXT}' AND
(${MessageTable.STORY_TYPE} = 0 OR ${MessageTable.STORY_TYPE} IS NULL) AND
(${MessageTable.EXPIRES_IN} = 0 OR ${MessageTable.EXPIRES_IN} > ${ChatItemArchiveExporter.EXPIRATION_CUTOFF.inWholeMilliseconds})
) as filtered
ON t.$DATA_HASH_END = filtered.$DATA_HASH_END
""".trimIndent()
)
.readToList {
it.readAttachment()
}
}
/**
* Clears out the incrementalMac for the specified [attachmentId], as well as any other attachments that share the same ([remoteKey], [plaintextHash]) pair (if present).
*/
@@ -3186,9 +3175,41 @@ class AttachmentTable(
}
}
/**
* Important: This is an expensive query that involves iterating over every row in the table. Only call this for debug stuff!
*/
fun debugGetAttachmentsForMediaIds(mediaIds: Set<MediaId>, limit: Int): List<Pair<DatabaseAttachment, Boolean>> {
val byteStringMediaIds: Set<ByteString> = mediaIds.map { it.value.toByteString() }.toSet()
val found = mutableListOf<Pair<DatabaseAttachment, Boolean>>()
readableDatabase
.select(*PROJECTION)
.from(TABLE_NAME)
.where("$REMOTE_KEY NOT NULL AND $DATA_HASH_END NOT NULL")
.run()
.forEach { cursor ->
val remoteKey = Base64.decode(cursor.requireNonNullString(REMOTE_KEY))
val plaintextHash = Base64.decode(cursor.requireNonNullString(DATA_HASH_END))
val mediaId = MediaName.fromPlaintextHashAndRemoteKey(plaintextHash, remoteKey).toMediaId(SignalStore.backup.mediaRootBackupKey).value.toByteString()
val mediaIdThumbnail = MediaName.fromPlaintextHashAndRemoteKeyForThumbnail(plaintextHash, remoteKey).toMediaId(SignalStore.backup.mediaRootBackupKey).value.toByteString()
if (mediaId in byteStringMediaIds) {
found.add(getAttachment(cursor) to false)
}
if (mediaIdThumbnail in byteStringMediaIds) {
found.add(getAttachment(cursor) to true)
}
if (found.size >= limit) return@forEach
}
return found
}
fun debugGetAttachmentStats(): DebugAttachmentStats {
val totalAttachmentRows = readableDatabase.count().from(TABLE_NAME).run().readToSingleLong(0)
val totalEligibleForUploadRows = getAttachmentsThatWillBeIncludedInArchive().count
val totalEligibleForUploadRows = getFullSizeAttachmentsThatWillBeIncludedInArchive().count
val totalUniqueDataFiles = readableDatabase.select("COUNT(DISTINCT $DATA_FILE)").from(TABLE_NAME).run().readToSingleLong(0)
val totalUniqueMediaNames = readableDatabase.query("SELECT COUNT(*) FROM (SELECT DISTINCT $DATA_HASH_END, $REMOTE_KEY FROM $TABLE_NAME WHERE $DATA_HASH_END NOT NULL AND $REMOTE_KEY NOT NULL)").readToSingleLong(0)
@@ -3273,6 +3294,26 @@ class AttachmentTable(
)
}
fun getDebugMediaInfoForEntries(hashes: Collection<BackupMediaSnapshotTable.MediaEntry>): Set<DebugArchiveMediaInfo> {
val entriesByHash = hashes.associateBy { Base64.encodeWithPadding(it.plaintextHash) }
val query = SqlUtil.buildFastCollectionQuery(DATA_HASH_END, entriesByHash.keys)
return readableDatabase
.select(ID, MESSAGE_ID, CONTENT_TYPE, DATA_HASH_END)
.from(TABLE_NAME)
.where(query.where, query.whereArgs)
.run()
.readToSet { cursor ->
DebugArchiveMediaInfo(
attachmentId = AttachmentId(cursor.requireLong(ID)),
messageId = cursor.requireLong(MESSAGE_ID),
contentType = cursor.requireString(CONTENT_TYPE),
isThumbnail = entriesByHash[cursor.requireString(DATA_HASH_END)]!!.isThumbnail
)
}
}
fun debugAttachmentStatsForBackupProto(): BackupDebugInfo.AttachmentDetails {
val archiveStateCounts = ArchiveTransferState
.entries.associateWith {
@@ -3676,4 +3717,11 @@ class AttachmentTable(
data class CreateRemoteKeyResult(val totalCount: Int, val notQuoteOrSickerDupeNotFoundCount: Int, val notQuoteOrSickerDupeFoundCount: Int) {
val unexpectedKeyCreation = notQuoteOrSickerDupeFoundCount > 0 || notQuoteOrSickerDupeNotFoundCount > 0
}
class DebugArchiveMediaInfo(
val attachmentId: AttachmentId,
val messageId: Long,
val contentType: String?,
val isThumbnail: Boolean
)
}

View File

@@ -24,7 +24,6 @@ import org.signal.core.util.toInt
import org.signal.core.util.update
import org.signal.core.util.withinTransaction
import org.thoughtcrime.securesms.backup.v2.ArchivedMediaObject
import org.thoughtcrime.securesms.util.MediaUtil
/**
* When we delete attachments locally, we can't immediately delete them from the archive CDN. This is because there is still a backup that exists that
@@ -129,26 +128,29 @@ class BackupMediaSnapshotTable(context: Context, database: SignalDatabase) : Dat
}
/**
* Writes the set of media items that are slated to be referenced in the next backup, updating their pending sync time.
* Writes the set of full-size media items that are slated to be referenced in the next backup, updating their pending sync time.
* Will insert multiple rows per object -- one for the main item, and one for the thumbnail.
*/
fun writePendingMediaObjects(mediaObjects: Sequence<ArchiveMediaItem>) {
fun writeFullSizePendingMediaObjects(mediaObjects: Sequence<ArchiveMediaItem>) {
mediaObjects
.chunked(SqlUtil.MAX_QUERY_ARGS)
.forEach { chunk ->
// Full attachment
writePendingMediaObjectsChunk(
chunk
.filterNot { MediaUtil.isViewOnceType(it.contentType) || MediaUtil.isLongTextType(it.contentType) }
.map { MediaEntry(it.mediaId, it.cdn, it.plaintextHash, it.remoteKey, isThumbnail = false) }
chunk.map { MediaEntry(it.mediaId, it.cdn, it.plaintextHash, it.remoteKey, isThumbnail = false) }
)
}
}
// Thumbnail
/**
* Writes the set of thumbnail media items that are slated to be referenced in the next backup, updating their pending sync time.
* Will insert multiple rows per object -- one for the main item, and one for the thumbnail.
*/
fun writeThumbnailPendingMediaObjects(mediaObjects: Sequence<ArchiveMediaItem>) {
mediaObjects
.chunked(SqlUtil.MAX_QUERY_ARGS)
.forEach { chunk ->
writePendingMediaObjectsChunk(
chunk
.filterNot { it.quote }
.filter { MediaUtil.isImageOrVideoType(it.contentType) }
.map { MediaEntry(it.thumbnailMediaId, it.cdn, it.plaintextHash, it.remoteKey, isThumbnail = true) }
chunk.map { MediaEntry(it.thumbnailMediaId, it.cdn, it.plaintextHash, it.remoteKey, isThumbnail = true) }
)
}
}
@@ -237,6 +239,32 @@ class BackupMediaSnapshotTable(context: Context, database: SignalDatabase) : Dat
return objects.filterNot { foundObjects.contains(it.mediaId) }.toSet()
}
fun getMediaEntriesForObjects(objects: List<ArchivedMediaObject>): Set<MediaEntry> {
if (objects.isEmpty()) {
return emptySet()
}
val queries: List<SqlUtil.Query> = SqlUtil.buildCollectionQuery(
column = MEDIA_ID,
values = objects.map { it.mediaId },
collectionOperator = SqlUtil.CollectionOperator.IN,
prefix = "$SNAPSHOT_VERSION = $MAX_VERSION AND "
)
val entries: MutableSet<MediaEntry> = mutableSetOf()
for (query in queries) {
entries += readableDatabase
.select(MEDIA_ID, CDN, PLAINTEXT_HASH, REMOTE_KEY, IS_THUMBNAIL)
.from("$TABLE_NAME JOIN ${AttachmentTable.TABLE_NAME}")
.where(query.where, query.whereArgs)
.run()
.readToList { MediaEntry.fromCursor(it) }
}
return entries.toSet()
}
/**
* Given a list of media objects, find the ones that are present in the most recent snapshot, but have a different CDN than the one passed in.
* This will ignore thumbnails, as the results are intended to be used to update CDNs, which we do not track for thumbnails.