Update BackupMediaSnapshot to be based on attachments in backup frames.

This commit is contained in:
Greyson Parrelli
2025-09-19 13:40:28 -04:00
committed by Jeffrey Starke
parent f39ad24cc1
commit c5753b96ff
8 changed files with 343 additions and 186 deletions

View File

@@ -431,34 +431,27 @@ class AttachmentTable(
}
/**
* Returns a cursor (with just the plaintextHash+remoteKey+archive_cdn) for all full-size attachments that are slated to be included in the current archive upload.
* Used for snapshotting data in [BackupMediaSnapshotTable].
* Returns a list that has any permanently-failed thumbnails removed.
*/
fun getFullSizeAttachmentsThatWillBeIncludedInArchive(): Cursor {
return readableDatabase
.select(DATA_HASH_END, REMOTE_KEY, ARCHIVE_CDN, QUOTE, CONTENT_TYPE)
.from("$TABLE_NAME LEFT JOIN ${MessageTable.TABLE_NAME} ON $TABLE_NAME.$MESSAGE_ID = ${MessageTable.TABLE_NAME}.${MessageTable.ID}")
.where(buildAttachmentsThatNeedUploadQuery(transferStateFilter = "$ARCHIVE_TRANSFER_STATE != ${ArchiveTransferState.PERMANENT_FAILURE.value}"))
.run()
}
fun filterPermanentlyFailedThumbnails(entries: Set<BackupMediaSnapshotTable.MediaEntry>): Set<BackupMediaSnapshotTable.MediaEntry> {
val entriesByMediaName: MutableMap<String, BackupMediaSnapshotTable.MediaEntry> = entries
.associateBy { MediaName.fromPlaintextHashAndRemoteKeyForThumbnail(it.plaintextHash, it.remoteKey).name }
.toMutableMap()
/**
* Returns a cursor (with just the plaintextHash+remoteKey+archive_cdn) for all thumbnail attachments that are slated to be included in the current archive upload.
* Used for snapshotting data in [BackupMediaSnapshotTable].
*/
fun getThumbnailAttachmentsThatWillBeIncludedInArchive(): Cursor {
return readableDatabase
.select(DATA_HASH_END, REMOTE_KEY, ARCHIVE_CDN, QUOTE, CONTENT_TYPE)
.from("$TABLE_NAME LEFT JOIN ${MessageTable.TABLE_NAME} ON $TABLE_NAME.$MESSAGE_ID = ${MessageTable.TABLE_NAME}.${MessageTable.ID}")
.where(
"""
${buildAttachmentsThatNeedUploadQuery(transferStateFilter = "$ARCHIVE_THUMBNAIL_TRANSFER_STATE != ${ArchiveTransferState.PERMANENT_FAILURE.value}")} AND
$QUOTE = 0 AND
($CONTENT_TYPE LIKE 'image/%' OR $CONTENT_TYPE LIKE 'video/%') AND
$CONTENT_TYPE != 'image/svg+xml'
"""
)
readableDatabase
.select(DATA_HASH_END, REMOTE_KEY)
.from(TABLE_NAME)
.where("$DATA_HASH_END NOT NULL AND $REMOTE_KEY NOT NULL AND $ARCHIVE_THUMBNAIL_TRANSFER_STATE = ${ArchiveTransferState.PERMANENT_FAILURE.value}")
.run()
.forEach { cursor ->
val hashEnd = cursor.requireNonNullString(DATA_HASH_END)
val remoteKey = cursor.requireNonNullString(REMOTE_KEY)
val thumbnailMediaName = MediaName.fromPlaintextHashAndRemoteKeyForThumbnail(Base64.decode(hashEnd), Base64.decode(remoteKey)).name
entriesByMediaName.remove(thumbnailMediaName)
}
return entriesByMediaName.values.toSet()
}
fun hasData(attachmentId: AttachmentId): Boolean {
@@ -566,6 +559,25 @@ class AttachmentTable(
.flatten()
}
fun getLocalArchivableAttachment(plaintextHash: String, remoteKey: String): LocalArchivableAttachment? {
return readableDatabase
.select(*PROJECTION)
.from(TABLE_NAME)
.where("$DATA_HASH_END = ? AND $REMOTE_KEY = ?")
.orderBy("$ID DESC")
.limit(1)
.run()
.readToSingleObject {
LocalArchivableAttachment(
file = File(it.requireNonNullString(DATA_FILE)),
random = it.requireNonNullBlob(DATA_RANDOM),
size = it.requireLong(DATA_SIZE),
remoteKey = Base64.decode(it.requireNonNullString(REMOTE_KEY)),
plaintextHash = Base64.decode(it.requireNonNullString(DATA_HASH_END))
)
}
}
fun getLocalArchivableAttachments(): List<LocalArchivableAttachment> {
return readableDatabase
.select(*PROJECTION)
@@ -3214,7 +3226,7 @@ class AttachmentTable(
.select(*PROJECTION)
.from(TABLE_NAME)
.where("$REMOTE_KEY NOT NULL AND $DATA_HASH_END NOT NULL")
.groupBy(DATA_HASH_END)
.groupBy("$DATA_HASH_END, $REMOTE_KEY")
.run()
.forEach { cursor ->
val remoteKey = Base64.decode(cursor.requireNonNullString(REMOTE_KEY))
@@ -3239,7 +3251,6 @@ class AttachmentTable(
fun debugGetAttachmentStats(): DebugAttachmentStats {
val totalAttachmentRows = readableDatabase.count().from(TABLE_NAME).run().readToSingleLong(0)
val totalEligibleForUploadRows = getFullSizeAttachmentsThatWillBeIncludedInArchive().count
val totalUniqueDataFiles = readableDatabase.select("COUNT(DISTINCT $DATA_FILE)").from(TABLE_NAME).run().readToSingleLong(0)
val totalUniqueMediaNames = readableDatabase.query("SELECT COUNT(*) FROM (SELECT DISTINCT $DATA_HASH_END, $REMOTE_KEY FROM $TABLE_NAME WHERE $DATA_HASH_END NOT NULL AND $REMOTE_KEY NOT NULL)").readToSingleLong(0)
@@ -3309,15 +3320,19 @@ class AttachmentTable(
val uploadedThumbnailCount = archiveStatusMediaNameThumbnailCounts.getOrDefault(ArchiveTransferState.FINISHED, 0L)
val uploadedThumbnailBytes = uploadedThumbnailCount * RemoteConfig.backupMaxThumbnailFileSize.inWholeBytes
val lastSnapshotFullSizeCount = SignalDatabase.backupMediaSnapshots.debugGetFullSizeAttachmentCountForMostRecentSnapshot()
val lastSnapshotThumbnailCount = SignalDatabase.backupMediaSnapshots.debugGetThumbnailAttachmentCountForMostRecentSnapshot()
return DebugAttachmentStats(
totalAttachmentRows = totalAttachmentRows,
totalEligibleForUploadRows = totalEligibleForUploadRows.toLong(),
totalUniqueMediaNamesEligibleForUpload = totalUniqueMediaNamesEligibleForUpload,
totalUniqueDataFiles = totalUniqueDataFiles,
totalUniqueMediaNames = totalUniqueMediaNames,
archiveStatusMediaNameCounts = archiveStatusMediaNameCounts,
mediaNamesWithThumbnailsCount = uniqueEligibleMediaNamesWithThumbnailsCount,
archiveStatusMediaNameThumbnailCounts = archiveStatusMediaNameThumbnailCounts,
lastSnapshotFullSizeCount = lastSnapshotFullSizeCount.toLong(),
lastSnapshotThumbnailCount = lastSnapshotThumbnailCount.toLong(),
pendingAttachmentUploadBytes = pendingAttachmentUploadBytes,
uploadedAttachmentBytes = uploadedAttachmentBytes,
uploadedThumbnailBytes = uploadedThumbnailBytes
@@ -3727,13 +3742,14 @@ class AttachmentTable(
data class DebugAttachmentStats(
val totalAttachmentRows: Long = 0L,
val totalEligibleForUploadRows: Long = 0L,
val totalUniqueMediaNamesEligibleForUpload: Long = 0L,
val totalUniqueDataFiles: Long = 0L,
val totalUniqueMediaNames: Long = 0L,
val archiveStatusMediaNameCounts: Map<ArchiveTransferState, Long> = emptyMap(),
val mediaNamesWithThumbnailsCount: Long = 0L,
val archiveStatusMediaNameThumbnailCounts: Map<ArchiveTransferState, Long> = emptyMap(),
val lastSnapshotFullSizeCount: Long = 0L,
val lastSnapshotThumbnailCount: Long = 0L,
val pendingAttachmentUploadBytes: Long = 0L,
val uploadedAttachmentBytes: Long = 0L,
val uploadedThumbnailBytes: Long = 0L
@@ -3747,12 +3763,13 @@ class AttachmentTable(
fun prettyString(): String {
return buildString {
appendLine("Total attachment rows: $totalAttachmentRows")
appendLine("Total eligible for upload rows: $totalEligibleForUploadRows")
appendLine("Total unique media names eligible for upload: $totalUniqueMediaNamesEligibleForUpload")
appendLine("Total unique data files: $totalUniqueDataFiles")
appendLine("Total unique media names: $totalUniqueMediaNames")
appendLine("Media names with thumbnails count: $mediaNamesWithThumbnailsCount")
appendLine("Pending attachment upload bytes: $pendingAttachmentUploadBytes")
appendLine("Last snapshot full-size count: $lastSnapshotFullSizeCount")
appendLine("Last snapshot thumbnail count : $lastSnapshotFullSizeCount")
appendLine("Uploaded attachment bytes: $uploadedAttachmentBytes")
appendLine("Uploaded thumbnail bytes: $uploadedThumbnailBytes")
appendLine("Total upload count: $totalUploadCount")
@@ -3776,10 +3793,11 @@ class AttachmentTable(
fun shortPrettyString(): String {
return buildString {
appendLine("Total eligible for upload rows: $totalEligibleForUploadRows")
appendLine("Total unique media names eligible for upload: $totalUniqueMediaNamesEligibleForUpload")
appendLine("Total unique data files: $totalUniqueDataFiles")
appendLine("Total unique media names: $totalUniqueMediaNames")
appendLine("Last snapshot full-size count: $lastSnapshotFullSizeCount")
appendLine("Last snapshot thumbnail count : $lastSnapshotFullSizeCount")
appendLine("Pending attachment upload bytes: $pendingAttachmentUploadBytes")
if (archiveStatusMediaNameCounts.isNotEmpty()) {

View File

@@ -10,10 +10,12 @@ import android.database.Cursor
import androidx.annotation.VisibleForTesting
import androidx.core.content.contentValuesOf
import org.signal.core.util.SqlUtil
import org.signal.core.util.count
import org.signal.core.util.delete
import org.signal.core.util.forEach
import org.signal.core.util.readToList
import org.signal.core.util.readToSet
import org.signal.core.util.readToSingleInt
import org.signal.core.util.readToSingleLong
import org.signal.core.util.requireBoolean
import org.signal.core.util.requireInt
@@ -129,33 +131,43 @@ class BackupMediaSnapshotTable(context: Context, database: SignalDatabase) : Dat
}
/**
* Writes the set of full-size media items that are slated to be referenced in the next backup, updating their pending sync time.
* Writes a set of [MediaEntry] that are slated to be referenced in the next backup, updating their pending sync time.
*/
fun writeFullSizePendingMediaObjects(mediaObjects: Sequence<ArchiveMediaItem>) {
mediaObjects
.chunked(SqlUtil.MAX_QUERY_ARGS)
.forEach { chunk ->
writePendingMediaObjectsChunk(
chunk.map { MediaEntry(it.mediaId, it.cdn, it.plaintextHash, it.remoteKey, isThumbnail = false) }
)
}
fun writePendingMediaEntries(entries: Collection<MediaEntry>) {
if (entries.isEmpty()) {
return
}
val values = entries.map {
contentValuesOf(
MEDIA_ID to it.mediaId,
CDN to it.cdn,
PLAINTEXT_HASH to it.plaintextHash,
REMOTE_KEY to it.remoteKey,
IS_THUMBNAIL to it.isThumbnail.toInt(),
SNAPSHOT_VERSION to UNKNOWN_VERSION,
IS_PENDING to 1
)
}
SqlUtil.buildBulkInsert(TABLE_NAME, arrayOf(MEDIA_ID, CDN, PLAINTEXT_HASH, REMOTE_KEY, IS_THUMBNAIL, SNAPSHOT_VERSION, IS_PENDING), values).forEach { query ->
writableDatabase.execSQL(
query.where +
"""
ON CONFLICT($MEDIA_ID) DO UPDATE SET
$CDN = excluded.$CDN,
$PLAINTEXT_HASH = excluded.$PLAINTEXT_HASH,
$REMOTE_KEY = excluded.$REMOTE_KEY,
$IS_THUMBNAIL = excluded.$IS_THUMBNAIL,
$IS_PENDING = excluded.$IS_PENDING
""",
query.whereArgs
)
}
}
/**
* Writes the set of thumbnail media items that are slated to be referenced in the next backup, updating their pending sync time.
*/
fun writeThumbnailPendingMediaObjects(mediaObjects: Sequence<ArchiveMediaItem>) {
mediaObjects
.chunked(SqlUtil.MAX_QUERY_ARGS)
.forEach { chunk ->
writePendingMediaObjectsChunk(
chunk.map { MediaEntry(it.thumbnailMediaId, it.cdn, it.plaintextHash, it.remoteKey, isThumbnail = true) }
)
}
}
/**
* Commits all pending entries (written via [writePendingMediaObjects]) to have a concrete [SNAPSHOT_VERSION]. The version will be 1 higher than the previous
* Commits all pending entries (written via [writePendingMediaEntries]) to have a concrete [SNAPSHOT_VERSION]. The version will be 1 higher than the previous
* snapshot version.
*/
fun commitPendingRows() {
@@ -326,37 +338,22 @@ class BackupMediaSnapshotTable(context: Context, database: SignalDatabase) : Dat
.run()
}
private fun writePendingMediaObjectsChunk(chunk: List<MediaEntry>) {
if (chunk.isEmpty()) {
return
}
fun debugGetFullSizeAttachmentCountForMostRecentSnapshot(): Int {
return readableDatabase
.count()
.from(TABLE_NAME)
.where("$IS_THUMBNAIL = 0 AND $SNAPSHOT_VERSION = $MAX_VERSION")
.run()
.readToSingleInt()
}
val values = chunk.map {
contentValuesOf(
MEDIA_ID to it.mediaId,
CDN to it.cdn,
PLAINTEXT_HASH to it.plaintextHash,
REMOTE_KEY to it.remoteKey,
IS_THUMBNAIL to it.isThumbnail.toInt(),
SNAPSHOT_VERSION to UNKNOWN_VERSION,
IS_PENDING to 1
)
}
val query = SqlUtil.buildSingleBulkInsert(TABLE_NAME, arrayOf(MEDIA_ID, CDN, PLAINTEXT_HASH, REMOTE_KEY, IS_THUMBNAIL, SNAPSHOT_VERSION, IS_PENDING), values)
writableDatabase.execSQL(
query.where +
"""
ON CONFLICT($MEDIA_ID) DO UPDATE SET
$CDN = excluded.$CDN,
$PLAINTEXT_HASH = excluded.$PLAINTEXT_HASH,
$REMOTE_KEY = excluded.$REMOTE_KEY,
$IS_THUMBNAIL = excluded.$IS_THUMBNAIL,
$IS_PENDING = excluded.$IS_PENDING
""",
query.whereArgs
)
fun debugGetThumbnailAttachmentCountForMostRecentSnapshot(): Int {
return readableDatabase
.count()
.from(TABLE_NAME)
.where("$IS_THUMBNAIL != 0 AND $SNAPSHOT_VERSION = $MAX_VERSION")
.run()
.readToSingleInt()
}
class ArchiveMediaItem(