From 91d3fa8ad5d2c95688a973eb18d769983b40b8dc Mon Sep 17 00:00:00 2001 From: Cody Henthorne Date: Fri, 29 May 2026 10:16:10 -0400 Subject: [PATCH] Fix bugs around stalled media restore and add recovery logic. --- .../backup/v2/ArchiveRestoreProgress.kt | 19 +++++ .../banners/ArchiveRestoreStatusBanner.kt | 2 + .../securesms/database/AttachmentTable.kt | 18 +++++ .../securesms/jobs/BackupRestoreMediaJob.kt | 5 ++ .../jobs/CheckRestoreMediaLeftJob.kt | 69 +++++++++++++++---- .../jobs/RestoreLocalAttachmentJob.kt | 7 +- 6 files changed, 105 insertions(+), 15 deletions(-) diff --git a/app/src/main/java/org/thoughtcrime/securesms/backup/v2/ArchiveRestoreProgress.kt b/app/src/main/java/org/thoughtcrime/securesms/backup/v2/ArchiveRestoreProgress.kt index 73358e074d..faf8b30c0e 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/backup/v2/ArchiveRestoreProgress.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/backup/v2/ArchiveRestoreProgress.kt @@ -31,6 +31,9 @@ import org.thoughtcrime.securesms.jobmanager.impl.BatteryNotLowConstraint import org.thoughtcrime.securesms.jobmanager.impl.DiskSpaceNotLowConstraint import org.thoughtcrime.securesms.jobmanager.impl.NetworkConstraint import org.thoughtcrime.securesms.jobmanager.impl.WifiConstraint +import org.thoughtcrime.securesms.jobs.CheckRestoreMediaLeftJob +import org.thoughtcrime.securesms.jobs.RestoreAttachmentJob +import org.thoughtcrime.securesms.jobs.RestoreLocalAttachmentJob import org.thoughtcrime.securesms.keyvalue.SignalStore import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.atomic.AtomicInteger @@ -157,6 +160,22 @@ object ArchiveRestoreProgress { update() } + /** + * Self-heal hook for restores that appear active (banner showing, media still remaining) but have no jobs left actually working on them. + */ + fun checkForStalledRestore() { + SignalExecutors.BOUNDED.execute { + val stalled = SignalStore.backup.restoreState.isMediaRestoreOperation && + SignalDatabase.attachments.getRemainingRestorableAttachmentSize() > 0L && + AppDependencies.jobManager.areFactoriesEmpty(setOf(RestoreAttachmentJob.KEY, RestoreLocalAttachmentJob.KEY, CheckRestoreMediaLeftJob.KEY)) + + if (stalled) { + Log.w(TAG, "Detected a stalled media restore with no active jobs. Enqueueing a check job to recover.") + CheckRestoreMediaLeftJob.enqueueStalledRecoveryCheck() + } + } + } + fun clearLocalRestoreDirectoryError() { SignalStore.backup.localRestoreDirectoryError = false update() diff --git a/app/src/main/java/org/thoughtcrime/securesms/banner/banners/ArchiveRestoreStatusBanner.kt b/app/src/main/java/org/thoughtcrime/securesms/banner/banners/ArchiveRestoreStatusBanner.kt index 97cbe70217..bfbb142f8a 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/banner/banners/ArchiveRestoreStatusBanner.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/banner/banners/ArchiveRestoreStatusBanner.kt @@ -10,6 +10,7 @@ import androidx.compose.runtime.Composable import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.filter +import kotlinx.coroutines.flow.onStart import org.thoughtcrime.securesms.backup.v2.ArchiveRestoreProgress import org.thoughtcrime.securesms.backup.v2.ArchiveRestoreProgressState import org.thoughtcrime.securesms.backup.v2.ArchiveRestoreProgressState.RestoreStatus @@ -25,6 +26,7 @@ class ArchiveRestoreStatusBanner(private val listener: RestoreProgressBannerList override val dataFlow: Flow by lazy { ArchiveRestoreProgress .stateFlow + .onStart { ArchiveRestoreProgress.checkForStalledRestore() } .filter { it.restoreStatus != RestoreStatus.NONE && (it.restoreState.isMediaRestoreOperation || it.restoreStatus == RestoreStatus.FINISHED) } diff --git a/app/src/main/java/org/thoughtcrime/securesms/database/AttachmentTable.kt b/app/src/main/java/org/thoughtcrime/securesms/database/AttachmentTable.kt index 1cb6b846de..c56ca293b1 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/database/AttachmentTable.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/database/AttachmentTable.kt @@ -1665,6 +1665,24 @@ class AttachmentTable( } } + /** + * Marks any restorable attachment whose [MESSAGE_ID] does not point to an existing message as failed. + * @return the number of rows updated + */ + fun markRestorableAttachmentsWithoutMessageAsFailed(): Int { + return writableDatabase + .update(TABLE_NAME) + .values(TRANSFER_STATE to TRANSFER_PROGRESS_FAILED) + .where( + """ + ($TRANSFER_STATE = $TRANSFER_NEEDS_RESTORE OR $TRANSFER_STATE = $TRANSFER_RESTORE_IN_PROGRESS) AND + $MESSAGE_ID > 0 AND + $MESSAGE_ID NOT IN (SELECT ${MessageTable.ID} FROM ${MessageTable.TABLE_NAME}) + """ + ) + .run() + } + fun setRestoreTransferState(attachmentId: AttachmentId, state: Int) { setRestoreTransferState(listOf(attachmentId), state) } diff --git a/app/src/main/java/org/thoughtcrime/securesms/jobs/BackupRestoreMediaJob.kt b/app/src/main/java/org/thoughtcrime/securesms/jobs/BackupRestoreMediaJob.kt index 844125bc86..0d51044c81 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/jobs/BackupRestoreMediaJob.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/jobs/BackupRestoreMediaJob.kt @@ -61,6 +61,11 @@ class BackupRestoreMediaJob private constructor(parameters: Parameters) : BaseJo val batchSize = 500 val restoreTime = System.currentTimeMillis() + val orphanedCount = SignalDatabase.attachments.markRestorableAttachmentsWithoutMessageAsFailed() + if (orphanedCount > 0) { + Log.w(TAG, "$orphanedCount orphaned restorable attachments marked failed") + } + do { val restoreThumbnailJobs: MutableList = mutableListOf() val restoreFullAttachmentJobs: MutableList = mutableListOf() diff --git a/app/src/main/java/org/thoughtcrime/securesms/jobs/CheckRestoreMediaLeftJob.kt b/app/src/main/java/org/thoughtcrime/securesms/jobs/CheckRestoreMediaLeftJob.kt index 75bf2e48d6..13bfae8bb2 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/jobs/CheckRestoreMediaLeftJob.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/jobs/CheckRestoreMediaLeftJob.kt @@ -26,6 +26,12 @@ class CheckRestoreMediaLeftJob private constructor(parameters: Parameters) : Job const val KEY = "CheckRestoreMediaLeftJob" private val TAG = Log.tag(CheckRestoreMediaLeftJob::class) + private val FINALIZE_LOCK = Any() + private const val STALLED_RECOVERY_QUEUE = "CheckRestoreMediaLeftJob::StalledRecovery" + + fun enqueueStalledRecoveryCheck() { + AppDependencies.jobManager.add(CheckRestoreMediaLeftJob(STALLED_RECOVERY_QUEUE)) + } } constructor(queue: String) : this( @@ -46,28 +52,63 @@ class CheckRestoreMediaLeftJob private constructor(parameters: Parameters) : Job if (remainingAttachmentSize == 0L) { Log.d(TAG, "Media restore complete: there are no remaining restorable attachments.") - ArchiveRestoreProgress.allMediaRestored() - BackupMediaRestoreService.stop(context) - - if (SignalStore.backup.deletionState == DeletionState.AWAITING_MEDIA_DOWNLOAD) { - SignalStore.backup.deletionState = DeletionState.MEDIA_DOWNLOAD_FINISHED - } - - if (!SignalStore.backup.backsUpMedia) { - SignalDatabase.attachments.markQuotesThatNeedReconstruction() - AppDependencies.jobManager.add(QuoteThumbnailReconstructionJob()) - } + onMediaRestoreComplete() } else if (runAttempt == 0) { Log.w(TAG, "Still have remaining data to restore, will retry before checking job queues, queue: ${parameters.queue} estimated remaining: $remainingAttachmentSize") - return Result.retry(15.seconds.inWholeMilliseconds) + return Result.retry(30.seconds.inWholeMilliseconds) } else { - Log.w(TAG, "Max retries reached, queue: ${parameters.queue} estimated remaining: $remainingAttachmentSize") - // todo [local-backup] inspect jobs/queues and raise some alarm/abort? + handleRemainingAfterMaxAttempts(remainingAttachmentSize) } return Result.success() } + /** + * Reached the retry limit while attachments still appear restorable. Figure out whether there is anything left actively working on the + * restore before deciding the restore is finished. + */ + private fun handleRemainingAfterMaxAttempts(remainingAttachmentSize: Long) { + synchronized(FINALIZE_LOCK) { + val otherCheckJobs = AppDependencies.jobManager.find { it.factoryKey == KEY && it.id != id } + if (otherCheckJobs.isNotEmpty()) { + Log.w(TAG, "Max retries reached but ${otherCheckJobs.size} other check job(s) remain, deferring to them. queue: ${parameters.queue} estimated remaining: $remainingAttachmentSize") + return + } + + val orphanedCount = SignalDatabase.attachments.markRestorableAttachmentsWithoutMessageAsFailed() + if (orphanedCount > 0) { + Log.w(TAG, "$orphanedCount orphaned restorable attachments marked failed") + } + val restoreQueues = AppDependencies.jobManager + .find { it.factoryKey == RestoreAttachmentJob.KEY || it.factoryKey == RestoreLocalAttachmentJob.KEY } + .mapNotNull { it.queueKey } + .toSet() + + if (restoreQueues.isNotEmpty()) { + Log.w(TAG, "Max retries reached but restore jobs remain in ${restoreQueues.size} queue(s), re-enqueueing check jobs. estimated remaining: $remainingAttachmentSize") + AppDependencies.jobManager.addAll(restoreQueues.map { CheckRestoreMediaLeftJob(it) }) + return + } + + Log.w(TAG, "Max retries reached and no restore jobs remain, treating restore as complete. estimated remaining: $remainingAttachmentSize") + onMediaRestoreComplete() + } + } + + private fun onMediaRestoreComplete() { + ArchiveRestoreProgress.allMediaRestored() + BackupMediaRestoreService.stop(context) + + if (SignalStore.backup.deletionState == DeletionState.AWAITING_MEDIA_DOWNLOAD) { + SignalStore.backup.deletionState = DeletionState.MEDIA_DOWNLOAD_FINISHED + } + + if (!SignalStore.backup.backsUpMedia) { + SignalDatabase.attachments.markQuotesThatNeedReconstruction() + AppDependencies.jobManager.add(QuoteThumbnailReconstructionJob()) + } + } + override fun onFailure() = Unit class Factory : Job.Factory { diff --git a/app/src/main/java/org/thoughtcrime/securesms/jobs/RestoreLocalAttachmentJob.kt b/app/src/main/java/org/thoughtcrime/securesms/jobs/RestoreLocalAttachmentJob.kt index cc87c449db..727d4a28cf 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/jobs/RestoreLocalAttachmentJob.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/jobs/RestoreLocalAttachmentJob.kt @@ -46,6 +46,11 @@ class RestoreLocalAttachmentJob private constructor( fun enqueueRestoreLocalAttachmentsJobs(mediaNameToFileInfo: Map) { val jobManager = AppDependencies.jobManager + val orphanedCount = SignalDatabase.attachments.markRestorableAttachmentsWithoutMessageAsFailed() + if (orphanedCount > 0) { + Log.w(TAG, "Failed $orphanedCount orphaned restorable attachment(s) with no backing message before enqueueing restores.") + } + do { val possibleRestorableAttachments: List = SignalDatabase.attachments.getRestorableLocalAttachments(500) val notRestorableAttachments = ArrayList(possibleRestorableAttachments.size) @@ -72,7 +77,7 @@ class RestoreLocalAttachmentJob private constructor( // Intentionally enqueues one at a time for safer attachment transfer state management Log.d(TAG, "Adding ${restoreAttachmentJobs.size} restore local attachment jobs") restoreAttachmentJobs.forEach { jobManager.add(it) } - } while (restoreAttachmentJobs.isNotEmpty()) + } while (possibleRestorableAttachments.isNotEmpty()) ArchiveRestoreProgress.onRestoringMedia()