Mitigate flaky network by retrying link+sync api calls.

This commit is contained in:
Cody Henthorne
2025-01-09 11:18:33 -05:00
committed by Greyson Parrelli
parent b983a56dd2
commit ef71410eaf
2 changed files with 52 additions and 26 deletions
@@ -273,7 +273,7 @@ object LinkDeviceRepository {
stopwatch.split("validate-backup") stopwatch.split("validate-backup")
Log.d(TAG, "[createAndUploadArchive] Fetching an upload form...") Log.d(TAG, "[createAndUploadArchive] Fetching an upload form...")
val uploadForm = when (val result = SignalNetwork.attachments.getAttachmentV4UploadForm()) { val uploadForm = when (val result = NetworkResult.withRetry { SignalNetwork.attachments.getAttachmentV4UploadForm() }) {
is NetworkResult.Success -> result.result.logD(TAG, "[createAndUploadArchive] Successfully retrieved upload form.") is NetworkResult.Success -> result.result.logD(TAG, "[createAndUploadArchive] Successfully retrieved upload form.")
is NetworkResult.ApplicationError -> throw result.throwable is NetworkResult.ApplicationError -> throw result.throwable
is NetworkResult.NetworkError -> return LinkUploadArchiveResult.NetworkError(result.exception).logW(TAG, "[createAndUploadArchive] Network error when fetching form.", result.exception) is NetworkResult.NetworkError -> return LinkUploadArchiveResult.NetworkError(result.exception).logW(TAG, "[createAndUploadArchive] Network error when fetching form.", result.exception)
@@ -289,12 +289,14 @@ object LinkDeviceRepository {
stopwatch.split("upload-backup") stopwatch.split("upload-backup")
Log.d(TAG, "[createAndUploadArchive] Setting the transfer archive...") Log.d(TAG, "[createAndUploadArchive] Setting the transfer archive...")
val transferSetResult = SignalNetwork.linkDevice.setTransferArchive( val transferSetResult = NetworkResult.withRetry {
destinationDeviceId = deviceId, SignalNetwork.linkDevice.setTransferArchive(
destinationDeviceCreated = deviceCreatedAt, destinationDeviceId = deviceId,
cdn = uploadForm.cdn, destinationDeviceCreated = deviceCreatedAt,
cdnKey = uploadForm.key cdn = uploadForm.cdn,
) cdnKey = uploadForm.key
)
}
when (transferSetResult) { when (transferSetResult) {
is NetworkResult.Success -> Log.i(TAG, "[createAndUploadArchive] Successfully set transfer archive.") is NetworkResult.Success -> Log.i(TAG, "[createAndUploadArchive] Successfully set transfer archive.")
@@ -317,39 +319,32 @@ object LinkDeviceRepository {
* Handles uploading the archive for [createAndUploadArchive]. Handles resumable uploads and making multiple upload attempts. * Handles uploading the archive for [createAndUploadArchive]. Handles resumable uploads and making multiple upload attempts.
*/ */
private fun uploadArchive(backupFile: File, uploadForm: AttachmentUploadForm): NetworkResult<Unit> { private fun uploadArchive(backupFile: File, uploadForm: AttachmentUploadForm): NetworkResult<Unit> {
val resumableUploadUrl = when (val result = SignalNetwork.attachments.getResumableUploadUrl(uploadForm)) { val resumableUploadUrl = when (val result = NetworkResult.withRetry { SignalNetwork.attachments.getResumableUploadUrl(uploadForm) }) {
is NetworkResult.Success -> result.result is NetworkResult.Success -> result.result
is NetworkResult.NetworkError -> return result.map { Unit }.logW(TAG, "Network error when fetching upload URL.", result.exception) is NetworkResult.NetworkError -> return result.map { Unit }.logW(TAG, "Network error when fetching upload URL.", result.exception)
is NetworkResult.StatusCodeError -> return result.map { Unit }.logW(TAG, "Status code error when fetching upload URL.", result.exception) is NetworkResult.StatusCodeError -> return result.map { Unit }.logW(TAG, "Status code error when fetching upload URL.", result.exception)
is NetworkResult.ApplicationError -> throw result.throwable is NetworkResult.ApplicationError -> throw result.throwable
} }
val maxRetries = 5 val uploadResult = NetworkResult.withRetry(
var attemptCount = 0 logAttempt = { attempt, maxAttempts -> Log.i(TAG, "Starting upload attempt ${attempt + 1}/$maxAttempts") }
) {
while (attemptCount < maxRetries) { FileInputStream(backupFile).use {
Log.i(TAG, "Starting upload attempt ${attemptCount + 1}/$maxRetries")
val uploadResult = FileInputStream(backupFile).use {
SignalNetwork.attachments.uploadPreEncryptedFileToAttachmentV4( SignalNetwork.attachments.uploadPreEncryptedFileToAttachmentV4(
uploadForm = uploadForm, uploadForm = uploadForm,
resumableUploadUrl = resumableUploadUrl, resumableUploadUrl = resumableUploadUrl,
inputStream = backupFile.inputStream(), inputStream = it,
inputStreamLength = backupFile.length() inputStreamLength = backupFile.length()
) )
} }
when (uploadResult) {
is NetworkResult.Success -> return uploadResult
is NetworkResult.NetworkError -> Log.w(TAG, "Hit network error while uploading. May retry.", uploadResult.exception)
is NetworkResult.StatusCodeError -> return uploadResult.logW(TAG, "Status code error when uploading archive.", uploadResult.exception)
is NetworkResult.ApplicationError -> throw uploadResult.throwable
}
attemptCount++
} }
Log.w(TAG, "Hit the max retry count of $maxRetries. Failing.") return when (uploadResult) {
return NetworkResult.NetworkError(IOException("Hit max retries!")) is NetworkResult.Success -> uploadResult
is NetworkResult.NetworkError -> uploadResult.logW(TAG, "Network error while uploading.", uploadResult.exception)
is NetworkResult.StatusCodeError -> uploadResult.logW(TAG, "Status code error when uploading archive.", uploadResult.exception)
is NetworkResult.ApplicationError -> throw uploadResult.throwable
}
} }
/** /**
@@ -96,6 +96,36 @@ sealed class NetworkResult<T>(
} catch (e: Throwable) { } catch (e: Throwable) {
ApplicationError(e) ApplicationError(e)
} }
/**
* Runs [operation] to perform a network call. If [shouldRetry] returns false for the result, then returns it. Otherwise will call [operation] repeatedly
* until [shouldRetry] returns false or is called [maxAttempts] number of times.
*
* @param maxAttempts Max attempts to try the network operation, must be 1 or more, default is 5
* @param shouldRetry Predicate to determine if network operation should be retried, default is any [NetworkError] result is retried
* @param logAttempt Log each attempt before [operation] is called, default is noop
* @param operation Network operation that can be called repeatedly for each attempt
*/
fun <T : Any?> withRetry(
maxAttempts: Int = 5,
shouldRetry: (NetworkResult<T>) -> Boolean = { it is NetworkError },
logAttempt: (attempt: Int, maxAttempts: Int) -> Unit = { _, _ -> },
operation: () -> NetworkResult<T>
): NetworkResult<T> {
require(maxAttempts > 0)
lateinit var result: NetworkResult<T>
for (attempt in 0 until maxAttempts) {
logAttempt(attempt, maxAttempts)
result = operation()
if (!shouldRetry(result)) {
return result
}
}
return result
}
} }
/** Indicates the request was successful */ /** Indicates the request was successful */
@@ -160,6 +190,7 @@ sealed class NetworkResult<T>(
ApplicationError<R>(e).runOnStatusCodeError(statusCodeErrorActions) ApplicationError<R>(e).runOnStatusCodeError(statusCodeErrorActions)
} }
} }
is NetworkError -> NetworkError<R>(exception).runOnStatusCodeError(statusCodeErrorActions) is NetworkError -> NetworkError<R>(exception).runOnStatusCodeError(statusCodeErrorActions)
is ApplicationError -> ApplicationError<R>(throwable).runOnStatusCodeError(statusCodeErrorActions) is ApplicationError -> ApplicationError<R>(throwable).runOnStatusCodeError(statusCodeErrorActions)
is StatusCodeError -> StatusCodeError<R>(code, stringBody, binaryBody, exception).runOnStatusCodeError(statusCodeErrorActions) is StatusCodeError -> StatusCodeError<R>(code, stringBody, binaryBody, exception).runOnStatusCodeError(statusCodeErrorActions)