Add a crawler for backup usage metrics

This commit is contained in:
Ravi Khadiwala
2024-05-07 13:50:28 -05:00
committed by ravi-signal
parent 101ecf342f
commit 7d95926f02
6 changed files with 252 additions and 6 deletions

View File

@@ -452,6 +452,17 @@ public class BackupManager {
});
}
/**
* List all backups stored in the backups table
*
* @param segments Number of segments to read in parallel from the underlying backup database
* @param scheduler Scheduler for running downstream operations
* @return Flux of {@link StoredBackupAttributes} for each backup record in the backups table
*/
public Flux<StoredBackupAttributes> listBackupAttributes(final int segments, final Scheduler scheduler) {
return this.backupsDb.listBackupAttributes(segments, scheduler);
}
/**
* List all backups whose media or messages refresh timestamp are older than the provided purgeTime
*

View File

@@ -441,6 +441,37 @@ public class BackupsDb {
}
}
Flux<StoredBackupAttributes> listBackupAttributes(final int segments, final Scheduler scheduler) {
if (segments < 1) {
throw new IllegalArgumentException("Total number of segments must be positive");
}
return Flux.range(0, segments)
.parallel()
.runOn(scheduler)
.flatMap(segment -> dynamoClient.scanPaginator(ScanRequest.builder()
.tableName(backupTableName)
.consistentRead(true)
.segment(segment)
.totalSegments(segments)
.expressionAttributeNames(Map.of(
"#backupIdHash", KEY_BACKUP_ID_HASH,
"#refresh", ATTR_LAST_REFRESH,
"#mediaRefresh", ATTR_LAST_MEDIA_REFRESH,
"#bytesUsed", ATTR_MEDIA_BYTES_USED,
"#numObjects", ATTR_MEDIA_COUNT))
.projectionExpression("#backupIdHash, #refresh, #mediaRefresh, #bytesUsed, #numObjects")
.build())
.items())
.sequential()
.filter(item -> item.containsKey(KEY_BACKUP_ID_HASH))
.map(item -> new StoredBackupAttributes(
Instant.ofEpochSecond(AttributeValues.getLong(item, ATTR_LAST_REFRESH, 0L)),
Instant.ofEpochSecond(AttributeValues.getLong(item, ATTR_LAST_MEDIA_REFRESH, 0L)),
AttributeValues.getLong(item, ATTR_MEDIA_BYTES_USED, 0L),
AttributeValues.getLong(item, ATTR_MEDIA_COUNT, 0L)));
}
Flux<ExpiredBackup> getExpiredBackups(final int segments, final Scheduler scheduler, final Instant purgeTime) {
if (segments < 1) {
throw new IllegalArgumentException("Total number of segments must be positive");

View File

@@ -0,0 +1,19 @@
/*
* Copyright 2024 Signal Messenger, LLC
* SPDX-License-Identifier: AGPL-3.0-only
*/
package org.whispersystems.textsecuregcm.backup;
import java.time.Instant;
/**
* Attributes stored in the backups table for a single backup id
*
* @param lastRefresh The last time the record was updated with a messages or media tier credential
* @param lastMediaRefresh The last time the record was updated with a media tier credential
* @param bytesUsed The number of media bytes used by the backup
* @param numObjects The number of media objects used byt the backup
*/
public record StoredBackupAttributes(
Instant lastRefresh, Instant lastMediaRefresh,
long bytesUsed, long numObjects) {}