diff --git a/app/src/main/java/org/thoughtcrime/securesms/backup/v2/stream/EncryptedBackupWriter.kt b/app/src/main/java/org/thoughtcrime/securesms/backup/v2/stream/EncryptedBackupWriter.kt index c8d2ea4de9..97828c6f9e 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/backup/v2/stream/EncryptedBackupWriter.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/backup/v2/stream/EncryptedBackupWriter.kt @@ -13,7 +13,6 @@ import org.whispersystems.signalservice.api.backup.BackupKey import org.whispersystems.signalservice.api.push.ServiceId.ACI import java.io.IOException import java.io.OutputStream -import java.util.zip.GZIPOutputStream import javax.crypto.Cipher import javax.crypto.CipherOutputStream import javax.crypto.Mac @@ -33,7 +32,7 @@ class EncryptedBackupWriter( private val append: (ByteArray) -> Unit ) : BackupExportWriter { - private val mainStream: GZIPOutputStream + private val mainStream: PaddedGzipOutputStream private val macStream: MacOutputStream init { @@ -48,13 +47,9 @@ class EncryptedBackupWriter( } macStream = MacOutputStream(outputStream, mac) + val cipherStream = CipherOutputStream(macStream, cipher) - mainStream = GZIPOutputStream( - CipherOutputStream( - macStream, - cipher - ) - ) + mainStream = PaddedGzipOutputStream(cipherStream) } override fun write(header: BackupInfo) { diff --git a/app/src/main/java/org/thoughtcrime/securesms/backup/v2/stream/PaddedGzipOutputStream.kt b/app/src/main/java/org/thoughtcrime/securesms/backup/v2/stream/PaddedGzipOutputStream.kt new file mode 100644 index 0000000000..791adc036a --- /dev/null +++ b/app/src/main/java/org/thoughtcrime/securesms/backup/v2/stream/PaddedGzipOutputStream.kt @@ -0,0 +1,55 @@ +/* + * Copyright 2024 Signal Messenger, LLC + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package org.thoughtcrime.securesms.backup.v2.stream + +import org.whispersystems.signalservice.internal.crypto.PaddingInputStream +import java.io.FilterOutputStream +import java.io.OutputStream +import java.util.zip.GZIPOutputStream + +/** + * GZIPs the content of the provided [outputStream], but also adds padding to the end of the stream using the same algorithm as [PaddingInputStream]. + * We do this to fit files into a smaller number of size buckets to avoid fingerprinting. And it turns out that bolting on zeros to the end of a GZIP stream is + * fine, because GZIP is smart enough to ignore it. This means readers of this data don't have to do anything special. + */ +class PaddedGzipOutputStream private constructor(private val outputStream: SizeObservingOutputStream) : GZIPOutputStream(outputStream) { + + constructor(outputStream: OutputStream) : this(SizeObservingOutputStream(outputStream)) + + override fun finish() { + super.finish() + + val totalLength = outputStream.size + val paddedSize: Long = PaddingInputStream.getPaddedSize(totalLength) + val paddingToAdd: Int = (paddedSize - totalLength).toInt() + + outputStream.write(ByteArray(paddingToAdd)) + } + + /** + * We need to know the size of the *compressed* stream to know how much padding to add at the end. + */ + private class SizeObservingOutputStream(val wrapped: OutputStream) : FilterOutputStream(wrapped) { + + var size: Long = 0L + private set + + override fun write(b: Int) { + wrapped.write(b) + size++ + } + + override fun write(b: ByteArray) { + wrapped.write(b) + size += b.size + } + + override fun write(b: ByteArray, off: Int, len: Int) { + wrapped.write(b, off, len) + size += len + } + } +} diff --git a/app/src/test/java/org/thoughtcrime/securesms/backup/v2/stream/EncryptedBackupReaderWriterTest.kt b/app/src/test/java/org/thoughtcrime/securesms/backup/v2/stream/EncryptedBackupReaderWriterTest.kt index 12198e7e43..7692ea5e39 100644 --- a/app/src/test/java/org/thoughtcrime/securesms/backup/v2/stream/EncryptedBackupReaderWriterTest.kt +++ b/app/src/test/java/org/thoughtcrime/securesms/backup/v2/stream/EncryptedBackupReaderWriterTest.kt @@ -7,6 +7,7 @@ package org.thoughtcrime.securesms.backup.v2.stream import org.junit.Assert.assertEquals import org.junit.Test +import org.signal.core.util.Hex import org.thoughtcrime.securesms.backup.v2.proto.AccountData import org.thoughtcrime.securesms.backup.v2.proto.BackupInfo import org.thoughtcrime.securesms.backup.v2.proto.Frame @@ -27,13 +28,15 @@ class EncryptedBackupReaderWriterTest { val frameCount = 10_000 EncryptedBackupWriter(key, aci, outputStream, append = { outputStream.write(it) }).use { writer -> - writer.write(BackupInfo(1, 1000L)) + writer.write(BackupInfo(version = 1, backupTimeMs = 1000L)) + for (i in 0 until frameCount) { writer.write(Frame(account = AccountData(username = "username-$i"))) } } val ciphertext: ByteArray = outputStream.toByteArray() + println(ciphertext.size) val frames: List = EncryptedBackupReader(key, aci, ciphertext.size.toLong()) { ciphertext.inputStream() }.use { reader -> assertEquals(reader.backupInfo?.version, 1L) @@ -47,4 +50,28 @@ class EncryptedBackupReaderWriterTest { assertEquals("username-$i", frames[i].account?.username) } } + + @Test + fun `padding limits number of sizes`() { + val key = BackupKey(Util.getSecretBytes(32)) + val aci = ACI.from(UUID.randomUUID()) + + val sizes = (1..10) + .map { frameCount -> + val outputStream = ByteArrayOutputStream() + + EncryptedBackupWriter(key, aci, outputStream, append = { outputStream.write(it) }).use { writer -> + writer.write(BackupInfo(version = 1, backupTimeMs = 1000L)) + + for (i in 0 until frameCount) { + writer.write(Frame(account = AccountData(username = Hex.toStringCondensed(Util.getSecretBytes(32))))) + } + } + + outputStream.toByteArray().size + } + .toSet() + + assertEquals(1, sizes.size) + } }