Inline long text attachments into backup files.

This commit is contained in:
Greyson Parrelli
2025-07-17 16:50:22 -04:00
committed by GitHub
parent 6553c416f8
commit a9455b95ac
21 changed files with 158 additions and 32 deletions

View File

@@ -170,6 +170,22 @@ object ExportOddities {
return log(sentTimestamp, "Invalid e164 in sessions switchover event. Exporting an empty event.")
}
fun undownloadedLongTextAttachment(sentTimestamp: Long): String {
return log(sentTimestamp, "Long text attachment was not yet downloaded. Falling back to the known body with an attachment pointer.")
}
fun unreadableLongTextAttachment(sentTimestamp: Long): String {
return log(sentTimestamp, "Long text attachment was unreadable. Falling back to the known body with an attachment pointer.")
}
fun unopenableLongTextAttachment(sentTimestamp: Long): String {
return log(sentTimestamp, "Long text attachment failed to open. Falling back to the known body with an attachment pointer.")
}
fun bodyGreaterThanMaxLength(sentTimestamp: Long, length: Int): String {
return log(sentTimestamp, "The body length was greater than the max allowed ($length bytes). Trimming to fit.")
}
private fun log(sentTimestamp: Long, message: String): String {
return "[ODDITY][$sentTimestamp] $message"
}

View File

@@ -13,13 +13,17 @@ import org.signal.core.util.Base64
import org.signal.core.util.EventTimer
import org.signal.core.util.Hex
import org.signal.core.util.ParallelEventTimer
import org.signal.core.util.StringUtil
import org.signal.core.util.concurrent.SignalExecutors
import org.signal.core.util.emptyIfNull
import org.signal.core.util.isNotNullOrBlank
import org.signal.core.util.kibiBytes
import org.signal.core.util.logging.Log
import org.signal.core.util.logging.logW
import org.signal.core.util.nullIfBlank
import org.signal.core.util.nullIfEmpty
import org.signal.core.util.orNull
import org.signal.core.util.readFully
import org.signal.core.util.requireBlob
import org.signal.core.util.requireBoolean
import org.signal.core.util.requireInt
@@ -82,8 +86,10 @@ import org.thoughtcrime.securesms.database.model.databaseprotos.MessageExtras
import org.thoughtcrime.securesms.database.model.databaseprotos.ProfileChangeDetails
import org.thoughtcrime.securesms.database.model.databaseprotos.SessionSwitchoverEvent
import org.thoughtcrime.securesms.database.model.databaseprotos.ThreadMergeEvent
import org.thoughtcrime.securesms.dependencies.AppDependencies
import org.thoughtcrime.securesms.keyvalue.SignalStore
import org.thoughtcrime.securesms.linkpreview.LinkPreview
import org.thoughtcrime.securesms.mms.PartAuthority
import org.thoughtcrime.securesms.mms.QuoteModel
import org.thoughtcrime.securesms.payments.FailureReason
import org.thoughtcrime.securesms.payments.State
@@ -105,6 +111,8 @@ import org.thoughtcrime.securesms.backup.v2.proto.BodyRange as BackupBodyRange
import org.thoughtcrime.securesms.backup.v2.proto.GiftBadge as BackupGiftBadge
private val TAG = Log.tag(ChatItemArchiveExporter::class.java)
private val MAX_INLINED_BODY_SIZE = 128.kibiBytes.bytes.toInt()
private val MAX_INLINED_BODY_SIZE_WITH_LONG_ATTACHMENT_POINTER = 2.kibiBytes.bytes.toInt()
/**
* An iterator for chat items with a clever performance twist: rather than do the extra queries one at a time (for reactions,
@@ -361,11 +369,6 @@ class ChatItemArchiveExporter(
else -> {
val attachments = extraData.attachmentsById[record.id]
if (attachments?.isNotEmpty() == true && attachments.any { it.contentType == MediaUtil.LONG_TEXT } && record.body.isNullOrBlank()) {
Log.w(TAG, ExportSkips.invalidLongTextChatItem(record.dateSent))
continue
}
val sticker = attachments?.firstOrNull { dbAttachment -> dbAttachment.isSticker }
if (sticker?.stickerLocator != null) {
@@ -963,6 +966,8 @@ private fun BackupMessageRecord.toRemoteDirectStoryReplyMessage(mediaArchiveEnab
val isReaction = MessageTypes.isStoryReaction(this.type)
val (bodyText, longTextAttachment) = this.getBodyText(attachments)
return DirectStoryReplyMessage(
emoji = if (isReaction) {
this.body
@@ -972,10 +977,10 @@ private fun BackupMessageRecord.toRemoteDirectStoryReplyMessage(mediaArchiveEnab
textReply = if (!isReaction) {
DirectStoryReplyMessage.TextReply(
text = Text(
body = this.body,
body = bodyText,
bodyRanges = this.bodyRanges?.toRemoteBodyRanges(this.dateSent) ?: emptyList()
),
longText = attachments?.firstOrNull { it.contentType == MediaUtil.LONG_TEXT }?.toRemoteFilePointer(mediaArchiveEnabled)
longText = longTextAttachment?.toRemoteFilePointer(mediaArchiveEnabled)
)
} else {
null
@@ -985,23 +990,25 @@ private fun BackupMessageRecord.toRemoteDirectStoryReplyMessage(mediaArchiveEnab
}
private fun BackupMessageRecord.toRemoteStandardMessage(exportState: ExportState, mediaArchiveEnabled: Boolean, reactionRecords: List<ReactionRecord>?, mentions: List<Mention>?, attachments: List<DatabaseAttachment>?): StandardMessage {
val text = body.nullIfBlank()?.let {
val linkPreviews = this.toRemoteLinkPreviews(attachments)
val linkPreviewAttachments = linkPreviews.mapNotNull { it.thumbnail.orElse(null) }.toSet()
val quotedAttachments = attachments?.filter { it.quote } ?: emptyList()
val messageAttachments = attachments
?.filterNot { it.quote }
?.filterNot { linkPreviewAttachments.contains(it) }
?.filterNot { MediaUtil.isLongTextType(it.contentType) }
?: emptyList()
val hasVoiceNote = messageAttachments.any { it.voiceNote }
val (bodyText, longTextAttachment) = this.getBodyText(attachments)
val text = bodyText.nullIfBlank()?.let {
Text(
body = it,
bodyRanges = (this.bodyRanges?.toRemoteBodyRanges(this.dateSent) ?: emptyList()) + (mentions?.toRemoteBodyRanges(exportState) ?: emptyList())
)
}
val linkPreviews = this.toRemoteLinkPreviews(attachments)
val linkPreviewAttachments = linkPreviews.mapNotNull { it.thumbnail.orElse(null) }.toSet()
val quotedAttachments = attachments?.filter { it.quote } ?: emptyList()
val longTextAttachment = attachments?.firstOrNull { it.contentType == "text/x-signal-plain" }
val messageAttachments = attachments
?.filterNot { it.quote }
?.filterNot { linkPreviewAttachments.contains(it) }
?.filterNot { it == longTextAttachment }
?: emptyList()
val hasVoiceNote = messageAttachments.any { it.voiceNote }
return StandardMessage(
quote = this.toRemoteQuote(exportState, mediaArchiveEnabled, quotedAttachments),
text = text.takeUnless { hasVoiceNote },
@@ -1012,6 +1019,40 @@ private fun BackupMessageRecord.toRemoteStandardMessage(exportState: ExportState
)
}
/**
* Retrieves the body text, reading from a long text attachment if necessary. Will return an optional [DatabaseAttachment] that, if present, indicates that
* you should set it as the value for [StandardMessage.longText].
*/
private fun BackupMessageRecord.getBodyText(attachments: List<DatabaseAttachment>?): Pair<String, DatabaseAttachment?> {
val longTextAttachment = attachments?.firstOrNull { it.contentType == "text/x-signal-plain" }
if (longTextAttachment == null) {
return this.body.emptyIfNull() to null
}
if (longTextAttachment.uri == null || longTextAttachment.transferState != AttachmentTable.TRANSFER_PROGRESS_DONE) {
return StringUtil.trimToFit(this.body.emptyIfNull(), MAX_INLINED_BODY_SIZE_WITH_LONG_ATTACHMENT_POINTER) to longTextAttachment
}
val longText = try {
PartAuthority.getAttachmentStream(AppDependencies.application, longTextAttachment.uri!!)?.readFully()?.toString(Charsets.UTF_8)
} catch (e: IOException) {
Log.w(TAG, ExportOddities.unreadableLongTextAttachment(this.dateSent))
return this.body.emptyIfNull() to longTextAttachment
}
if (longText == null) {
Log.w(TAG, ExportOddities.unopenableLongTextAttachment(this.dateSent))
return StringUtil.trimToFit(this.body.emptyIfNull(), MAX_INLINED_BODY_SIZE_WITH_LONG_ATTACHMENT_POINTER) to longTextAttachment
}
val trimmed = StringUtil.trimToFit(longText, MAX_INLINED_BODY_SIZE)
if (trimmed.length != longText.length) {
Log.w(TAG, ExportOddities.bodyGreaterThanMaxLength(this.dateSent, longText.length))
}
return trimmed to null
}
private fun BackupMessageRecord.toRemoteQuote(exportState: ExportState, mediaArchiveEnabled: Boolean, attachments: List<DatabaseAttachment>? = null): Quote? {
if (this.quoteTargetSentTimestamp == MessageTable.QUOTE_NOT_PRESENT_ID || this.quoteAuthor <= 0 || exportState.groupRecipientIds.contains(this.quoteAuthor)) {
return null

View File

@@ -10,6 +10,7 @@ import androidx.core.content.contentValuesOf
import org.signal.core.util.Base64
import org.signal.core.util.Hex
import org.signal.core.util.SqlUtil
import org.signal.core.util.asList
import org.signal.core.util.forEach
import org.signal.core.util.logging.Log
import org.signal.core.util.orNull
@@ -40,6 +41,7 @@ import org.thoughtcrime.securesms.backup.v2.proto.Sticker
import org.thoughtcrime.securesms.backup.v2.proto.ViewOnceMessage
import org.thoughtcrime.securesms.backup.v2.util.toLocalAttachment
import org.thoughtcrime.securesms.contactshare.Contact
import org.thoughtcrime.securesms.database.AttachmentTable
import org.thoughtcrime.securesms.database.CallTable
import org.thoughtcrime.securesms.database.GroupReceiptTable
import org.thoughtcrime.securesms.database.MessageTable
@@ -63,6 +65,7 @@ import org.thoughtcrime.securesms.database.model.databaseprotos.PaymentTombstone
import org.thoughtcrime.securesms.database.model.databaseprotos.ProfileChangeDetails
import org.thoughtcrime.securesms.database.model.databaseprotos.SessionSwitchoverEvent
import org.thoughtcrime.securesms.database.model.databaseprotos.ThreadMergeEvent
import org.thoughtcrime.securesms.dependencies.AppDependencies
import org.thoughtcrime.securesms.mms.QuoteModel
import org.thoughtcrime.securesms.payments.CryptoValueUtil
import org.thoughtcrime.securesms.payments.Direction
@@ -74,6 +77,7 @@ import org.thoughtcrime.securesms.recipients.RecipientId
import org.thoughtcrime.securesms.stickers.StickerLocator
import org.thoughtcrime.securesms.util.JsonUtils
import org.thoughtcrime.securesms.util.MediaUtil
import org.thoughtcrime.securesms.util.MessageUtil
import org.whispersystems.signalservice.api.payments.Money
import org.whispersystems.signalservice.api.push.ServiceId
import org.whispersystems.signalservice.api.util.UuidUtil
@@ -371,14 +375,17 @@ class ChatItemArchiveImporter(
}
if (this.directStoryReplyMessage != null) {
val longTextAttachment: Attachment? = this.directStoryReplyMessage.textReply?.longText?.toLocalAttachment(
importState = importState,
contentType = "text/x-signal-plain"
)
val (trimmedBodyText, longTextAttachment) = this.directStoryReplyMessage.parseBodyText(importState)
if (trimmedBodyText != null) {
contentValues.put(MessageTable.BODY, trimmedBodyText)
}
if (longTextAttachment != null) {
followUps += { messageRowId ->
SignalDatabase.attachments.insertAttachmentsForMessage(messageRowId, listOf(longTextAttachment), emptyList())
val ids = SignalDatabase.attachments.insertAttachmentsForMessage(messageRowId, listOf(longTextAttachment), emptyList())
ids.values.firstOrNull()?.let { attachmentId ->
SignalDatabase.attachments.setTransferState(messageRowId, attachmentId, AttachmentTable.TRANSFER_PROGRESS_DONE)
}
}
}
}
@@ -396,23 +403,29 @@ class ChatItemArchiveImporter(
attachment.toLocalAttachment()
}
val longTextAttachments: List<Attachment> = this.standardMessage.longText?.toLocalAttachment(
importState = importState,
contentType = "text/x-signal-plain"
)?.let { listOf(it) } ?: emptyList()
val (trimmedBodyText, longTextAttachment) = this.standardMessage.parseBodyText(importState)
if (trimmedBodyText != null) {
contentValues.put(MessageTable.BODY, trimmedBodyText)
}
val quoteAttachments: List<Attachment> = this.standardMessage.quote?.toLocalAttachments() ?: emptyList()
val hasAttachments = attachments.isNotEmpty() || linkPreviewAttachments.isNotEmpty() || quoteAttachments.isNotEmpty() || longTextAttachments.isNotEmpty()
val hasAttachments = attachments.isNotEmpty() || linkPreviewAttachments.isNotEmpty() || quoteAttachments.isNotEmpty() || longTextAttachment != null
if (hasAttachments || linkPreviews.isNotEmpty()) {
followUps += { messageRowId ->
val attachmentMap = if (hasAttachments) {
SignalDatabase.attachments.insertAttachmentsForMessage(messageRowId, attachments + linkPreviewAttachments + longTextAttachments, quoteAttachments)
SignalDatabase.attachments.insertAttachmentsForMessage(messageRowId, attachments + linkPreviewAttachments + longTextAttachment.asList(), quoteAttachments)
} else {
emptyMap()
}
if (longTextAttachment != null) {
attachmentMap[longTextAttachment]?.let { attachmentId ->
SignalDatabase.attachments.setTransferState(messageRowId, attachmentId, AttachmentTable.TRANSFER_PROGRESS_DONE)
}
}
if (linkPreviews.isNotEmpty()) {
db.update(MessageTable.TABLE_NAME)
.values(MessageTable.LINK_PREVIEWS to SignalDatabase.messages.getSerializedLinkPreviews(attachmentMap, linkPreviews))
@@ -453,6 +466,54 @@ class ChatItemArchiveImporter(
return MessageInsert(contentValues, followUp)
}
/**
* Text that we import from the [StandardMessage.text] field may be too long to put in a database column, needing to instead be broken into a separate
* attachment. This handles looking at the state of the frame and giving back the components we need to insert.
*
* @return If the returned String is non-null, then that means you should replace what we currently have stored as the body with this new, trimmed string.
* If the attachment is non-null, then you should store it along with the message, as it contains the long text.
*/
private fun StandardMessage.parseBodyText(importState: ImportState): Pair<String?, Attachment?> {
if (this.longText != null) {
return null to this.longText.toLocalAttachment(importState, contentType = "text/x-signal-plain")
}
if (this.text?.body == null) {
return null to null
}
val splitResult = MessageUtil.getSplitMessage(AppDependencies.application, this.text.body)
if (splitResult.textSlide.isPresent) {
return splitResult.body to splitResult.textSlide.get().asAttachment()
}
return null to null
}
/**
* Text that we import from the [DirectStoryReplyMessage.textReply] field may be too long to put in a database column, needing to instead be broken into a separate
* attachment. This handles looking at the state of the frame and giving back the components we need to insert.
*
* @return If the returned String is non-null, then that means you should replace what we currently have stored as the body with this new, trimmed string.
* If the attachment is non-null, then you should store it along with the message, as it contains the long text.
*/
private fun DirectStoryReplyMessage.parseBodyText(importState: ImportState): Pair<String?, Attachment?> {
if (this.textReply?.longText != null) {
return null to this.textReply.longText.toLocalAttachment(importState, contentType = "text/x-signal-plain")
}
if (this.textReply?.text == null) {
return null to null
}
val splitResult = MessageUtil.getSplitMessage(AppDependencies.application, this.textReply.text.body)
if (splitResult.textSlide.isPresent) {
return splitResult.body to splitResult.textSlide.get().asAttachment()
}
return null to null
}
private fun ChatItem.toMessageContentValues(fromRecipientId: RecipientId, chatRecipientId: RecipientId, threadId: Long): ContentValues {
val contentValues = ContentValues()

View File

@@ -22,3 +22,10 @@ fun <E> List<E>.swap(i: Int, j: Int): List<E> {
Collections.swap(mutableCopy, i, j)
return mutableCopy.toList()
}
/**
* Returns the item wrapped in a list, or an empty list of the item is null.
*/
fun <E> E?.asList(): List<E> {
return if (this == null) emptyList() else listOf(this)
}

View File

@@ -1,6 +1,7 @@
package org.signal.core.util
import android.text.SpannableStringBuilder
import okio.utf8Size
import java.io.ByteArrayOutputStream
import java.io.IOException
import java.nio.charset.StandardCharsets
@@ -27,7 +28,7 @@ object StringUtil {
return ""
}
if (name.toByteArray(StandardCharsets.UTF_8).size <= maxByteLength) {
if (name.utf8Size() <= maxByteLength) {
return name
}

View File

@@ -12,7 +12,7 @@ import org.junit.Test
class StringExtensionsTest {
@Test
fun `splitByByteLength fuzzing`() {
fun `splitByByteLength - fuzzing`() {
val characterSet = "日月木山川田水火金土空海花風雨雪星森犬猫鳥魚虫人子女男友学校車電話本書時分先生愛夢楽音話語映画新古長短高低東西南北春夏秋冬雨雲星夜朝昼電気手足目耳口心頭体家国町村道橋山川本店仕事時間会話思考知識感情自動車飛行機船馬牛羊豚鶏鳥猫犬虎龍"
for (stringSize in 2100..2500) {
@@ -28,7 +28,7 @@ class StringExtensionsTest {
}
@Test
fun idk() {
fun `splitByByteLength - long string`() {
val myString = """
すべての人間は生まれながらにして自由であり、尊厳と権利において平等である。彼らは理性と良心を授けられており、互いに兄弟愛の精神をもって行動しなければならない。