Inline long text attachments into backup files.

This commit is contained in:
Greyson Parrelli
2025-07-17 16:50:22 -04:00
committed by GitHub
parent 6553c416f8
commit a9455b95ac
21 changed files with 158 additions and 32 deletions

View File

@@ -170,6 +170,22 @@ object ExportOddities {
return log(sentTimestamp, "Invalid e164 in sessions switchover event. Exporting an empty event.") return log(sentTimestamp, "Invalid e164 in sessions switchover event. Exporting an empty event.")
} }
fun undownloadedLongTextAttachment(sentTimestamp: Long): String {
return log(sentTimestamp, "Long text attachment was not yet downloaded. Falling back to the known body with an attachment pointer.")
}
fun unreadableLongTextAttachment(sentTimestamp: Long): String {
return log(sentTimestamp, "Long text attachment was unreadable. Falling back to the known body with an attachment pointer.")
}
fun unopenableLongTextAttachment(sentTimestamp: Long): String {
return log(sentTimestamp, "Long text attachment failed to open. Falling back to the known body with an attachment pointer.")
}
fun bodyGreaterThanMaxLength(sentTimestamp: Long, length: Int): String {
return log(sentTimestamp, "The body length was greater than the max allowed ($length bytes). Trimming to fit.")
}
private fun log(sentTimestamp: Long, message: String): String { private fun log(sentTimestamp: Long, message: String): String {
return "[ODDITY][$sentTimestamp] $message" return "[ODDITY][$sentTimestamp] $message"
} }

View File

@@ -13,13 +13,17 @@ import org.signal.core.util.Base64
import org.signal.core.util.EventTimer import org.signal.core.util.EventTimer
import org.signal.core.util.Hex import org.signal.core.util.Hex
import org.signal.core.util.ParallelEventTimer import org.signal.core.util.ParallelEventTimer
import org.signal.core.util.StringUtil
import org.signal.core.util.concurrent.SignalExecutors import org.signal.core.util.concurrent.SignalExecutors
import org.signal.core.util.emptyIfNull
import org.signal.core.util.isNotNullOrBlank import org.signal.core.util.isNotNullOrBlank
import org.signal.core.util.kibiBytes
import org.signal.core.util.logging.Log import org.signal.core.util.logging.Log
import org.signal.core.util.logging.logW import org.signal.core.util.logging.logW
import org.signal.core.util.nullIfBlank import org.signal.core.util.nullIfBlank
import org.signal.core.util.nullIfEmpty import org.signal.core.util.nullIfEmpty
import org.signal.core.util.orNull import org.signal.core.util.orNull
import org.signal.core.util.readFully
import org.signal.core.util.requireBlob import org.signal.core.util.requireBlob
import org.signal.core.util.requireBoolean import org.signal.core.util.requireBoolean
import org.signal.core.util.requireInt import org.signal.core.util.requireInt
@@ -82,8 +86,10 @@ import org.thoughtcrime.securesms.database.model.databaseprotos.MessageExtras
import org.thoughtcrime.securesms.database.model.databaseprotos.ProfileChangeDetails import org.thoughtcrime.securesms.database.model.databaseprotos.ProfileChangeDetails
import org.thoughtcrime.securesms.database.model.databaseprotos.SessionSwitchoverEvent import org.thoughtcrime.securesms.database.model.databaseprotos.SessionSwitchoverEvent
import org.thoughtcrime.securesms.database.model.databaseprotos.ThreadMergeEvent import org.thoughtcrime.securesms.database.model.databaseprotos.ThreadMergeEvent
import org.thoughtcrime.securesms.dependencies.AppDependencies
import org.thoughtcrime.securesms.keyvalue.SignalStore import org.thoughtcrime.securesms.keyvalue.SignalStore
import org.thoughtcrime.securesms.linkpreview.LinkPreview import org.thoughtcrime.securesms.linkpreview.LinkPreview
import org.thoughtcrime.securesms.mms.PartAuthority
import org.thoughtcrime.securesms.mms.QuoteModel import org.thoughtcrime.securesms.mms.QuoteModel
import org.thoughtcrime.securesms.payments.FailureReason import org.thoughtcrime.securesms.payments.FailureReason
import org.thoughtcrime.securesms.payments.State import org.thoughtcrime.securesms.payments.State
@@ -105,6 +111,8 @@ import org.thoughtcrime.securesms.backup.v2.proto.BodyRange as BackupBodyRange
import org.thoughtcrime.securesms.backup.v2.proto.GiftBadge as BackupGiftBadge import org.thoughtcrime.securesms.backup.v2.proto.GiftBadge as BackupGiftBadge
private val TAG = Log.tag(ChatItemArchiveExporter::class.java) private val TAG = Log.tag(ChatItemArchiveExporter::class.java)
private val MAX_INLINED_BODY_SIZE = 128.kibiBytes.bytes.toInt()
private val MAX_INLINED_BODY_SIZE_WITH_LONG_ATTACHMENT_POINTER = 2.kibiBytes.bytes.toInt()
/** /**
* An iterator for chat items with a clever performance twist: rather than do the extra queries one at a time (for reactions, * An iterator for chat items with a clever performance twist: rather than do the extra queries one at a time (for reactions,
@@ -361,11 +369,6 @@ class ChatItemArchiveExporter(
else -> { else -> {
val attachments = extraData.attachmentsById[record.id] val attachments = extraData.attachmentsById[record.id]
if (attachments?.isNotEmpty() == true && attachments.any { it.contentType == MediaUtil.LONG_TEXT } && record.body.isNullOrBlank()) {
Log.w(TAG, ExportSkips.invalidLongTextChatItem(record.dateSent))
continue
}
val sticker = attachments?.firstOrNull { dbAttachment -> dbAttachment.isSticker } val sticker = attachments?.firstOrNull { dbAttachment -> dbAttachment.isSticker }
if (sticker?.stickerLocator != null) { if (sticker?.stickerLocator != null) {
@@ -963,6 +966,8 @@ private fun BackupMessageRecord.toRemoteDirectStoryReplyMessage(mediaArchiveEnab
val isReaction = MessageTypes.isStoryReaction(this.type) val isReaction = MessageTypes.isStoryReaction(this.type)
val (bodyText, longTextAttachment) = this.getBodyText(attachments)
return DirectStoryReplyMessage( return DirectStoryReplyMessage(
emoji = if (isReaction) { emoji = if (isReaction) {
this.body this.body
@@ -972,10 +977,10 @@ private fun BackupMessageRecord.toRemoteDirectStoryReplyMessage(mediaArchiveEnab
textReply = if (!isReaction) { textReply = if (!isReaction) {
DirectStoryReplyMessage.TextReply( DirectStoryReplyMessage.TextReply(
text = Text( text = Text(
body = this.body, body = bodyText,
bodyRanges = this.bodyRanges?.toRemoteBodyRanges(this.dateSent) ?: emptyList() bodyRanges = this.bodyRanges?.toRemoteBodyRanges(this.dateSent) ?: emptyList()
), ),
longText = attachments?.firstOrNull { it.contentType == MediaUtil.LONG_TEXT }?.toRemoteFilePointer(mediaArchiveEnabled) longText = longTextAttachment?.toRemoteFilePointer(mediaArchiveEnabled)
) )
} else { } else {
null null
@@ -985,23 +990,25 @@ private fun BackupMessageRecord.toRemoteDirectStoryReplyMessage(mediaArchiveEnab
} }
private fun BackupMessageRecord.toRemoteStandardMessage(exportState: ExportState, mediaArchiveEnabled: Boolean, reactionRecords: List<ReactionRecord>?, mentions: List<Mention>?, attachments: List<DatabaseAttachment>?): StandardMessage { private fun BackupMessageRecord.toRemoteStandardMessage(exportState: ExportState, mediaArchiveEnabled: Boolean, reactionRecords: List<ReactionRecord>?, mentions: List<Mention>?, attachments: List<DatabaseAttachment>?): StandardMessage {
val text = body.nullIfBlank()?.let { val linkPreviews = this.toRemoteLinkPreviews(attachments)
val linkPreviewAttachments = linkPreviews.mapNotNull { it.thumbnail.orElse(null) }.toSet()
val quotedAttachments = attachments?.filter { it.quote } ?: emptyList()
val messageAttachments = attachments
?.filterNot { it.quote }
?.filterNot { linkPreviewAttachments.contains(it) }
?.filterNot { MediaUtil.isLongTextType(it.contentType) }
?: emptyList()
val hasVoiceNote = messageAttachments.any { it.voiceNote }
val (bodyText, longTextAttachment) = this.getBodyText(attachments)
val text = bodyText.nullIfBlank()?.let {
Text( Text(
body = it, body = it,
bodyRanges = (this.bodyRanges?.toRemoteBodyRanges(this.dateSent) ?: emptyList()) + (mentions?.toRemoteBodyRanges(exportState) ?: emptyList()) bodyRanges = (this.bodyRanges?.toRemoteBodyRanges(this.dateSent) ?: emptyList()) + (mentions?.toRemoteBodyRanges(exportState) ?: emptyList())
) )
} }
val linkPreviews = this.toRemoteLinkPreviews(attachments)
val linkPreviewAttachments = linkPreviews.mapNotNull { it.thumbnail.orElse(null) }.toSet()
val quotedAttachments = attachments?.filter { it.quote } ?: emptyList()
val longTextAttachment = attachments?.firstOrNull { it.contentType == "text/x-signal-plain" }
val messageAttachments = attachments
?.filterNot { it.quote }
?.filterNot { linkPreviewAttachments.contains(it) }
?.filterNot { it == longTextAttachment }
?: emptyList()
val hasVoiceNote = messageAttachments.any { it.voiceNote }
return StandardMessage( return StandardMessage(
quote = this.toRemoteQuote(exportState, mediaArchiveEnabled, quotedAttachments), quote = this.toRemoteQuote(exportState, mediaArchiveEnabled, quotedAttachments),
text = text.takeUnless { hasVoiceNote }, text = text.takeUnless { hasVoiceNote },
@@ -1012,6 +1019,40 @@ private fun BackupMessageRecord.toRemoteStandardMessage(exportState: ExportState
) )
} }
/**
* Retrieves the body text, reading from a long text attachment if necessary. Will return an optional [DatabaseAttachment] that, if present, indicates that
* you should set it as the value for [StandardMessage.longText].
*/
private fun BackupMessageRecord.getBodyText(attachments: List<DatabaseAttachment>?): Pair<String, DatabaseAttachment?> {
val longTextAttachment = attachments?.firstOrNull { it.contentType == "text/x-signal-plain" }
if (longTextAttachment == null) {
return this.body.emptyIfNull() to null
}
if (longTextAttachment.uri == null || longTextAttachment.transferState != AttachmentTable.TRANSFER_PROGRESS_DONE) {
return StringUtil.trimToFit(this.body.emptyIfNull(), MAX_INLINED_BODY_SIZE_WITH_LONG_ATTACHMENT_POINTER) to longTextAttachment
}
val longText = try {
PartAuthority.getAttachmentStream(AppDependencies.application, longTextAttachment.uri!!)?.readFully()?.toString(Charsets.UTF_8)
} catch (e: IOException) {
Log.w(TAG, ExportOddities.unreadableLongTextAttachment(this.dateSent))
return this.body.emptyIfNull() to longTextAttachment
}
if (longText == null) {
Log.w(TAG, ExportOddities.unopenableLongTextAttachment(this.dateSent))
return StringUtil.trimToFit(this.body.emptyIfNull(), MAX_INLINED_BODY_SIZE_WITH_LONG_ATTACHMENT_POINTER) to longTextAttachment
}
val trimmed = StringUtil.trimToFit(longText, MAX_INLINED_BODY_SIZE)
if (trimmed.length != longText.length) {
Log.w(TAG, ExportOddities.bodyGreaterThanMaxLength(this.dateSent, longText.length))
}
return trimmed to null
}
private fun BackupMessageRecord.toRemoteQuote(exportState: ExportState, mediaArchiveEnabled: Boolean, attachments: List<DatabaseAttachment>? = null): Quote? { private fun BackupMessageRecord.toRemoteQuote(exportState: ExportState, mediaArchiveEnabled: Boolean, attachments: List<DatabaseAttachment>? = null): Quote? {
if (this.quoteTargetSentTimestamp == MessageTable.QUOTE_NOT_PRESENT_ID || this.quoteAuthor <= 0 || exportState.groupRecipientIds.contains(this.quoteAuthor)) { if (this.quoteTargetSentTimestamp == MessageTable.QUOTE_NOT_PRESENT_ID || this.quoteAuthor <= 0 || exportState.groupRecipientIds.contains(this.quoteAuthor)) {
return null return null

View File

@@ -10,6 +10,7 @@ import androidx.core.content.contentValuesOf
import org.signal.core.util.Base64 import org.signal.core.util.Base64
import org.signal.core.util.Hex import org.signal.core.util.Hex
import org.signal.core.util.SqlUtil import org.signal.core.util.SqlUtil
import org.signal.core.util.asList
import org.signal.core.util.forEach import org.signal.core.util.forEach
import org.signal.core.util.logging.Log import org.signal.core.util.logging.Log
import org.signal.core.util.orNull import org.signal.core.util.orNull
@@ -40,6 +41,7 @@ import org.thoughtcrime.securesms.backup.v2.proto.Sticker
import org.thoughtcrime.securesms.backup.v2.proto.ViewOnceMessage import org.thoughtcrime.securesms.backup.v2.proto.ViewOnceMessage
import org.thoughtcrime.securesms.backup.v2.util.toLocalAttachment import org.thoughtcrime.securesms.backup.v2.util.toLocalAttachment
import org.thoughtcrime.securesms.contactshare.Contact import org.thoughtcrime.securesms.contactshare.Contact
import org.thoughtcrime.securesms.database.AttachmentTable
import org.thoughtcrime.securesms.database.CallTable import org.thoughtcrime.securesms.database.CallTable
import org.thoughtcrime.securesms.database.GroupReceiptTable import org.thoughtcrime.securesms.database.GroupReceiptTable
import org.thoughtcrime.securesms.database.MessageTable import org.thoughtcrime.securesms.database.MessageTable
@@ -63,6 +65,7 @@ import org.thoughtcrime.securesms.database.model.databaseprotos.PaymentTombstone
import org.thoughtcrime.securesms.database.model.databaseprotos.ProfileChangeDetails import org.thoughtcrime.securesms.database.model.databaseprotos.ProfileChangeDetails
import org.thoughtcrime.securesms.database.model.databaseprotos.SessionSwitchoverEvent import org.thoughtcrime.securesms.database.model.databaseprotos.SessionSwitchoverEvent
import org.thoughtcrime.securesms.database.model.databaseprotos.ThreadMergeEvent import org.thoughtcrime.securesms.database.model.databaseprotos.ThreadMergeEvent
import org.thoughtcrime.securesms.dependencies.AppDependencies
import org.thoughtcrime.securesms.mms.QuoteModel import org.thoughtcrime.securesms.mms.QuoteModel
import org.thoughtcrime.securesms.payments.CryptoValueUtil import org.thoughtcrime.securesms.payments.CryptoValueUtil
import org.thoughtcrime.securesms.payments.Direction import org.thoughtcrime.securesms.payments.Direction
@@ -74,6 +77,7 @@ import org.thoughtcrime.securesms.recipients.RecipientId
import org.thoughtcrime.securesms.stickers.StickerLocator import org.thoughtcrime.securesms.stickers.StickerLocator
import org.thoughtcrime.securesms.util.JsonUtils import org.thoughtcrime.securesms.util.JsonUtils
import org.thoughtcrime.securesms.util.MediaUtil import org.thoughtcrime.securesms.util.MediaUtil
import org.thoughtcrime.securesms.util.MessageUtil
import org.whispersystems.signalservice.api.payments.Money import org.whispersystems.signalservice.api.payments.Money
import org.whispersystems.signalservice.api.push.ServiceId import org.whispersystems.signalservice.api.push.ServiceId
import org.whispersystems.signalservice.api.util.UuidUtil import org.whispersystems.signalservice.api.util.UuidUtil
@@ -371,14 +375,17 @@ class ChatItemArchiveImporter(
} }
if (this.directStoryReplyMessage != null) { if (this.directStoryReplyMessage != null) {
val longTextAttachment: Attachment? = this.directStoryReplyMessage.textReply?.longText?.toLocalAttachment( val (trimmedBodyText, longTextAttachment) = this.directStoryReplyMessage.parseBodyText(importState)
importState = importState, if (trimmedBodyText != null) {
contentType = "text/x-signal-plain" contentValues.put(MessageTable.BODY, trimmedBodyText)
) }
if (longTextAttachment != null) { if (longTextAttachment != null) {
followUps += { messageRowId -> followUps += { messageRowId ->
SignalDatabase.attachments.insertAttachmentsForMessage(messageRowId, listOf(longTextAttachment), emptyList()) val ids = SignalDatabase.attachments.insertAttachmentsForMessage(messageRowId, listOf(longTextAttachment), emptyList())
ids.values.firstOrNull()?.let { attachmentId ->
SignalDatabase.attachments.setTransferState(messageRowId, attachmentId, AttachmentTable.TRANSFER_PROGRESS_DONE)
}
} }
} }
} }
@@ -396,23 +403,29 @@ class ChatItemArchiveImporter(
attachment.toLocalAttachment() attachment.toLocalAttachment()
} }
val longTextAttachments: List<Attachment> = this.standardMessage.longText?.toLocalAttachment( val (trimmedBodyText, longTextAttachment) = this.standardMessage.parseBodyText(importState)
importState = importState, if (trimmedBodyText != null) {
contentType = "text/x-signal-plain" contentValues.put(MessageTable.BODY, trimmedBodyText)
)?.let { listOf(it) } ?: emptyList() }
val quoteAttachments: List<Attachment> = this.standardMessage.quote?.toLocalAttachments() ?: emptyList() val quoteAttachments: List<Attachment> = this.standardMessage.quote?.toLocalAttachments() ?: emptyList()
val hasAttachments = attachments.isNotEmpty() || linkPreviewAttachments.isNotEmpty() || quoteAttachments.isNotEmpty() || longTextAttachments.isNotEmpty() val hasAttachments = attachments.isNotEmpty() || linkPreviewAttachments.isNotEmpty() || quoteAttachments.isNotEmpty() || longTextAttachment != null
if (hasAttachments || linkPreviews.isNotEmpty()) { if (hasAttachments || linkPreviews.isNotEmpty()) {
followUps += { messageRowId -> followUps += { messageRowId ->
val attachmentMap = if (hasAttachments) { val attachmentMap = if (hasAttachments) {
SignalDatabase.attachments.insertAttachmentsForMessage(messageRowId, attachments + linkPreviewAttachments + longTextAttachments, quoteAttachments) SignalDatabase.attachments.insertAttachmentsForMessage(messageRowId, attachments + linkPreviewAttachments + longTextAttachment.asList(), quoteAttachments)
} else { } else {
emptyMap() emptyMap()
} }
if (longTextAttachment != null) {
attachmentMap[longTextAttachment]?.let { attachmentId ->
SignalDatabase.attachments.setTransferState(messageRowId, attachmentId, AttachmentTable.TRANSFER_PROGRESS_DONE)
}
}
if (linkPreviews.isNotEmpty()) { if (linkPreviews.isNotEmpty()) {
db.update(MessageTable.TABLE_NAME) db.update(MessageTable.TABLE_NAME)
.values(MessageTable.LINK_PREVIEWS to SignalDatabase.messages.getSerializedLinkPreviews(attachmentMap, linkPreviews)) .values(MessageTable.LINK_PREVIEWS to SignalDatabase.messages.getSerializedLinkPreviews(attachmentMap, linkPreviews))
@@ -453,6 +466,54 @@ class ChatItemArchiveImporter(
return MessageInsert(contentValues, followUp) return MessageInsert(contentValues, followUp)
} }
/**
* Text that we import from the [StandardMessage.text] field may be too long to put in a database column, needing to instead be broken into a separate
* attachment. This handles looking at the state of the frame and giving back the components we need to insert.
*
* @return If the returned String is non-null, then that means you should replace what we currently have stored as the body with this new, trimmed string.
* If the attachment is non-null, then you should store it along with the message, as it contains the long text.
*/
private fun StandardMessage.parseBodyText(importState: ImportState): Pair<String?, Attachment?> {
if (this.longText != null) {
return null to this.longText.toLocalAttachment(importState, contentType = "text/x-signal-plain")
}
if (this.text?.body == null) {
return null to null
}
val splitResult = MessageUtil.getSplitMessage(AppDependencies.application, this.text.body)
if (splitResult.textSlide.isPresent) {
return splitResult.body to splitResult.textSlide.get().asAttachment()
}
return null to null
}
/**
* Text that we import from the [DirectStoryReplyMessage.textReply] field may be too long to put in a database column, needing to instead be broken into a separate
* attachment. This handles looking at the state of the frame and giving back the components we need to insert.
*
* @return If the returned String is non-null, then that means you should replace what we currently have stored as the body with this new, trimmed string.
* If the attachment is non-null, then you should store it along with the message, as it contains the long text.
*/
private fun DirectStoryReplyMessage.parseBodyText(importState: ImportState): Pair<String?, Attachment?> {
if (this.textReply?.longText != null) {
return null to this.textReply.longText.toLocalAttachment(importState, contentType = "text/x-signal-plain")
}
if (this.textReply?.text == null) {
return null to null
}
val splitResult = MessageUtil.getSplitMessage(AppDependencies.application, this.textReply.text.body)
if (splitResult.textSlide.isPresent) {
return splitResult.body to splitResult.textSlide.get().asAttachment()
}
return null to null
}
private fun ChatItem.toMessageContentValues(fromRecipientId: RecipientId, chatRecipientId: RecipientId, threadId: Long): ContentValues { private fun ChatItem.toMessageContentValues(fromRecipientId: RecipientId, chatRecipientId: RecipientId, threadId: Long): ContentValues {
val contentValues = ContentValues() val contentValues = ContentValues()

View File

@@ -22,3 +22,10 @@ fun <E> List<E>.swap(i: Int, j: Int): List<E> {
Collections.swap(mutableCopy, i, j) Collections.swap(mutableCopy, i, j)
return mutableCopy.toList() return mutableCopy.toList()
} }
/**
* Returns the item wrapped in a list, or an empty list of the item is null.
*/
fun <E> E?.asList(): List<E> {
return if (this == null) emptyList() else listOf(this)
}

View File

@@ -1,6 +1,7 @@
package org.signal.core.util package org.signal.core.util
import android.text.SpannableStringBuilder import android.text.SpannableStringBuilder
import okio.utf8Size
import java.io.ByteArrayOutputStream import java.io.ByteArrayOutputStream
import java.io.IOException import java.io.IOException
import java.nio.charset.StandardCharsets import java.nio.charset.StandardCharsets
@@ -27,7 +28,7 @@ object StringUtil {
return "" return ""
} }
if (name.toByteArray(StandardCharsets.UTF_8).size <= maxByteLength) { if (name.utf8Size() <= maxByteLength) {
return name return name
} }

View File

@@ -12,7 +12,7 @@ import org.junit.Test
class StringExtensionsTest { class StringExtensionsTest {
@Test @Test
fun `splitByByteLength fuzzing`() { fun `splitByByteLength - fuzzing`() {
val characterSet = "日月木山川田水火金土空海花風雨雪星森犬猫鳥魚虫人子女男友学校車電話本書時分先生愛夢楽音話語映画新古長短高低東西南北春夏秋冬雨雲星夜朝昼電気手足目耳口心頭体家国町村道橋山川本店仕事時間会話思考知識感情自動車飛行機船馬牛羊豚鶏鳥猫犬虎龍" val characterSet = "日月木山川田水火金土空海花風雨雪星森犬猫鳥魚虫人子女男友学校車電話本書時分先生愛夢楽音話語映画新古長短高低東西南北春夏秋冬雨雲星夜朝昼電気手足目耳口心頭体家国町村道橋山川本店仕事時間会話思考知識感情自動車飛行機船馬牛羊豚鶏鳥猫犬虎龍"
for (stringSize in 2100..2500) { for (stringSize in 2100..2500) {
@@ -28,7 +28,7 @@ class StringExtensionsTest {
} }
@Test @Test
fun idk() { fun `splitByByteLength - long string`() {
val myString = """ val myString = """
すべての人間は生まれながらにして自由であり、尊厳と権利において平等である。彼らは理性と良心を授けられており、互いに兄弟愛の精神をもって行動しなければならない。 すべての人間は生まれながらにして自由であり、尊厳と権利において平等である。彼らは理性と良心を授けられており、互いに兄弟愛の精神をもって行動しなければならない。