Adjust domain scrubbing.

This commit is contained in:
Cody Henthorne
2025-07-15 12:14:07 -04:00
committed by Jeffrey Starke
parent 1a1e1ea631
commit 9180917b7c
2 changed files with 34 additions and 8 deletions

View File

@@ -7,6 +7,7 @@ package org.signal.core.util.logging
import org.signal.core.util.CryptoUtil import org.signal.core.util.CryptoUtil
import org.signal.core.util.Hex import org.signal.core.util.Hex
import org.signal.core.util.isNotNullOrBlank
import java.util.regex.Matcher import java.util.regex.Matcher
import java.util.regex.Pattern import java.util.regex.Pattern
@@ -59,9 +60,9 @@ object Scrubber {
private val IPV6_PATTERN = Pattern.compile("([0-9a-fA-F]{0,4}:){3,7}([0-9a-fA-F]){0,4}") private val IPV6_PATTERN = Pattern.compile("([0-9a-fA-F]{0,4}:){3,7}([0-9a-fA-F]){0,4}")
private const val IPV6_CENSOR = "...ipv6..." private const val IPV6_CENSOR = "...ipv6..."
/** The domain name except for TLD will be censored. */ /** The domain name and path except for TLD will be censored. */
private val DOMAIN_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)", Pattern.CASE_INSENSITIVE) private val URL_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)(/[/a-z0-9\\-_.~:@?&=#%+\\[\\]!$()*,;]*)?", Pattern.CASE_INSENSITIVE)
private const val DOMAIN_CENSOR = "***." private const val URL_CENSOR = "***"
private val TOP_100_TLDS: Set<String> = setOf( private val TOP_100_TLDS: Set<String> = setOf(
"com", "net", "org", "jp", "de", "uk", "fr", "br", "it", "ru", "es", "me", "gov", "pl", "ca", "au", "cn", "co", "in", "com", "net", "org", "jp", "de", "uk", "fr", "br", "it", "ru", "es", "me", "gov", "pl", "ca", "au", "cn", "co", "in",
"nl", "edu", "info", "eu", "ch", "id", "at", "kr", "cz", "mx", "be", "tv", "se", "tr", "tw", "al", "ua", "ir", "vn", "nl", "edu", "info", "eu", "ch", "id", "at", "kr", "cz", "mx", "be", "tv", "se", "tr", "tw", "al", "ua", "ir", "vn",
@@ -95,7 +96,7 @@ object Scrubber {
.scrubGroupsV2() .scrubGroupsV2()
.scrubPnis() .scrubPnis()
.scrubUuids() .scrubUuids()
.scrubDomains() .scrubUrls()
.scrubIpv4() .scrubIpv4()
.scrubIpv6() .scrubIpv6()
.scrubCallLinkKeys() .scrubCallLinkKeys()
@@ -177,13 +178,26 @@ object Scrubber {
} }
} }
private fun CharSequence.scrubDomains(): CharSequence { private fun CharSequence.scrubUrls(): CharSequence {
return scrub(this, DOMAIN_PATTERN) { matcher, output -> return scrub(this, URL_PATTERN) { matcher, output ->
val match: String = matcher.group(0)!! val match: String = matcher.group(0)!!
if (matcher.groupCount() == 2 && TOP_100_TLDS.contains(matcher.group(2)!!.lowercase()) && !match.endsWith("signal.org") && !match.endsWith("debuglogs.org")) {
if (
(matcher.groupCount() == 2 || matcher.groupCount() == 3) &&
TOP_100_TLDS.contains(matcher.group(2)!!.lowercase()) &&
!(matcher.group(1).endsWith("signal.") && matcher.group(2) == "org" && !match.contains("cdn")) &&
!(matcher.group(1).endsWith("debuglogs.") && matcher.group(2) == "org")
) {
output output
.append(DOMAIN_CENSOR) .append(URL_CENSOR)
.append(".")
.append(matcher.group(2)) .append(matcher.group(2))
.run {
if (matcher.groupCount() == 3 && matcher.group(3).isNotNullOrBlank()) {
append("/")
append(URL_CENSOR)
}
}
} else { } else {
output.append(match) output.append(match)
} }

View File

@@ -250,6 +250,18 @@ class ScrubberTest(private val input: String, private val expected: String) {
"Recipient::123", "Recipient::123",
"Recipient::123" "Recipient::123"
), ),
arrayOf(
"url with text before https://example.com/v1/endpoint;asdf123%20$[]?asdf&asdf#asdf and stuff afterwards",
"url with text before https://***.com/*** and stuff afterwards"
),
arrayOf(
"https://signal.org/v1/endpoint",
"https://signal.org/v1/endpoint"
),
arrayOf(
"https://cdn3.signal.org/v1/endpoint",
"https://***.org/***"
),
arrayOf( arrayOf(
"https://debuglogs.org/android/7.47.2/2b5ccf4e3e58e44f12b3c92cfd5b526a2432f1dd0f81c8f89dededb176f1122d", "https://debuglogs.org/android/7.47.2/2b5ccf4e3e58e44f12b3c92cfd5b526a2432f1dd0f81c8f89dededb176f1122d",
"https://debuglogs.org/android/7.47.2/2b5ccf4e3e58e44f12b3c92cfd5b526a2432f1dd0f81c8f89dededb176f1122d" "https://debuglogs.org/android/7.47.2/2b5ccf4e3e58e44f12b3c92cfd5b526a2432f1dd0f81c8f89dededb176f1122d"