mirror of
https://github.com/signalapp/Signal-Android.git
synced 2025-12-20 02:58:45 +00:00
Adjust domain scrubbing.
This commit is contained in:
committed by
Jeffrey Starke
parent
1a1e1ea631
commit
9180917b7c
@@ -7,6 +7,7 @@ package org.signal.core.util.logging
|
||||
|
||||
import org.signal.core.util.CryptoUtil
|
||||
import org.signal.core.util.Hex
|
||||
import org.signal.core.util.isNotNullOrBlank
|
||||
import java.util.regex.Matcher
|
||||
import java.util.regex.Pattern
|
||||
|
||||
@@ -59,9 +60,9 @@ object Scrubber {
|
||||
private val IPV6_PATTERN = Pattern.compile("([0-9a-fA-F]{0,4}:){3,7}([0-9a-fA-F]){0,4}")
|
||||
private const val IPV6_CENSOR = "...ipv6..."
|
||||
|
||||
/** The domain name except for TLD will be censored. */
|
||||
private val DOMAIN_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)", Pattern.CASE_INSENSITIVE)
|
||||
private const val DOMAIN_CENSOR = "***."
|
||||
/** The domain name and path except for TLD will be censored. */
|
||||
private val URL_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)(/[/a-z0-9\\-_.~:@?&=#%+\\[\\]!$()*,;]*)?", Pattern.CASE_INSENSITIVE)
|
||||
private const val URL_CENSOR = "***"
|
||||
private val TOP_100_TLDS: Set<String> = setOf(
|
||||
"com", "net", "org", "jp", "de", "uk", "fr", "br", "it", "ru", "es", "me", "gov", "pl", "ca", "au", "cn", "co", "in",
|
||||
"nl", "edu", "info", "eu", "ch", "id", "at", "kr", "cz", "mx", "be", "tv", "se", "tr", "tw", "al", "ua", "ir", "vn",
|
||||
@@ -95,7 +96,7 @@ object Scrubber {
|
||||
.scrubGroupsV2()
|
||||
.scrubPnis()
|
||||
.scrubUuids()
|
||||
.scrubDomains()
|
||||
.scrubUrls()
|
||||
.scrubIpv4()
|
||||
.scrubIpv6()
|
||||
.scrubCallLinkKeys()
|
||||
@@ -177,13 +178,26 @@ object Scrubber {
|
||||
}
|
||||
}
|
||||
|
||||
private fun CharSequence.scrubDomains(): CharSequence {
|
||||
return scrub(this, DOMAIN_PATTERN) { matcher, output ->
|
||||
private fun CharSequence.scrubUrls(): CharSequence {
|
||||
return scrub(this, URL_PATTERN) { matcher, output ->
|
||||
val match: String = matcher.group(0)!!
|
||||
if (matcher.groupCount() == 2 && TOP_100_TLDS.contains(matcher.group(2)!!.lowercase()) && !match.endsWith("signal.org") && !match.endsWith("debuglogs.org")) {
|
||||
|
||||
if (
|
||||
(matcher.groupCount() == 2 || matcher.groupCount() == 3) &&
|
||||
TOP_100_TLDS.contains(matcher.group(2)!!.lowercase()) &&
|
||||
!(matcher.group(1).endsWith("signal.") && matcher.group(2) == "org" && !match.contains("cdn")) &&
|
||||
!(matcher.group(1).endsWith("debuglogs.") && matcher.group(2) == "org")
|
||||
) {
|
||||
output
|
||||
.append(DOMAIN_CENSOR)
|
||||
.append(URL_CENSOR)
|
||||
.append(".")
|
||||
.append(matcher.group(2))
|
||||
.run {
|
||||
if (matcher.groupCount() == 3 && matcher.group(3).isNotNullOrBlank()) {
|
||||
append("/")
|
||||
append(URL_CENSOR)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
output.append(match)
|
||||
}
|
||||
|
||||
@@ -250,6 +250,18 @@ class ScrubberTest(private val input: String, private val expected: String) {
|
||||
"Recipient::123",
|
||||
"Recipient::123"
|
||||
),
|
||||
arrayOf(
|
||||
"url with text before https://example.com/v1/endpoint;asdf123%20$[]?asdf&asdf#asdf and stuff afterwards",
|
||||
"url with text before https://***.com/*** and stuff afterwards"
|
||||
),
|
||||
arrayOf(
|
||||
"https://signal.org/v1/endpoint",
|
||||
"https://signal.org/v1/endpoint"
|
||||
),
|
||||
arrayOf(
|
||||
"https://cdn3.signal.org/v1/endpoint",
|
||||
"https://***.org/***"
|
||||
),
|
||||
arrayOf(
|
||||
"https://debuglogs.org/android/7.47.2/2b5ccf4e3e58e44f12b3c92cfd5b526a2432f1dd0f81c8f89dededb176f1122d",
|
||||
"https://debuglogs.org/android/7.47.2/2b5ccf4e3e58e44f12b3c92cfd5b526a2432f1dd0f81c8f89dededb176f1122d"
|
||||
|
||||
Reference in New Issue
Block a user