mirror of
https://github.com/signalapp/Signal-Android.git
synced 2025-12-20 11:08:31 +00:00
Adjust domain scrubbing.
This commit is contained in:
committed by
Jeffrey Starke
parent
1a1e1ea631
commit
9180917b7c
@@ -7,6 +7,7 @@ package org.signal.core.util.logging
|
|||||||
|
|
||||||
import org.signal.core.util.CryptoUtil
|
import org.signal.core.util.CryptoUtil
|
||||||
import org.signal.core.util.Hex
|
import org.signal.core.util.Hex
|
||||||
|
import org.signal.core.util.isNotNullOrBlank
|
||||||
import java.util.regex.Matcher
|
import java.util.regex.Matcher
|
||||||
import java.util.regex.Pattern
|
import java.util.regex.Pattern
|
||||||
|
|
||||||
@@ -59,9 +60,9 @@ object Scrubber {
|
|||||||
private val IPV6_PATTERN = Pattern.compile("([0-9a-fA-F]{0,4}:){3,7}([0-9a-fA-F]){0,4}")
|
private val IPV6_PATTERN = Pattern.compile("([0-9a-fA-F]{0,4}:){3,7}([0-9a-fA-F]){0,4}")
|
||||||
private const val IPV6_CENSOR = "...ipv6..."
|
private const val IPV6_CENSOR = "...ipv6..."
|
||||||
|
|
||||||
/** The domain name except for TLD will be censored. */
|
/** The domain name and path except for TLD will be censored. */
|
||||||
private val DOMAIN_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)", Pattern.CASE_INSENSITIVE)
|
private val URL_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)(/[/a-z0-9\\-_.~:@?&=#%+\\[\\]!$()*,;]*)?", Pattern.CASE_INSENSITIVE)
|
||||||
private const val DOMAIN_CENSOR = "***."
|
private const val URL_CENSOR = "***"
|
||||||
private val TOP_100_TLDS: Set<String> = setOf(
|
private val TOP_100_TLDS: Set<String> = setOf(
|
||||||
"com", "net", "org", "jp", "de", "uk", "fr", "br", "it", "ru", "es", "me", "gov", "pl", "ca", "au", "cn", "co", "in",
|
"com", "net", "org", "jp", "de", "uk", "fr", "br", "it", "ru", "es", "me", "gov", "pl", "ca", "au", "cn", "co", "in",
|
||||||
"nl", "edu", "info", "eu", "ch", "id", "at", "kr", "cz", "mx", "be", "tv", "se", "tr", "tw", "al", "ua", "ir", "vn",
|
"nl", "edu", "info", "eu", "ch", "id", "at", "kr", "cz", "mx", "be", "tv", "se", "tr", "tw", "al", "ua", "ir", "vn",
|
||||||
@@ -95,7 +96,7 @@ object Scrubber {
|
|||||||
.scrubGroupsV2()
|
.scrubGroupsV2()
|
||||||
.scrubPnis()
|
.scrubPnis()
|
||||||
.scrubUuids()
|
.scrubUuids()
|
||||||
.scrubDomains()
|
.scrubUrls()
|
||||||
.scrubIpv4()
|
.scrubIpv4()
|
||||||
.scrubIpv6()
|
.scrubIpv6()
|
||||||
.scrubCallLinkKeys()
|
.scrubCallLinkKeys()
|
||||||
@@ -177,13 +178,26 @@ object Scrubber {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun CharSequence.scrubDomains(): CharSequence {
|
private fun CharSequence.scrubUrls(): CharSequence {
|
||||||
return scrub(this, DOMAIN_PATTERN) { matcher, output ->
|
return scrub(this, URL_PATTERN) { matcher, output ->
|
||||||
val match: String = matcher.group(0)!!
|
val match: String = matcher.group(0)!!
|
||||||
if (matcher.groupCount() == 2 && TOP_100_TLDS.contains(matcher.group(2)!!.lowercase()) && !match.endsWith("signal.org") && !match.endsWith("debuglogs.org")) {
|
|
||||||
|
if (
|
||||||
|
(matcher.groupCount() == 2 || matcher.groupCount() == 3) &&
|
||||||
|
TOP_100_TLDS.contains(matcher.group(2)!!.lowercase()) &&
|
||||||
|
!(matcher.group(1).endsWith("signal.") && matcher.group(2) == "org" && !match.contains("cdn")) &&
|
||||||
|
!(matcher.group(1).endsWith("debuglogs.") && matcher.group(2) == "org")
|
||||||
|
) {
|
||||||
output
|
output
|
||||||
.append(DOMAIN_CENSOR)
|
.append(URL_CENSOR)
|
||||||
|
.append(".")
|
||||||
.append(matcher.group(2))
|
.append(matcher.group(2))
|
||||||
|
.run {
|
||||||
|
if (matcher.groupCount() == 3 && matcher.group(3).isNotNullOrBlank()) {
|
||||||
|
append("/")
|
||||||
|
append(URL_CENSOR)
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
output.append(match)
|
output.append(match)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -250,6 +250,18 @@ class ScrubberTest(private val input: String, private val expected: String) {
|
|||||||
"Recipient::123",
|
"Recipient::123",
|
||||||
"Recipient::123"
|
"Recipient::123"
|
||||||
),
|
),
|
||||||
|
arrayOf(
|
||||||
|
"url with text before https://example.com/v1/endpoint;asdf123%20$[]?asdf&asdf#asdf and stuff afterwards",
|
||||||
|
"url with text before https://***.com/*** and stuff afterwards"
|
||||||
|
),
|
||||||
|
arrayOf(
|
||||||
|
"https://signal.org/v1/endpoint",
|
||||||
|
"https://signal.org/v1/endpoint"
|
||||||
|
),
|
||||||
|
arrayOf(
|
||||||
|
"https://cdn3.signal.org/v1/endpoint",
|
||||||
|
"https://***.org/***"
|
||||||
|
),
|
||||||
arrayOf(
|
arrayOf(
|
||||||
"https://debuglogs.org/android/7.47.2/2b5ccf4e3e58e44f12b3c92cfd5b526a2432f1dd0f81c8f89dededb176f1122d",
|
"https://debuglogs.org/android/7.47.2/2b5ccf4e3e58e44f12b3c92cfd5b526a2432f1dd0f81c8f89dededb176f1122d",
|
||||||
"https://debuglogs.org/android/7.47.2/2b5ccf4e3e58e44f12b3c92cfd5b526a2432f1dd0f81c8f89dededb176f1122d"
|
"https://debuglogs.org/android/7.47.2/2b5ccf4e3e58e44f12b3c92cfd5b526a2432f1dd0f81c8f89dededb176f1122d"
|
||||||
|
|||||||
Reference in New Issue
Block a user