mirror of
https://github.com/signalapp/Signal-Android.git
synced 2026-02-25 20:23:19 +00:00
Convert Scrubber to kotlin.
This commit is contained in:
@@ -1,228 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2014 Open Whisper Systems
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package org.signal.core.util.logging;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Scrub data for possibly sensitive information.
|
||||
*/
|
||||
public final class Scrubber {
|
||||
|
||||
private Scrubber() {
|
||||
}
|
||||
|
||||
/**
|
||||
* The middle group will be censored.
|
||||
* Supposedly, the shortest international phone numbers in use contain seven digits.
|
||||
* Handles URL encoded +, %2B
|
||||
*/
|
||||
private static final Pattern E164_PATTERN = Pattern.compile("(\\+|%2B)(\\d{5,13})(\\d{2})");
|
||||
private static final String E164_CENSOR = "*************";
|
||||
|
||||
/**
|
||||
* The second group will be censored.
|
||||
*/
|
||||
private static final Pattern CRUDE_EMAIL_PATTERN = Pattern.compile("\\b([^\\s/])([^\\s/]*@[^\\s]+)");
|
||||
private static final String EMAIL_CENSOR = "...@...";
|
||||
|
||||
/**
|
||||
* The middle group will be censored.
|
||||
*/
|
||||
private static final Pattern GROUP_ID_V1_PATTERN = Pattern.compile("(__)(textsecure_group__![^\\s]+)([^\\s]{2})");
|
||||
private static final String GROUP_ID_V1_CENSOR = "...group...";
|
||||
|
||||
/**
|
||||
* The middle group will be censored.
|
||||
*/
|
||||
private static final Pattern GROUP_ID_V2_PATTERN = Pattern.compile("(__)(signal_group__v2__![^\\s]+)([^\\s]{2})");
|
||||
private static final String GROUP_ID_V2_CENSOR = "...group_v2...";
|
||||
|
||||
/**
|
||||
* The middle group will be censored.
|
||||
*/
|
||||
private static final Pattern UUID_PATTERN = Pattern.compile("(JOB::)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{9})([0-9a-f]{3})", Pattern.CASE_INSENSITIVE);
|
||||
private static final String UUID_CENSOR = "********-****-****-****-*********";
|
||||
|
||||
/**
|
||||
* The entire string is censored.
|
||||
*/
|
||||
private static final Pattern IPV4_PATTERN = Pattern.compile("\\b" +
|
||||
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." +
|
||||
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." +
|
||||
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." +
|
||||
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" +
|
||||
"\\b");
|
||||
private static final String IPV4_CENSOR = "...ipv4...";
|
||||
|
||||
private static final Pattern IPV6_PATTERN = Pattern.compile("([0-9a-fA-F]{0,4}:){3,7}([0-9a-fA-F]){0,4}");
|
||||
private static final String IPV6_CENSOR = "...ipv6...";
|
||||
|
||||
/**
|
||||
* The domain name except for TLD will be censored.
|
||||
*/
|
||||
private static final Pattern DOMAIN_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)", Pattern.CASE_INSENSITIVE);
|
||||
private static final String DOMAIN_CENSOR = "***.";
|
||||
private static final Set<String> TOP_100_TLDS = new HashSet<>(Arrays.asList("com", "net", "org", "jp", "de", "uk", "fr", "br", "it", "ru", "es", "me", "gov", "pl", "ca", "au", "cn", "co", "in",
|
||||
"nl", "edu", "info", "eu", "ch", "id", "at", "kr", "cz", "mx", "be", "tv", "se", "tr", "tw", "al", "ua", "ir", "vn",
|
||||
"cl", "sk", "ly", "cc", "to", "no", "fi", "us", "pt", "dk", "ar", "hu", "tk", "gr", "il", "news", "ro", "my", "biz",
|
||||
"ie", "za", "nz", "sg", "ee", "th", "io", "xyz", "pe", "bg", "hk", "lt", "link", "ph", "club", "si", "site",
|
||||
"mobi", "by", "cat", "wiki", "la", "ga", "xxx", "cf", "hr", "ng", "jobs", "online", "kz", "ug", "gq", "ae", "is",
|
||||
"lv", "pro", "fm", "tips", "ms", "sa", "app"));
|
||||
|
||||
/**
|
||||
* Base16 Call Link Key Pattern
|
||||
*/
|
||||
private static final Pattern CALL_LINK_PATTERN = Pattern.compile("([bBcCdDfFgGhHkKmMnNpPqQrRsStTxXzZ]{4})(-[bBcCdDfFgGhHkKmMnNpPqQrRsStTxXzZ]{4}){7}");
|
||||
private static final String CALL_LINK_CENSOR_SUFFIX = "-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX";
|
||||
|
||||
public static CharSequence scrub(@NonNull CharSequence in) {
|
||||
|
||||
in = scrubE164(in);
|
||||
in = scrubEmail(in);
|
||||
in = scrubGroupsV1(in);
|
||||
in = scrubGroupsV2(in);
|
||||
in = scrubUuids(in);
|
||||
in = scrubDomains(in);
|
||||
in = scrubIpv4(in);
|
||||
in = scrubIpv6(in);
|
||||
in = scrubCallLinkKeys(in);
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
private static CharSequence scrubE164(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
E164_PATTERN,
|
||||
(matcher, output) -> output.append(matcher.group(1))
|
||||
.append(E164_CENSOR, 0, matcher.group(2).length())
|
||||
.append(matcher.group(3)));
|
||||
}
|
||||
|
||||
private static CharSequence scrubEmail(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
CRUDE_EMAIL_PATTERN,
|
||||
(matcher, output) -> output.append(matcher.group(1))
|
||||
.append(EMAIL_CENSOR));
|
||||
}
|
||||
|
||||
private static CharSequence scrubGroupsV1(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
GROUP_ID_V1_PATTERN,
|
||||
(matcher, output) -> output.append(matcher.group(1))
|
||||
.append(GROUP_ID_V1_CENSOR)
|
||||
.append(matcher.group(3)));
|
||||
}
|
||||
|
||||
private static CharSequence scrubGroupsV2(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
GROUP_ID_V2_PATTERN,
|
||||
(matcher, output) -> output.append(matcher.group(1))
|
||||
.append(GROUP_ID_V2_CENSOR)
|
||||
.append(matcher.group(3)));
|
||||
}
|
||||
|
||||
private static CharSequence scrubUuids(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
UUID_PATTERN,
|
||||
(matcher, output) -> {
|
||||
if (matcher.group(1) != null && !matcher.group(1).isEmpty()) {
|
||||
output.append(matcher.group(1))
|
||||
.append(matcher.group(2))
|
||||
.append(matcher.group(3));
|
||||
} else {
|
||||
output.append(UUID_CENSOR)
|
||||
.append(matcher.group(3));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private static CharSequence scrubDomains(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
DOMAIN_PATTERN,
|
||||
(matcher, output) -> {
|
||||
String match = matcher.group(0);
|
||||
if (matcher.groupCount() == 2 &&
|
||||
TOP_100_TLDS.contains(matcher.group(2).toLowerCase(Locale.US)) &&
|
||||
!match.endsWith("signal.org")) {
|
||||
output.append(DOMAIN_CENSOR)
|
||||
.append(matcher.group(2));
|
||||
} else {
|
||||
output.append(match);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private static CharSequence scrubIpv4(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
IPV4_PATTERN,
|
||||
(matcher, output) -> output.append(IPV4_CENSOR));
|
||||
}
|
||||
|
||||
private static CharSequence scrubIpv6(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
IPV6_PATTERN,
|
||||
(matcher, output) -> output.append(IPV6_CENSOR));
|
||||
}
|
||||
|
||||
private static CharSequence scrubCallLinkKeys(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
CALL_LINK_PATTERN,
|
||||
((matcher, output) -> {
|
||||
String match = matcher.group(1);
|
||||
output.append(match);
|
||||
output.append(CALL_LINK_CENSOR_SUFFIX);
|
||||
}));
|
||||
}
|
||||
|
||||
|
||||
private static CharSequence scrub(@NonNull CharSequence in, @NonNull Pattern pattern, @NonNull ProcessMatch processMatch) {
|
||||
final StringBuilder output = new StringBuilder(in.length());
|
||||
final Matcher matcher = pattern.matcher(in);
|
||||
|
||||
int lastEndingPos = 0;
|
||||
|
||||
while (matcher.find()) {
|
||||
output.append(in, lastEndingPos, matcher.start());
|
||||
|
||||
processMatch.scrubMatch(matcher, output);
|
||||
|
||||
lastEndingPos = matcher.end();
|
||||
}
|
||||
|
||||
if (lastEndingPos == 0) {
|
||||
// there were no matches, save copying all the data
|
||||
return in;
|
||||
} else {
|
||||
output.append(in, lastEndingPos, in.length());
|
||||
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
||||
private interface ProcessMatch {
|
||||
void scrubMatch(@NonNull Matcher matcher, @NonNull StringBuilder output);
|
||||
}
|
||||
}
|
||||
200
core-util/src/main/java/org/signal/core/util/logging/Scrubber.kt
Normal file
200
core-util/src/main/java/org/signal/core/util/logging/Scrubber.kt
Normal file
@@ -0,0 +1,200 @@
|
||||
/*
|
||||
* Copyright (C) 2014 Open Whisper Systems
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.signal.core.util.logging
|
||||
|
||||
import java.util.regex.Matcher
|
||||
import java.util.regex.Pattern
|
||||
|
||||
/** Given a [Matcher], update the [StringBuilder] with the scrubbed output you want for a given match. */
|
||||
private typealias MatchProcessor = (Matcher, StringBuilder) -> Unit
|
||||
|
||||
/**
|
||||
* Scrub data for possibly sensitive information.
|
||||
*/
|
||||
object Scrubber {
|
||||
/**
|
||||
* The middle group will be censored.
|
||||
* Supposedly, the shortest international phone numbers in use contain seven digits.
|
||||
* Handles URL encoded +, %2B
|
||||
*/
|
||||
private val E164_PATTERN = Pattern.compile("(\\+|%2B)(\\d{5,13})(\\d{2})")
|
||||
private const val E164_CENSOR = "*************"
|
||||
|
||||
/** The second group will be censored.*/
|
||||
private val CRUDE_EMAIL_PATTERN = Pattern.compile("\\b([^\\s/])([^\\s/]*@[^\\s]+)")
|
||||
private const val EMAIL_CENSOR = "...@..."
|
||||
|
||||
/** The middle group will be censored. */
|
||||
private val GROUP_ID_V1_PATTERN = Pattern.compile("(__)(textsecure_group__![^\\s]+)([^\\s]{2})")
|
||||
private const val GROUP_ID_V1_CENSOR = "...group..."
|
||||
|
||||
/** The middle group will be censored. */
|
||||
private val GROUP_ID_V2_PATTERN = Pattern.compile("(__)(signal_group__v2__![^\\s]+)([^\\s]{2})")
|
||||
private const val GROUP_ID_V2_CENSOR = "...group_v2..."
|
||||
|
||||
/** The middle group will be censored. */
|
||||
private val UUID_PATTERN = Pattern.compile("(JOB::)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{9})([0-9a-f]{3})", Pattern.CASE_INSENSITIVE)
|
||||
private const val UUID_CENSOR = "********-****-****-****-*********"
|
||||
|
||||
/**
|
||||
* The entire string is censored. Note: left as concatenated strings because kotlin string literals leave trailing newlines, and removing them breaks
|
||||
* syntax highlighting.
|
||||
*/
|
||||
private val IPV4_PATTERN = Pattern.compile(
|
||||
"\\b" +
|
||||
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." +
|
||||
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." +
|
||||
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." +
|
||||
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" +
|
||||
"\\b"
|
||||
)
|
||||
private const val IPV4_CENSOR = "...ipv4..."
|
||||
|
||||
/** The entire string is censored. */
|
||||
private val IPV6_PATTERN = Pattern.compile("([0-9a-fA-F]{0,4}:){3,7}([0-9a-fA-F]){0,4}")
|
||||
private const val IPV6_CENSOR = "...ipv6..."
|
||||
|
||||
/** The domain name except for TLD will be censored. */
|
||||
private val DOMAIN_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)", Pattern.CASE_INSENSITIVE)
|
||||
private const val DOMAIN_CENSOR = "***."
|
||||
private val TOP_100_TLDS: Set<String> = setOf(
|
||||
"com", "net", "org", "jp", "de", "uk", "fr", "br", "it", "ru", "es", "me", "gov", "pl", "ca", "au", "cn", "co", "in",
|
||||
"nl", "edu", "info", "eu", "ch", "id", "at", "kr", "cz", "mx", "be", "tv", "se", "tr", "tw", "al", "ua", "ir", "vn",
|
||||
"cl", "sk", "ly", "cc", "to", "no", "fi", "us", "pt", "dk", "ar", "hu", "tk", "gr", "il", "news", "ro", "my", "biz",
|
||||
"ie", "za", "nz", "sg", "ee", "th", "io", "xyz", "pe", "bg", "hk", "lt", "link", "ph", "club", "si", "site",
|
||||
"mobi", "by", "cat", "wiki", "la", "ga", "xxx", "cf", "hr", "ng", "jobs", "online", "kz", "ug", "gq", "ae", "is",
|
||||
"lv", "pro", "fm", "tips", "ms", "sa", "app"
|
||||
)
|
||||
|
||||
/** Base16 Call Link Key Pattern */
|
||||
private val CALL_LINK_PATTERN = Pattern.compile("([bBcCdDfFgGhHkKmMnNpPqQrRsStTxXzZ]{4})(-[bBcCdDfFgGhHkKmMnNpPqQrRsStTxXzZ]{4}){7}")
|
||||
private const val CALL_LINK_CENSOR_SUFFIX = "-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX"
|
||||
|
||||
@JvmStatic
|
||||
fun scrub(input: CharSequence): CharSequence {
|
||||
return input
|
||||
.scrubE164()
|
||||
.scrubEmail()
|
||||
.scrubGroupsV1()
|
||||
.scrubGroupsV2()
|
||||
.scrubUuids()
|
||||
.scrubDomains()
|
||||
.scrubIpv4()
|
||||
.scrubIpv6()
|
||||
.scrubCallLinkKeys()
|
||||
}
|
||||
|
||||
private fun CharSequence.scrubE164(): CharSequence {
|
||||
return scrub(this, E164_PATTERN) { matcher, output ->
|
||||
output
|
||||
.append(matcher.group(1))
|
||||
.append(E164_CENSOR, 0, matcher.group(2)!!.length)
|
||||
.append(matcher.group(3))
|
||||
}
|
||||
}
|
||||
|
||||
private fun CharSequence.scrubEmail(): CharSequence {
|
||||
return scrub(this, CRUDE_EMAIL_PATTERN) { matcher, output ->
|
||||
output
|
||||
.append(matcher.group(1))
|
||||
.append(EMAIL_CENSOR)
|
||||
}
|
||||
}
|
||||
|
||||
private fun CharSequence.scrubGroupsV1(): CharSequence {
|
||||
return scrub(this, GROUP_ID_V1_PATTERN) { matcher, output ->
|
||||
output
|
||||
.append(matcher.group(1))
|
||||
.append(GROUP_ID_V1_CENSOR)
|
||||
.append(matcher.group(3))
|
||||
}
|
||||
}
|
||||
|
||||
private fun CharSequence.scrubGroupsV2(): CharSequence {
|
||||
return scrub(this, GROUP_ID_V2_PATTERN) { matcher, output ->
|
||||
output
|
||||
.append(matcher.group(1))
|
||||
.append(GROUP_ID_V2_CENSOR)
|
||||
.append(matcher.group(3))
|
||||
}
|
||||
}
|
||||
|
||||
private fun CharSequence.scrubUuids(): CharSequence {
|
||||
return scrub(this, UUID_PATTERN) { matcher, output ->
|
||||
if (matcher.group(1) != null && matcher.group(1)!!.isNotEmpty()) {
|
||||
output
|
||||
.append(matcher.group(1))
|
||||
.append(matcher.group(2))
|
||||
.append(matcher.group(3))
|
||||
} else {
|
||||
output
|
||||
.append(UUID_CENSOR)
|
||||
.append(matcher.group(3))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun CharSequence.scrubDomains(): CharSequence {
|
||||
return scrub(this, DOMAIN_PATTERN) { matcher, output ->
|
||||
val match: String = matcher.group(0)!!
|
||||
if (matcher.groupCount() == 2 && TOP_100_TLDS.contains(matcher.group(2)!!.lowercase()) && !match.endsWith("signal.org")) {
|
||||
output
|
||||
.append(DOMAIN_CENSOR)
|
||||
.append(matcher.group(2))
|
||||
} else {
|
||||
output.append(match)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun CharSequence.scrubIpv4(): CharSequence {
|
||||
return scrub(this, IPV4_PATTERN) { _, output -> output.append(IPV4_CENSOR) }
|
||||
}
|
||||
|
||||
private fun CharSequence.scrubIpv6(): CharSequence {
|
||||
return scrub(this, IPV6_PATTERN) { _, output -> output.append(IPV6_CENSOR) }
|
||||
}
|
||||
|
||||
private fun CharSequence.scrubCallLinkKeys(): CharSequence {
|
||||
return scrub(this, CALL_LINK_PATTERN) { matcher, output ->
|
||||
val match = matcher.group(1)
|
||||
output
|
||||
.append(match)
|
||||
.append(CALL_LINK_CENSOR_SUFFIX)
|
||||
}
|
||||
}
|
||||
|
||||
private fun scrub(input: CharSequence, pattern: Pattern, processMatch: MatchProcessor): CharSequence {
|
||||
val output = StringBuilder(input.length)
|
||||
val matcher: Matcher = pattern.matcher(input)
|
||||
var lastEndingPos = 0
|
||||
|
||||
while (matcher.find()) {
|
||||
output.append(input, lastEndingPos, matcher.start())
|
||||
processMatch(matcher, output)
|
||||
lastEndingPos = matcher.end()
|
||||
}
|
||||
|
||||
return if (lastEndingPos == 0) {
|
||||
// there were no matches, save copying all the data
|
||||
input
|
||||
} else {
|
||||
output.append(input, lastEndingPos, input.length)
|
||||
output
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user