diff --git a/core-util/src/main/java/org/signal/core/util/logging/Scrubber.java b/core-util/src/main/java/org/signal/core/util/logging/Scrubber.java deleted file mode 100644 index 54079ad912..0000000000 --- a/core-util/src/main/java/org/signal/core/util/logging/Scrubber.java +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (C) 2014 Open Whisper Systems - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -package org.signal.core.util.logging; - -import androidx.annotation.NonNull; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.Locale; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Scrub data for possibly sensitive information. - */ -public final class Scrubber { - - private Scrubber() { - } - - /** - * The middle group will be censored. - * Supposedly, the shortest international phone numbers in use contain seven digits. - * Handles URL encoded +, %2B - */ - private static final Pattern E164_PATTERN = Pattern.compile("(\\+|%2B)(\\d{5,13})(\\d{2})"); - private static final String E164_CENSOR = "*************"; - - /** - * The second group will be censored. - */ - private static final Pattern CRUDE_EMAIL_PATTERN = Pattern.compile("\\b([^\\s/])([^\\s/]*@[^\\s]+)"); - private static final String EMAIL_CENSOR = "...@..."; - - /** - * The middle group will be censored. - */ - private static final Pattern GROUP_ID_V1_PATTERN = Pattern.compile("(__)(textsecure_group__![^\\s]+)([^\\s]{2})"); - private static final String GROUP_ID_V1_CENSOR = "...group..."; - - /** - * The middle group will be censored. - */ - private static final Pattern GROUP_ID_V2_PATTERN = Pattern.compile("(__)(signal_group__v2__![^\\s]+)([^\\s]{2})"); - private static final String GROUP_ID_V2_CENSOR = "...group_v2..."; - - /** - * The middle group will be censored. - */ - private static final Pattern UUID_PATTERN = Pattern.compile("(JOB::)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{9})([0-9a-f]{3})", Pattern.CASE_INSENSITIVE); - private static final String UUID_CENSOR = "********-****-****-****-*********"; - - /** - * The entire string is censored. - */ - private static final Pattern IPV4_PATTERN = Pattern.compile("\\b" + - "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." + - "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." + - "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." + - "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" + - "\\b"); - private static final String IPV4_CENSOR = "...ipv4..."; - - private static final Pattern IPV6_PATTERN = Pattern.compile("([0-9a-fA-F]{0,4}:){3,7}([0-9a-fA-F]){0,4}"); - private static final String IPV6_CENSOR = "...ipv6..."; - - /** - * The domain name except for TLD will be censored. - */ - private static final Pattern DOMAIN_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)", Pattern.CASE_INSENSITIVE); - private static final String DOMAIN_CENSOR = "***."; - private static final Set TOP_100_TLDS = new HashSet<>(Arrays.asList("com", "net", "org", "jp", "de", "uk", "fr", "br", "it", "ru", "es", "me", "gov", "pl", "ca", "au", "cn", "co", "in", - "nl", "edu", "info", "eu", "ch", "id", "at", "kr", "cz", "mx", "be", "tv", "se", "tr", "tw", "al", "ua", "ir", "vn", - "cl", "sk", "ly", "cc", "to", "no", "fi", "us", "pt", "dk", "ar", "hu", "tk", "gr", "il", "news", "ro", "my", "biz", - "ie", "za", "nz", "sg", "ee", "th", "io", "xyz", "pe", "bg", "hk", "lt", "link", "ph", "club", "si", "site", - "mobi", "by", "cat", "wiki", "la", "ga", "xxx", "cf", "hr", "ng", "jobs", "online", "kz", "ug", "gq", "ae", "is", - "lv", "pro", "fm", "tips", "ms", "sa", "app")); - - /** - * Base16 Call Link Key Pattern - */ - private static final Pattern CALL_LINK_PATTERN = Pattern.compile("([bBcCdDfFgGhHkKmMnNpPqQrRsStTxXzZ]{4})(-[bBcCdDfFgGhHkKmMnNpPqQrRsStTxXzZ]{4}){7}"); - private static final String CALL_LINK_CENSOR_SUFFIX = "-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX"; - - public static CharSequence scrub(@NonNull CharSequence in) { - - in = scrubE164(in); - in = scrubEmail(in); - in = scrubGroupsV1(in); - in = scrubGroupsV2(in); - in = scrubUuids(in); - in = scrubDomains(in); - in = scrubIpv4(in); - in = scrubIpv6(in); - in = scrubCallLinkKeys(in); - - return in; - } - - private static CharSequence scrubE164(@NonNull CharSequence in) { - return scrub(in, - E164_PATTERN, - (matcher, output) -> output.append(matcher.group(1)) - .append(E164_CENSOR, 0, matcher.group(2).length()) - .append(matcher.group(3))); - } - - private static CharSequence scrubEmail(@NonNull CharSequence in) { - return scrub(in, - CRUDE_EMAIL_PATTERN, - (matcher, output) -> output.append(matcher.group(1)) - .append(EMAIL_CENSOR)); - } - - private static CharSequence scrubGroupsV1(@NonNull CharSequence in) { - return scrub(in, - GROUP_ID_V1_PATTERN, - (matcher, output) -> output.append(matcher.group(1)) - .append(GROUP_ID_V1_CENSOR) - .append(matcher.group(3))); - } - - private static CharSequence scrubGroupsV2(@NonNull CharSequence in) { - return scrub(in, - GROUP_ID_V2_PATTERN, - (matcher, output) -> output.append(matcher.group(1)) - .append(GROUP_ID_V2_CENSOR) - .append(matcher.group(3))); - } - - private static CharSequence scrubUuids(@NonNull CharSequence in) { - return scrub(in, - UUID_PATTERN, - (matcher, output) -> { - if (matcher.group(1) != null && !matcher.group(1).isEmpty()) { - output.append(matcher.group(1)) - .append(matcher.group(2)) - .append(matcher.group(3)); - } else { - output.append(UUID_CENSOR) - .append(matcher.group(3)); - } - }); - } - - private static CharSequence scrubDomains(@NonNull CharSequence in) { - return scrub(in, - DOMAIN_PATTERN, - (matcher, output) -> { - String match = matcher.group(0); - if (matcher.groupCount() == 2 && - TOP_100_TLDS.contains(matcher.group(2).toLowerCase(Locale.US)) && - !match.endsWith("signal.org")) { - output.append(DOMAIN_CENSOR) - .append(matcher.group(2)); - } else { - output.append(match); - } - }); - } - - private static CharSequence scrubIpv4(@NonNull CharSequence in) { - return scrub(in, - IPV4_PATTERN, - (matcher, output) -> output.append(IPV4_CENSOR)); - } - - private static CharSequence scrubIpv6(@NonNull CharSequence in) { - return scrub(in, - IPV6_PATTERN, - (matcher, output) -> output.append(IPV6_CENSOR)); - } - - private static CharSequence scrubCallLinkKeys(@NonNull CharSequence in) { - return scrub(in, - CALL_LINK_PATTERN, - ((matcher, output) -> { - String match = matcher.group(1); - output.append(match); - output.append(CALL_LINK_CENSOR_SUFFIX); - })); - } - - - private static CharSequence scrub(@NonNull CharSequence in, @NonNull Pattern pattern, @NonNull ProcessMatch processMatch) { - final StringBuilder output = new StringBuilder(in.length()); - final Matcher matcher = pattern.matcher(in); - - int lastEndingPos = 0; - - while (matcher.find()) { - output.append(in, lastEndingPos, matcher.start()); - - processMatch.scrubMatch(matcher, output); - - lastEndingPos = matcher.end(); - } - - if (lastEndingPos == 0) { - // there were no matches, save copying all the data - return in; - } else { - output.append(in, lastEndingPos, in.length()); - - return output; - } - } - - private interface ProcessMatch { - void scrubMatch(@NonNull Matcher matcher, @NonNull StringBuilder output); - } -} diff --git a/core-util/src/main/java/org/signal/core/util/logging/Scrubber.kt b/core-util/src/main/java/org/signal/core/util/logging/Scrubber.kt new file mode 100644 index 0000000000..c408135e56 --- /dev/null +++ b/core-util/src/main/java/org/signal/core/util/logging/Scrubber.kt @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2014 Open Whisper Systems + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.signal.core.util.logging + +import java.util.regex.Matcher +import java.util.regex.Pattern + +/** Given a [Matcher], update the [StringBuilder] with the scrubbed output you want for a given match. */ +private typealias MatchProcessor = (Matcher, StringBuilder) -> Unit + +/** + * Scrub data for possibly sensitive information. + */ +object Scrubber { + /** + * The middle group will be censored. + * Supposedly, the shortest international phone numbers in use contain seven digits. + * Handles URL encoded +, %2B + */ + private val E164_PATTERN = Pattern.compile("(\\+|%2B)(\\d{5,13})(\\d{2})") + private const val E164_CENSOR = "*************" + + /** The second group will be censored.*/ + private val CRUDE_EMAIL_PATTERN = Pattern.compile("\\b([^\\s/])([^\\s/]*@[^\\s]+)") + private const val EMAIL_CENSOR = "...@..." + + /** The middle group will be censored. */ + private val GROUP_ID_V1_PATTERN = Pattern.compile("(__)(textsecure_group__![^\\s]+)([^\\s]{2})") + private const val GROUP_ID_V1_CENSOR = "...group..." + + /** The middle group will be censored. */ + private val GROUP_ID_V2_PATTERN = Pattern.compile("(__)(signal_group__v2__![^\\s]+)([^\\s]{2})") + private const val GROUP_ID_V2_CENSOR = "...group_v2..." + + /** The middle group will be censored. */ + private val UUID_PATTERN = Pattern.compile("(JOB::)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{9})([0-9a-f]{3})", Pattern.CASE_INSENSITIVE) + private const val UUID_CENSOR = "********-****-****-****-*********" + + /** + * The entire string is censored. Note: left as concatenated strings because kotlin string literals leave trailing newlines, and removing them breaks + * syntax highlighting. + */ + private val IPV4_PATTERN = Pattern.compile( + "\\b" + + "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." + + "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." + + "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." + + "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" + + "\\b" + ) + private const val IPV4_CENSOR = "...ipv4..." + + /** The entire string is censored. */ + private val IPV6_PATTERN = Pattern.compile("([0-9a-fA-F]{0,4}:){3,7}([0-9a-fA-F]){0,4}") + private const val IPV6_CENSOR = "...ipv6..." + + /** The domain name except for TLD will be censored. */ + private val DOMAIN_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)", Pattern.CASE_INSENSITIVE) + private const val DOMAIN_CENSOR = "***." + private val TOP_100_TLDS: Set = setOf( + "com", "net", "org", "jp", "de", "uk", "fr", "br", "it", "ru", "es", "me", "gov", "pl", "ca", "au", "cn", "co", "in", + "nl", "edu", "info", "eu", "ch", "id", "at", "kr", "cz", "mx", "be", "tv", "se", "tr", "tw", "al", "ua", "ir", "vn", + "cl", "sk", "ly", "cc", "to", "no", "fi", "us", "pt", "dk", "ar", "hu", "tk", "gr", "il", "news", "ro", "my", "biz", + "ie", "za", "nz", "sg", "ee", "th", "io", "xyz", "pe", "bg", "hk", "lt", "link", "ph", "club", "si", "site", + "mobi", "by", "cat", "wiki", "la", "ga", "xxx", "cf", "hr", "ng", "jobs", "online", "kz", "ug", "gq", "ae", "is", + "lv", "pro", "fm", "tips", "ms", "sa", "app" + ) + + /** Base16 Call Link Key Pattern */ + private val CALL_LINK_PATTERN = Pattern.compile("([bBcCdDfFgGhHkKmMnNpPqQrRsStTxXzZ]{4})(-[bBcCdDfFgGhHkKmMnNpPqQrRsStTxXzZ]{4}){7}") + private const val CALL_LINK_CENSOR_SUFFIX = "-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX" + + @JvmStatic + fun scrub(input: CharSequence): CharSequence { + return input + .scrubE164() + .scrubEmail() + .scrubGroupsV1() + .scrubGroupsV2() + .scrubUuids() + .scrubDomains() + .scrubIpv4() + .scrubIpv6() + .scrubCallLinkKeys() + } + + private fun CharSequence.scrubE164(): CharSequence { + return scrub(this, E164_PATTERN) { matcher, output -> + output + .append(matcher.group(1)) + .append(E164_CENSOR, 0, matcher.group(2)!!.length) + .append(matcher.group(3)) + } + } + + private fun CharSequence.scrubEmail(): CharSequence { + return scrub(this, CRUDE_EMAIL_PATTERN) { matcher, output -> + output + .append(matcher.group(1)) + .append(EMAIL_CENSOR) + } + } + + private fun CharSequence.scrubGroupsV1(): CharSequence { + return scrub(this, GROUP_ID_V1_PATTERN) { matcher, output -> + output + .append(matcher.group(1)) + .append(GROUP_ID_V1_CENSOR) + .append(matcher.group(3)) + } + } + + private fun CharSequence.scrubGroupsV2(): CharSequence { + return scrub(this, GROUP_ID_V2_PATTERN) { matcher, output -> + output + .append(matcher.group(1)) + .append(GROUP_ID_V2_CENSOR) + .append(matcher.group(3)) + } + } + + private fun CharSequence.scrubUuids(): CharSequence { + return scrub(this, UUID_PATTERN) { matcher, output -> + if (matcher.group(1) != null && matcher.group(1)!!.isNotEmpty()) { + output + .append(matcher.group(1)) + .append(matcher.group(2)) + .append(matcher.group(3)) + } else { + output + .append(UUID_CENSOR) + .append(matcher.group(3)) + } + } + } + + private fun CharSequence.scrubDomains(): CharSequence { + return scrub(this, DOMAIN_PATTERN) { matcher, output -> + val match: String = matcher.group(0)!! + if (matcher.groupCount() == 2 && TOP_100_TLDS.contains(matcher.group(2)!!.lowercase()) && !match.endsWith("signal.org")) { + output + .append(DOMAIN_CENSOR) + .append(matcher.group(2)) + } else { + output.append(match) + } + } + } + + private fun CharSequence.scrubIpv4(): CharSequence { + return scrub(this, IPV4_PATTERN) { _, output -> output.append(IPV4_CENSOR) } + } + + private fun CharSequence.scrubIpv6(): CharSequence { + return scrub(this, IPV6_PATTERN) { _, output -> output.append(IPV6_CENSOR) } + } + + private fun CharSequence.scrubCallLinkKeys(): CharSequence { + return scrub(this, CALL_LINK_PATTERN) { matcher, output -> + val match = matcher.group(1) + output + .append(match) + .append(CALL_LINK_CENSOR_SUFFIX) + } + } + + private fun scrub(input: CharSequence, pattern: Pattern, processMatch: MatchProcessor): CharSequence { + val output = StringBuilder(input.length) + val matcher: Matcher = pattern.matcher(input) + var lastEndingPos = 0 + + while (matcher.find()) { + output.append(input, lastEndingPos, matcher.start()) + processMatch(matcher, output) + lastEndingPos = matcher.end() + } + + return if (lastEndingPos == 0) { + // there were no matches, save copying all the data + input + } else { + output.append(input, lastEndingPos, input.length) + output + } + } +} diff --git a/core-util/src/test/java/org/signal/core/util/logging/ScrubberTest.java b/core-util/src/test/java/org/signal/core/util/logging/ScrubberTest.java deleted file mode 100644 index 3839d81caf..0000000000 --- a/core-util/src/test/java/org/signal/core/util/logging/ScrubberTest.java +++ /dev/null @@ -1,174 +0,0 @@ -package org.signal.core.util.logging; - -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.util.Arrays; -import java.util.Collection; - -import static org.junit.Assert.assertEquals; - -@RunWith(Parameterized.class) -public final class ScrubberTest { - - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(new Object[][]{ - - { "An E164 number +15551234567", - "An E164 number +*********67" }, - - { "A UK number +447700900000", - "A UK number +**********00" }, - - { "An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B447700900099", - "An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B**********99" }, - - { "Multiple numbers +447700900001 +447700900002", - "Multiple numbers +**********01 +**********02" }, - - { "One less than shortest number +155556", - "One less than shortest number +155556" }, - - { "Shortest number +1555567", - "Shortest number +*****67" }, - - { "Longest number +155556789012345", - "Longest number +*************45" }, - - { "One more than longest number +1234567890123456", - "One more than longest number +*************456" }, - - { "abc@def.com", - "a...@..." }, - - { "An email abc@def.com", - "An email a...@..." }, - - { "A short email a@def.com", - "A short email a...@..." }, - - { "A email with multiple parts before the @ d.c+b.a@mulitpart.domain.com and a multipart domain", - "A email with multiple parts before the @ d...@... and a multipart domain" }, - - { "An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/abc@signal.org", - "An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/a...@..." }, - - { "An email and a number abc@def.com +155556789012345", - "An email and a number a...@... +*************45" }, - - { "__textsecure_group__!000102030405060708090a0b0c0d0e0f", - "__...group...0f" }, - - { "A group id __textsecure_group__!000102030405060708090a0b0c0d0e1a surrounded with text", - "A group id __...group...1a surrounded with text" }, - - { "__signal_group__v2__!0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", - "__...group_v2...ef" }, - - { "A group v2 id __signal_group__v2__!23456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef01 surrounded with text", - "A group v2 id __...group_v2...01 surrounded with text" }, - - { "a37cb654-c9e0-4c1e-93df-3d11ca3c97f4", - "********-****-****-****-*********7f4" }, - - { "A UUID a37cb654-c9e0-4c1e-93df-3d11ca3c97f4 surrounded with text", - "A UUID ********-****-****-****-*********7f4 surrounded with text" }, - - { "JOB::a37cb654-c9e0-4c1e-93df-3d11ca3c97f4", - "JOB::a37cb654-c9e0-4c1e-93df-3d11ca3c97f4" }, - - { "All patterns in a row __textsecure_group__!abcdefg1234567890 +1234567890123456 abc@def.com a37cb654-c9e0-4c1e-93df-3d11ca3c97f4 nl.motorsport.com 192.168.1.1 with text after", - "All patterns in a row __...group...90 +*************456 a...@... ********-****-****-****-*********7f4 ***.com ...ipv4... with text after" - }, - - { "java.net.UnknownServiceException: CLEARTEXT communication to nl.motorsport.com not permitted by network security policy", - "java.net.UnknownServiceException: CLEARTEXT communication to ***.com not permitted by network security policy" - }, - - { "nl.motorsport.com:443", - "***.com:443" - }, - - { "Failed to resolve chat.signal.org using . Continuing.", - "Failed to resolve chat.signal.org using . Continuing." - }, - - { " Caused by: java.io.IOException: unexpected end of stream on Connection{storage.signal.org:443, proxy=DIRECT hostAddress=storage.signal.org/142.251.32.211:443 cipherSuite=TLS_AES_128_GCM_SHA256 protocol=http/1.1}", - " Caused by: java.io.IOException: unexpected end of stream on Connection{storage.signal.org:443, proxy=DIRECT hostAddress=storage.signal.org/...ipv4...:443 cipherSuite=TLS_AES_128_GCM_SHA256 protocol=http/1.1}" - }, - - { "192.168.1.1", - "...ipv4..." - }, - - { "255.255.255.255", - "...ipv4..." - }, - - { "Text before 255.255.255.255 text after", - "Text before ...ipv4... text after" - }, - - { "Not an ipv4 3.141", - "Not an ipv4 3.141" - }, - - { "A Call Link Root Key BCDF-FGHK-MNPQ-RSTX-ZRQH-BCDF-FGHM-STXZ", - "A Call Link Root Key BCDF-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX" - }, - - { "Not a Call Link Root Key (Invalid Characters) BCAF-FGHK-MNPQ-RSTX-ZRQH-BCDF-FGHM-STXZ", - "Not a Call Link Root Key (Invalid Characters) BCAF-FGHK-MNPQ-RSTX-ZRQH-BCDF-FGHM-STXZ" - }, - - { "Not a Call Link Root Key (Missing Quartet) BCAF-FGHK-MNPQ-RSTX-ZRQH-BCDF-STXZ", - "Not a Call Link Root Key (Missing Quartet) BCAF-FGHK-MNPQ-RSTX-ZRQH-BCDF-STXZ" - }, - - { - "2345:0425:2CA1:0000:0000:0567:5673:23b5", - "...ipv6..." - }, - - { "2345:425:2CA1:0000:0000:567:5673:23b5", - "...ipv6..." }, - - { "2345:0425:2CA1:0:0:0567:5673:23b5", - "...ipv6..." }, - - { "2345:0425:2CA1::0567:5673:23b5", - "...ipv6..." }, - - { "FF01:0:0:0:0:0:0:1", - "...ipv6..." }, - - { "2001:db8::a3", - "...ipv6..." }, - - { "text before 2345:0425:2CA1:0000:0000:0567:5673:23b5 text after", - "text before ...ipv6... text after" }, - - { "Recipient::1", - "Recipient::1" }, - - { "Recipient::123", - "Recipient::123" }, - - }); - } - - private final String input; - private final String expected; - - public ScrubberTest(String input, String expected) { - this.input = input; - this.expected = expected; - } - - @Test - public void scrub() { - assertEquals(expected, Scrubber.scrub(input).toString()); - } -} diff --git a/core-util/src/test/java/org/signal/core/util/logging/ScrubberTest.kt b/core-util/src/test/java/org/signal/core/util/logging/ScrubberTest.kt new file mode 100644 index 0000000000..8d91f3af20 --- /dev/null +++ b/core-util/src/test/java/org/signal/core/util/logging/ScrubberTest.kt @@ -0,0 +1,191 @@ +package org.signal.core.util.logging + +import org.junit.Assert +import org.junit.Test +import org.junit.runner.RunWith +import org.junit.runners.Parameterized + +@RunWith(Parameterized::class) +class ScrubberTest(private val input: String, private val expected: String) { + @Test + fun scrub() { + Assert.assertEquals(expected, Scrubber.scrub(input).toString()) + } + + companion object { + @JvmStatic + @Parameterized.Parameters + fun data(): Iterable> { + return listOf( + arrayOf( + "An E164 number +15551234567", + "An E164 number +*********67" + ), + arrayOf( + "A UK number +447700900000", + "A UK number +**********00" + ), + arrayOf( + "An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B447700900099", + "An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B**********99" + ), + arrayOf( + "Multiple numbers +447700900001 +447700900002", + "Multiple numbers +**********01 +**********02" + ), + arrayOf( + "One less than shortest number +155556", + "One less than shortest number +155556" + ), + arrayOf( + "Shortest number +1555567", + "Shortest number +*****67" + ), + arrayOf( + "Longest number +155556789012345", + "Longest number +*************45" + ), + arrayOf( + "One more than longest number +1234567890123456", + "One more than longest number +*************456" + ), + arrayOf( + "abc@def.com", + "a...@..." + ), + arrayOf( + "An email abc@def.com", + "An email a...@..." + ), + arrayOf( + "A short email a@def.com", + "A short email a...@..." + ), + arrayOf( + "A email with multiple parts before the @ d.c+b.a@mulitpart.domain.com and a multipart domain", + "A email with multiple parts before the @ d...@... and a multipart domain" + ), + arrayOf( + "An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/abc@signal.org", + "An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/a...@..." + ), + arrayOf( + "An email and a number abc@def.com +155556789012345", + "An email and a number a...@... +*************45" + ), + arrayOf( + "__textsecure_group__!000102030405060708090a0b0c0d0e0f", + "__...group...0f" + ), + arrayOf( + "A group id __textsecure_group__!000102030405060708090a0b0c0d0e1a surrounded with text", + "A group id __...group...1a surrounded with text" + ), + arrayOf( + "__signal_group__v2__!0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + "__...group_v2...ef" + ), + arrayOf( + "A group v2 id __signal_group__v2__!23456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef01 surrounded with text", + "A group v2 id __...group_v2...01 surrounded with text" + ), + arrayOf( + "a37cb654-c9e0-4c1e-93df-3d11ca3c97f4", + "********-****-****-****-*********7f4" + ), + arrayOf( + "A UUID a37cb654-c9e0-4c1e-93df-3d11ca3c97f4 surrounded with text", + "A UUID ********-****-****-****-*********7f4 surrounded with text" + ), + arrayOf( + "JOB::a37cb654-c9e0-4c1e-93df-3d11ca3c97f4", + "JOB::a37cb654-c9e0-4c1e-93df-3d11ca3c97f4" + ), + arrayOf( + "All patterns in a row __textsecure_group__!abcdefg1234567890 +1234567890123456 abc@def.com a37cb654-c9e0-4c1e-93df-3d11ca3c97f4 nl.motorsport.com 192.168.1.1 with text after", + "All patterns in a row __...group...90 +*************456 a...@... ********-****-****-****-*********7f4 ***.com ...ipv4... with text after" + ), + arrayOf( + "java.net.UnknownServiceException: CLEARTEXT communication to nl.motorsport.com not permitted by network security policy", + "java.net.UnknownServiceException: CLEARTEXT communication to ***.com not permitted by network security policy" + ), + arrayOf( + "nl.motorsport.com:443", + "***.com:443" + ), + arrayOf( + "Failed to resolve chat.signal.org using . Continuing.", + "Failed to resolve chat.signal.org using . Continuing." + ), + arrayOf( + " Caused by: java.io.IOException: unexpected end of stream on Connection{storage.signal.org:443, proxy=DIRECT hostAddress=storage.signal.org/142.251.32.211:443 cipherSuite=TLS_AES_128_GCM_SHA256 protocol=http/1.1}", + " Caused by: java.io.IOException: unexpected end of stream on Connection{storage.signal.org:443, proxy=DIRECT hostAddress=storage.signal.org/...ipv4...:443 cipherSuite=TLS_AES_128_GCM_SHA256 protocol=http/1.1}" + ), + arrayOf( + "192.168.1.1", + "...ipv4..." + ), + arrayOf( + "255.255.255.255", + "...ipv4..." + ), + arrayOf( + "Text before 255.255.255.255 text after", + "Text before ...ipv4... text after" + ), + arrayOf( + "Not an ipv4 3.141", + "Not an ipv4 3.141" + ), + arrayOf( + "A Call Link Root Key BCDF-FGHK-MNPQ-RSTX-ZRQH-BCDF-FGHM-STXZ", + "A Call Link Root Key BCDF-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX" + ), + arrayOf( + "Not a Call Link Root Key (Invalid Characters) BCAF-FGHK-MNPQ-RSTX-ZRQH-BCDF-FGHM-STXZ", + "Not a Call Link Root Key (Invalid Characters) BCAF-FGHK-MNPQ-RSTX-ZRQH-BCDF-FGHM-STXZ" + ), + arrayOf( + "Not a Call Link Root Key (Missing Quartet) BCAF-FGHK-MNPQ-RSTX-ZRQH-BCDF-STXZ", + "Not a Call Link Root Key (Missing Quartet) BCAF-FGHK-MNPQ-RSTX-ZRQH-BCDF-STXZ" + ), + arrayOf( + "2345:0425:2CA1:0000:0000:0567:5673:23b5", + "...ipv6..." + ), + arrayOf( + "2345:425:2CA1:0000:0000:567:5673:23b5", + "...ipv6..." + ), + arrayOf( + "2345:0425:2CA1:0:0:0567:5673:23b5", + "...ipv6..." + ), + arrayOf( + "2345:0425:2CA1::0567:5673:23b5", + "...ipv6..." + ), + arrayOf( + "FF01:0:0:0:0:0:0:1", + "...ipv6..." + ), + arrayOf( + "2001:db8::a3", + "...ipv6..." + ), + arrayOf( + "text before 2345:0425:2CA1:0000:0000:0567:5673:23b5 text after", + "text before ...ipv6... text after" + ), + arrayOf( + "Recipient::1", + "Recipient::1" + ), + arrayOf( + "Recipient::123", + "Recipient::123" + ) + ) + } + } +}