Fix an issue where the charset in the link preview of some pages was not identified correctly.

2025-12-24 13:08:46 +00:00 · 2023-07-08 16:14:13 +03:00
parent 5ca025544e
commit 23ef8c78bd
1 changed files with 22 additions and 1 deletions
--- a/app/src/main/java/org/thoughtcrime/securesms/util/OkHttpUtil.java
+++ b/app/src/main/java/org/thoughtcrime/securesms/util/OkHttpUtil.java
@@ -1,6 +1,7 @@
 package org.thoughtcrime.securesms.util;

 import androidx.annotation.NonNull;
+import androidx.core.text.HtmlCompat;

 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@@ -8,12 +9,16 @@ import java.io.InputStream;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.Objects;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;

 import okhttp3.MediaType;
 import okhttp3.ResponseBody;

 public final class OkHttpUtil {

+  private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=[\"']?([a-zA-Z0-9\\\\-]+)[\"']?");
+
  private OkHttpUtil() {}

  public static byte[] readAsBytes(@NonNull InputStream bodyStream, long sizeLimit) throws IOException {
@@ -41,8 +46,24 @@ public final class OkHttpUtil {

    byte[]    data        = readAsBytes(body.byteStream(), sizeLimit);
    MediaType contentType = body.contentType();
-    Charset   charset     = contentType != null ? contentType.charset(StandardCharsets.UTF_8) : StandardCharsets.UTF_8;
+    Charset   charset     = contentType != null ? contentType.charset(null) : null;
+
+    charset = charset == null ? getHtmlCharset(new String(data)) : charset;

    return new String(data, Objects.requireNonNull(charset));
  }
+
+  private static @NonNull Charset getHtmlCharset(String html) {
+    Matcher charsetMatcher = CHARSET_PATTERN.matcher(html);
+    if (charsetMatcher.find() && charsetMatcher.groupCount() > 0) {
+      try {
+        return Objects.requireNonNull(Charset.forName(fromDoubleEncoded(charsetMatcher.group(1))));
+      } catch (Exception ignored) {}
+    }
+    return StandardCharsets.UTF_8;
+  }
+
+  private static @NonNull String fromDoubleEncoded(@NonNull String html) {
+    return HtmlCompat.fromHtml(HtmlCompat.fromHtml(html, 0).toString(), 0).toString();
+  }
 }