mirror of
https://github.com/signalapp/Signal-Android.git
synced 2026-05-08 09:18:39 +01:00
Fix an issue where the charset in the link preview of some pages was not identified correctly.
This commit is contained in:
committed by
Clark Chen
parent
5ca025544e
commit
23ef8c78bd
@@ -1,6 +1,7 @@
|
|||||||
package org.thoughtcrime.securesms.util;
|
package org.thoughtcrime.securesms.util;
|
||||||
|
|
||||||
import androidx.annotation.NonNull;
|
import androidx.annotation.NonNull;
|
||||||
|
import androidx.core.text.HtmlCompat;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@@ -8,12 +9,16 @@ import java.io.InputStream;
|
|||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import okhttp3.MediaType;
|
import okhttp3.MediaType;
|
||||||
import okhttp3.ResponseBody;
|
import okhttp3.ResponseBody;
|
||||||
|
|
||||||
public final class OkHttpUtil {
|
public final class OkHttpUtil {
|
||||||
|
|
||||||
|
private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=[\"']?([a-zA-Z0-9\\\\-]+)[\"']?");
|
||||||
|
|
||||||
private OkHttpUtil() {}
|
private OkHttpUtil() {}
|
||||||
|
|
||||||
public static byte[] readAsBytes(@NonNull InputStream bodyStream, long sizeLimit) throws IOException {
|
public static byte[] readAsBytes(@NonNull InputStream bodyStream, long sizeLimit) throws IOException {
|
||||||
@@ -41,8 +46,24 @@ public final class OkHttpUtil {
|
|||||||
|
|
||||||
byte[] data = readAsBytes(body.byteStream(), sizeLimit);
|
byte[] data = readAsBytes(body.byteStream(), sizeLimit);
|
||||||
MediaType contentType = body.contentType();
|
MediaType contentType = body.contentType();
|
||||||
Charset charset = contentType != null ? contentType.charset(StandardCharsets.UTF_8) : StandardCharsets.UTF_8;
|
Charset charset = contentType != null ? contentType.charset(null) : null;
|
||||||
|
|
||||||
|
charset = charset == null ? getHtmlCharset(new String(data)) : charset;
|
||||||
|
|
||||||
return new String(data, Objects.requireNonNull(charset));
|
return new String(data, Objects.requireNonNull(charset));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static @NonNull Charset getHtmlCharset(String html) {
|
||||||
|
Matcher charsetMatcher = CHARSET_PATTERN.matcher(html);
|
||||||
|
if (charsetMatcher.find() && charsetMatcher.groupCount() > 0) {
|
||||||
|
try {
|
||||||
|
return Objects.requireNonNull(Charset.forName(fromDoubleEncoded(charsetMatcher.group(1))));
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
}
|
||||||
|
return StandardCharsets.UTF_8;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static @NonNull String fromDoubleEncoded(@NonNull String html) {
|
||||||
|
return HtmlCompat.fromHtml(HtmlCompat.fromHtml(html, 0).toString(), 0).toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user