Open up link previews to work with all sites.

This commit is contained in:
Greyson Parrelli
2020-08-12 10:41:52 -04:00
parent d569419e13
commit 6e6105af05
18 changed files with 377 additions and 200 deletions

View File

@@ -1,38 +0,0 @@
package org.thoughtcrime.securesms.linkpreview;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class LinkPreviewDomains {
public static final String STICKERS = "signal.org";
public static final Set<String> LINKS = new HashSet<>(Arrays.asList(
"youtube.com",
"www.youtube.com",
"m.youtube.com",
"youtu.be",
"reddit.com",
"www.reddit.com",
"m.reddit.com",
"imgur.com",
"www.imgur.com",
"m.imgur.com",
"instagram.com",
"www.instagram.com",
"m.instagram.com",
"pinterest.com",
"www.pinterest.com",
"pin.it"
));
public static final Set<String> IMAGES = new HashSet<>(Arrays.asList(
"ytimg.com",
"cdninstagram.com",
"fbcdn.net",
"redd.it",
"imgur.com",
"pinimg.com",
"giphy.com"
));
}

View File

@@ -2,31 +2,33 @@ package org.thoughtcrime.securesms.linkpreview;
import android.content.Context;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.net.Uri;
import androidx.annotation.NonNull;
import android.text.Html;
import android.text.TextUtils;
import com.bumptech.glide.load.engine.DiskCacheStrategy;
import com.bumptech.glide.request.FutureTarget;
import org.thoughtcrime.securesms.attachments.Attachment;
import org.thoughtcrime.securesms.attachments.UriAttachment;
import org.thoughtcrime.securesms.database.AttachmentDatabase;
import org.thoughtcrime.securesms.dependencies.ApplicationDependencies;
import org.thoughtcrime.securesms.giph.model.ChunkedImageUrl;
import org.thoughtcrime.securesms.linkpreview.LinkPreviewUtil.OpenGraph;
import org.thoughtcrime.securesms.logging.Log;
import org.thoughtcrime.securesms.mms.GlideApp;
import org.thoughtcrime.securesms.net.CallRequestController;
import org.thoughtcrime.securesms.net.CompositeRequestController;
import org.thoughtcrime.securesms.net.ContentProxySafetyInterceptor;
import org.thoughtcrime.securesms.net.ContentProxySelector;
import org.thoughtcrime.securesms.net.RequestController;
import org.thoughtcrime.securesms.net.UserAgentInterceptor;
import org.thoughtcrime.securesms.providers.BlobProvider;
import org.thoughtcrime.securesms.stickers.StickerRemoteUri;
import org.thoughtcrime.securesms.stickers.StickerUrl;
import org.thoughtcrime.securesms.util.ByteUnit;
import org.thoughtcrime.securesms.util.Hex;
import org.thoughtcrime.securesms.util.MediaUtil;
import org.thoughtcrime.securesms.util.OkHttpUtil;
import org.thoughtcrime.securesms.util.concurrent.SignalExecutors;
import org.whispersystems.libsignal.InvalidMessageException;
import org.whispersystems.libsignal.util.Pair;
@@ -34,10 +36,11 @@ import org.whispersystems.libsignal.util.guava.Optional;
import org.whispersystems.signalservice.api.SignalServiceMessageReceiver;
import org.whispersystems.signalservice.api.messages.SignalServiceStickerManifest;
import org.whispersystems.signalservice.api.messages.SignalServiceStickerManifest.StickerInfo;
import org.whispersystems.signalservice.api.util.OptionalUtil;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.concurrent.CancellationException;
import java.io.InputStream;
import java.util.concurrent.ExecutionException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -54,20 +57,22 @@ public class LinkPreviewRepository {
private static final CacheControl NO_CACHE = new CacheControl.Builder().noCache().build();
private static final long FAILSAFE_MAX_TEXT_SIZE = ByteUnit.MEGABYTES.toBytes(2);
private static final long FAILSAFE_MAX_IMAGE_SIZE = ByteUnit.MEGABYTES.toBytes(2);
private final OkHttpClient client;
public LinkPreviewRepository() {
this.client = new OkHttpClient.Builder()
.proxySelector(new ContentProxySelector())
.addNetworkInterceptor(new ContentProxySafetyInterceptor())
.cache(null)
.addInterceptor(new UserAgentInterceptor("WhatsApp"))
.build();
}
RequestController getLinkPreview(@NonNull Context context, @NonNull String url, @NonNull Callback<Optional<LinkPreview>> callback) {
CompositeRequestController compositeController = new CompositeRequestController();
if (!LinkPreviewUtil.isWhitelistedLinkUrl(url)) {
if (!LinkPreviewUtil.isValidPreviewUrl(url)) {
Log.w(TAG, "Tried to get a link preview for a non-whitelisted domain.");
callback.onComplete(Optional.absent());
return compositeController;
@@ -89,7 +94,7 @@ public class LinkPreviewRepository {
return;
}
RequestController imageController = fetchThumbnail(context, metadata.getImageUrl().get(), attachment -> {
RequestController imageController = fetchThumbnail(metadata.getImageUrl().get(), attachment -> {
if (!metadata.getTitle().isPresent() && !attachment.isPresent()) {
callback.onComplete(Optional.absent());
} else {
@@ -127,11 +132,12 @@ public class LinkPreviewRepository {
return;
}
String body = response.body().string();
Optional<String> title = getProperty(body, "title");
Optional<String> imageUrl = getProperty(body, "image");
String body = OkHttpUtil.readAsString(response.body(), FAILSAFE_MAX_TEXT_SIZE);
OpenGraph openGraph = LinkPreviewUtil.parseOpenGraphFields(body);
Optional<String> title = openGraph.getTitle();
Optional<String> imageUrl = openGraph.getImageUrl();
if (imageUrl.isPresent() && !LinkPreviewUtil.isWhitelistedMediaUrl(imageUrl.get())) {
if (imageUrl.isPresent() && !LinkPreviewUtil.isValidPreviewUrl(imageUrl.get())) {
Log.i(TAG, "Image URL was invalid or for a non-whitelisted domain. Skipping.");
imageUrl = Optional.absent();
}
@@ -143,20 +149,23 @@ public class LinkPreviewRepository {
return new CallRequestController(call);
}
private @NonNull RequestController fetchThumbnail(@NonNull Context context, @NonNull String imageUrl, @NonNull Callback<Optional<Attachment>> callback) {
FutureTarget<Bitmap> bitmapFuture = GlideApp.with(context).asBitmap()
.load(new ChunkedImageUrl(imageUrl))
.skipMemoryCache(true)
.diskCacheStrategy(DiskCacheStrategy.NONE)
.centerInside()
.submit(1024, 1024);
RequestController controller = () -> bitmapFuture.cancel(false);
private @NonNull RequestController fetchThumbnail(@NonNull String imageUrl, @NonNull Callback<Optional<Attachment>> callback) {
Call call = client.newCall(new Request.Builder().url(imageUrl).build());
CallRequestController controller = new CallRequestController(call);
SignalExecutors.UNBOUNDED.execute(() -> {
try {
Bitmap bitmap = bitmapFuture.get();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Response response = call.execute();
if (!response.isSuccessful() || response.body() == null) {
return;
}
InputStream bodyStream = response.body().byteStream();
controller.setStream(bodyStream);
byte[] data = OkHttpUtil.readAsBytes(bodyStream, FAILSAFE_MAX_IMAGE_SIZE);
Bitmap bitmap = BitmapFactory.decodeByteArray(data, 0, data.length);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
bitmap.compress(Bitmap.CompressFormat.JPEG, 80, baos);
@@ -181,27 +190,14 @@ public class LinkPreviewRepository {
null));
callback.onComplete(thumbnail);
} catch (CancellationException | ExecutionException | InterruptedException e) {
} catch (IOException e) {
Log.w(TAG, "Exception during link preview image retrieval.", e);
controller.cancel();
callback.onComplete(Optional.absent());
} finally {
bitmapFuture.cancel(false);
}
});
return () -> bitmapFuture.cancel(true);
}
private @NonNull Optional<String> getProperty(@NonNull String searchText, @NonNull String property) {
Pattern pattern = Pattern.compile("<\\s*meta\\s+property\\s*=\\s*\"\\s*og:" + property + "\\s*\"\\s+[^>]*content\\s*=\\s*\"(.*?)\"[^>]*/?\\s*>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
Matcher matcher = pattern.matcher(searchText);
if (matcher.find()) {
String text = Html.fromHtml(matcher.group(1)).toString();
return TextUtils.isEmpty(text) ? Optional.absent() : Optional.of(text);
}
return Optional.absent();
return controller;
}
private RequestController fetchStickerPackLinkPreview(@NonNull Context context,

View File

@@ -2,6 +2,9 @@ package org.thoughtcrime.securesms.linkpreview;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import androidx.annotation.VisibleForTesting;
import android.text.Html;
import android.text.SpannableString;
import android.text.TextUtils;
import android.text.style.URLSpan;
@@ -10,9 +13,14 @@ import android.text.util.Linkify;
import com.annimon.stream.Stream;
import org.thoughtcrime.securesms.stickers.StickerUrl;
import org.thoughtcrime.securesms.util.Util;
import org.whispersystems.libsignal.util.guava.Optional;
import org.whispersystems.signalservice.api.util.OptionalUtil;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -20,9 +28,14 @@ import okhttp3.HttpUrl;
public final class LinkPreviewUtil {
private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$");
private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$");
private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$");
private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$");
private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$");
private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$");
private static final Pattern OPEN_GRAPH_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*og:([^\"]+)\"[^>]*/?\\s*>");
private static final Pattern OPEN_GRAPH_CONTENT_PATTERN = Pattern.compile("content\\s*=\\s*\"([^\"]*)\"");
private static final Pattern TITLE_PATTERN = Pattern.compile("<\\s*title[^>]*>(.*)<\\s*/title[^>]*>");
private static final Pattern FAVICON_PATTERN = Pattern.compile("<\\s*link[^>]*rel\\s*=\\s*\".*icon.*\"[^>]*>");
private static final Pattern FAVICON_HREF_PATTERN = Pattern.compile("href\\s*=\\s*\"([^\"]*)\"");
/**
* @return All whitelisted URLs in the source text.
@@ -37,14 +50,14 @@ public final class LinkPreviewUtil {
return Stream.of(spannable.getSpans(0, spannable.length(), URLSpan.class))
.map(span -> new Link(span.getURL(), spannable.getSpanStart(span)))
.filter(link -> isWhitelistedLinkUrl(link.getUrl()))
.filter(link -> isValidPreviewUrl(link.getUrl()))
.toList();
}
/**
* @return True if the host is present in the link whitelist.
*/
public static boolean isWhitelistedLinkUrl(@Nullable String linkUrl) {
public static boolean isValidPreviewUrl(@Nullable String linkUrl) {
if (linkUrl == null) return false;
if (StickerUrl.isValidShareLink(linkUrl)) return true;
@@ -52,24 +65,9 @@ public final class LinkPreviewUtil {
return url != null &&
!TextUtils.isEmpty(url.scheme()) &&
"https".equals(url.scheme()) &&
LinkPreviewDomains.LINKS.contains(url.host()) &&
isLegalUrl(linkUrl);
}
/**
* @return True if the top-level domain is present in the media whitelist.
*/
public static boolean isWhitelistedMediaUrl(@Nullable String mediaUrl) {
if (mediaUrl == null) return false;
HttpUrl url = HttpUrl.parse(mediaUrl);
return url != null &&
!TextUtils.isEmpty(url.scheme()) &&
"https".equals(url.scheme()) &&
LinkPreviewDomains.IMAGES.contains(url.topPrivateDomain()) &&
isLegalUrl(mediaUrl);
}
public static boolean isLegalUrl(@NonNull String url) {
Matcher matcher = DOMAIN_PATTERN.matcher(url);
@@ -83,4 +81,78 @@ public final class LinkPreviewUtil {
return false;
}
}
public static @NonNull OpenGraph parseOpenGraphFields(@Nullable String html) {
return parseOpenGraphFields(html, text -> Html.fromHtml(text).toString());
}
@VisibleForTesting
static @NonNull OpenGraph parseOpenGraphFields(@Nullable String html, @NonNull HtmlDecoder htmlDecoder) {
if (html == null) {
return new OpenGraph(Collections.emptyMap(), null, null);
}
Map<String, String> openGraphTags = new HashMap<>();
Matcher openGraphMatcher = OPEN_GRAPH_TAG_PATTERN.matcher(html);
while (openGraphMatcher.find()) {
String tag = openGraphMatcher.group();
String property = openGraphMatcher.groupCount() > 0 ? openGraphMatcher.group(1) : null;
if (property != null) {
Matcher contentMatcher = OPEN_GRAPH_CONTENT_PATTERN.matcher(tag);
if (contentMatcher.find() && contentMatcher.groupCount() > 0) {
String content = htmlDecoder.fromEncoded(contentMatcher.group(1));
openGraphTags.put(property, content);
}
}
}
String htmlTitle = "";
String faviconUrl = "";
Matcher titleMatcher = TITLE_PATTERN.matcher(html);
if (titleMatcher.find() && titleMatcher.groupCount() > 0) {
htmlTitle = titleMatcher.group(1);
}
Matcher faviconMatcher = FAVICON_PATTERN.matcher(html);
if (faviconMatcher.find()) {
Matcher faviconHrefMatcher = FAVICON_HREF_PATTERN.matcher(faviconMatcher.group());
if (faviconHrefMatcher.find() && faviconHrefMatcher.groupCount() > 0) {
faviconUrl = faviconHrefMatcher.group(1);
}
}
return new OpenGraph(openGraphTags, htmlTitle, faviconUrl);
}
public static final class OpenGraph {
private final Map<String, String> values;
private final @Nullable String htmlTitle;
private final @Nullable String faviconUrl;
private static final String KEY_TITLE = "title";
private static final String KEY_IMAGE_URL = "image";
public OpenGraph(@NonNull Map<String, String> values, @Nullable String htmlTitle, @Nullable String faviconUrl) {
this.values = values;
this.htmlTitle = htmlTitle;
this.faviconUrl = faviconUrl;
}
public @NonNull Optional<String> getTitle() {
return OptionalUtil.absentIfEmpty(Util.getFirstNonEmpty(values.get(KEY_TITLE), htmlTitle));
}
public @NonNull Optional<String> getImageUrl() {
return OptionalUtil.absentIfEmpty(Util.getFirstNonEmpty(values.get(KEY_IMAGE_URL), faviconUrl));
}
}
public interface HtmlDecoder {
@NonNull String fromEncoded(@NonNull String html);
}
}