diff --git a/app/src/main/java/org/thoughtcrime/securesms/database/LocalMetricsDatabase.kt b/app/src/main/java/org/thoughtcrime/securesms/database/LocalMetricsDatabase.kt index 79b93b7f14..437771e594 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/database/LocalMetricsDatabase.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/database/LocalMetricsDatabase.kt @@ -153,6 +153,16 @@ class LocalMetricsDatabase private constructor( writableDatabase.delete(TABLE_NAME, null, null) } + fun getOldestMetricTime(eventName: String): Long { + readableDatabase.rawQuery("SELECT $CREATED_AT FROM $TABLE_NAME WHERE $EVENT_NAME = ? ORDER BY $CREATED_AT ASC", SqlUtil.buildArgs(eventName)).use { cursor -> + return if (cursor.moveToFirst()) { + cursor.getLong(0) + } else { + 0 + } + } + } + fun getMetrics(): List { val db = readableDatabase @@ -213,7 +223,7 @@ class LocalMetricsDatabase private constructor( } } - private fun eventPercent(eventName: String, percent: Int): Long { + fun eventPercent(eventName: String, percent: Int): Long { return percentile(EventTotals.VIEW_NAME, "$EVENT_NAME = '$eventName'", percent) } diff --git a/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmFetchForegroundService.kt b/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmFetchForegroundService.kt index 319dfb9f17..5f3bcf3860 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmFetchForegroundService.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmFetchForegroundService.kt @@ -53,7 +53,12 @@ class FcmFetchForegroundService : Service() { private var foregroundServiceState: State = State.STOPPED - fun startServiceIfNecessary(context: Context) { + /** + * Attempts to start the foreground service if it isn't already running. + * + * @return false if we failed to start the foreground service + */ + fun startServiceIfNecessary(context: Context): Boolean { synchronized(this) { when (foregroundServiceState) { State.STOPPING -> foregroundServiceState = State.RESTARTING @@ -64,11 +69,13 @@ class FcmFetchForegroundService : Service() { } catch (e: IllegalStateException) { Log.e(TAG, "Failed to start foreground service", e) State.STOPPED + return false } } else -> Log.i(TAG, "Already started foreground service") } } + return true } fun stopServiceIfNecessary(context: Context) { diff --git a/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmFetchManager.kt b/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmFetchManager.kt index 7129da1c44..30c4fa2676 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmFetchManager.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmFetchManager.kt @@ -51,22 +51,21 @@ object FcmFetchManager { @Volatile private var highPriority = false - /** - * @return True if a service was successfully started, otherwise false. - */ @JvmStatic fun startBackgroundService(context: Context) { Log.i(TAG, "Starting in the background.") context.startService(Intent(context, FcmFetchBackgroundService::class.java)) + SignalLocalMetrics.FcmServiceStartSuccess.onFcmStarted() } - /** - * @return True if a service was successfully started, otherwise false. - */ @JvmStatic fun startForegroundService(context: Context) { Log.i(TAG, "Starting in the foreground.") - FcmFetchForegroundService.startServiceIfNecessary(context) + if (FcmFetchForegroundService.startServiceIfNecessary(context)) { + SignalLocalMetrics.FcmServiceStartSuccess.onFcmStarted() + } else { + SignalLocalMetrics.FcmServiceStartFailure.onFcmFailedToStart() + } } private fun postMayHaveMessagesNotification(context: Context) { diff --git a/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmReceiveService.java b/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmReceiveService.java index 60a28d9f48..6fd49950b9 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmReceiveService.java +++ b/app/src/main/java/org/thoughtcrime/securesms/gcm/FcmReceiveService.java @@ -16,6 +16,7 @@ import org.thoughtcrime.securesms.jobs.SubmitRateLimitPushChallengeJob; import org.thoughtcrime.securesms.keyvalue.SignalStore; import org.thoughtcrime.securesms.registration.PushChallengeRequest; import org.thoughtcrime.securesms.util.NetworkUtil; +import org.thoughtcrime.securesms.util.SignalLocalMetrics; import java.util.Locale; @@ -85,6 +86,7 @@ public class FcmReceiveService extends FirebaseMessagingService { } } catch (Exception e) { Log.w(TAG, "Failed to start service.", e); + SignalLocalMetrics.FcmServiceStartFailure.onFcmFailedToStart(); } FcmFetchManager.enqueueFetch(context, highPriority); diff --git a/app/src/main/java/org/thoughtcrime/securesms/messages/IncomingMessageObserver.kt b/app/src/main/java/org/thoughtcrime/securesms/messages/IncomingMessageObserver.kt index 3dbd0cc6b1..002d54b452 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/messages/IncomingMessageObserver.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/messages/IncomingMessageObserver.kt @@ -298,6 +298,8 @@ class IncomingMessageObserver(private val context: Application) { val localReceiveMetric = SignalLocalMetrics.MessageReceive.start() val result = MessageDecryptor.decrypt(context, bufferedProtocolStore, envelope, serverDeliveredTimestamp) localReceiveMetric.onEnvelopeDecrypted() + + SignalLocalMetrics.MessageLatency.onMessageReceived(envelope.serverTimestamp, serverDeliveredTimestamp) when (result) { is MessageDecryptor.Result.Success -> { val job = PushProcessMessageJobV2.processOrDefer(messageContentProcessor, result, localReceiveMetric) diff --git a/app/src/main/java/org/thoughtcrime/securesms/notifications/SlowNotificationHeuristics.kt b/app/src/main/java/org/thoughtcrime/securesms/notifications/SlowNotificationHeuristics.kt new file mode 100644 index 0000000000..062d181575 --- /dev/null +++ b/app/src/main/java/org/thoughtcrime/securesms/notifications/SlowNotificationHeuristics.kt @@ -0,0 +1,112 @@ +/* + * Copyright 2023 Signal Messenger, LLC + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package org.thoughtcrime.securesms.notifications + +import org.signal.core.util.logging.Log +import org.thoughtcrime.securesms.database.LocalMetricsDatabase +import org.thoughtcrime.securesms.dependencies.ApplicationDependencies +import org.thoughtcrime.securesms.util.SignalLocalMetrics + +/** + * Heuristic for estimating if a user has been experiencing issues with delayed notifications. + * + * This uses local metrics based off of message latency, failed service starts, and queue drain + * timeouts. + * + * This will enable us to attempt to improve notifications for users who are experiencing these issues. + */ +object SlowNotificationHeuristics { + + private val TAG = Log.tag(SlowNotificationHeuristics::class.java) + + fun isHavingDelayedNotifications(configuration: Configuration): Boolean { + val db = LocalMetricsDatabase.getInstance(ApplicationDependencies.getApplication()) + + val metrics = db.getMetrics() + + val failedServiceStarts = hasRepeatedFailedServiceStarts(metrics, configuration.minimumEventAgeMs, configuration.minimumServiceEventCount, configuration.serviceStartFailurePercentage) + val failedQueueDrains = isFailingToDrainQueue(metrics, configuration.minimumEventAgeMs, configuration.weeklyFailedQueueDrains) + val longMessageLatency = hasLongMessageLatency(metrics, configuration.minimumEventAgeMs, configuration.messageLatencyPercentage, configuration.minimumMessageLatencyEvents, configuration.messageLatencyThreshold) + + if (failedServiceStarts || failedQueueDrains || longMessageLatency) { + Log.w(TAG, "User seems to be having delayed notifications: failed-service-starts=$failedServiceStarts failedQueueDrains=$failedQueueDrains longMessageLatency=$longMessageLatency") + return true + } + return false + } + + private fun hasRepeatedFailedServiceStarts(metrics: List, minimumEventAgeMs: Long, minimumEventCount: Int, failurePercentage: Float): Boolean { + if (!haveEnoughData(SignalLocalMetrics.FcmServiceStartSuccess.NAME, minimumEventAgeMs) && !haveEnoughData(SignalLocalMetrics.FcmServiceStartFailure.NAME, minimumEventAgeMs)) { + Log.d(TAG, "insufficient data for service starts") + return false + } + + val successes = metrics.filter { it.name == SignalLocalMetrics.FcmServiceStartSuccess.NAME } + val failures = metrics.filter { it.name == SignalLocalMetrics.FcmServiceStartFailure.NAME } + + if ((successes.size + failures.size) < minimumEventCount) { + Log.d(TAG, "insufficient service start events") + return false + } + + if (failures.size / (failures.size + successes.size) >= failurePercentage) { + Log.w(TAG, "User often unable start FCM service. ${failures.size} failed : ${successes.size} successful") + return true + } + return false + } + + private fun isFailingToDrainQueue(metrics: List, minimumEventAgeMs: Long, failureThreshold: Int): Boolean { + if (!haveEnoughData(SignalLocalMetrics.PushWebsocketFetch.SUCCESS_EVENT, minimumEventAgeMs) && !haveEnoughData(SignalLocalMetrics.PushWebsocketFetch.TIMEOUT_EVENT, minimumEventAgeMs)) { + Log.d(TAG, "insufficient data for failed queue drains") + return false + } + val failures = metrics.filter { it.name == SignalLocalMetrics.PushWebsocketFetch.TIMEOUT_EVENT } + if (failures.size < failureThreshold) { + return false + } + Log.w(TAG, "User has repeatedly failed to drain queue ${failures.size} events") + return true + } + + private fun hasLongMessageLatency(metrics: List, minimumEventAgeMs: Long, percentage: Int, messageThreshold: Int, durationThreshold: Long): Boolean { + if (!haveEnoughData(SignalLocalMetrics.MessageLatency.NAME, minimumEventAgeMs)) { + Log.d(TAG, "insufficient data for message latency") + return false + } + val eventCount = metrics.count { it.name == SignalLocalMetrics.MessageLatency.NAME } + if (eventCount < messageThreshold) { + Log.d(TAG, "not enough messages for message latency") + return false + } + val db = LocalMetricsDatabase.getInstance(ApplicationDependencies.getApplication()) + val averageLatency = db.eventPercent(SignalLocalMetrics.MessageLatency.NAME, percentage.coerceAtMost(100).coerceAtLeast(0)) + + val longMessageLatency = averageLatency > durationThreshold + if (longMessageLatency) { + Log.w(TAG, "User has high average message latency of $averageLatency ms over $eventCount events") + } + return longMessageLatency + } + + private fun haveEnoughData(eventName: String, minimumEventAgeMs: Long): Boolean { + val db = LocalMetricsDatabase.getInstance(ApplicationDependencies.getApplication()) + + val oldestEvent = db.getOldestMetricTime(eventName) + + return !(oldestEvent == 0L || oldestEvent > System.currentTimeMillis() - minimumEventAgeMs) + } +} + +data class Configuration( + val minimumEventAgeMs: Long, + val minimumServiceEventCount: Int, + val serviceStartFailurePercentage: Float, + val weeklyFailedQueueDrains: Int, + val minimumMessageLatencyEvents: Int, + val messageLatencyThreshold: Long, + val messageLatencyPercentage: Int +) diff --git a/app/src/main/java/org/thoughtcrime/securesms/util/LocalMetrics.kt b/app/src/main/java/org/thoughtcrime/securesms/util/LocalMetrics.kt index edff293fef..e8aa12119e 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/util/LocalMetrics.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/util/LocalMetrics.kt @@ -87,14 +87,15 @@ object LocalMetrics { * * If an event with the provided ID does not exist, this is effectively a no-op. */ - fun splitWithDuration(id: String, split: String, duration: Long) { - val time = System.currentTimeMillis() + @JvmOverloads + fun splitWithDuration(id: String, split: String, duration: Long, timeunit: TimeUnit = TimeUnit.MILLISECONDS) { + val time = SystemClock.elapsedRealtimeNanos() executor.execute { val lastTime: Long? = lastSplitTimeById[id] val splitDoesNotExist: Boolean = eventsById[id]?.splits?.none { it.name == split } ?: true if (lastTime != null && splitDoesNotExist) { - eventsById[id]?.splits?.add(LocalMetricsSplit(split, duration)) + eventsById[id]?.splits?.add(LocalMetricsSplit(split, TimeUnit.NANOSECONDS.convert(duration, timeunit))) lastSplitTimeById[id] = time } } diff --git a/app/src/main/java/org/thoughtcrime/securesms/util/SignalLocalMetrics.java b/app/src/main/java/org/thoughtcrime/securesms/util/SignalLocalMetrics.java index 49f1267eec..c30a59ff93 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/util/SignalLocalMetrics.java +++ b/app/src/main/java/org/thoughtcrime/securesms/util/SignalLocalMetrics.java @@ -203,9 +203,51 @@ public final class SignalLocalMetrics { } } + public static final class MessageLatency { + public static final String NAME = "message-latency"; + + private static final String SPLIT_LATENCY = "latency"; + + public static void onMessageReceived(long serverReceiveTimestamp, long serverDeliverTimestamp) { + long latency = serverDeliverTimestamp - serverReceiveTimestamp; + + String id = NAME + System.currentTimeMillis(); + LocalMetrics.getInstance().start(id, NAME); + LocalMetrics.getInstance().splitWithDuration(id, SPLIT_LATENCY, latency); + LocalMetrics.getInstance().end(id); + } + } + + public static final class FcmServiceStartFailure { + public static final String NAME = "fcm-service-start-failure"; + + private static final String SPLIT_EVENT = "event"; + + public static void onFcmFailedToStart() { + String id = NAME + System.currentTimeMillis(); + LocalMetrics.getInstance().start(id, NAME); + LocalMetrics.getInstance().splitWithDuration(id, SPLIT_EVENT, 1); + LocalMetrics.getInstance().end(id); + } + + } + + public static final class FcmServiceStartSuccess { + public static final String NAME = "fcm-service-start-success"; + + private static final String SPLIT_EVENT = "event"; + + public static void onFcmStarted() { + String id = NAME + System.currentTimeMillis(); + LocalMetrics.getInstance().start(id, NAME); + LocalMetrics.getInstance().splitWithDuration(id, SPLIT_EVENT, 1); + LocalMetrics.getInstance().end(id); + } + + } public static final class PushWebsocketFetch { - private static final String SUCCESS_EVENT = "push-websocket-fetch"; - private static final String TIMEOUT_EVENT = "timed-out-fetch"; + public static final String SUCCESS_EVENT = "push-websocket-fetch"; + public static final String TIMEOUT_EVENT = "timed-out-fetch"; private static final String SPLIT_BATCH_PROCESSED = "batches-processed"; private static final String SPLIT_PROCESS_TIME = "fetch-time";