mirror of
https://github.com/signalapp/Signal-Server
synced 2026-02-27 07:43:19 +00:00
Add retry after exceptions during a cluster topology change event callback
This commit is contained in:
@@ -515,8 +515,8 @@ public class WhisperServerService extends Application<WhisperServerConfiguration
|
||||
StoredVerificationCodeManager pendingAccountsManager = new StoredVerificationCodeManager(pendingAccounts);
|
||||
StoredVerificationCodeManager pendingDevicesManager = new StoredVerificationCodeManager(pendingDevices);
|
||||
ProfilesManager profilesManager = new ProfilesManager(profiles, cacheCluster);
|
||||
MessagesCache messagesCache = new MessagesCache(messagesCluster, messagesCluster, Clock.systemUTC(),
|
||||
keyspaceNotificationDispatchExecutor, messageDeliveryScheduler, messageDeletionAsyncExecutor);
|
||||
MessagesCache messagesCache = new MessagesCache(messagesCluster, messagesCluster,
|
||||
keyspaceNotificationDispatchExecutor, messageDeliveryScheduler, messageDeletionAsyncExecutor, clock);
|
||||
PushLatencyManager pushLatencyManager = new PushLatencyManager(metricsCluster, dynamicConfigurationManager);
|
||||
ReportMessageManager reportMessageManager = new ReportMessageManager(reportMessageDynamoDb, rateLimitersCluster,
|
||||
config.getReportMessageConfiguration().getCounterTtl());
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2013-2020 Signal Messenger, LLC
|
||||
* Copyright 2013 Signal Messenger, LLC
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
@@ -18,7 +18,6 @@ import io.lettuce.core.RedisFuture;
|
||||
import io.lettuce.core.ScriptOutputType;
|
||||
import io.lettuce.core.cluster.SlotHash;
|
||||
import io.lettuce.core.cluster.api.sync.RedisAdvancedClusterCommands;
|
||||
import io.lettuce.core.cluster.event.ClusterTopologyChangedEvent;
|
||||
import io.lettuce.core.cluster.models.partitions.RedisClusterNode;
|
||||
import io.lettuce.core.cluster.pubsub.RedisClusterPubSubAdapter;
|
||||
import java.io.IOException;
|
||||
@@ -86,8 +85,10 @@ public class ClientPresenceManager extends RedisClusterPubSubAdapter<String, Str
|
||||
final ExecutorService keyspaceNotificationExecutorService) throws IOException {
|
||||
this.presenceCluster = presenceCluster;
|
||||
this.pubSubConnection = this.presenceCluster.createPubSubConnection();
|
||||
this.clearPresenceScript = ClusterLuaScript.fromResource(presenceCluster, "lua/clear_presence.lua", ScriptOutputType.INTEGER);
|
||||
this.renewPresenceScript = ClusterLuaScript.fromResource(presenceCluster, "lua/renew_presence.lua", ScriptOutputType.VALUE);
|
||||
this.clearPresenceScript = ClusterLuaScript.fromResource(presenceCluster, "lua/clear_presence.lua",
|
||||
ScriptOutputType.INTEGER);
|
||||
this.renewPresenceScript = ClusterLuaScript.fromResource(presenceCluster, "lua/renew_presence.lua",
|
||||
ScriptOutputType.VALUE);
|
||||
this.scheduledExecutorService = scheduledExecutorService;
|
||||
this.keyspaceNotificationExecutorService = keyspaceNotificationExecutorService;
|
||||
|
||||
@@ -112,9 +113,6 @@ public class ClientPresenceManager extends RedisClusterPubSubAdapter<String, Str
|
||||
public void start() {
|
||||
pubSubConnection.usePubSubConnection(connection -> {
|
||||
connection.addListener(this);
|
||||
connection.getResources().eventBus().get()
|
||||
.filter(event -> event instanceof ClusterTopologyChangedEvent)
|
||||
.subscribe(event -> resubscribeAll());
|
||||
|
||||
final String presenceChannel = getManagerPresenceChannel(managerId);
|
||||
final int slot = SlotHash.getSlot(presenceChannel);
|
||||
@@ -124,6 +122,8 @@ public class ClientPresenceManager extends RedisClusterPubSubAdapter<String, Str
|
||||
.subscribe(presenceChannel);
|
||||
});
|
||||
|
||||
pubSubConnection.subscribeToClusterTopologyChangedEvents(this::resubscribeAll);
|
||||
|
||||
presenceCluster.useCluster(connection -> connection.sync().sadd(MANAGER_SET_KEY, managerId));
|
||||
|
||||
pruneMissingPeersFuture = scheduledExecutorService.scheduleWithFixedDelay(() -> {
|
||||
|
||||
@@ -10,28 +10,41 @@ import static org.whispersystems.textsecuregcm.metrics.MetricsUtil.name;
|
||||
import io.github.resilience4j.circuitbreaker.CircuitBreaker;
|
||||
import io.github.resilience4j.retry.Retry;
|
||||
import io.lettuce.core.RedisException;
|
||||
import io.lettuce.core.cluster.event.ClusterTopologyChangedEvent;
|
||||
import io.lettuce.core.cluster.pubsub.StatefulRedisClusterPubSubConnection;
|
||||
import io.micrometer.core.instrument.Metrics;
|
||||
import io.micrometer.core.instrument.Timer;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.whispersystems.textsecuregcm.util.CircuitBreakerUtil;
|
||||
import reactor.core.scheduler.Scheduler;
|
||||
|
||||
public class FaultTolerantPubSubConnection<K, V> {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(FaultTolerantPubSubConnection.class);
|
||||
|
||||
|
||||
private final String name;
|
||||
private final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection;
|
||||
|
||||
private final CircuitBreaker circuitBreaker;
|
||||
private final Retry retry;
|
||||
private final Retry resubscribeRetry;
|
||||
private final Scheduler topologyChangedEventScheduler;
|
||||
|
||||
private final Timer executeTimer;
|
||||
|
||||
public FaultTolerantPubSubConnection(final String name,
|
||||
final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection, final CircuitBreaker circuitBreaker,
|
||||
final Retry retry) {
|
||||
final Retry retry, final Retry resubscribeRetry, final Scheduler topologyChangedEventScheduler) {
|
||||
this.name = name;
|
||||
this.pubSubConnection = pubSubConnection;
|
||||
this.circuitBreaker = circuitBreaker;
|
||||
this.retry = retry;
|
||||
this.resubscribeRetry = resubscribeRetry;
|
||||
this.topologyChangedEventScheduler = topologyChangedEventScheduler;
|
||||
|
||||
this.pubSubConnection.setNodeMessagePropagation(true);
|
||||
|
||||
@@ -58,11 +71,32 @@ public class FaultTolerantPubSubConnection<K, V> {
|
||||
return circuitBreaker.executeCheckedSupplier(
|
||||
() -> retry.executeCallable(() -> executeTimer.record(() -> function.apply(pubSubConnection))));
|
||||
} catch (final Throwable t) {
|
||||
if (t instanceof RedisException) {
|
||||
throw (RedisException) t;
|
||||
} else {
|
||||
throw new RedisException(t);
|
||||
}
|
||||
if (t instanceof RedisException) {
|
||||
throw (RedisException) t;
|
||||
} else {
|
||||
throw new RedisException(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void subscribeToClusterTopologyChangedEvents(final Runnable eventHandler) {
|
||||
|
||||
usePubSubConnection(connection -> connection.getResources().eventBus().get()
|
||||
.filter(event -> event instanceof ClusterTopologyChangedEvent)
|
||||
.subscribeOn(topologyChangedEventScheduler)
|
||||
.subscribe(event -> {
|
||||
logger.info("Got topology change event for {}, resubscribing all keyspace notifications", name);
|
||||
|
||||
resubscribeRetry.executeRunnable(() -> {
|
||||
try {
|
||||
eventHandler.run();
|
||||
} catch (final RuntimeException e) {
|
||||
logger.warn("Resubscribe for {} failed", name, e);
|
||||
throw e;
|
||||
}
|
||||
});
|
||||
}));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -7,9 +7,11 @@ package org.whispersystems.textsecuregcm.redis;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import io.github.resilience4j.circuitbreaker.CircuitBreaker;
|
||||
import io.github.resilience4j.core.IntervalFunction;
|
||||
import io.github.resilience4j.reactor.circuitbreaker.operator.CircuitBreakerOperator;
|
||||
import io.github.resilience4j.reactor.retry.RetryOperator;
|
||||
import io.github.resilience4j.retry.Retry;
|
||||
import io.github.resilience4j.retry.RetryConfig;
|
||||
import io.lettuce.core.ClientOptions.DisconnectedBehavior;
|
||||
import io.lettuce.core.RedisCommandTimeoutException;
|
||||
import io.lettuce.core.RedisException;
|
||||
@@ -31,6 +33,7 @@ import org.whispersystems.textsecuregcm.configuration.RedisClusterConfiguration;
|
||||
import org.whispersystems.textsecuregcm.configuration.RetryConfiguration;
|
||||
import org.whispersystems.textsecuregcm.util.CircuitBreakerUtil;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.scheduler.Schedulers;
|
||||
|
||||
/**
|
||||
* A fault-tolerant access manager for a Redis cluster. A fault-tolerant Redis cluster provides managed,
|
||||
@@ -38,51 +41,61 @@ import reactor.core.publisher.Flux;
|
||||
*/
|
||||
public class FaultTolerantRedisCluster {
|
||||
|
||||
private final String name;
|
||||
private final String name;
|
||||
|
||||
private final RedisClusterClient clusterClient;
|
||||
private final RedisClusterClient clusterClient;
|
||||
|
||||
private final StatefulRedisClusterConnection<String, String> stringConnection;
|
||||
private final StatefulRedisClusterConnection<byte[], byte[]> binaryConnection;
|
||||
private final StatefulRedisClusterConnection<String, String> stringConnection;
|
||||
private final StatefulRedisClusterConnection<byte[], byte[]> binaryConnection;
|
||||
|
||||
private final List<StatefulRedisClusterPubSubConnection<?, ?>> pubSubConnections = new ArrayList<>();
|
||||
private final List<StatefulRedisClusterPubSubConnection<?, ?>> pubSubConnections = new ArrayList<>();
|
||||
|
||||
private final CircuitBreaker circuitBreaker;
|
||||
private final Retry retry;
|
||||
private final CircuitBreaker circuitBreaker;
|
||||
private final Retry retry;
|
||||
private final Retry topologyChangedEventRetry;
|
||||
|
||||
public FaultTolerantRedisCluster(final String name, final RedisClusterConfiguration clusterConfiguration, final ClientResources clientResources) {
|
||||
this(name,
|
||||
RedisClusterClient.create(clientResources, clusterConfiguration.getConfigurationUri()),
|
||||
clusterConfiguration.getTimeout(),
|
||||
clusterConfiguration.getCircuitBreakerConfiguration(),
|
||||
clusterConfiguration.getRetryConfiguration());
|
||||
}
|
||||
public FaultTolerantRedisCluster(final String name, final RedisClusterConfiguration clusterConfiguration,
|
||||
final ClientResources clientResources) {
|
||||
this(name,
|
||||
RedisClusterClient.create(clientResources, clusterConfiguration.getConfigurationUri()),
|
||||
clusterConfiguration.getTimeout(),
|
||||
clusterConfiguration.getCircuitBreakerConfiguration(),
|
||||
clusterConfiguration.getRetryConfiguration());
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
FaultTolerantRedisCluster(final String name, final RedisClusterClient clusterClient, final Duration commandTimeout, final CircuitBreakerConfiguration circuitBreakerConfiguration, final RetryConfiguration retryConfiguration) {
|
||||
this.name = name;
|
||||
@VisibleForTesting
|
||||
FaultTolerantRedisCluster(final String name, final RedisClusterClient clusterClient, final Duration commandTimeout,
|
||||
final CircuitBreakerConfiguration circuitBreakerConfiguration, final RetryConfiguration retryConfiguration) {
|
||||
this.name = name;
|
||||
|
||||
this.clusterClient = clusterClient;
|
||||
this.clusterClient.setDefaultTimeout(commandTimeout);
|
||||
this.clusterClient.setOptions(ClusterClientOptions.builder()
|
||||
.disconnectedBehavior(DisconnectedBehavior.REJECT_COMMANDS)
|
||||
.validateClusterNodeMembership(false)
|
||||
.topologyRefreshOptions(ClusterTopologyRefreshOptions.builder()
|
||||
.enableAllAdaptiveRefreshTriggers()
|
||||
.build())
|
||||
.publishOnScheduler(true)
|
||||
.build());
|
||||
this.clusterClient = clusterClient;
|
||||
this.clusterClient.setDefaultTimeout(commandTimeout);
|
||||
this.clusterClient.setOptions(ClusterClientOptions.builder()
|
||||
.disconnectedBehavior(DisconnectedBehavior.REJECT_COMMANDS)
|
||||
.validateClusterNodeMembership(false)
|
||||
.topologyRefreshOptions(ClusterTopologyRefreshOptions.builder()
|
||||
.enableAllAdaptiveRefreshTriggers()
|
||||
.build())
|
||||
.publishOnScheduler(true)
|
||||
.build());
|
||||
|
||||
this.stringConnection = clusterClient.connect();
|
||||
this.binaryConnection = clusterClient.connect(ByteArrayCodec.INSTANCE);
|
||||
this.stringConnection = clusterClient.connect();
|
||||
this.binaryConnection = clusterClient.connect(ByteArrayCodec.INSTANCE);
|
||||
|
||||
this.circuitBreaker = CircuitBreaker.of(name + "-breaker", circuitBreakerConfiguration.toCircuitBreakerConfig());
|
||||
this.retry = Retry.of(name + "-retry", retryConfiguration.toRetryConfigBuilder()
|
||||
.retryOnException(exception -> exception instanceof RedisCommandTimeoutException).build());
|
||||
this.circuitBreaker = CircuitBreaker.of(name + "-breaker", circuitBreakerConfiguration.toCircuitBreakerConfig());
|
||||
this.retry = Retry.of(name + "-retry", retryConfiguration.toRetryConfigBuilder()
|
||||
.retryOnException(exception -> exception instanceof RedisCommandTimeoutException).build());
|
||||
final RetryConfig topologyChangedEventRetryConfig = RetryConfig.custom()
|
||||
.maxAttempts(Integer.MAX_VALUE)
|
||||
.intervalFunction(
|
||||
IntervalFunction.ofExponentialRandomBackoff(Duration.ofSeconds(1), 1.5, Duration.ofSeconds(30)))
|
||||
.build();
|
||||
|
||||
this.topologyChangedEventRetry = Retry.of(name + "-topologyChangedRetry", topologyChangedEventRetryConfig);
|
||||
|
||||
CircuitBreakerUtil.registerMetrics(circuitBreaker, FaultTolerantRedisCluster.class);
|
||||
CircuitBreakerUtil.registerMetrics(retry, FaultTolerantRedisCluster.class);
|
||||
}
|
||||
}
|
||||
|
||||
void shutdown() {
|
||||
stringConnection.close();
|
||||
@@ -158,6 +171,7 @@ public class FaultTolerantRedisCluster {
|
||||
final StatefulRedisClusterPubSubConnection<String, String> pubSubConnection = clusterClient.connectPubSub();
|
||||
pubSubConnections.add(pubSubConnection);
|
||||
|
||||
return new FaultTolerantPubSubConnection<>(name, pubSubConnection, circuitBreaker, retry);
|
||||
return new FaultTolerantPubSubConnection<>(name, pubSubConnection, circuitBreaker, retry, topologyChangedEventRetry,
|
||||
Schedulers.newSingle(name + "-redisPubSubEvents"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@ import io.lettuce.core.ScoredValue;
|
||||
import io.lettuce.core.ScriptOutputType;
|
||||
import io.lettuce.core.ZAddArgs;
|
||||
import io.lettuce.core.cluster.SlotHash;
|
||||
import io.lettuce.core.cluster.event.ClusterTopologyChangedEvent;
|
||||
import io.lettuce.core.cluster.models.partitions.RedisClusterNode;
|
||||
import io.lettuce.core.cluster.pubsub.RedisClusterPubSubAdapter;
|
||||
import io.micrometer.core.instrument.Counter;
|
||||
@@ -104,8 +103,8 @@ public class MessagesCache extends RedisClusterPubSubAdapter<String, String> imp
|
||||
private static final Logger logger = LoggerFactory.getLogger(MessagesCache.class);
|
||||
|
||||
public MessagesCache(final FaultTolerantRedisCluster insertCluster, final FaultTolerantRedisCluster readDeleteCluster,
|
||||
final Clock clock, final ExecutorService notificationExecutorService, final Scheduler messageDeliveryScheduler,
|
||||
final ExecutorService messageDeletionExecutorService) throws IOException {
|
||||
final ExecutorService notificationExecutorService, final Scheduler messageDeliveryScheduler,
|
||||
final ExecutorService messageDeletionExecutorService, final Clock clock) throws IOException {
|
||||
|
||||
this.readDeleteCluster = readDeleteCluster;
|
||||
this.pubSubConnection = readDeleteCluster.createPubSubConnection();
|
||||
@@ -128,12 +127,8 @@ public class MessagesCache extends RedisClusterPubSubAdapter<String, String> imp
|
||||
|
||||
@Override
|
||||
public void start() {
|
||||
pubSubConnection.usePubSubConnection(connection -> {
|
||||
connection.addListener(this);
|
||||
connection.getResources().eventBus().get()
|
||||
.filter(event -> event instanceof ClusterTopologyChangedEvent)
|
||||
.subscribe(event -> resubscribeAll());
|
||||
});
|
||||
pubSubConnection.usePubSubConnection(connection -> connection.addListener(this));
|
||||
pubSubConnection.subscribeToClusterTopologyChangedEvents(this::resubscribeAll);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -142,7 +137,6 @@ public class MessagesCache extends RedisClusterPubSubAdapter<String, String> imp
|
||||
}
|
||||
|
||||
private void resubscribeAll() {
|
||||
logger.info("Got topology change event, resubscribing all keyspace notifications");
|
||||
|
||||
final Set<String> queueNames;
|
||||
|
||||
|
||||
@@ -201,7 +201,7 @@ public class AssignUsernameCommand extends EnvironmentCommand<WhisperServerConfi
|
||||
ClientPresenceManager clientPresenceManager = new ClientPresenceManager(clientPresenceCluster,
|
||||
Executors.newSingleThreadScheduledExecutor(), keyspaceNotificationDispatchExecutor);
|
||||
MessagesCache messagesCache = new MessagesCache(messageInsertCacheCluster, messageReadDeleteCluster,
|
||||
Clock.systemUTC(), keyspaceNotificationDispatchExecutor, messageDeliveryScheduler, messageDeletionExecutor);
|
||||
keyspaceNotificationDispatchExecutor, messageDeliveryScheduler, messageDeletionExecutor, Clock.systemUTC());
|
||||
DirectoryQueue directoryQueue = new DirectoryQueue(
|
||||
configuration.getDirectoryConfiguration().getSqsConfiguration());
|
||||
ProfilesManager profilesManager = new ProfilesManager(profiles, cacheCluster);
|
||||
|
||||
@@ -180,7 +180,7 @@ record CommandDependencies(
|
||||
ClientPresenceManager clientPresenceManager = new ClientPresenceManager(clientPresenceCluster,
|
||||
Executors.newSingleThreadScheduledExecutor(), keyspaceNotificationDispatchExecutor);
|
||||
MessagesCache messagesCache = new MessagesCache(messageInsertCacheCluster, messageReadDeleteCluster,
|
||||
Clock.systemUTC(), keyspaceNotificationDispatchExecutor, messageDeliveryScheduler, messageDeletionExecutor);
|
||||
keyspaceNotificationDispatchExecutor, messageDeliveryScheduler, messageDeletionExecutor, Clock.systemUTC());
|
||||
DirectoryQueue directoryQueue = new DirectoryQueue(
|
||||
configuration.getDirectoryConfiguration().getSqsConfiguration());
|
||||
ProfilesManager profilesManager = new ProfilesManager(profiles, cacheCluster);
|
||||
|
||||
Reference in New Issue
Block a user