Add per-shard Redis circuit breakers

This commit is contained in:
Chris Eager
2024-04-12 11:22:41 -05:00
committed by GitHub
parent 05a92494bb
commit 2dc707d86e
22 changed files with 1677 additions and 313 deletions

View File

@@ -176,8 +176,10 @@ import org.whispersystems.textsecuregcm.push.ProvisioningManager;
import org.whispersystems.textsecuregcm.push.PushLatencyManager;
import org.whispersystems.textsecuregcm.push.PushNotificationManager;
import org.whispersystems.textsecuregcm.push.ReceiptSender;
import org.whispersystems.textsecuregcm.redis.ClusterFaultTolerantRedisCluster;
import org.whispersystems.textsecuregcm.redis.ConnectionEventLogger;
import org.whispersystems.textsecuregcm.redis.FaultTolerantRedisCluster;
import org.whispersystems.textsecuregcm.redis.ShardFaultTolerantRedisCluster;
import org.whispersystems.textsecuregcm.registration.RegistrationServiceClient;
import org.whispersystems.textsecuregcm.s3.PolicySigner;
import org.whispersystems.textsecuregcm.s3.PostPolicyGenerator;
@@ -412,18 +414,25 @@ public class WhisperServerService extends Application<WhisperServerConfiguration
final VerificationSessions verificationSessions = new VerificationSessions(dynamoDbAsyncClient,
config.getDynamoDbTables().getVerificationSessions().getTableName(), clock);
final ClientResources redisClientResources = ClientResources.builder()
.commandLatencyRecorder(new MicrometerCommandLatencyRecorder(Metrics.globalRegistry, MicrometerOptions.builder().build()))
.build();
final ClientResources.Builder redisClientResourcesBuilder = ClientResources.builder()
.commandLatencyRecorder(
new MicrometerCommandLatencyRecorder(Metrics.globalRegistry, MicrometerOptions.builder().build()));
final ClientResources redisClientResources = redisClientResourcesBuilder.build();
ConnectionEventLogger.logConnectionEvents(redisClientResources);
FaultTolerantRedisCluster cacheCluster = new FaultTolerantRedisCluster("main_cache_cluster", config.getCacheClusterConfiguration(), redisClientResources);
FaultTolerantRedisCluster messagesCluster = new FaultTolerantRedisCluster("messages_cluster", config.getMessageCacheConfiguration().getRedisClusterConfiguration(), redisClientResources);
FaultTolerantRedisCluster clientPresenceCluster = new FaultTolerantRedisCluster("client_presence_cluster", config.getClientPresenceClusterConfiguration(), redisClientResources);
FaultTolerantRedisCluster metricsCluster = new FaultTolerantRedisCluster("metrics_cluster", config.getMetricsClusterConfiguration(), redisClientResources);
FaultTolerantRedisCluster pushSchedulerCluster = new FaultTolerantRedisCluster("push_scheduler", config.getPushSchedulerCluster(), redisClientResources);
FaultTolerantRedisCluster rateLimitersCluster = new FaultTolerantRedisCluster("rate_limiters", config.getRateLimitersCluster(), redisClientResources);
FaultTolerantRedisCluster cacheCluster = new ClusterFaultTolerantRedisCluster("main_cache_cluster",
config.getCacheClusterConfiguration(), redisClientResources);
FaultTolerantRedisCluster messagesCluster = new ClusterFaultTolerantRedisCluster("messages_cluster",
config.getMessageCacheConfiguration().getRedisClusterConfiguration(), redisClientResources);
FaultTolerantRedisCluster clientPresenceCluster = new ClusterFaultTolerantRedisCluster("client_presence_cluster",
config.getClientPresenceClusterConfiguration(), redisClientResources);
FaultTolerantRedisCluster metricsCluster = new ShardFaultTolerantRedisCluster("metrics",
config.getMetricsClusterConfiguration(), redisClientResourcesBuilder);
FaultTolerantRedisCluster pushSchedulerCluster = new ClusterFaultTolerantRedisCluster("push_scheduler",
config.getPushSchedulerCluster(), redisClientResources);
FaultTolerantRedisCluster rateLimitersCluster = new ClusterFaultTolerantRedisCluster("rate_limiters",
config.getRateLimitersCluster(), redisClientResources);
final BlockingQueue<Runnable> keyspaceNotificationDispatchQueue = new ArrayBlockingQueue<>(100_000);
Metrics.gaugeCollectionSize(name(getClass(), "keyspaceNotificationDispatchQueueSize"), Collections.emptyList(),

View File

@@ -41,8 +41,7 @@ public class CircuitBreakerConfiguration {
@JsonProperty
@NotNull
@Min(1)
private long waitDurationInOpenStateInSeconds = 10;
private Duration waitDurationInOpenState = Duration.ofSeconds(10);
@JsonProperty
private List<String> ignoredExceptions = Collections.emptyList();
@@ -64,8 +63,8 @@ public class CircuitBreakerConfiguration {
return slidingWindowMinimumNumberOfCalls;
}
public long getWaitDurationInOpenStateInSeconds() {
return waitDurationInOpenStateInSeconds;
public Duration getWaitDurationInOpenState() {
return waitDurationInOpenState;
}
public List<Class<?>> getIgnoredExceptions() {
@@ -101,8 +100,8 @@ public class CircuitBreakerConfiguration {
}
@VisibleForTesting
public void setWaitDurationInOpenStateInSeconds(int seconds) {
this.waitDurationInOpenStateInSeconds = seconds;
public void setWaitDurationInOpenState(Duration duration) {
this.waitDurationInOpenState = duration;
}
@VisibleForTesting
@@ -115,7 +114,7 @@ public class CircuitBreakerConfiguration {
.failureRateThreshold(getFailureRateThreshold())
.ignoreExceptions(getIgnoredExceptions().toArray(new Class[0]))
.permittedNumberOfCallsInHalfOpenState(getPermittedNumberOfCallsInHalfOpenState())
.waitDurationInOpenState(Duration.ofSeconds(getWaitDurationInOpenStateInSeconds()))
.waitDurationInOpenState(getWaitDurationInOpenState())
.slidingWindow(getSlidingWindowSize(), getSlidingWindowMinimumNumberOfCalls(),
CircuitBreakerConfig.SlidingWindowType.COUNT_BASED)
.build();

View File

@@ -24,6 +24,7 @@ import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.IntStream;
import io.micrometer.core.instrument.Tags;
import org.glassfish.jersey.SslConfigurator;
import org.whispersystems.textsecuregcm.configuration.CircuitBreakerConfiguration;
import org.whispersystems.textsecuregcm.configuration.RetryConfiguration;
@@ -56,7 +57,7 @@ public class FaultTolerantHttpClient {
this.defaultRequestTimeout = defaultRequestTimeout;
this.breaker = CircuitBreaker.of(name + "-breaker", circuitBreakerConfiguration.toCircuitBreakerConfig());
CircuitBreakerUtil.registerMetrics(breaker, FaultTolerantHttpClient.class);
CircuitBreakerUtil.registerMetrics(breaker, FaultTolerantHttpClient.class, Tags.empty());
if (retryConfiguration != null) {
if (this.retryExecutor == null) {

View File

@@ -78,7 +78,7 @@ public class ProvisioningManager extends RedisPubSubAdapter<byte[], byte[]> impl
this.circuitBreaker = CircuitBreaker.of("pubsub-breaker", circuitBreakerConfiguration.toCircuitBreakerConfig());
CircuitBreakerUtil.registerMetrics(circuitBreaker, ProvisioningManager.class);
CircuitBreakerUtil.registerMetrics(circuitBreaker, ProvisioningManager.class, Tags.empty());
Metrics.gaugeMapSize(ACTIVE_LISTENERS_GAUGE_NAME, Tags.empty(), listenersByProvisioningAddress);
}

View File

@@ -0,0 +1,106 @@
/*
* Copyright 2013-2020 Signal Messenger, LLC
* SPDX-License-Identifier: AGPL-3.0-only
*/
package org.whispersystems.textsecuregcm.redis;
import static org.whispersystems.textsecuregcm.metrics.MetricsUtil.name;
import io.github.resilience4j.circuitbreaker.CircuitBreaker;
import io.github.resilience4j.retry.Retry;
import io.lettuce.core.RedisException;
import io.lettuce.core.cluster.event.ClusterTopologyChangedEvent;
import io.lettuce.core.cluster.pubsub.StatefulRedisClusterPubSubConnection;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Tags;
import io.micrometer.core.instrument.Timer;
import java.util.function.Consumer;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.whispersystems.textsecuregcm.util.CircuitBreakerUtil;
import reactor.core.scheduler.Scheduler;
public class ClusterFaultTolerantPubSubConnection<K, V> implements FaultTolerantPubSubConnection<K, V> {
private static final Logger logger = LoggerFactory.getLogger(ClusterFaultTolerantPubSubConnection.class);
private final String name;
private final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection;
private final CircuitBreaker circuitBreaker;
private final Retry retry;
private final Retry resubscribeRetry;
private final Scheduler topologyChangedEventScheduler;
private final Timer executeTimer;
public ClusterFaultTolerantPubSubConnection(final String name,
final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection, final CircuitBreaker circuitBreaker,
final Retry retry, final Retry resubscribeRetry, final Scheduler topologyChangedEventScheduler) {
this.name = name;
this.pubSubConnection = pubSubConnection;
this.circuitBreaker = circuitBreaker;
this.retry = retry;
this.resubscribeRetry = resubscribeRetry;
this.topologyChangedEventScheduler = topologyChangedEventScheduler;
this.pubSubConnection.setNodeMessagePropagation(true);
this.executeTimer = Metrics.timer(name(getClass(), "execute"), "clusterName", name + "-pubsub");
CircuitBreakerUtil.registerMetrics(circuitBreaker, ClusterFaultTolerantPubSubConnection.class, Tags.empty());
}
@Override
public void usePubSubConnection(final Consumer<StatefulRedisClusterPubSubConnection<K, V>> consumer) {
try {
circuitBreaker.executeCheckedRunnable(
() -> retry.executeRunnable(() -> executeTimer.record(() -> consumer.accept(pubSubConnection))));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
@Override
public <T> T withPubSubConnection(final Function<StatefulRedisClusterPubSubConnection<K, V>, T> function) {
try {
return circuitBreaker.executeCheckedSupplier(
() -> retry.executeCallable(() -> executeTimer.record(() -> function.apply(pubSubConnection))));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
@Override
public void subscribeToClusterTopologyChangedEvents(final Runnable eventHandler) {
usePubSubConnection(connection -> connection.getResources().eventBus().get()
.filter(event -> event instanceof ClusterTopologyChangedEvent)
.subscribeOn(topologyChangedEventScheduler)
.subscribe(event -> {
logger.info("Got topology change event for {}, resubscribing all keyspace notifications", name);
resubscribeRetry.executeRunnable(() -> {
try {
eventHandler.run();
} catch (final RuntimeException e) {
logger.warn("Resubscribe for {} failed", name, e);
throw e;
}
});
}));
}
}

View File

@@ -0,0 +1,196 @@
/*
* Copyright 2013-2020 Signal Messenger, LLC
* SPDX-License-Identifier: AGPL-3.0-only
*/
package org.whispersystems.textsecuregcm.redis;
import com.google.common.annotations.VisibleForTesting;
import io.github.resilience4j.circuitbreaker.CircuitBreaker;
import io.github.resilience4j.core.IntervalFunction;
import io.github.resilience4j.reactor.circuitbreaker.operator.CircuitBreakerOperator;
import io.github.resilience4j.reactor.retry.RetryOperator;
import io.github.resilience4j.retry.Retry;
import io.github.resilience4j.retry.RetryConfig;
import io.lettuce.core.ClientOptions.DisconnectedBehavior;
import io.lettuce.core.RedisCommandTimeoutException;
import io.lettuce.core.RedisException;
import io.lettuce.core.TimeoutOptions;
import io.lettuce.core.cluster.ClusterClientOptions;
import io.lettuce.core.cluster.ClusterTopologyRefreshOptions;
import io.lettuce.core.cluster.RedisClusterClient;
import io.lettuce.core.cluster.api.StatefulRedisClusterConnection;
import io.lettuce.core.cluster.pubsub.StatefulRedisClusterPubSubConnection;
import io.lettuce.core.codec.ByteArrayCodec;
import io.lettuce.core.resource.ClientResources;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
import io.micrometer.core.instrument.Tags;
import org.reactivestreams.Publisher;
import org.whispersystems.textsecuregcm.configuration.CircuitBreakerConfiguration;
import org.whispersystems.textsecuregcm.configuration.RedisClusterConfiguration;
import org.whispersystems.textsecuregcm.configuration.RetryConfiguration;
import org.whispersystems.textsecuregcm.util.CircuitBreakerUtil;
import reactor.core.publisher.Flux;
import reactor.core.scheduler.Schedulers;
/**
* A fault-tolerant access manager for a Redis cluster. A single circuit breaker protects all cluster
* calls.
*/
public class ClusterFaultTolerantRedisCluster implements FaultTolerantRedisCluster {
private final String name;
private final RedisClusterClient clusterClient;
private final StatefulRedisClusterConnection<String, String> stringConnection;
private final StatefulRedisClusterConnection<byte[], byte[]> binaryConnection;
private final List<StatefulRedisClusterPubSubConnection<?, ?>> pubSubConnections = new ArrayList<>();
private final CircuitBreaker circuitBreaker;
private final Retry retry;
private final Retry topologyChangedEventRetry;
public ClusterFaultTolerantRedisCluster(final String name, final RedisClusterConfiguration clusterConfiguration,
final ClientResources clientResources) {
this(name,
RedisClusterClient.create(clientResources,
RedisUriUtil.createRedisUriWithTimeout(clusterConfiguration.getConfigurationUri(),
clusterConfiguration.getTimeout())),
clusterConfiguration.getTimeout(),
clusterConfiguration.getCircuitBreakerConfiguration(),
clusterConfiguration.getRetryConfiguration());
}
@VisibleForTesting
ClusterFaultTolerantRedisCluster(final String name, final RedisClusterClient clusterClient,
final Duration commandTimeout,
final CircuitBreakerConfiguration circuitBreakerConfiguration, final RetryConfiguration retryConfiguration) {
this.name = name;
this.clusterClient = clusterClient;
this.clusterClient.setOptions(ClusterClientOptions.builder()
.disconnectedBehavior(DisconnectedBehavior.REJECT_COMMANDS)
.validateClusterNodeMembership(false)
.topologyRefreshOptions(ClusterTopologyRefreshOptions.builder()
.enableAllAdaptiveRefreshTriggers()
.build())
// for asynchronous commands
.timeoutOptions(TimeoutOptions.builder()
.fixedTimeout(commandTimeout)
.build())
.publishOnScheduler(true)
.build());
this.stringConnection = clusterClient.connect();
this.binaryConnection = clusterClient.connect(ByteArrayCodec.INSTANCE);
this.circuitBreaker = CircuitBreaker.of(name + "-breaker", circuitBreakerConfiguration.toCircuitBreakerConfig());
this.retry = Retry.of(name + "-retry", retryConfiguration.toRetryConfigBuilder()
.retryOnException(exception -> exception instanceof RedisCommandTimeoutException).build());
final RetryConfig topologyChangedEventRetryConfig = RetryConfig.custom()
.maxAttempts(Integer.MAX_VALUE)
.intervalFunction(
IntervalFunction.ofExponentialRandomBackoff(Duration.ofSeconds(1), 1.5, Duration.ofSeconds(30)))
.build();
this.topologyChangedEventRetry = Retry.of(name + "-topologyChangedRetry", topologyChangedEventRetryConfig);
CircuitBreakerUtil.registerMetrics(circuitBreaker, FaultTolerantRedisCluster.class, Tags.empty());
CircuitBreakerUtil.registerMetrics(retry, FaultTolerantRedisCluster.class);
}
@Override
public void shutdown() {
stringConnection.close();
binaryConnection.close();
for (final StatefulRedisClusterPubSubConnection<?, ?> pubSubConnection : pubSubConnections) {
pubSubConnection.close();
}
clusterClient.shutdown();
}
@Override
public String getName() {
return name;
}
@Override
public void useCluster(final Consumer<StatefulRedisClusterConnection<String, String>> consumer) {
useConnection(stringConnection, consumer);
}
@Override
public <T> T withCluster(final Function<StatefulRedisClusterConnection<String, String>, T> function) {
return withConnection(stringConnection, function);
}
@Override
public void useBinaryCluster(final Consumer<StatefulRedisClusterConnection<byte[], byte[]>> consumer) {
useConnection(binaryConnection, consumer);
}
@Override
public <T> T withBinaryCluster(final Function<StatefulRedisClusterConnection<byte[], byte[]>, T> function) {
return withConnection(binaryConnection, function);
}
@Override
public <T> Publisher<T> withBinaryClusterReactive(
final Function<StatefulRedisClusterConnection<byte[], byte[]>, Publisher<T>> function) {
return withConnectionReactive(binaryConnection, function);
}
@Override
public <K, V> void useConnection(final StatefulRedisClusterConnection<K, V> connection,
final Consumer<StatefulRedisClusterConnection<K, V>> consumer) {
try {
circuitBreaker.executeCheckedRunnable(() -> retry.executeRunnable(() -> consumer.accept(connection)));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
@Override
public <T, K, V> T withConnection(final StatefulRedisClusterConnection<K, V> connection,
final Function<StatefulRedisClusterConnection<K, V>, T> function) {
try {
return circuitBreaker.executeCheckedSupplier(() -> retry.executeCallable(() -> function.apply(connection)));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
@Override
public <T, K, V> Publisher<T> withConnectionReactive(final StatefulRedisClusterConnection<K, V> connection,
final Function<StatefulRedisClusterConnection<K, V>, Publisher<T>> function) {
return Flux.from(function.apply(connection))
.transformDeferred(RetryOperator.of(retry))
.transformDeferred(CircuitBreakerOperator.of(circuitBreaker));
}
public FaultTolerantPubSubConnection<String, String> createPubSubConnection() {
final StatefulRedisClusterPubSubConnection<String, String> pubSubConnection = clusterClient.connectPubSub();
pubSubConnections.add(pubSubConnection);
return new ClusterFaultTolerantPubSubConnection<>(name, pubSubConnection, circuitBreaker, retry,
topologyChangedEventRetry,
Schedulers.newSingle(name + "-redisPubSubEvents", true));
}
}

View File

@@ -1,102 +1,19 @@
/*
* Copyright 2013-2020 Signal Messenger, LLC
* Copyright 2024 Signal Messenger, LLC
* SPDX-License-Identifier: AGPL-3.0-only
*/
package org.whispersystems.textsecuregcm.redis;
import static org.whispersystems.textsecuregcm.metrics.MetricsUtil.name;
import io.github.resilience4j.circuitbreaker.CircuitBreaker;
import io.github.resilience4j.retry.Retry;
import io.lettuce.core.RedisException;
import io.lettuce.core.cluster.event.ClusterTopologyChangedEvent;
import io.lettuce.core.cluster.pubsub.StatefulRedisClusterPubSubConnection;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Timer;
import java.util.function.Consumer;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.whispersystems.textsecuregcm.util.CircuitBreakerUtil;
import reactor.core.scheduler.Scheduler;
public class FaultTolerantPubSubConnection<K, V> {
public interface FaultTolerantPubSubConnection<K, V> {
private static final Logger logger = LoggerFactory.getLogger(FaultTolerantPubSubConnection.class);
void usePubSubConnection(Consumer<StatefulRedisClusterPubSubConnection<K, V>> consumer);
<T> T withPubSubConnection(Function<StatefulRedisClusterPubSubConnection<K, V>, T> function);
private final String name;
private final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection;
private final CircuitBreaker circuitBreaker;
private final Retry retry;
private final Retry resubscribeRetry;
private final Scheduler topologyChangedEventScheduler;
private final Timer executeTimer;
public FaultTolerantPubSubConnection(final String name,
final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection, final CircuitBreaker circuitBreaker,
final Retry retry, final Retry resubscribeRetry, final Scheduler topologyChangedEventScheduler) {
this.name = name;
this.pubSubConnection = pubSubConnection;
this.circuitBreaker = circuitBreaker;
this.retry = retry;
this.resubscribeRetry = resubscribeRetry;
this.topologyChangedEventScheduler = topologyChangedEventScheduler;
this.pubSubConnection.setNodeMessagePropagation(true);
this.executeTimer = Metrics.timer(name(getClass(), "execute"), "clusterName", name + "-pubsub");
CircuitBreakerUtil.registerMetrics(circuitBreaker, FaultTolerantPubSubConnection.class);
}
public void usePubSubConnection(final Consumer<StatefulRedisClusterPubSubConnection<K, V>> consumer) {
try {
circuitBreaker.executeCheckedRunnable(
() -> retry.executeRunnable(() -> executeTimer.record(() -> consumer.accept(pubSubConnection))));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
public <T> T withPubSubConnection(final Function<StatefulRedisClusterPubSubConnection<K, V>, T> function) {
try {
return circuitBreaker.executeCheckedSupplier(
() -> retry.executeCallable(() -> executeTimer.record(() -> function.apply(pubSubConnection))));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
public void subscribeToClusterTopologyChangedEvents(final Runnable eventHandler) {
usePubSubConnection(connection -> connection.getResources().eventBus().get()
.filter(event -> event instanceof ClusterTopologyChangedEvent)
.subscribeOn(topologyChangedEventScheduler)
.subscribe(event -> {
logger.info("Got topology change event for {}, resubscribing all keyspace notifications", name);
resubscribeRetry.executeRunnable(() -> {
try {
eventHandler.run();
} catch (final RuntimeException e) {
logger.warn("Resubscribe for {} failed", name, e);
throw e;
}
});
}));
}
void subscribeToClusterTopologyChangedEvents(Runnable eventHandler);
}

View File

@@ -1,183 +1,40 @@
/*
* Copyright 2013-2020 Signal Messenger, LLC
* Copyright 2024 Signal Messenger, LLC
* SPDX-License-Identifier: AGPL-3.0-only
*/
package org.whispersystems.textsecuregcm.redis;
import com.google.common.annotations.VisibleForTesting;
import io.github.resilience4j.circuitbreaker.CircuitBreaker;
import io.github.resilience4j.core.IntervalFunction;
import io.github.resilience4j.reactor.circuitbreaker.operator.CircuitBreakerOperator;
import io.github.resilience4j.reactor.retry.RetryOperator;
import io.github.resilience4j.retry.Retry;
import io.github.resilience4j.retry.RetryConfig;
import io.lettuce.core.ClientOptions.DisconnectedBehavior;
import io.lettuce.core.RedisCommandTimeoutException;
import io.lettuce.core.RedisException;
import io.lettuce.core.TimeoutOptions;
import io.lettuce.core.cluster.ClusterClientOptions;
import io.lettuce.core.cluster.ClusterTopologyRefreshOptions;
import io.lettuce.core.cluster.RedisClusterClient;
import io.lettuce.core.cluster.api.StatefulRedisClusterConnection;
import io.lettuce.core.cluster.pubsub.StatefulRedisClusterPubSubConnection;
import io.lettuce.core.codec.ByteArrayCodec;
import io.lettuce.core.resource.ClientResources;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
import org.reactivestreams.Publisher;
import org.whispersystems.textsecuregcm.configuration.CircuitBreakerConfiguration;
import org.whispersystems.textsecuregcm.configuration.RedisClusterConfiguration;
import org.whispersystems.textsecuregcm.configuration.RetryConfiguration;
import org.whispersystems.textsecuregcm.util.CircuitBreakerUtil;
import reactor.core.publisher.Flux;
import reactor.core.scheduler.Schedulers;
/**
* A fault-tolerant access manager for a Redis cluster. A fault-tolerant Redis cluster provides managed,
* circuit-breaker-protected access to connections.
*/
public class FaultTolerantRedisCluster {
public interface FaultTolerantRedisCluster {
private final String name;
void shutdown();
private final RedisClusterClient clusterClient;
String getName();
private final StatefulRedisClusterConnection<String, String> stringConnection;
private final StatefulRedisClusterConnection<byte[], byte[]> binaryConnection;
void useCluster(Consumer<StatefulRedisClusterConnection<String, String>> consumer);
private final List<StatefulRedisClusterPubSubConnection<?, ?>> pubSubConnections = new ArrayList<>();
<T> T withCluster(Function<StatefulRedisClusterConnection<String, String>, T> function);
private final CircuitBreaker circuitBreaker;
private final Retry retry;
private final Retry topologyChangedEventRetry;
void useBinaryCluster(Consumer<StatefulRedisClusterConnection<byte[], byte[]>> consumer);
public FaultTolerantRedisCluster(final String name, final RedisClusterConfiguration clusterConfiguration,
final ClientResources clientResources) {
this(name,
RedisClusterClient.create(clientResources,
RedisUriUtil.createRedisUriWithTimeout(clusterConfiguration.getConfigurationUri(),
clusterConfiguration.getTimeout())),
clusterConfiguration.getTimeout(),
clusterConfiguration.getCircuitBreakerConfiguration(),
clusterConfiguration.getRetryConfiguration());
}
<T> T withBinaryCluster(Function<StatefulRedisClusterConnection<byte[], byte[]>, T> function);
@VisibleForTesting
FaultTolerantRedisCluster(final String name, final RedisClusterClient clusterClient, final Duration commandTimeout,
final CircuitBreakerConfiguration circuitBreakerConfiguration, final RetryConfiguration retryConfiguration) {
this.name = name;
<T> Publisher<T> withBinaryClusterReactive(
Function<StatefulRedisClusterConnection<byte[], byte[]>, Publisher<T>> function);
this.clusterClient = clusterClient;
this.clusterClient.setOptions(ClusterClientOptions.builder()
.disconnectedBehavior(DisconnectedBehavior.REJECT_COMMANDS)
.validateClusterNodeMembership(false)
.topologyRefreshOptions(ClusterTopologyRefreshOptions.builder()
.enableAllAdaptiveRefreshTriggers()
.build())
// for asynchronous commands
.timeoutOptions(TimeoutOptions.builder()
.fixedTimeout(commandTimeout)
.build())
.publishOnScheduler(true)
.build());
<K, V> void useConnection(StatefulRedisClusterConnection<K, V> connection,
Consumer<StatefulRedisClusterConnection<K, V>> consumer);
this.stringConnection = clusterClient.connect();
this.binaryConnection = clusterClient.connect(ByteArrayCodec.INSTANCE);
<T, K, V> T withConnection(StatefulRedisClusterConnection<K, V> connection,
Function<StatefulRedisClusterConnection<K, V>, T> function);
this.circuitBreaker = CircuitBreaker.of(name + "-breaker", circuitBreakerConfiguration.toCircuitBreakerConfig());
this.retry = Retry.of(name + "-retry", retryConfiguration.toRetryConfigBuilder()
.retryOnException(exception -> exception instanceof RedisCommandTimeoutException).build());
final RetryConfig topologyChangedEventRetryConfig = RetryConfig.custom()
.maxAttempts(Integer.MAX_VALUE)
.intervalFunction(
IntervalFunction.ofExponentialRandomBackoff(Duration.ofSeconds(1), 1.5, Duration.ofSeconds(30)))
.build();
<T, K, V> Publisher<T> withConnectionReactive(StatefulRedisClusterConnection<K, V> connection,
Function<StatefulRedisClusterConnection<K, V>, Publisher<T>> function);
this.topologyChangedEventRetry = Retry.of(name + "-topologyChangedRetry", topologyChangedEventRetryConfig);
CircuitBreakerUtil.registerMetrics(circuitBreaker, FaultTolerantRedisCluster.class);
CircuitBreakerUtil.registerMetrics(retry, FaultTolerantRedisCluster.class);
}
void shutdown() {
stringConnection.close();
binaryConnection.close();
for (final StatefulRedisClusterPubSubConnection<?, ?> pubSubConnection : pubSubConnections) {
pubSubConnection.close();
}
clusterClient.shutdown();
}
public String getName() {
return name;
}
public void useCluster(final Consumer<StatefulRedisClusterConnection<String, String>> consumer) {
useConnection(stringConnection, consumer);
}
public <T> T withCluster(final Function<StatefulRedisClusterConnection<String, String>, T> function) {
return withConnection(stringConnection, function);
}
public void useBinaryCluster(final Consumer<StatefulRedisClusterConnection<byte[], byte[]>> consumer) {
useConnection(binaryConnection, consumer);
}
public <T> T withBinaryCluster(final Function<StatefulRedisClusterConnection<byte[], byte[]>, T> function) {
return withConnection(binaryConnection, function);
}
public <T> Publisher<T> withBinaryClusterReactive(
final Function<StatefulRedisClusterConnection<byte[], byte[]>, Publisher<T>> function) {
return withConnectionReactive(binaryConnection, function);
}
private <K, V> void useConnection(final StatefulRedisClusterConnection<K, V> connection,
final Consumer<StatefulRedisClusterConnection<K, V>> consumer) {
try {
circuitBreaker.executeCheckedRunnable(() -> retry.executeRunnable(() -> consumer.accept(connection)));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
private <T, K, V> T withConnection(final StatefulRedisClusterConnection<K, V> connection,
final Function<StatefulRedisClusterConnection<K, V>, T> function) {
try {
return circuitBreaker.executeCheckedSupplier(() -> retry.executeCallable(() -> function.apply(connection)));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
private <T, K, V> Publisher<T> withConnectionReactive(final StatefulRedisClusterConnection<K, V> connection,
final Function<StatefulRedisClusterConnection<K, V>, Publisher<T>> function) {
return Flux.from(function.apply(connection))
.transformDeferred(RetryOperator.of(retry))
.transformDeferred(CircuitBreakerOperator.of(circuitBreaker));
}
public FaultTolerantPubSubConnection<String, String> createPubSubConnection() {
final StatefulRedisClusterPubSubConnection<String, String> pubSubConnection = clusterClient.connectPubSub();
pubSubConnections.add(pubSubConnection);
return new FaultTolerantPubSubConnection<>(name, pubSubConnection, circuitBreaker, retry, topologyChangedEventRetry,
Schedulers.newSingle(name + "-redisPubSubEvents", true));
}
FaultTolerantPubSubConnection<String, String> createPubSubConnection();
}

View File

@@ -0,0 +1,136 @@
/*
* Copyright 2024 Signal Messenger, LLC
* SPDX-License-Identifier: AGPL-3.0-only
*/
package org.whispersystems.textsecuregcm.redis;
import com.google.common.annotations.VisibleForTesting;
import io.github.resilience4j.circuitbreaker.CallNotPermittedException;
import io.github.resilience4j.circuitbreaker.CircuitBreaker;
import io.github.resilience4j.circuitbreaker.CircuitBreakerConfig;
import io.lettuce.core.protocol.CommandHandler;
import io.lettuce.core.protocol.CompleteableCommand;
import io.lettuce.core.protocol.RedisCommand;
import io.lettuce.core.resource.NettyCustomizer;
import io.micrometer.core.instrument.Tags;
import io.netty.channel.Channel;
import io.netty.channel.ChannelDuplexHandler;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelPromise;
import java.net.SocketAddress;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.whispersystems.textsecuregcm.util.CircuitBreakerUtil;
public class LettuceShardCircuitBreaker implements NettyCustomizer {
private final String clusterName;
private final CircuitBreakerConfig circuitBreakerConfig;
public LettuceShardCircuitBreaker(final String clusterName, final CircuitBreakerConfig circuitBreakerConfig) {
this.clusterName = clusterName;
this.circuitBreakerConfig = circuitBreakerConfig;
}
@Override
public void afterChannelInitialized(final Channel channel) {
final ChannelCircuitBreakerHandler channelCircuitBreakerHandler = new ChannelCircuitBreakerHandler(clusterName,
circuitBreakerConfig);
final String commandHandlerName = StreamSupport.stream(channel.pipeline().spliterator(), false)
.filter(entry -> entry.getValue() instanceof CommandHandler)
.map(Map.Entry::getKey)
.findFirst()
.orElseThrow();
channel.pipeline().addBefore(commandHandlerName, null, channelCircuitBreakerHandler);
}
static final class ChannelCircuitBreakerHandler extends ChannelDuplexHandler {
private static final Logger logger = LoggerFactory.getLogger(ChannelCircuitBreakerHandler.class);
private static final String SHARD_TAG_NAME = "shard";
private static final String CLUSTER_TAG_NAME = "cluster";
private final String clusterName;
private final CircuitBreakerConfig circuitBreakerConfig;
@VisibleForTesting
CircuitBreaker breaker;
public ChannelCircuitBreakerHandler(final String name, CircuitBreakerConfig circuitBreakerConfig) {
this.clusterName = name;
this.circuitBreakerConfig = circuitBreakerConfig;
}
@Override
public void connect(final ChannelHandlerContext ctx, final SocketAddress remoteAddress,
final SocketAddress localAddress, final ChannelPromise promise) throws Exception {
super.connect(ctx, remoteAddress, localAddress, promise);
// Unfortunately, the Channel's remote address is null until connect() is called, so we have to wait to initialize
// the breaker with the remotes name.
// There is a Channel attribute, io.lettuce.core.ConnectionBuilder.REDIS_URI, but this does not always
// match remote address, as it is inherited from the Bootstrap attributes and not updated for the Channel connection
// In some cases, like the default connection, the remote address includes the DNS hostname, which we want to exclude.
final String shardAddress = StringUtils.substringAfter(remoteAddress.toString(), "/");
breaker = CircuitBreaker.of("%s/%s-breaker".formatted(clusterName, shardAddress), circuitBreakerConfig);
CircuitBreakerUtil.registerMetrics(breaker, getClass(),
Tags.of(CLUSTER_TAG_NAME, clusterName, SHARD_TAG_NAME, shardAddress));
}
@Override
public void write(final ChannelHandlerContext ctx, final Object msg, final ChannelPromise promise)
throws Exception {
logger.trace("Breaker state is {}", breaker.getState());
// Note: io.lettuce.core.protocol.CommandHandler also supports batches (List/Collection<RedisCommand>),
// but we do not use that feature, so we can just check for single commands
//
// There are two types of RedisCommands that are not CompleteableCommand:
// - io.lettuce.core.protocol.Command
// - io.lettuce.core.protocol.PristineFallbackCommand
//
// The former always get wrapped by one of the other command types, and the latter is only used in an edge case
// to consume responses.
if (msg instanceof RedisCommand<?, ?, ?> rc && rc instanceof CompleteableCommand<?> command) {
try {
breaker.acquirePermission();
// state can change in acquirePermission()
logger.trace("Breaker is permitted: {}", breaker.getState());
final long startNanos = System.nanoTime();
command.onComplete((ignored, throwable) -> {
final long durationNanos = System.nanoTime() - startNanos;
if (throwable != null) {
breaker.onError(durationNanos, TimeUnit.NANOSECONDS, throwable);
logger.debug("Command completed with error", throwable);
} else {
breaker.onSuccess(durationNanos, TimeUnit.NANOSECONDS);
}
});
} catch (final CallNotPermittedException e) {
rc.completeExceptionally(e);
promise.tryFailure(e);
return;
}
} else {
logger.warn("Unexpected msg type: {}", msg.getClass());
}
super.write(ctx, msg, promise);
}
}
}

View File

@@ -0,0 +1,97 @@
/*
* Copyright 2013-2020 Signal Messenger, LLC
* SPDX-License-Identifier: AGPL-3.0-only
*/
package org.whispersystems.textsecuregcm.redis;
import static org.whispersystems.textsecuregcm.metrics.MetricsUtil.name;
import io.github.resilience4j.retry.Retry;
import io.lettuce.core.RedisException;
import io.lettuce.core.cluster.event.ClusterTopologyChangedEvent;
import io.lettuce.core.cluster.pubsub.StatefulRedisClusterPubSubConnection;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Timer;
import java.util.function.Consumer;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import reactor.core.scheduler.Scheduler;
public class ShardFaultTolerantPubSubConnection<K, V> implements FaultTolerantPubSubConnection<K, V> {
private static final Logger logger = LoggerFactory.getLogger(ShardFaultTolerantPubSubConnection.class);
private final String name;
private final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection;
private final Retry retry;
private final Retry resubscribeRetry;
private final Scheduler topologyChangedEventScheduler;
private final Timer executeTimer;
public ShardFaultTolerantPubSubConnection(final String name,
final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection,
final Retry retry, final Retry resubscribeRetry, final Scheduler topologyChangedEventScheduler) {
this.name = name;
this.pubSubConnection = pubSubConnection;
this.retry = retry;
this.resubscribeRetry = resubscribeRetry;
this.topologyChangedEventScheduler = topologyChangedEventScheduler;
this.pubSubConnection.setNodeMessagePropagation(true);
this.executeTimer = Metrics.timer(name(getClass(), "execute"), "clusterName", name + "-pubsub");
}
@Override
public void usePubSubConnection(final Consumer<StatefulRedisClusterPubSubConnection<K, V>> consumer) {
try {
retry.executeRunnable(() -> executeTimer.record(() -> consumer.accept(pubSubConnection)));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
@Override
public <T> T withPubSubConnection(final Function<StatefulRedisClusterPubSubConnection<K, V>, T> function) {
try {
return retry.executeCallable(() -> executeTimer.record(() -> function.apply(pubSubConnection)));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
@Override
public void subscribeToClusterTopologyChangedEvents(final Runnable eventHandler) {
usePubSubConnection(connection -> connection.getResources().eventBus().get()
.filter(event -> event instanceof ClusterTopologyChangedEvent)
.subscribeOn(topologyChangedEventScheduler)
.subscribe(event -> {
logger.info("Got topology change event for {}, resubscribing all keyspace notifications", name);
resubscribeRetry.executeRunnable(() -> {
try {
eventHandler.run();
} catch (final RuntimeException e) {
logger.warn("Resubscribe for {} failed", name, e);
throw e;
}
});
}));
}
}

View File

@@ -0,0 +1,197 @@
/*
* Copyright 2024 Signal Messenger, LLC
* SPDX-License-Identifier: AGPL-3.0-only
*/
package org.whispersystems.textsecuregcm.redis;
import io.github.resilience4j.core.IntervalFunction;
import io.github.resilience4j.reactor.retry.RetryOperator;
import io.github.resilience4j.retry.Retry;
import io.github.resilience4j.retry.RetryConfig;
import io.lettuce.core.ClientOptions;
import io.lettuce.core.RedisCommandTimeoutException;
import io.lettuce.core.RedisException;
import io.lettuce.core.RedisURI;
import io.lettuce.core.TimeoutOptions;
import io.lettuce.core.cluster.ClusterClientOptions;
import io.lettuce.core.cluster.ClusterTopologyRefreshOptions;
import io.lettuce.core.cluster.RedisClusterClient;
import io.lettuce.core.cluster.api.StatefulRedisClusterConnection;
import io.lettuce.core.cluster.pubsub.StatefulRedisClusterPubSubConnection;
import io.lettuce.core.codec.ByteArrayCodec;
import io.lettuce.core.resource.ClientResources;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
import org.reactivestreams.Publisher;
import org.whispersystems.textsecuregcm.configuration.CircuitBreakerConfiguration;
import org.whispersystems.textsecuregcm.configuration.RedisClusterConfiguration;
import org.whispersystems.textsecuregcm.configuration.RetryConfiguration;
import org.whispersystems.textsecuregcm.util.CircuitBreakerUtil;
import reactor.core.publisher.Flux;
import reactor.core.scheduler.Schedulers;
/**
* A fault-tolerant access manager for a Redis cluster. Each shard in the cluster has a dedicated circuit breaker.
*
* @see LettuceShardCircuitBreaker
*/
public class ShardFaultTolerantRedisCluster implements FaultTolerantRedisCluster {
private final String name;
private final RedisClusterClient clusterClient;
private final StatefulRedisClusterConnection<String, String> stringConnection;
private final StatefulRedisClusterConnection<byte[], byte[]> binaryConnection;
private final List<StatefulRedisClusterPubSubConnection<?, ?>> pubSubConnections = new ArrayList<>();
private final Retry retry;
private final Retry topologyChangedEventRetry;
public ShardFaultTolerantRedisCluster(final String name, final RedisClusterConfiguration clusterConfiguration,
final ClientResources.Builder clientResourcesBuilder) {
this(name, clientResourcesBuilder,
Collections.singleton(RedisUriUtil.createRedisUriWithTimeout(clusterConfiguration.getConfigurationUri(),
clusterConfiguration.getTimeout())),
clusterConfiguration.getTimeout(),
clusterConfiguration.getCircuitBreakerConfiguration(),
clusterConfiguration.getRetryConfiguration());
}
ShardFaultTolerantRedisCluster(String name, final ClientResources.Builder clientResourcesBuilder,
Iterable<RedisURI> redisUris, Duration commandTimeout, CircuitBreakerConfiguration circuitBreakerConfig,
RetryConfiguration retryConfiguration) {
this.name = name;
this.clusterClient = RedisClusterClient.create(
clientResourcesBuilder.nettyCustomizer(
new LettuceShardCircuitBreaker(name, circuitBreakerConfig.toCircuitBreakerConfig())).
build(),
redisUris);
this.clusterClient.setOptions(ClusterClientOptions.builder()
.disconnectedBehavior(ClientOptions.DisconnectedBehavior.REJECT_COMMANDS)
.validateClusterNodeMembership(false)
.topologyRefreshOptions(ClusterTopologyRefreshOptions.builder()
.enableAllAdaptiveRefreshTriggers()
.build())
// for asynchronous commands
.timeoutOptions(TimeoutOptions.builder()
.fixedTimeout(commandTimeout)
.build())
.publishOnScheduler(true)
.build());
this.stringConnection = clusterClient.connect();
this.binaryConnection = clusterClient.connect(ByteArrayCodec.INSTANCE);
this.retry = Retry.of(name + "-retry", retryConfiguration.toRetryConfigBuilder()
.retryOnException(exception -> exception instanceof RedisCommandTimeoutException).build());
final RetryConfig topologyChangedEventRetryConfig = RetryConfig.custom()
.maxAttempts(Integer.MAX_VALUE)
.intervalFunction(
IntervalFunction.ofExponentialRandomBackoff(Duration.ofSeconds(1), 1.5, Duration.ofSeconds(30)))
.build();
this.topologyChangedEventRetry = Retry.of(name + "-topologyChangedRetry", topologyChangedEventRetryConfig);
CircuitBreakerUtil.registerMetrics(retry, ShardFaultTolerantRedisCluster.class);
}
@Override
public void shutdown() {
stringConnection.close();
binaryConnection.close();
for (final StatefulRedisClusterPubSubConnection<?, ?> pubSubConnection : pubSubConnections) {
pubSubConnection.close();
}
clusterClient.shutdown();
}
public String getName() {
return name;
}
@Override
public void useCluster(final Consumer<StatefulRedisClusterConnection<String, String>> consumer) {
useConnection(stringConnection, consumer);
}
@Override
public <T> T withCluster(final Function<StatefulRedisClusterConnection<String, String>, T> function) {
return withConnection(stringConnection, function);
}
@Override
public void useBinaryCluster(final Consumer<StatefulRedisClusterConnection<byte[], byte[]>> consumer) {
useConnection(binaryConnection, consumer);
}
@Override
public <T> T withBinaryCluster(final Function<StatefulRedisClusterConnection<byte[], byte[]>, T> function) {
return withConnection(binaryConnection, function);
}
@Override
public <T> Publisher<T> withBinaryClusterReactive(
final Function<StatefulRedisClusterConnection<byte[], byte[]>, Publisher<T>> function) {
return withConnectionReactive(binaryConnection, function);
}
@Override
public <K, V> void useConnection(final StatefulRedisClusterConnection<K, V> connection,
final Consumer<StatefulRedisClusterConnection<K, V>> consumer) {
try {
retry.executeRunnable(() -> consumer.accept(connection));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
@Override
public <T, K, V> T withConnection(final StatefulRedisClusterConnection<K, V> connection,
final Function<StatefulRedisClusterConnection<K, V>, T> function) {
try {
return retry.executeCallable(() -> function.apply(connection));
} catch (final Throwable t) {
if (t instanceof RedisException) {
throw (RedisException) t;
} else {
throw new RedisException(t);
}
}
}
@Override
public <T, K, V> Publisher<T> withConnectionReactive(final StatefulRedisClusterConnection<K, V> connection,
final Function<StatefulRedisClusterConnection<K, V>, Publisher<T>> function) {
return Flux.from(function.apply(connection))
.transformDeferred(RetryOperator.of(retry));
}
@Override
public FaultTolerantPubSubConnection<String, String> createPubSubConnection() {
final StatefulRedisClusterPubSubConnection<String, String> pubSubConnection = clusterClient.connectPubSub();
pubSubConnections.add(pubSubConnection);
return new ShardFaultTolerantPubSubConnection<>(name, pubSubConnection, retry, topologyChangedEventRetry,
Schedulers.newSingle(name + "-redisPubSubEvents", true));
}
}

View File

@@ -23,20 +23,22 @@ public class CircuitBreakerUtil {
private static final String BREAKER_NAME_TAG_NAME = "breakerName";
private static final String OUTCOME_TAG_NAME = "outcome";
public static void registerMetrics(CircuitBreaker circuitBreaker, Class<?> clazz) {
public static void registerMetrics(CircuitBreaker circuitBreaker, Class<?> clazz, Tags additionalTags) {
final String breakerName = clazz.getSimpleName() + "/" + circuitBreaker.getName();
final Counter successCounter = Metrics.counter(CIRCUIT_BREAKER_CALL_COUNTER_NAME,
BREAKER_NAME_TAG_NAME, breakerName,
OUTCOME_TAG_NAME, "success");
additionalTags.and(
BREAKER_NAME_TAG_NAME, breakerName,
OUTCOME_TAG_NAME, "success"));
final Counter failureCounter = Metrics.counter(CIRCUIT_BREAKER_CALL_COUNTER_NAME,
BREAKER_NAME_TAG_NAME, breakerName,
OUTCOME_TAG_NAME, "failure");
additionalTags.and(
BREAKER_NAME_TAG_NAME, breakerName,
OUTCOME_TAG_NAME, "failure"));
final Counter unpermittedCounter = Metrics.counter(CIRCUIT_BREAKER_CALL_COUNTER_NAME,
BREAKER_NAME_TAG_NAME, breakerName,
OUTCOME_TAG_NAME, "unpermitted");
additionalTags.and(BREAKER_NAME_TAG_NAME, breakerName,
OUTCOME_TAG_NAME, "unpermitted"));
circuitBreaker.getEventPublisher().onSuccess(event -> {
successCounter.increment();

View File

@@ -29,6 +29,7 @@ import org.whispersystems.textsecuregcm.configuration.dynamic.DynamicConfigurati
import org.whispersystems.textsecuregcm.controllers.SecureStorageController;
import org.whispersystems.textsecuregcm.controllers.SecureValueRecovery2Controller;
import org.whispersystems.textsecuregcm.push.ClientPresenceManager;
import org.whispersystems.textsecuregcm.redis.ClusterFaultTolerantRedisCluster;
import org.whispersystems.textsecuregcm.redis.FaultTolerantRedisCluster;
import org.whispersystems.textsecuregcm.securestorage.SecureStorageClient;
import org.whispersystems.textsecuregcm.securevaluerecovery.SecureValueRecovery2Client;
@@ -107,7 +108,7 @@ public class AssignUsernameCommand extends EnvironmentCommand<WhisperServerConfi
ClientResources redisClusterClientResources = ClientResources.builder().build();
FaultTolerantRedisCluster cacheCluster = new FaultTolerantRedisCluster("main_cache_cluster",
FaultTolerantRedisCluster cacheCluster = new ClusterFaultTolerantRedisCluster("main_cache_cluster",
configuration.getCacheClusterConfiguration(), redisClusterClientResources);
Scheduler messageDeliveryScheduler = Schedulers.fromExecutorService(
@@ -173,13 +174,14 @@ public class AssignUsernameCommand extends EnvironmentCommand<WhisperServerConfi
configuration.getDynamoDbTables().getMessages().getTableName(),
configuration.getDynamoDbTables().getMessages().getExpiration(),
messageDeletionExecutor);
FaultTolerantRedisCluster messageInsertCacheCluster = new FaultTolerantRedisCluster("message_insert_cluster",
FaultTolerantRedisCluster messageInsertCacheCluster = new ClusterFaultTolerantRedisCluster("message_insert_cluster",
configuration.getMessageCacheConfiguration().getRedisClusterConfiguration(), redisClusterClientResources);
FaultTolerantRedisCluster messageReadDeleteCluster = new FaultTolerantRedisCluster("message_read_delete_cluster",
FaultTolerantRedisCluster messageReadDeleteCluster = new ClusterFaultTolerantRedisCluster(
"message_read_delete_cluster",
configuration.getMessageCacheConfiguration().getRedisClusterConfiguration(), redisClusterClientResources);
FaultTolerantRedisCluster clientPresenceCluster = new FaultTolerantRedisCluster("client_presence_cluster",
FaultTolerantRedisCluster clientPresenceCluster = new ClusterFaultTolerantRedisCluster("client_presence_cluster",
configuration.getClientPresenceClusterConfiguration(), redisClusterClientResources);
FaultTolerantRedisCluster rateLimitersCluster = new FaultTolerantRedisCluster("rate_limiters",
FaultTolerantRedisCluster rateLimitersCluster = new ClusterFaultTolerantRedisCluster("rate_limiters",
configuration.getRateLimitersCluster(), redisClusterClientResources);
SecureValueRecovery2Client secureValueRecovery2Client = new SecureValueRecovery2Client(
secureValueRecoveryCredentialsGenerator, secureValueRecoveryExecutor, secureValueRecoveryServiceRetryExecutor,

View File

@@ -29,6 +29,7 @@ import org.whispersystems.textsecuregcm.controllers.SecureStorageController;
import org.whispersystems.textsecuregcm.controllers.SecureValueRecovery2Controller;
import org.whispersystems.textsecuregcm.metrics.MetricsUtil;
import org.whispersystems.textsecuregcm.push.ClientPresenceManager;
import org.whispersystems.textsecuregcm.redis.ClusterFaultTolerantRedisCluster;
import org.whispersystems.textsecuregcm.redis.FaultTolerantRedisCluster;
import org.whispersystems.textsecuregcm.securestorage.SecureStorageClient;
import org.whispersystems.textsecuregcm.securevaluerecovery.SecureValueRecovery2Client;
@@ -90,7 +91,7 @@ record CommandDependencies(
ClientResources redisClusterClientResources = ClientResources.builder().build();
FaultTolerantRedisCluster cacheCluster = new FaultTolerantRedisCluster("main_cache_cluster",
FaultTolerantRedisCluster cacheCluster = new ClusterFaultTolerantRedisCluster("main_cache_cluster",
configuration.getCacheClusterConfiguration(), redisClusterClientResources);
ScheduledExecutorService recurringJobExecutor = environment.lifecycle()
@@ -161,13 +162,14 @@ record CommandDependencies(
configuration.getDynamoDbTables().getMessages().getTableName(),
configuration.getDynamoDbTables().getMessages().getExpiration(),
messageDeletionExecutor);
FaultTolerantRedisCluster messageInsertCacheCluster = new FaultTolerantRedisCluster("message_insert_cluster",
FaultTolerantRedisCluster messageInsertCacheCluster = new ClusterFaultTolerantRedisCluster("message_insert_cluster",
configuration.getMessageCacheConfiguration().getRedisClusterConfiguration(), redisClusterClientResources);
FaultTolerantRedisCluster messageReadDeleteCluster = new FaultTolerantRedisCluster("message_read_delete_cluster",
FaultTolerantRedisCluster messageReadDeleteCluster = new ClusterFaultTolerantRedisCluster(
"message_read_delete_cluster",
configuration.getMessageCacheConfiguration().getRedisClusterConfiguration(), redisClusterClientResources);
FaultTolerantRedisCluster clientPresenceCluster = new FaultTolerantRedisCluster("client_presence_cluster",
FaultTolerantRedisCluster clientPresenceCluster = new ClusterFaultTolerantRedisCluster("client_presence_cluster",
configuration.getClientPresenceClusterConfiguration(), redisClusterClientResources);
FaultTolerantRedisCluster rateLimitersCluster = new FaultTolerantRedisCluster("rate_limiters",
FaultTolerantRedisCluster rateLimitersCluster = new ClusterFaultTolerantRedisCluster("rate_limiters",
configuration.getRateLimitersCluster(), redisClusterClientResources);
SecureValueRecovery2Client secureValueRecovery2Client = new SecureValueRecovery2Client(
secureValueRecoveryCredentialsGenerator, secureValueRecoveryServiceExecutor,

View File

@@ -19,6 +19,7 @@ import org.whispersystems.textsecuregcm.WhisperServerConfiguration;
import org.whispersystems.textsecuregcm.metrics.MetricsUtil;
import org.whispersystems.textsecuregcm.push.APNSender;
import org.whispersystems.textsecuregcm.push.ApnPushNotificationScheduler;
import org.whispersystems.textsecuregcm.redis.ClusterFaultTolerantRedisCluster;
import org.whispersystems.textsecuregcm.redis.FaultTolerantRedisCluster;
import org.whispersystems.textsecuregcm.util.logging.UncaughtExceptionHandler;
@@ -63,7 +64,7 @@ public class ScheduledApnPushNotificationSenderServiceCommand extends ServerComm
});
}
final FaultTolerantRedisCluster pushSchedulerCluster = new FaultTolerantRedisCluster("push_scheduler",
final FaultTolerantRedisCluster pushSchedulerCluster = new ClusterFaultTolerantRedisCluster("push_scheduler",
configuration.getPushSchedulerCluster(), deps.redisClusterClientResources());
final ExecutorService apnSenderExecutor = environment.lifecycle().executorService(name(getClass(), "apnSender-%d"))