Retry Redis commands that time out.

This commit is contained in:
Jon Chambers
2020-09-04 17:18:37 -04:00
committed by Jon Chambers
parent 2d42b478ba
commit 76665dd56e
7 changed files with 128 additions and 19 deletions

View File

@@ -4,7 +4,10 @@ import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.SharedMetricRegistries;
import com.codahale.metrics.Timer;
import io.github.resilience4j.circuitbreaker.CircuitBreaker;
import io.github.resilience4j.retry.Retry;
import io.lettuce.core.cluster.pubsub.StatefulRedisClusterPubSubConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.whispersystems.textsecuregcm.util.CircuitBreakerUtil;
import org.whispersystems.textsecuregcm.util.Constants;
@@ -16,13 +19,18 @@ import static com.codahale.metrics.MetricRegistry.name;
public class FaultTolerantPubSubConnection<K, V> {
private final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection;
private final CircuitBreaker circuitBreaker;
private final Retry retry;
private final Timer executeTimer;
public FaultTolerantPubSubConnection(final String name, final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection, final CircuitBreaker circuitBreaker) {
private static final Logger log = LoggerFactory.getLogger(FaultTolerantPubSubConnection.class);
public FaultTolerantPubSubConnection(final String name, final StatefulRedisClusterPubSubConnection<K, V> pubSubConnection, final CircuitBreaker circuitBreaker, final Retry retry) {
this.pubSubConnection = pubSubConnection;
this.circuitBreaker = circuitBreaker;
this.retry = retry;
CircuitBreakerUtil.registerMetrics(SharedMetricRegistries.getOrCreate(Constants.METRICS_NAME),
this.circuitBreaker,
@@ -36,14 +44,38 @@ public class FaultTolerantPubSubConnection<K, V> {
}
public void usePubSubConnection(final Consumer<StatefulRedisClusterPubSubConnection<K, V>> consumer) {
try (final Timer.Context ignored = executeTimer.time()) {
this.circuitBreaker.executeRunnable(() -> consumer.accept(pubSubConnection));
try {
circuitBreaker.executeCheckedRunnable(() -> retry.executeRunnable(() -> {
try (final Timer.Context ignored = executeTimer.time()) {
consumer.accept(pubSubConnection);
}
}));
} catch (final Throwable t) {
log.warn("Redis operation failure", t);
if (t instanceof RuntimeException) {
throw (RuntimeException) t;
} else {
throw new RuntimeException(t);
}
}
}
public <T> T withPubSubConnection(final Function<StatefulRedisClusterPubSubConnection<K, V>, T> consumer) {
try (final Timer.Context ignored = executeTimer.time()) {
return this.circuitBreaker.executeSupplier(() -> consumer.apply(pubSubConnection));
public <T> T withPubSubConnection(final Function<StatefulRedisClusterPubSubConnection<K, V>, T> function) {
try {
return circuitBreaker.executeCheckedSupplier(() -> retry.executeCallable(() -> {
try (final Timer.Context ignored = executeTimer.time()) {
return function.apply(pubSubConnection);
}
}));
} catch (final Throwable t) {
log.warn("Redis operation failure", t);
if (t instanceof RuntimeException) {
throw (RuntimeException) t;
} else {
throw new RuntimeException(t);
}
}
}
}

View File

@@ -5,6 +5,8 @@ import com.codahale.metrics.SharedMetricRegistries;
import com.codahale.metrics.Timer;
import com.google.common.annotations.VisibleForTesting;
import io.github.resilience4j.circuitbreaker.CircuitBreaker;
import io.github.resilience4j.retry.Retry;
import io.lettuce.core.RedisCommandTimeoutException;
import io.lettuce.core.RedisURI;
import io.lettuce.core.cluster.RedisClusterClient;
import io.lettuce.core.cluster.api.StatefulRedisClusterConnection;
@@ -14,6 +16,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.whispersystems.textsecuregcm.configuration.CircuitBreakerConfiguration;
import org.whispersystems.textsecuregcm.configuration.RedisClusterConfiguration;
import org.whispersystems.textsecuregcm.configuration.RetryConfiguration;
import org.whispersystems.textsecuregcm.util.CircuitBreakerUtil;
import org.whispersystems.textsecuregcm.util.Constants;
@@ -42,6 +45,7 @@ public class FaultTolerantRedisCluster {
private final List<StatefulRedisClusterPubSubConnection<?, ?>> pubSubConnections = new ArrayList<>();
private final CircuitBreaker circuitBreaker;
private final Retry retry;
private final Timer executeTimer;
@@ -51,11 +55,12 @@ public class FaultTolerantRedisCluster {
this(name,
RedisClusterClient.create(clusterConfiguration.getUrls().stream().map(RedisURI::create).collect(Collectors.toList())),
clusterConfiguration.getTimeout(),
clusterConfiguration.getCircuitBreakerConfiguration());
clusterConfiguration.getCircuitBreakerConfiguration(),
clusterConfiguration.getRetryConfiguration());
}
@VisibleForTesting
FaultTolerantRedisCluster(final String name, final RedisClusterClient clusterClient, final Duration commandTimeout, final CircuitBreakerConfiguration circuitBreakerConfiguration) {
FaultTolerantRedisCluster(final String name, final RedisClusterClient clusterClient, final Duration commandTimeout, final CircuitBreakerConfiguration circuitBreakerConfiguration, final RetryConfiguration retryConfiguration) {
this.name = name;
this.clusterClient = clusterClient;
@@ -65,10 +70,10 @@ public class FaultTolerantRedisCluster {
this.binaryConnection = clusterClient.connect(ByteArrayCodec.INSTANCE);
this.circuitBreaker = CircuitBreaker.of(name, circuitBreakerConfiguration.toCircuitBreakerConfig());
this.retry = Retry.of(name, retryConfiguration.toRetryConfigBuilder().retryOnException(exception -> exception instanceof RedisCommandTimeoutException).build());
CircuitBreakerUtil.registerMetrics(SharedMetricRegistries.getOrCreate(Constants.METRICS_NAME),
circuitBreaker,
FaultTolerantRedisCluster.class);
CircuitBreakerUtil.registerMetrics(SharedMetricRegistries.getOrCreate(Constants.METRICS_NAME), circuitBreaker, FaultTolerantRedisCluster.class);
CircuitBreakerUtil.registerMetrics(SharedMetricRegistries.getOrCreate(Constants.METRICS_NAME), retry, FaultTolerantRedisCluster.class);
final MetricRegistry metricRegistry = SharedMetricRegistries.getOrCreate(Constants.METRICS_NAME);
@@ -104,11 +109,11 @@ public class FaultTolerantRedisCluster {
private <K, V> void useConnection(final StatefulRedisClusterConnection<K, V> connection, final Consumer<StatefulRedisClusterConnection<K, V>> consumer) {
try {
circuitBreaker.executeCheckedRunnable(() -> {
circuitBreaker.executeCheckedRunnable(() -> retry.executeRunnable(() -> {
try (final Timer.Context ignored = executeTimer.time()) {
consumer.accept(connection);
}
});
}));
} catch (final Throwable t) {
log.warn("Redis operation failure", t);
@@ -122,11 +127,11 @@ public class FaultTolerantRedisCluster {
private <T, K, V> T withConnection(final StatefulRedisClusterConnection<K, V> connection, final Function<StatefulRedisClusterConnection<K, V>, T> function) {
try {
return circuitBreaker.executeCheckedSupplier(() -> {
return circuitBreaker.executeCheckedSupplier(() -> retry.executeCallable(() -> {
try (final Timer.Context ignored = executeTimer.time()) {
return function.apply(connection);
}
});
}));
} catch (final Throwable t) {
log.warn("Redis operation failure", t);
@@ -142,6 +147,6 @@ public class FaultTolerantRedisCluster {
final StatefulRedisClusterPubSubConnection<String, String> pubSubConnection = clusterClient.connectPubSub();
pubSubConnections.add(pubSubConnection);
return new FaultTolerantPubSubConnection<>(name, pubSubConnection, circuitBreaker);
return new FaultTolerantPubSubConnection<>(name, pubSubConnection, circuitBreaker, retry);
}
}