mirror of
https://github.com/signalapp/Signal-Server
synced 2026-04-20 13:07:58 +01:00
Add a framework for running experiments to improve push notification reliability
This commit is contained in:
@@ -0,0 +1,68 @@
|
||||
package org.whispersystems.textsecuregcm.experiment;
|
||||
|
||||
import org.whispersystems.textsecuregcm.storage.Account;
|
||||
import org.whispersystems.textsecuregcm.storage.Device;
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/**
|
||||
* A push notification selects for eligible devices, applies a control or experimental treatment, and provides a
|
||||
* mechanism for comparing device states before and after receiving the treatment.
|
||||
*
|
||||
* @param <T> the type of state object stored for this experiment
|
||||
*/
|
||||
public interface PushNotificationExperiment<T> {
|
||||
|
||||
/**
|
||||
* Returns the unique name of this experiment.
|
||||
*
|
||||
* @return the unique name of this experiment
|
||||
*/
|
||||
String getExperimentName();
|
||||
|
||||
/**
|
||||
* Tests whether a device is eligible for this experiment. An eligible device may be assigned to either the control
|
||||
* or experiment group within an experiment. Ineligible devices will not participate in the experiment in any way.
|
||||
*
|
||||
* @param account the account to which the device belongs
|
||||
* @param device the device to test for eligibility in this experiment
|
||||
*
|
||||
* @return a future that yields a boolean value indicating whether the target device is eligible for this experiment
|
||||
*/
|
||||
CompletableFuture<Boolean> isDeviceEligible(Account account, Device device);
|
||||
|
||||
/**
|
||||
* Generates an experiment specific state "snapshot" of the given device. Experiment results are generally evaluated
|
||||
* by comparing a device's state before a treatment is applied and its state after the treatment is applied.
|
||||
*
|
||||
* @param account the account to which the device belongs
|
||||
* @param device the device for which to generate a state "snapshot"
|
||||
*
|
||||
* @return an experiment-specific state "snapshot" of the given device
|
||||
*/
|
||||
T getState(@Nullable Account account, @Nullable Device device);
|
||||
|
||||
/**
|
||||
* Applies a control treatment to the given device. In many cases (and by default) no action is taken for devices in
|
||||
* the control group.
|
||||
*
|
||||
* @param account the account to which the device belongs
|
||||
* @param device the device to which to apply the control treatment for this experiment
|
||||
*
|
||||
* @return a future that completes when the control treatment has been applied for the given device
|
||||
*/
|
||||
default CompletableFuture<Void> applyControlTreatment(Account account, Device device) {
|
||||
return CompletableFuture.completedFuture(null);
|
||||
};
|
||||
|
||||
/**
|
||||
* Applies an experimental treatment to the given device. This generally involves sending or scheduling a specific
|
||||
* type of push notification for the given device.
|
||||
*
|
||||
* @param account the account to which the device belongs
|
||||
* @param device the device to which to apply the experimental treatment for this experiment
|
||||
*
|
||||
* @return a future that completes when the experimental treatment has been applied for the given device
|
||||
*/
|
||||
CompletableFuture<Void> applyExperimentTreatment(Account account, Device device);
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
package org.whispersystems.textsecuregcm.experiment;
|
||||
|
||||
public record PushNotificationExperimentSample<T>(boolean inExperimentGroup, T initialState, T finalState) {
|
||||
}
|
||||
@@ -0,0 +1,284 @@
|
||||
package org.whispersystems.textsecuregcm.experiment;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.time.Clock;
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.whispersystems.textsecuregcm.util.ExceptionUtils;
|
||||
import org.whispersystems.textsecuregcm.util.SystemMapper;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
import reactor.util.function.Tuple2;
|
||||
import reactor.util.function.Tuples;
|
||||
import reactor.util.retry.Retry;
|
||||
import software.amazon.awssdk.core.SdkBytes;
|
||||
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ConditionalCheckFailedException;
|
||||
import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.QueryRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ReturnValue;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ScanRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest;
|
||||
|
||||
public class PushNotificationExperimentSamples {
|
||||
|
||||
private final DynamoDbAsyncClient dynamoDbAsyncClient;
|
||||
private final String tableName;
|
||||
private final Clock clock;
|
||||
|
||||
// Experiment name; DynamoDB string; partition key
|
||||
public static final String KEY_EXPERIMENT_NAME = "N";
|
||||
|
||||
// Combined ACI and device ID; DynamoDB byte array; sort key
|
||||
public static final String ATTR_ACI_AND_DEVICE_ID = "AD";
|
||||
|
||||
// Whether the device is enrolled in the experiment group (as opposed to control group); DynamoDB boolean
|
||||
static final String ATTR_IN_EXPERIMENT_GROUP = "X";
|
||||
|
||||
// The experiment-specific state of the device at the start of the experiment, represented as a JSON blob; DynamoDB
|
||||
// string
|
||||
static final String ATTR_INITIAL_STATE = "I";
|
||||
|
||||
// The experiment-specific state of the device at the end of the experiment, represented as a JSON blob; DynamoDB
|
||||
// string
|
||||
static final String ATTR_FINAL_STATE = "F";
|
||||
|
||||
// The time, in seconds since the epoch, at which this sample should be deleted automatically
|
||||
static final String ATTR_TTL = "E";
|
||||
|
||||
private static final Duration FINAL_SAMPLE_TTL = Duration.ofDays(7);
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PushNotificationExperimentSamples.class);
|
||||
|
||||
public PushNotificationExperimentSamples(final DynamoDbAsyncClient dynamoDbAsyncClient,
|
||||
final String tableName,
|
||||
final Clock clock) {
|
||||
|
||||
this.dynamoDbAsyncClient = dynamoDbAsyncClient;
|
||||
this.tableName = tableName;
|
||||
this.clock = clock;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the initial state of a device participating in a push notification experiment.
|
||||
*
|
||||
* @param accountIdentifier the account identifier for the account to which the target device is linked
|
||||
* @param deviceId the identifier for the device within the given account
|
||||
* @param experimentName the name of the experiment
|
||||
* @param inExperimentGroup whether the given device is in the experiment group (as opposed to control group)
|
||||
* @param initialState the initial state of the object; must be serializable as a JSON text
|
||||
*
|
||||
* @return a future that completes when the record has been stored; the future yields {@code true} if a new record
|
||||
* was stored or {@code false} if a conflicting record already exists
|
||||
*
|
||||
* @param <T> the type of state object for this sample
|
||||
*
|
||||
* @throws JsonProcessingException if the given {@code initialState} could not be serialized as a JSON text
|
||||
*/
|
||||
public <T> CompletableFuture<Boolean> recordInitialState(final UUID accountIdentifier,
|
||||
final byte deviceId,
|
||||
final String experimentName,
|
||||
final boolean inExperimentGroup,
|
||||
final T initialState) throws JsonProcessingException {
|
||||
|
||||
final AttributeValue initialStateAttributeValue =
|
||||
AttributeValue.fromS(SystemMapper.jsonMapper().writeValueAsString(initialState));
|
||||
|
||||
final AttributeValue inExperimentGroupAttributeValue = AttributeValue.fromBool(inExperimentGroup);
|
||||
|
||||
return dynamoDbAsyncClient.putItem(PutItemRequest.builder()
|
||||
.tableName(tableName)
|
||||
.item(Map.of(
|
||||
KEY_EXPERIMENT_NAME, AttributeValue.fromS(experimentName),
|
||||
ATTR_ACI_AND_DEVICE_ID, buildSortKey(accountIdentifier, deviceId),
|
||||
ATTR_IN_EXPERIMENT_GROUP, inExperimentGroupAttributeValue,
|
||||
ATTR_INITIAL_STATE, initialStateAttributeValue,
|
||||
ATTR_TTL, AttributeValue.fromN(String.valueOf(clock.instant().plus(FINAL_SAMPLE_TTL).getEpochSecond()))))
|
||||
.conditionExpression("(attribute_not_exists(#inExperimentGroup) OR #inExperimentGroup = :inExperimentGroup) AND (attribute_not_exists(#initialState) OR #initialState = :initialState) AND attribute_not_exists(#finalState)")
|
||||
.expressionAttributeNames(Map.of(
|
||||
"#inExperimentGroup", ATTR_IN_EXPERIMENT_GROUP,
|
||||
"#initialState", ATTR_INITIAL_STATE,
|
||||
"#finalState", ATTR_FINAL_STATE))
|
||||
.expressionAttributeValues(Map.of(
|
||||
":inExperimentGroup", inExperimentGroupAttributeValue,
|
||||
":initialState", initialStateAttributeValue))
|
||||
.build())
|
||||
.thenApply(ignored -> true)
|
||||
.exceptionally(throwable -> {
|
||||
if (ExceptionUtils.unwrap(throwable) instanceof ConditionalCheckFailedException) {
|
||||
return false;
|
||||
}
|
||||
|
||||
throw ExceptionUtils.wrap(throwable);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the final state of a device participating in a push notification experiment.
|
||||
*
|
||||
* @param accountIdentifier the account identifier for the account to which the target device is linked
|
||||
* @param deviceId the identifier for the device within the given account
|
||||
* @param experimentName the name of the experiment
|
||||
* @param finalState the final state of the object; must be serializable as a JSON text and of the same type as the
|
||||
* previously-stored initial state
|
||||
|
||||
* @return a future that completes when the final state has been stored; yields a finished sample if an initial sample
|
||||
* was found or empty if no initial sample was found for the given account, device, and experiment
|
||||
*
|
||||
* @param <T> the type of state object for this sample
|
||||
*
|
||||
* @throws JsonProcessingException if the given {@code finalState} could not be serialized as a JSON text
|
||||
*/
|
||||
public <T> CompletableFuture<PushNotificationExperimentSample<T>> recordFinalState(final UUID accountIdentifier,
|
||||
final byte deviceId,
|
||||
final String experimentName,
|
||||
final T finalState) throws JsonProcessingException {
|
||||
|
||||
final AttributeValue aciAndDeviceIdAttributeValue = buildSortKey(accountIdentifier, deviceId);
|
||||
|
||||
return dynamoDbAsyncClient.updateItem(UpdateItemRequest.builder()
|
||||
.tableName(tableName)
|
||||
.key(Map.of(
|
||||
KEY_EXPERIMENT_NAME, AttributeValue.fromS(experimentName),
|
||||
ATTR_ACI_AND_DEVICE_ID, aciAndDeviceIdAttributeValue))
|
||||
// `UpdateItem` will, by default, create a new item if one does not already exist for the given primary key. We
|
||||
// want update-only-if-exists behavior, though, and so check that there's already an existing item for this ACI
|
||||
// and device ID.
|
||||
.conditionExpression("#aciAndDeviceId = :aciAndDeviceId")
|
||||
.updateExpression("SET #finalState = if_not_exists(#finalState, :finalState)")
|
||||
.expressionAttributeNames(Map.of(
|
||||
"#aciAndDeviceId", ATTR_ACI_AND_DEVICE_ID,
|
||||
"#finalState", ATTR_FINAL_STATE))
|
||||
.expressionAttributeValues(Map.of(
|
||||
":aciAndDeviceId", aciAndDeviceIdAttributeValue,
|
||||
":finalState", AttributeValue.fromS(SystemMapper.jsonMapper().writeValueAsString(finalState))))
|
||||
.returnValues(ReturnValue.ALL_NEW)
|
||||
.build())
|
||||
.thenApply(updateItemResponse -> {
|
||||
try {
|
||||
final boolean inExperimentGroup = updateItemResponse.attributes().get(ATTR_IN_EXPERIMENT_GROUP).bool();
|
||||
|
||||
@SuppressWarnings("unchecked") final T parsedInitialState =
|
||||
(T) parseState(updateItemResponse.attributes().get(ATTR_INITIAL_STATE).s(), finalState.getClass());
|
||||
|
||||
@SuppressWarnings("unchecked") final T parsedFinalState =
|
||||
(T) parseState(updateItemResponse.attributes().get(ATTR_FINAL_STATE).s(), finalState.getClass());
|
||||
|
||||
return new PushNotificationExperimentSample<>(inExperimentGroup, parsedInitialState, parsedFinalState);
|
||||
} catch (final JsonProcessingException e) {
|
||||
throw ExceptionUtils.wrap(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a publisher across all samples pending a final state for a given experiment.
|
||||
*
|
||||
* @param experimentName the name of the experiment for which to retrieve samples pending a final state
|
||||
*
|
||||
* @return a publisher across all samples pending a final state for a given experiment
|
||||
*/
|
||||
public Flux<Tuple2<UUID, Byte>> getDevicesPendingFinalState(final String experimentName) {
|
||||
return Flux.from(dynamoDbAsyncClient.queryPaginator(QueryRequest.builder()
|
||||
.tableName(tableName)
|
||||
.keyConditionExpression("#experiment = :experiment")
|
||||
.filterExpression("attribute_not_exists(#finalState)")
|
||||
.expressionAttributeNames(Map.of(
|
||||
"#experiment", KEY_EXPERIMENT_NAME,
|
||||
"#finalState", ATTR_FINAL_STATE))
|
||||
.expressionAttributeValues(Map.of(":experiment", AttributeValue.fromS(experimentName)))
|
||||
.projectionExpression(ATTR_ACI_AND_DEVICE_ID)
|
||||
.build())
|
||||
.items())
|
||||
.map(item -> parseSortKey(item.get(ATTR_ACI_AND_DEVICE_ID)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a publisher across all finished samples (i.e. samples with a recorded final state) for a given experiment.
|
||||
*
|
||||
* @param experimentName the name of the experiment for which to retrieve finished samples
|
||||
* @param stateClass the type of state object for sample in the given experiment
|
||||
*
|
||||
* @return a publisher across all finished samples for the given experiment
|
||||
*
|
||||
* @param <T> the type of the sample's state objects
|
||||
*/
|
||||
public <T> Flux<PushNotificationExperimentSample<T>> getFinishedSamples(final String experimentName,
|
||||
final Class<T> stateClass) {
|
||||
return Flux.from(dynamoDbAsyncClient.queryPaginator(QueryRequest.builder()
|
||||
.tableName(tableName)
|
||||
.keyConditionExpression("#experiment = :experiment")
|
||||
.filterExpression("attribute_exists(#finalState)")
|
||||
.expressionAttributeNames(Map.of(
|
||||
"#experiment", KEY_EXPERIMENT_NAME,
|
||||
"#finalState", ATTR_FINAL_STATE))
|
||||
.expressionAttributeValues(Map.of(":experiment", AttributeValue.fromS(experimentName)))
|
||||
.build())
|
||||
.items())
|
||||
.handle((item, sink) -> {
|
||||
try {
|
||||
final boolean inExperimentGroup = item.get(ATTR_IN_EXPERIMENT_GROUP).bool();
|
||||
final T initialState = parseState(item.get(ATTR_INITIAL_STATE).s(), stateClass);
|
||||
final T finalState = parseState(item.get(ATTR_FINAL_STATE).s(), stateClass);
|
||||
|
||||
sink.next(new PushNotificationExperimentSample<>(inExperimentGroup, initialState, finalState));
|
||||
} catch (final JsonProcessingException e) {
|
||||
sink.error(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public CompletableFuture<Void> discardSamples(final String experimentName, final int maxConcurrency) {
|
||||
final AttributeValue experimentNameAttributeValue = AttributeValue.fromS(experimentName);
|
||||
|
||||
return Flux.from(dynamoDbAsyncClient.scanPaginator(ScanRequest.builder()
|
||||
.tableName(tableName)
|
||||
.filterExpression("#experiment = :experiment")
|
||||
.expressionAttributeNames(Map.of("#experiment", KEY_EXPERIMENT_NAME))
|
||||
.expressionAttributeValues(Map.of(":experiment", experimentNameAttributeValue))
|
||||
.projectionExpression(ATTR_ACI_AND_DEVICE_ID)
|
||||
.build())
|
||||
.items())
|
||||
.map(item -> item.get(ATTR_ACI_AND_DEVICE_ID))
|
||||
.flatMap(aciAndDeviceId -> Mono.fromFuture(() -> dynamoDbAsyncClient.deleteItem(DeleteItemRequest.builder()
|
||||
.tableName(tableName)
|
||||
.key(Map.of(
|
||||
KEY_EXPERIMENT_NAME, experimentNameAttributeValue,
|
||||
ATTR_ACI_AND_DEVICE_ID, aciAndDeviceId))
|
||||
.build()))
|
||||
.retryWhen(Retry.backoff(5, Duration.ofSeconds(5)))
|
||||
.onErrorResume(throwable -> {
|
||||
log.warn("Failed to delete sample for experiment {}", experimentName, throwable);
|
||||
return Mono.empty();
|
||||
}), maxConcurrency)
|
||||
.then()
|
||||
.toFuture();
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
static AttributeValue buildSortKey(final UUID accountIdentifier, final byte deviceId) {
|
||||
return AttributeValue.fromB(SdkBytes.fromByteBuffer(ByteBuffer.allocate(17)
|
||||
.putLong(accountIdentifier.getMostSignificantBits())
|
||||
.putLong(accountIdentifier.getLeastSignificantBits())
|
||||
.put(deviceId)
|
||||
.flip()));
|
||||
}
|
||||
|
||||
private static Tuple2<UUID, Byte> parseSortKey(final AttributeValue sortKey) {
|
||||
final ByteBuffer byteBuffer = sortKey.b().asByteBuffer();
|
||||
|
||||
return Tuples.of(new UUID(byteBuffer.getLong(), byteBuffer.getLong()), byteBuffer.get());
|
||||
}
|
||||
|
||||
private static <T> T parseState(final String state, final Class<T> clazz) throws JsonProcessingException {
|
||||
return SystemMapper.jsonMapper().readValue(state, clazz);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user