Add dynamic scaling to our JobRunner system.

This commit is contained in:
Greyson Parrelli
2025-08-21 11:56:14 -04:00
committed by Jeffrey Starke
parent c117082f23
commit b3d2e31bae
10 changed files with 810 additions and 68 deletions

View File

@@ -4,6 +4,7 @@ import android.app.Application;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import androidx.annotation.VisibleForTesting;
import androidx.annotation.WorkerThread;
import com.annimon.stream.Collectors;
@@ -27,6 +28,9 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
/**
@@ -37,6 +41,8 @@ class JobController {
private static final String TAG = Log.tag(JobController.class);
private static final Predicate<MinimalJobSpec> NO_PREDICATE = spec -> true;
private final Application application;
private final JobStorage jobStorage;
private final JobInstantiator jobInstantiator;
@@ -47,6 +53,18 @@ class JobController {
private final Callback callback;
private final Map<String, Job> runningJobs;
private final int minGeneralRunners;
private final int maxGeneralRunners;
private final long generalRunnerIdleTimeout;
private final AtomicInteger nextRunnerId;
private final List<Predicate<MinimalJobSpec>> reservedRunnerPredicates;
@VisibleForTesting
final AtomicBoolean runnersStarted = new AtomicBoolean(false);
@VisibleForTesting
final List<JobRunner> activeGeneralRunners;
JobController(@NonNull Application application,
@NonNull JobStorage jobStorage,
@NonNull JobInstantiator jobInstantiator,
@@ -54,17 +72,27 @@ class JobController {
@NonNull JobTracker jobTracker,
@NonNull Scheduler scheduler,
@NonNull Debouncer debouncer,
@NonNull Callback callback)
@NonNull Callback callback,
int minGeneralRunners,
int maxGeneralRunners,
long generalRunnerIdleTimeout,
@NonNull List<Predicate<MinimalJobSpec>> reservedRunnerPredicates)
{
this.application = application;
this.jobStorage = jobStorage;
this.jobInstantiator = jobInstantiator;
this.constraintInstantiator = constraintInstantiator;
this.jobTracker = jobTracker;
this.scheduler = scheduler;
this.debouncer = debouncer;
this.callback = callback;
this.runningJobs = new HashMap<>();
this.application = application;
this.jobStorage = jobStorage;
this.jobInstantiator = jobInstantiator;
this.constraintInstantiator = constraintInstantiator;
this.jobTracker = jobTracker;
this.scheduler = scheduler;
this.debouncer = debouncer;
this.callback = callback;
this.runningJobs = new HashMap<>();
this.minGeneralRunners = minGeneralRunners;
this.maxGeneralRunners = maxGeneralRunners;
this.generalRunnerIdleTimeout = generalRunnerIdleTimeout;
this.nextRunnerId = new AtomicInteger(0);
this.activeGeneralRunners = new CopyOnWriteArrayList<>();
this.reservedRunnerPredicates = new ArrayList<>(reservedRunnerPredicates);
}
@WorkerThread
@@ -116,6 +144,7 @@ class JobController {
synchronized (this) {
notifyAll();
maybeScaleUpRunners(jobStorage.getEligibleJobCount(System.currentTimeMillis()));
}
}
@@ -167,6 +196,7 @@ class JobController {
synchronized (this) {
notifyAll();
maybeScaleUpRunners(jobStorage.getEligibleJobCount(System.currentTimeMillis()));
}
}
@@ -202,6 +232,7 @@ class JobController {
synchronized (this) {
notifyAll();
maybeScaleUpRunners(jobStorage.getEligibleJobCount(System.currentTimeMillis()));
}
}
@@ -337,20 +368,30 @@ class JobController {
* - Has no dependencies
* - Has no unmet constraints
*
* This method will block until a job is available.
* When the job returned from this method has been run, you must call {@link #onJobFinished(Job)}.
* @param predicate Filter for jobs to consider
* @param timeoutMs Maximum time to wait for a job. If 0, waits indefinitely.
* @return Job to execute, or null if the timeout is hit
*/
@WorkerThread
synchronized @NonNull Job pullNextEligibleJobForExecution(@NonNull Predicate<MinimalJobSpec> predicate) {
synchronized @Nullable Job pullNextEligibleJobForExecution(@NonNull Predicate<MinimalJobSpec> predicate, long timeoutMs) {
try {
Job job;
long startTime = System.currentTimeMillis();
while ((job = getNextEligibleJobForExecution(predicate)) == null) {
if (runningJobs.isEmpty()) {
debouncer.publish(callback::onEmpty);
}
wait();
if (timeoutMs > 0) {
long remainingTime = timeoutMs - (System.currentTimeMillis() - startTime);
if (remainingTime <= 0) {
return null;
}
wait(remainingTime);
} else {
wait();
}
}
jobStorage.markJobAsRunning(job.getId(), System.currentTimeMillis());
@@ -411,6 +452,68 @@ class JobController {
return jobStorage.areQueuesEmpty(queueKeys);
}
/**
* Initializes the dynamic JobRunner system with minimum threads.
*/
@WorkerThread
synchronized void startJobRunners() {
Log.i(TAG, "Starting JobRunners. (Reserved: " + reservedRunnerPredicates.size() + ", MinGeneral: " + minGeneralRunners + ", MaxGeneral: " + maxGeneralRunners + ", GeneralIdleTimeout: " + generalRunnerIdleTimeout + " ms)");
runnersStarted.set(true);
for (Predicate<MinimalJobSpec> predicate : reservedRunnerPredicates) {
int id = nextRunnerId.incrementAndGet();
JobRunner runner = new JobRunner(application, id, this, predicate == null ? NO_PREDICATE : predicate, 0);
runner.start();
Log.i(TAG, "Spawned new reserved JobRunner[" + id + "]");
}
for (int i = 0; i < minGeneralRunners; i++) {
spawnGeneralRunner(0);
}
maybeScaleUpRunners(jobStorage.getEligibleJobCount(System.currentTimeMillis()));
notifyAll();
}
/**
* Scales up the number of {@link JobRunner}s to satisfy the number of eligible jobs, if needed.
*/
@VisibleForTesting
synchronized void maybeScaleUpRunners(int eligibleJobCount) {
if (!runnersStarted.get()) {
return;
}
int activeRunners = this.activeGeneralRunners.size();
int maxPossibleRunnersToSpawn = maxGeneralRunners - activeRunners;
int runnersToCoverEligibleJobs = eligibleJobCount - activeRunners;
int actualRunnersToSpawn = Math.min(runnersToCoverEligibleJobs, maxPossibleRunnersToSpawn);
if (actualRunnersToSpawn > 0) {
Log.i(TAG, "Spawning " + actualRunnersToSpawn + " new JobRunner(s) to meet demand. (CurrentActive: " + activeRunners + ", EligibleJobs: " + eligibleJobCount + ", MaxAllowed: " + maxGeneralRunners + ")");
for (int i = 0; i < actualRunnersToSpawn; i++) {
spawnGeneralRunner(generalRunnerIdleTimeout);
}
}
}
private synchronized void spawnGeneralRunner(long timeOutMs) {
int id = nextRunnerId.incrementAndGet();
JobRunner runner = new JobRunner(application, id, this, NO_PREDICATE, timeOutMs);
runner.start();
activeGeneralRunners.add(runner);
Log.d(TAG, "Spawned new " + (timeOutMs == 0 ? "core" : "temporary") + " general JobRunner[" + id + "] (CurrentActive: " + activeGeneralRunners.size() + ")");
}
@VisibleForTesting
synchronized void onRunnerTerminated(@NonNull JobRunner runner) {
activeGeneralRunners.remove(runner);
Log.i(TAG, "JobRunner[" + runner.getId() + "] terminated. (CurrentActive: " + activeGeneralRunners.size() + ")");
}
@WorkerThread
private boolean chainExceedsMaximumInstances(@NonNull List<List<Job>> chain) {
if (chain.size() == 1 && chain.get(0).size() == 1) {

View File

@@ -1,7 +1,5 @@
package org.thoughtcrime.securesms.jobmanager
import android.text.TextUtils
/**
* Provides utilities to create consistent logging for jobs.
*/
@@ -15,7 +13,7 @@ object JobLogger {
@JvmStatic
fun format(job: Job, extraTag: String, event: String): String {
val id = job.id
val tag = if (TextUtils.isEmpty(extraTag)) "" else "[$extraTag]"
val tag = if (extraTag.isBlank()) "" else "[$extraTag]"
val timeSinceSubmission = System.currentTimeMillis() - job.parameters.createTime
val runAttempt = job.runAttempt + 1
val maxAttempts = if (job.parameters.maxAttempts == Job.Parameters.UNLIMITED) "Unlimited" else job.parameters.maxAttempts.toString()

View File

@@ -47,8 +47,6 @@ public class JobManager implements ConstraintObserver.Notifier {
public static final int CURRENT_VERSION = 12;
private static final Predicate<MinimalJobSpec> NO_PREDICATE = spec -> true;
private final Application application;
private final Configuration configuration;
private final Executor executor;
@@ -76,7 +74,11 @@ public class JobManager implements ConstraintObserver.Notifier {
Build.VERSION.SDK_INT < 26 ? new AlarmManagerScheduler(application)
: new CompositeScheduler(new InAppScheduler(this), new JobSchedulerScheduler(application)),
new Debouncer(500),
this::onEmptyQueue);
this::onEmptyQueue,
configuration.getMinGeneralRunners(),
configuration.getMaxGeneralRunners(),
configuration.getGeneralRunnerIdleTimeout(),
configuration.getReservedJobRunners());
executor.execute(() -> {
synchronized (this) {
@@ -111,17 +113,8 @@ public class JobManager implements ConstraintObserver.Notifier {
* Begins the execution of jobs.
*/
public void beginJobLoop() {
runOnExecutor(()-> {
int id = 0;
for (int i = 0; i < configuration.getJobThreadCount(); i++) {
new JobRunner(application, ++id, jobController, NO_PREDICATE).start();
}
for (Predicate<MinimalJobSpec> predicate : configuration.getReservedJobRunners()) {
new JobRunner(application, ++id, jobController, predicate).start();
}
runOnExecutor(() -> {
jobController.startJobRunners();
jobController.wakeUp();
});
}
@@ -596,7 +589,9 @@ public class JobManager implements ConstraintObserver.Notifier {
public static class Configuration {
private final ExecutorFactory executorFactory;
private final int jobThreadCount;
private final int minGeneralRunners;
private final int maxGeneralRunners;
private final long generalRunnerIdleTimeout;
private final JobInstantiator jobInstantiator;
private final ConstraintInstantiator constraintInstantiator;
private final List<ConstraintObserver> constraintObservers;
@@ -605,7 +600,9 @@ public class JobManager implements ConstraintObserver.Notifier {
private final JobTracker jobTracker;
private final List<Predicate<MinimalJobSpec>> reservedJobRunners;
private Configuration(int jobThreadCount,
private Configuration(int minGeneralRunners,
int maxGeneralRunners,
long generalRunnerIdleTimeout,
@NonNull ExecutorFactory executorFactory,
@NonNull JobInstantiator jobInstantiator,
@NonNull ConstraintInstantiator constraintInstantiator,
@@ -615,19 +612,29 @@ public class JobManager implements ConstraintObserver.Notifier {
@NonNull JobTracker jobTracker,
@NonNull List<Predicate<MinimalJobSpec>> reservedJobRunners)
{
this.executorFactory = executorFactory;
this.jobThreadCount = jobThreadCount;
this.jobInstantiator = jobInstantiator;
this.constraintInstantiator = constraintInstantiator;
this.constraintObservers = new ArrayList<>(constraintObservers);
this.jobStorage = jobStorage;
this.jobMigrator = jobMigrator;
this.jobTracker = jobTracker;
this.reservedJobRunners = new ArrayList<>(reservedJobRunners);
this.executorFactory = executorFactory;
this.minGeneralRunners = minGeneralRunners;
this.maxGeneralRunners = maxGeneralRunners;
this.generalRunnerIdleTimeout = generalRunnerIdleTimeout;
this.jobInstantiator = jobInstantiator;
this.constraintInstantiator = constraintInstantiator;
this.constraintObservers = new ArrayList<>(constraintObservers);
this.jobStorage = jobStorage;
this.jobMigrator = jobMigrator;
this.jobTracker = jobTracker;
this.reservedJobRunners = new ArrayList<>(reservedJobRunners);
}
int getJobThreadCount() {
return jobThreadCount;
int getMinGeneralRunners() {
return minGeneralRunners;
}
int getMaxGeneralRunners() {
return maxGeneralRunners;
}
long getGeneralRunnerIdleTimeout() {
return generalRunnerIdleTimeout;
}
@NonNull ExecutorFactory getExecutorFactory() {
@@ -665,18 +672,30 @@ public class JobManager implements ConstraintObserver.Notifier {
public static class Builder {
private ExecutorFactory executorFactory = new DefaultExecutorFactory();
private int jobThreadCount = 8;
private Map<String, Job.Factory> jobFactories = new HashMap<>();
private Map<String, Constraint.Factory> constraintFactories = new HashMap<>();
private List<ConstraintObserver> constraintObservers = new ArrayList<>();
private JobStorage jobStorage = null;
private JobMigrator jobMigrator = null;
private JobTracker jobTracker = new JobTracker();
private List<Predicate<MinimalJobSpec>> reservedJobRunners = new ArrayList<>();
private ExecutorFactory executorFactory = new DefaultExecutorFactory();
private int minGeneralRunners = 4;
private int maxGeneralRunners = 16;
private long generalRunnerIdleTimeout = TimeUnit.MINUTES.toMillis(1);
private Map<String, Job.Factory> jobFactories = new HashMap<>();
private Map<String, Constraint.Factory> constraintFactories = new HashMap<>();
private List<ConstraintObserver> constraintObservers = new ArrayList<>();
private JobStorage jobStorage = null;
private JobMigrator jobMigrator = null;
private JobTracker jobTracker = new JobTracker();
private List<Predicate<MinimalJobSpec>> reservedJobRunners = new ArrayList<>();
public @NonNull Builder setJobThreadCount(int jobThreadCount) {
this.jobThreadCount = jobThreadCount;
public @NonNull Builder setMinGeneralRunners(int minGeneralRunners) {
this.minGeneralRunners = minGeneralRunners;
return this;
}
public @NonNull Builder setMaxGeneralRunners(int maxGeneralRunners) {
this.maxGeneralRunners = maxGeneralRunners;
return this;
}
public @NonNull Builder setGeneralRunnerIdleTimeout(long generalRunnerIdleTimeout) {
this.generalRunnerIdleTimeout = generalRunnerIdleTimeout;
return this;
}
@@ -716,7 +735,9 @@ public class JobManager implements ConstraintObserver.Notifier {
}
public @NonNull Configuration build() {
return new Configuration(jobThreadCount,
return new Configuration(minGeneralRunners,
maxGeneralRunners,
generalRunnerIdleTimeout,
executorFactory,
new JobInstantiator(jobFactories),
new ConstraintInstantiator(constraintFactories),

View File

@@ -5,8 +5,6 @@ import android.os.PowerManager;
import androidx.annotation.NonNull;
import com.annimon.stream.Stream;
import org.signal.core.util.logging.Log;
import org.thoughtcrime.securesms.jobs.MinimalJobSpec;
import org.thoughtcrime.securesms.util.WakeLockUtil;
@@ -33,21 +31,36 @@ class JobRunner extends Thread {
private final int id;
private final JobController jobController;
private final Predicate<MinimalJobSpec> jobPredicate;
private final long idleTimeoutMs;
JobRunner(@NonNull Application application, int id, @NonNull JobController jobController, @NonNull Predicate<MinimalJobSpec> predicate) {
super("signal-JobRunner-" + id);
/**
* @param idleTimeoutMs If the runner experiences no activity within this duration, it will terminate. If set to 0, it will never terminate.
*/
JobRunner(@NonNull Application application, int id, @NonNull JobController jobController, @NonNull Predicate<MinimalJobSpec> predicate, long idleTimeoutMs) {
super("JobRunner-" + (idleTimeoutMs == 0 ? "core-" : "temp-") + id);
this.application = application;
this.id = id;
this.jobController = jobController;
this.jobPredicate = predicate;
this.idleTimeoutMs = idleTimeoutMs;
}
@Override
public synchronized void run() {
//noinspection InfiniteLoopStatement
Log.i(TAG, "JobRunner " + id + " started" + (idleTimeoutMs > 0 ? " with idle timeout " + idleTimeoutMs + "ms" : " with no idle timeout"));
while (true) {
Job job = jobController.pullNextEligibleJobForExecution(jobPredicate);
Job job = jobController.pullNextEligibleJobForExecution(jobPredicate, idleTimeoutMs);
if (job == null && idleTimeoutMs > 0) {
Log.i(TAG, "JobRunner " + id + " terminating due to inactivity");
jobController.onRunnerTerminated(this);
break;
} else if (job == null) {
Log.i(TAG, "JobRunner " + id + " unexpectedly given a null job. Going around the loop.");
continue;
}
Job.Result result = run(job);
jobController.onJobFinished(job);
@@ -60,7 +73,7 @@ class JobRunner extends Thread {
} else if (result.isFailure()) {
List<Job> dependents = jobController.onFailure(job);
job.onFailure();
Stream.of(dependents).forEach(Job::onFailure);
dependents.stream().forEach(Job::onFailure);
if (result.getException() != null) {
throw result.getException();

View File

@@ -20,6 +20,9 @@ interface JobStorage {
@WorkerThread
fun getNextEligibleJob(currentTime: Long, filter: (MinimalJobSpec) -> Boolean): JobSpec?
@WorkerThread
fun getEligibleJobCount(currentTime: Long): Int
@WorkerThread
fun getJobsInQueue(queue: String): List<JobSpec>

View File

@@ -148,6 +148,27 @@ class FastJobStorage(private val jobDatabase: JobDatabase) : JobStorage {
}
}
@Synchronized
override fun getEligibleJobCount(currentTime: Long): Int {
val migrationJob: MinimalJobSpec? = migrationJobs.firstOrNull()
return if (migrationJob != null && !migrationJob.isRunning && migrationJob.hasEligibleRunTime(currentTime)) {
1
} else if (migrationJob != null) {
0
} else {
eligibleJobs
.asSequence()
.filter { job ->
// Filter out all jobs with unmet dependencies
dependenciesByJobId[job.id].isNullOrEmpty()
}
.filterNot { it.isRunning }
.filter { job -> job.hasEligibleRunTime(currentTime) }
.count()
}
}
@Synchronized
override fun getJobsInQueue(queue: String): List<JobSpec> {
return minimalJobs

View File

@@ -76,7 +76,11 @@ class RestoreAttachmentJob private constructor(
"RestoreAttachmentJob::InitialRestore_01",
"RestoreAttachmentJob::InitialRestore_02",
"RestoreAttachmentJob::InitialRestore_03",
"RestoreAttachmentJob::InitialRestore_04"
"RestoreAttachmentJob::InitialRestore_04",
"RestoreAttachmentJob::InitialRestore_05",
"RestoreAttachmentJob::InitialRestore_06",
"RestoreAttachmentJob::InitialRestore_07",
"RestoreAttachmentJob::InitialRestore_08"
)
/** Job queues used when restoring an offloaded attachment. The number of queues in this set determine the level of parallelization. */

View File

@@ -56,10 +56,18 @@ class UploadAttachmentToArchiveJob private constructor(
/** A set of possible queues this job may use. The number of queues determines the parallelism. */
val QUEUES = setOf(
"ArchiveAttachmentJobs_1",
"ArchiveAttachmentJobs_2",
"ArchiveAttachmentJobs_3",
"ArchiveAttachmentJobs_4"
"ArchiveAttachmentJobs_01",
"ArchiveAttachmentJobs_02",
"ArchiveAttachmentJobs_03",
"ArchiveAttachmentJobs_04",
"ArchiveAttachmentJobs_05",
"ArchiveAttachmentJobs_06",
"ArchiveAttachmentJobs_07",
"ArchiveAttachmentJobs_08",
"ArchiveAttachmentJobs_09",
"ArchiveAttachmentJobs_10",
"ArchiveAttachmentJobs_11",
"ArchiveAttachmentJobs_12"
)
}