Split sort in batches (fixes #33613)

This commit is contained in:
Christof Marti
2017-10-02 16:15:47 -07:00
parent 9f304b7a49
commit 0bc9defc62
3 changed files with 146 additions and 41 deletions
+47 -2
View File
@@ -4,6 +4,8 @@
*--------------------------------------------------------------------------------------------*/
'use strict';
import { TPromise } from 'vs/base/common/winjs.base';
/**
* Returns the last element of an array.
* @param array The array.
@@ -218,7 +220,51 @@ export function top<T>(array: T[], compare: (a: T, b: T) => number, n: number):
return [];
}
const result = array.slice(0, n).sort(compare);
for (let i = n, m = array.length; i < m; i++) {
topStep(array, compare, result, n, array.length);
return result;
}
/**
* Asynchronous variant of `top()` allowing for splitting up work in batches between which the event loop can run.
*
* Returns the top N elements from the array.
*
* Faster than sorting the entire array when the array is a lot larger than N.
*
* @param array The unsorted array.
* @param compare A sort function for the elements.
* @param n The number of elements to return.
* @param batch The number of elements to examine before yielding to the event loop.
* @return The first n elemnts from array when sorted with compare.
*/
export function topAsync<T>(array: T[], compare: (a: T, b: T) => number, n: number, batch: number): TPromise<T[]> {
if (n === 0) {
return TPromise.as([]);
}
let canceled = false;
return new TPromise((resolve, reject) => {
(async () => {
const o = array.length;
const result = array.slice(0, n).sort(compare);
for (let i = n, m = Math.min(n + batch, o); i < o; i = m, m = Math.min(m + batch, o)) {
if (i > n) {
await new Promise(resolve => setTimeout(resolve)); // nextTick() would starve I/O.
}
if (canceled) {
throw new Error('canceled');
}
topStep(array, compare, result, i, m);
}
return result;
})()
.then(resolve, reject);
}, () => {
canceled = true;
});
}
function topStep<T>(array: T[], compare: (a: T, b: T) => number, result: T[], i: number, m: number): void {
for (const n = result.length; i < m; i++) {
const element = array[i];
if (compare(element, result[n - 1]) < 0) {
result.pop();
@@ -226,7 +272,6 @@ export function top<T>(array: T[], compare: (a: T, b: T) => number, n: number):
result.splice(j, 0, element);
}
}
return result;
}
/**
+54
View File
@@ -5,6 +5,7 @@
'use strict';
import * as assert from 'assert';
import { TPromise } from 'vs/base/common/winjs.base';
import arrays = require('vs/base/common/arrays');
suite('Arrays', () => {
@@ -215,5 +216,58 @@ suite('Arrays', () => {
assert.deepEqual(arrays.top([3, 2, 1], cmp, 3), [1, 2, 3]);
assert.deepEqual(arrays.top([4, 6, 2, 7, 8, 3, 5, 1], cmp, 3), [1, 2, 3]);
});
test('topAsync', function (done) {
const cmp = (a, b) => {
assert.strictEqual(typeof a, 'number', 'typeof a');
assert.strictEqual(typeof b, 'number', 'typeof b');
return a - b;
};
testTopAsync(cmp, 1)
.then(() => {
return testTopAsync(cmp, 2);
})
.then(done, done);
});
function testTopAsync(cmp: any, m: number) {
return TPromise.as(null).then(() => {
return arrays.topAsync([], cmp, 1, m)
.then(result => {
assert.deepEqual(result, []);
});
}).then(() => {
return arrays.topAsync([1], cmp, 0, m)
.then(result => {
assert.deepEqual(result, []);
});
}).then(() => {
return arrays.topAsync([1, 2], cmp, 1, m)
.then(result => {
assert.deepEqual(result, [1]);
});
}).then(() => {
return arrays.topAsync([2, 1], cmp, 1, m)
.then(result => {
assert.deepEqual(result, [1]);
});
}).then(() => {
return arrays.topAsync([1, 3, 2], cmp, 2, m)
.then(result => {
assert.deepEqual(result, [1, 2]);
});
}).then(() => {
return arrays.topAsync([3, 2, 1], cmp, 3, m)
.then(result => {
assert.deepEqual(result, [1, 2, 3]);
});
}).then(() => {
return arrays.topAsync([4, 6, 2, 7, 8, 3, 5, 1], cmp, 3, m)
.then(result => {
assert.deepEqual(result, [1, 2, 3]);
});
});
}
});
@@ -170,23 +170,26 @@ export class SearchService implements IRawSearchService {
allResultsPromise = this.preventCancellation(allResultsPromise);
}
let chained: TPromise<void>;
return new PPromise<[ISerializedSearchComplete, IRawFileMatch[]], IProgress>((c, e, p) => {
allResultsPromise.then(([result, results]) => {
chained = allResultsPromise.then(([result, results]) => {
const scorerCache: ScorerCache = cache ? cache.scorerCache : Object.create(null);
const unsortedResultTime = Date.now();
const sortedResults = this.sortResults(config, results, scorerCache);
const sortedResultTime = Date.now();
return this.sortResults(config, results, scorerCache)
.then(sortedResults => {
const sortedResultTime = Date.now();
c([{
stats: objects.assign({}, result.stats, {
unsortedResultTime,
sortedResultTime
}),
limitHit: result.limitHit || typeof config.maxResults === 'number' && results.length > config.maxResults
}, sortedResults]);
c([{
stats: objects.assign({}, result.stats, {
unsortedResultTime,
sortedResultTime
}),
limitHit: result.limitHit || typeof config.maxResults === 'number' && results.length > config.maxResults
}, sortedResults]);
});
}, e, p);
}, () => {
allResultsPromise.cancel();
chained.cancel();
});
}
@@ -207,47 +210,50 @@ export class SearchService implements IRawSearchService {
const cacheLookupStartTime = Date.now();
const cached = this.getResultsFromCache(cache, config.filePattern);
if (cached) {
let chained: TPromise<void>;
return new PPromise<[ISerializedSearchComplete, IRawFileMatch[]], IProgress>((c, e, p) => {
cached.then(([result, results, cacheStats]) => {
chained = cached.then(([result, results, cacheStats]) => {
const cacheLookupResultTime = Date.now();
const sortedResults = this.sortResults(config, results, cache.scorerCache);
const sortedResultTime = Date.now();
return this.sortResults(config, results, cache.scorerCache)
.then(sortedResults => {
const sortedResultTime = Date.now();
const stats: ICachedSearchStats = {
fromCache: true,
cacheLookupStartTime: cacheLookupStartTime,
cacheFilterStartTime: cacheStats.cacheFilterStartTime,
cacheLookupResultTime: cacheLookupResultTime,
cacheEntryCount: cacheStats.cacheFilterResultCount,
resultCount: results.length
};
if (config.sortByScore) {
stats.unsortedResultTime = cacheLookupResultTime;
stats.sortedResultTime = sortedResultTime;
}
if (!cacheStats.cacheWasResolved) {
stats.joined = result.stats;
}
c([
{
limitHit: result.limitHit || typeof config.maxResults === 'number' && results.length > config.maxResults,
stats: stats
},
sortedResults
]);
const stats: ICachedSearchStats = {
fromCache: true,
cacheLookupStartTime: cacheLookupStartTime,
cacheFilterStartTime: cacheStats.cacheFilterStartTime,
cacheLookupResultTime: cacheLookupResultTime,
cacheEntryCount: cacheStats.cacheFilterResultCount,
resultCount: results.length
};
if (config.sortByScore) {
stats.unsortedResultTime = cacheLookupResultTime;
stats.sortedResultTime = sortedResultTime;
}
if (!cacheStats.cacheWasResolved) {
stats.joined = result.stats;
}
c([
{
limitHit: result.limitHit || typeof config.maxResults === 'number' && results.length > config.maxResults,
stats: stats
},
sortedResults
]);
});
}, e, p);
}, () => {
cached.cancel();
chained.cancel();
});
}
return undefined;
}
private sortResults(config: IRawSearch, results: IRawFileMatch[], scorerCache: ScorerCache): IRawFileMatch[] {
private sortResults(config: IRawSearch, results: IRawFileMatch[], scorerCache: ScorerCache): TPromise<IRawFileMatch[]> {
const filePattern = config.filePattern;
const normalizedSearchValue = strings.stripWildcards(filePattern).toLowerCase();
const compare = (elementA: IRawFileMatch, elementB: IRawFileMatch) => compareResourcesByScore(elementA, elementB, FileMatchResourceAccessor, filePattern, normalizedSearchValue, scorerCache);
return arrays.top(results, compare, config.maxResults);
return arrays.topAsync(results, compare, config.maxResults, 10000);
}
private sendProgress(results: ISerializedFileMatch[], progressCb: (batch: ISerializedFileMatch[]) => void, batchSize: number) {