Fix workspace symbol search filtering when query contains # or * (#277922)

* Initial plan

* Fix: Strip special characters (#, *) from workspace symbol queries for fuzzy matching

- Add # to the list of characters removed during query normalization
- Update doScoreFuzzy2Single to use normalized query instead of original
- Add comprehensive tests for workspace symbol search with special characters
- All existing tests still pass (5816 passing)

Co-authored-by: dmitrivMS <9581278+dmitrivMS@users.noreply.github.com>

* Update JSDoc for normalized field to document all removed characters

Updated IPreparedQueryPiece.normalized JSDoc comment to accurately reflect that quotes, ellipsis, and hash characters are also removed in addition to whitespace and wildcards.

Co-authored-by: dmitrivMS <9581278+dmitrivMS@users.noreply.github.com>

* PR feedback

* PR feedback

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: dmitrivMS <9581278+dmitrivMS@users.noreply.github.com>
Co-authored-by: Dmitriy Vasyura <dmitriv@microsoft.com>
This commit is contained in:
Copilot
2025-12-19 04:34:48 +00:00
committed by GitHub
parent 8fec28c14c
commit 5c306a9967
2 changed files with 62 additions and 3 deletions

View File

@@ -322,7 +322,7 @@ function doScoreFuzzy2Multiple(target: string, query: IPreparedQueryPiece[], pat
}
function doScoreFuzzy2Single(target: string, query: IPreparedQueryPiece, patternStart: number, wordStart: number): FuzzyScore2 {
const score = fuzzyScore(query.original, query.originalLowercase, patternStart, target, target.toLowerCase(), wordStart, { firstMatchCanBeWeak: true, boostFullMatch: true });
const score = fuzzyScore(query.normalized, query.normalizedLowercase, patternStart, target, target.toLowerCase(), wordStart, { firstMatchCanBeWeak: true, boostFullMatch: true });
if (!score) {
return NO_SCORE2;
}
@@ -811,7 +811,7 @@ export interface IPreparedQueryPiece {
/**
* In addition to the normalized path, will have
* whitespace and wildcards removed.
* whitespace, wildcards, quotes, ellipsis, and trailing hash characters removed.
*/
normalized: string;
normalizedLowercase: string;
@@ -905,7 +905,8 @@ function normalizeQuery(original: string): { pathNormalized: string; normalized:
// - wildcards: are used for fuzzy matching
// - whitespace: are used to separate queries
// - ellipsis: sometimes used to indicate any path segments
const normalized = pathNormalized.replace(/[\*\u2026\s"]/g, '');
// - trailing hash: used by some language servers (e.g. rust-analyzer) as query modifiers
const normalized = pathNormalized.replace(/[\*\u2026\s"]/g, '').replace(/(?<=.)#$/, '');
return {
pathNormalized,

View File

@@ -1141,6 +1141,10 @@ suite('Fuzzy Scorer', () => {
test('prepareQuery', () => {
assert.strictEqual(prepareQuery(' f*a ').normalized, 'fa');
assert.strictEqual(prepareQuery(' f…a ').normalized, 'fa');
assert.strictEqual(prepareQuery('main#').normalized, 'main');
assert.strictEqual(prepareQuery('main#').original, 'main#');
assert.strictEqual(prepareQuery('foo*').normalized, 'foo');
assert.strictEqual(prepareQuery('foo*').original, 'foo*');
assert.strictEqual(prepareQuery('model Tester.ts').original, 'model Tester.ts');
assert.strictEqual(prepareQuery('model Tester.ts').originalLowercase, 'model Tester.ts'.toLowerCase());
assert.strictEqual(prepareQuery('model Tester.ts').normalized, 'modelTester.ts');
@@ -1295,5 +1299,59 @@ suite('Fuzzy Scorer', () => {
assert.strictEqual(score[1][1], 8);
});
test('Workspace symbol search with special characters (#, *)', function () {
// Simulates the scenario from the issue where rust-analyzer uses # and * as query modifiers
// The original query (with special chars) should reach the language server
// but normalized query (without special chars) should be used for fuzzy matching
// Test #: User types "main#", language server returns "main" symbol
let query = prepareQuery('main#');
assert.strictEqual(query.original, 'main#'); // Sent to language server
assert.strictEqual(query.normalized, 'main'); // Used for fuzzy matching
let [score, matches] = _doScore2('main', 'main#');
assert.ok(typeof score === 'number' && score > 0, 'Should match "main" symbol when query is "main#"');
assert.ok(matches.length > 0);
// Test *: User types "foo*", language server returns "foo" symbol
query = prepareQuery('foo*');
assert.strictEqual(query.original, 'foo*'); // Sent to language server
assert.strictEqual(query.normalized, 'foo'); // Used for fuzzy matching
[score, matches] = _doScore2('foo', 'foo*');
assert.ok(typeof score === 'number' && score > 0, 'Should match "foo" symbol when query is "foo*"');
assert.ok(matches.length > 0);
// Test both: User types "MyClass#*", should match "MyClass"
query = prepareQuery('MyClass#*');
assert.strictEqual(query.original, 'MyClass#*');
assert.strictEqual(query.normalized, 'MyClass');
[score, matches] = _doScore2('MyClass', 'MyClass#*');
assert.ok(typeof score === 'number' && score > 0, 'Should match "MyClass" symbol when query is "MyClass#*"');
assert.ok(matches.length > 0);
// Test fuzzy matching still works: User types "MC#", should match "MyClass"
query = prepareQuery('MC#');
assert.strictEqual(query.original, 'MC#');
assert.strictEqual(query.normalized, 'MC');
[score, matches] = _doScore2('MyClass', 'MC#');
assert.ok(typeof score === 'number' && score > 0, 'Should fuzzy match "MyClass" symbol when query is "MC#"');
assert.ok(matches.length > 0);
// Make sure leading # or # in the middle are not removed.
query = prepareQuery('#SpecialFunction');
assert.strictEqual(query.original, '#SpecialFunction');
assert.strictEqual(query.normalized, '#SpecialFunction');
[score, matches] = _doScore2('#SpecialFunction', '#SpecialFunction');
assert.ok(typeof score === 'number' && score > 0, 'Should match "#SpecialFunction" symbol when query is "#SpecialFunction"');
assert.ok(matches.length > 0);
// Make sure standalone # is not removed
query = prepareQuery('#');
assert.strictEqual(query.original, '#');
assert.strictEqual(query.normalized, '#', 'Standalone # should not be removed');
[score, matches] = _doScore2('#', '#');
assert.ok(typeof score === 'number' && score > 0, 'Should match "#" symbol when query is "#"');
assert.ok(matches.length > 0);
});
ensureNoDisposablesAreLeakedInTestSuite();
});