make the chat prompt decoder to emit @mentions

This commit is contained in:
Oleg Solomko
2025-03-24 10:48:02 -07:00
parent 0160d1de1e
commit 4e15134bd2
5 changed files with 236 additions and 8 deletions
@@ -4,11 +4,14 @@
*--------------------------------------------------------------------------------------------*/
import { PromptToken } from './tokens/promptToken.js';
import { PromptAtMention } from './tokens/promptAtMention.js';
import { VSBuffer } from '../../../../../../base/common/buffer.js';
import { assertNever } from '../../../../../../base/common/assert.js';
import { ReadableStream } from '../../../../../../base/common/stream.js';
import { PartialPromptAtMention } from './parsers/promptAtMentionParser.js';
import { BaseDecoder } from '../../../../../../base/common/codecs/baseDecoder.js';
import { PromptVariable, PromptVariableWithData } from './tokens/promptVariable.js';
import { At } from '../../../../../../editor/common/codecs/simpleCodec/tokens/at.js';
import { Hash } from '../../../../../../editor/common/codecs/simpleCodec/tokens/hash.js';
import { MarkdownLink } from '../../../../../../editor/common/codecs/markdownCodec/tokens/markdownLink.js';
import { PartialPromptVariableName, PartialPromptVariableWithData } from './parsers/promptVariableParser.js';
@@ -17,7 +20,7 @@ import { MarkdownDecoder, TMarkdownToken } from '../../../../../../editor/common
/**
* Tokens produced by this decoder.
*/
export type TChatPromptToken = MarkdownLink | PromptVariable | PromptVariableWithData;
export type TChatPromptToken = MarkdownLink | (PromptVariable | PromptVariableWithData) | PromptAtMention;
/**
* Decoder for the common chatbot prompt message syntax.
@@ -29,7 +32,7 @@ export class ChatPromptDecoder extends BaseDecoder<TChatPromptToken, TMarkdownTo
* tokens, for instance, a `#file:/path/to/file.md` link that consists of `hash`,
* `word`, and `colon` tokens sequence plus the `file path` part that follows.
*/
private current?: PartialPromptVariableName | PartialPromptVariableWithData;
private current?: (PartialPromptVariableName | PartialPromptVariableWithData) | PartialPromptAtMention;
constructor(
stream: ReadableStream<VSBuffer>,
@@ -41,14 +44,23 @@ export class ChatPromptDecoder extends BaseDecoder<TChatPromptToken, TMarkdownTo
// prompt variables always start with the `#` character, hence
// initiate a parser object if we encounter respective token and
// there is no active parser object present at the moment
if (token instanceof Hash && !this.current) {
if ((token instanceof Hash) && !this.current) {
this.current = new PartialPromptVariableName(token);
return;
}
// if current parser was not yet initiated, - we are in the general
// "text" parsing mode, therefore re-emit the token immediately and return
// prompt @mentions always start with the `@` character, hence
// initiate a parser object if we encounter respective token and
// there is no active parser object present at the moment
if ((token instanceof At) && !this.current) {
this.current = new PartialPromptAtMention(token);
return;
}
// if current parser was not yet initiated, - we are in the general "text"
// parsing mode, therefore re-emit the token immediately and continue
if (!this.current) {
// at the moment, the decoder outputs only specific markdown tokens, like
// the `markdown link` one, so re-emit only these tokens ignoring the rest
@@ -123,6 +135,10 @@ export class ChatPromptDecoder extends BaseDecoder<TChatPromptToken, TMarkdownTo
return this._onData.fire(this.current.asPromptVariableWithData());
}
if (this.current instanceof PartialPromptAtMention) {
return this._onData.fire(this.current.asPromptAtMention());
}
assertNever(
this.current,
`Unknown parser object '${this.current}'`,
@@ -0,0 +1,127 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { PromptAtMention } from '../tokens/promptAtMention.js';
import { pick } from '../../../../../../../base/common/arrays.js';
import { assert } from '../../../../../../../base/common/assert.js';
import { Range } from '../../../../../../../editor/common/core/range.js';
import { At } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/at.js';
import { Tab } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/tab.js';
import { Hash } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/hash.js';
import { Space } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/space.js';
import { Colon } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/colon.js';
import { NewLine } from '../../../../../../../editor/common/codecs/linesCodec/tokens/newLine.js';
import { FormFeed } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/formFeed.js';
import { TSimpleToken } from '../../../../../../../editor/common/codecs/simpleCodec/simpleDecoder.js';
import { VerticalTab } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/verticalTab.js';
import { CarriageReturn } from '../../../../../../../editor/common/codecs/linesCodec/tokens/carriageReturn.js';
import { ExclamationMark } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/exclamationMark.js';
import { LeftBracket, RightBracket } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/brackets.js';
import { LeftAngleBracket, RightAngleBracket } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/angleBrackets.js';
import { assertNotConsumed, ParserBase, TAcceptTokenResult } from '../../../../../../../editor/common/codecs/simpleCodec/parserBase.js';
/**
* List of characters that terminate the prompt at-mention sequence.
*/
export const STOP_CHARACTERS: readonly string[] = [Space, Tab, NewLine, CarriageReturn, VerticalTab, FormFeed, At, Colon]
.map((token) => { return token.symbol; });
/**
* List of characters that cannot be in an at-mention name (excluding the {@link STOP_CHARACTERS}).
*/
export const INVALID_NAME_CHARACTERS: readonly string[] = [Hash, At, Colon, ExclamationMark, LeftAngleBracket, RightAngleBracket, LeftBracket, RightBracket]
.map((token) => { return token.symbol; });
/**
* TODO: @legomushroom - update the comment
*/
/**
* The parser responsible for parsing a `prompt @mention` sequences.
* E.g., `@workspace` or `#workspace` variable. If the `:` character follows
* the variable name, the parser transitions to {@link PartialPromptVariableWithData}
* that is also able to parse the `data` part of the variable. E.g., the `#file` part
* of the `#file:/path/to/something.md` sequence.
*/
export class PartialPromptAtMention extends ParserBase<TSimpleToken, PartialPromptAtMention | PromptAtMention> {
constructor(token: At) {
super([token]);
}
@assertNotConsumed
public accept(token: TSimpleToken): TAcceptTokenResult<PartialPromptAtMention | PromptAtMention> {
// if a `stop` character is encountered, finish the parsing process
if (STOP_CHARACTERS.includes(token.text)) {
try {
// if it is possible to convert current parser to `PromptAtMention`, return success result
return {
result: 'success',
nextParser: this.asPromptAtMention(),
wasTokenConsumed: false,
};
} catch (error) {
// otherwise fail
return {
result: 'failure',
wasTokenConsumed: false,
};
} finally {
// in any case this is an end of the parsing process
this.isConsumed = true;
}
}
// variables cannot have {@link INVALID_NAME_CHARACTERS} in their names
if (INVALID_NAME_CHARACTERS.includes(token.text)) {
this.isConsumed = true;
return {
result: 'failure',
wasTokenConsumed: false,
};
}
// otherwise it is a valid name character, so add it to the list of
// the current tokens and continue the parsing process
this.currentTokens.push(token);
return {
result: 'success',
nextParser: this,
wasTokenConsumed: true,
};
}
/**
* Try to convert current parser instance into a fully-parsed {@link PromptAtMention} token.
*
* @throws if sequence of tokens received so far do not constitute a valid prompt variable,
* for instance, if there is only `1` starting `@` token is available.
*/
public asPromptAtMention(): PromptAtMention {
// if there is only one token before the stop character
// must be the starting `@` one), then fail
assert(
this.currentTokens.length > 1,
'Cannot create a prompt @mention out of incomplete token sequence.',
);
const firstToken = this.currentTokens[0];
const lastToken = this.currentTokens[this.currentTokens.length - 1];
// render the characters above into strings, excluding the starting `@` character
const nameTokens = this.currentTokens.slice(1);
const atMentionName = nameTokens.map(pick('text')).join('');
return new PromptAtMention(
new Range(
firstToken.range.startLineNumber,
firstToken.range.startColumn,
lastToken.range.endLineNumber,
lastToken.range.endColumn,
),
atMentionName,
);
}
}
@@ -21,6 +21,10 @@ import { LeftBracket, RightBracket } from '../../../../../../../editor/common/co
import { LeftAngleBracket, RightAngleBracket } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/angleBrackets.js';
import { assertNotConsumed, ParserBase, TAcceptTokenResult } from '../../../../../../../editor/common/codecs/simpleCodec/parserBase.js';
/**
* TODO: @legomushroom - the new @ character should stop variable parsing
*/
/**
* List of characters that terminate the prompt variable sequence.
*/
@@ -35,7 +39,7 @@ export const INVALID_NAME_CHARACTERS: readonly string[] = [Hash, Colon, Exclamat
/**
* The parser responsible for parsing a `prompt variable name`.
* E.g., `#selection` or `#workspace` variable. If the `:` character follows
* E.g., `#selection` or `#codebase` variable. If the `:` character follows
* the variable name, the parser transitions to {@link PartialPromptVariableWithData}
* that is also able to parse the `data` part of the variable. E.g., the `#file` part
* of the `#file:/path/to/something.md` sequence.
@@ -0,0 +1,72 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { PromptToken } from './promptToken.js';
import { assert } from '../../../../../../../base/common/assert.js';
import { Range } from '../../../../../../../editor/common/core/range.js';
import { BaseToken } from '../../../../../../../editor/common/codecs/baseToken.js';
import { INVALID_NAME_CHARACTERS, STOP_CHARACTERS } from '../parsers/promptVariableParser.js';
/**
* All prompt at-mentions start with `@` character.
*/
const START_CHARACTER: string = '@';
/**
* Represents a `@mention` token in a prompt text.
*/
export class PromptAtMention extends PromptToken {
constructor(
range: Range,
/**
* The name of a mention, excluding the `@` character at the start.
*/
public readonly name: string,
) {
// sanity check of characters used in the provided mention name
for (const character of name) {
assert(
(INVALID_NAME_CHARACTERS.includes(character) === false) &&
(STOP_CHARACTERS.includes(character) === false),
`Mention 'name' cannot contain character '${character}', got '${name}'.`,
);
}
super(range);
}
/**
* Get full text of the token.
*/
public get text(): string {
return `${START_CHARACTER}${this.name}`;
}
/**
* Check if this token is equal to another one.
*/
public override equals<T extends BaseToken>(other: T): boolean {
if (!super.sameRange(other.range)) {
return false;
}
if ((other instanceof PromptAtMention) === false) {
return false;
}
if (this.text.length !== other.text.length) {
return false;
}
return this.text === other.text;
}
/**
* Return a string representation of the token.
*/
public override toString(): string {
return `${this.text}${this.range}`;
}
}
@@ -8,6 +8,7 @@ import { Range } from '../../../../../../../editor/common/core/range.js';
import { newWriteableStream } from '../../../../../../../base/common/stream.js';
import { TestDecoder } from '../../../../../../../editor/test/common/utils/testDecoder.js';
import { FileReference } from '../../../../common/promptSyntax/codecs/tokens/fileReference.js';
import { PromptAtMention } from '../../../../common/promptSyntax/codecs/tokens/promptAtMention.js';
import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../../../base/test/common/utils.js';
import { MarkdownLink } from '../../../../../../../editor/common/codecs/markdownCodec/tokens/markdownLink.js';
import { ChatPromptDecoder, TChatPromptToken } from '../../../../common/promptSyntax/codecs/chatPromptDecoder.js';
@@ -53,18 +54,22 @@ suite('ChatPromptDecoder', () => {
const contents = [
'',
'haalo!',
'haalo! @workspace',
' message 👾 message #file:./path/to/file1.md',
'',
'## Heading Title',
' \t#file:a/b/c/filename2.md\t🖖\t#file:other-file.md',
' [#file:reference.md](./reference.md)some text #file:/some/file/with/absolute/path.md',
'text text #file: another text',
'text text #file: another @github text',
];
await test.run(
contents,
[
new PromptAtMention(
new Range(2, 8, 2, 18),
'workspace',
),
new FileReference(
new Range(3, 21, 3, 21 + 24),
'./path/to/file1.md',
@@ -91,6 +96,10 @@ suite('ChatPromptDecoder', () => {
new Range(8, 11, 8, 11 + 6),
'',
),
new PromptAtMention(
new Range(8, 26, 8, 33),
'github',
),
],
);
});