Fix flickering when using browser screenshot tool (#298080)

* Fix flickering when using browser screenshot tool

* feedback

* fix

* feedback
This commit is contained in:
Kyle Cutler
2026-02-27 09:03:35 -08:00
committed by GitHub
parent 77ec1740b7
commit d228467dd3
9 changed files with 125 additions and 69 deletions
@@ -34,6 +34,7 @@ export interface IBrowserViewState {
lastFavicon: string | undefined;
lastError: IBrowserViewLoadError | undefined;
storageScope: BrowserViewStorageScope;
zoomFactor: number;
}
export interface IBrowserViewNavigationEvent {
@@ -153,6 +154,14 @@ export interface IBrowserViewService {
*/
destroyBrowserView(id: string): Promise<void>;
/**
* Get the state of an existing browser view by ID, or throw if it doesn't exist
* @param id The browser view identifier
* @return The state of the browser view for the given ID
* @throws If no browser view exists for the given ID
*/
getState(id: string): Promise<IBrowserViewState>;
/**
* Update the bounds of a browser view
* @param id The browser view identifier
@@ -4,7 +4,6 @@
*--------------------------------------------------------------------------------------------*/
import { Event } from '../../../base/common/event.js';
import { VSBuffer } from '../../../base/common/buffer.js';
import { createDecorator } from '../../instantiation/common/instantiation.js';
export const IPlaywrightService = createDecorator<IPlaywrightService>('playwrightService');
@@ -63,7 +62,17 @@ export interface IPlaywrightService {
getSummary(pageId: string): Promise<string>;
/**
* Run a function with access to a Playwright page.
* Run a function with access to a Playwright page and return its raw result, or throw an error.
* The first function argument is always the Playwright `page` object, and additional arguments can be passed after.
* @param pageId The browser view ID identifying the page to operate on.
* @param fnDef The function code to execute. Should contain the function definition but not its invocation, e.g. `async (page, arg1, arg2) => { ... }`.
* @param args Additional arguments to pass to the function after the `page` object.
* @returns The result of the function execution.
*/
invokeFunctionRaw<T>(pageId: string, fnDef: string, ...args: unknown[]): Promise<T>;
/**
* Run a function with access to a Playwright page and return a result for tool output, including error handling.
* The first function argument is always the Playwright `page` object, and additional arguments can be passed after.
* @param pageId The browser view ID identifying the page to operate on.
* @param fnDef The function code to execute. Should contain the function definition but not its invocation, e.g. `async (page, arg1, arg2) => { ... }`.
@@ -72,15 +81,6 @@ export interface IPlaywrightService {
*/
invokeFunction(pageId: string, fnDef: string, ...args: unknown[]): Promise<{ result: unknown; summary: string }>;
/**
* Takes a screenshot of the current page viewport and returns it as a VSBuffer.
* @param pageId The browser view ID identifying the page to capture.
* @param selector Optional Playwright selector to capture a specific element instead of the viewport.
* @param fullPage Whether to capture the full scrollable page instead of just the viewport.
* @returns The screenshot image data.
*/
captureScreenshot(pageId: string, selector?: string, fullPage?: boolean): Promise<VSBuffer>;
/**
* Responds to a file chooser dialog on the given page.
* @param pageId The browser view ID identifying the page.
@@ -359,7 +359,8 @@ export class BrowserView extends Disposable implements ICDPTarget {
lastScreenshot: this._lastScreenshot,
lastFavicon: this._lastFavicon,
lastError: this._lastError,
storageScope: this.session.storageScope
storageScope: this.session.storageScope,
zoomFactor: webContents.getZoomFactor()
};
}
@@ -509,13 +510,6 @@ export class BrowserView extends Disposable implements ICDPTarget {
}
}
/**
* Set the zoom factor of this view
*/
async setZoomFactor(zoomFactor: number): Promise<void> {
await this._view.webContents.setZoomFactor(zoomFactor);
}
/**
* Focus this view
*/
@@ -278,6 +278,10 @@ export class BrowserViewMainService extends Disposable implements IBrowserViewMa
return this._getBrowserView(id).onDidClose;
}
async getState(id: string): Promise<IBrowserViewState> {
return this._getBrowserView(id).getState();
}
async destroyBrowserView(id: string): Promise<void> {
return this.browserViews.deleteAndDispose(id);
}
@@ -330,10 +334,6 @@ export class BrowserViewMainService extends Disposable implements IBrowserViewMa
return this._getBrowserView(id).dispatchKeyEvent(keyEvent);
}
async setZoomFactor(id: string, zoomFactor: number): Promise<void> {
return this._getBrowserView(id).setZoomFactor(zoomFactor);
}
async focus(id: string): Promise<void> {
return this._getBrowserView(id).focus();
}
@@ -10,7 +10,6 @@ import { ILogService } from '../../log/common/log.js';
import { IPlaywrightService } from '../common/playwrightService.js';
import { IBrowserViewGroupRemoteService } from '../node/browserViewGroupRemoteService.js';
import { IBrowserViewGroup } from '../common/browserViewGroup.js';
import { VSBuffer } from '../../../base/common/buffer.js';
import { PlaywrightTab } from './playwrightTab.js';
// eslint-disable-next-line local/code-import-patterns
@@ -125,18 +124,22 @@ export class PlaywrightService extends Disposable implements IPlaywrightService
return this._pages.getSummary(pageId, true);
}
async invokeFunctionRaw<T>(pageId: string, fnDef: string, ...args: unknown[]): Promise<T> {
await this.initialize();
const vm = await import('vm');
const fn = vm.compileFunction(`return (${fnDef})(page, ...args)`, ['page', 'args'], { parsingContext: vm.createContext() });
return this._pages.runAgainstPage(pageId, (page) => fn(page, args));
}
async invokeFunction(pageId: string, fnDef: string, ...args: unknown[]): Promise<{ result: unknown; summary: string }> {
this.logService.info(`[PlaywrightService] Invoking function on view ${pageId}`);
try {
await this.initialize();
const vm = await import('vm');
const fn = vm.compileFunction(`return (${fnDef})(page, ...args)`, ['page', 'args'], { parsingContext: vm.createContext() });
let result;
try {
result = await this._pages.runAgainstPage(pageId, (page) => fn(page, args));
result = await this.invokeFunctionRaw(pageId, fnDef, ...args);
} catch (err: unknown) {
result = err instanceof Error ? err.message : String(err);
}
@@ -155,16 +158,6 @@ export class PlaywrightService extends Disposable implements IPlaywrightService
}
}
async captureScreenshot(pageId: string, selector?: string, fullPage?: boolean): Promise<VSBuffer> {
await this.initialize();
return this._pages.runAgainstPage(pageId, async page => {
const screenshotBuffer = selector
? await page.locator(selector).screenshot({ type: 'jpeg', quality: 80 })
: await page.screenshot({ type: 'jpeg', quality: 80, fullPage: fullPage ?? false });
return VSBuffer.wrap(screenshotBuffer);
});
}
async replyToFileChooser(pageId: string, files: string[]): Promise<{ summary: string }> {
await this.initialize();
const summary = await this._pages.replyToFileChooser(pageId, files);
@@ -76,6 +76,14 @@ export interface IBrowserViewWorkbenchService {
*/
getOrCreateBrowserViewModel(id: string): Promise<IBrowserViewModel>;
/**
* Get an existing browser view model for the given ID
* @param id The browser view identifier
* @returns A browser view model that proxies to the main process
* @throws If no browser view exists for the given ID
*/
getBrowserViewModel(id: string): Promise<IBrowserViewModel>;
/**
* Clear all storage data for the global browser session
*/
@@ -105,9 +113,9 @@ export interface IBrowserViewModel extends IDisposable {
readonly isDevToolsOpen: boolean;
readonly canGoForward: boolean;
readonly error: IBrowserViewLoadError | undefined;
readonly storageScope: BrowserViewStorageScope;
readonly sharedWithAgent: boolean;
readonly zoomFactor: number;
readonly onDidChangeSharedWithAgent: Event<boolean>;
readonly onDidNavigate: Event<IBrowserViewNavigationEvent>;
@@ -123,7 +131,7 @@ export interface IBrowserViewModel extends IDisposable {
readonly onDidClose: Event<void>;
readonly onWillDispose: Event<void>;
initialize(): Promise<void>;
initialize(create: boolean): Promise<void>;
layout(bounds: IBrowserViewBounds): Promise<void>;
setVisible(visible: boolean): Promise<void>;
@@ -156,6 +164,7 @@ export class BrowserViewModel extends Disposable implements IBrowserViewModel {
private _error: IBrowserViewLoadError | undefined = undefined;
private _storageScope: BrowserViewStorageScope = BrowserViewStorageScope.Ephemeral;
private _sharedWithAgent: boolean = false;
private _zoomFactor: number = 1;
private readonly _onDidChangeSharedWithAgent = this._register(new Emitter<boolean>());
readonly onDidChangeSharedWithAgent: Event<boolean> = this._onDidChangeSharedWithAgent.event;
@@ -190,6 +199,7 @@ export class BrowserViewModel extends Disposable implements IBrowserViewModel {
get error(): IBrowserViewLoadError | undefined { return this._error; }
get storageScope(): BrowserViewStorageScope { return this._storageScope; }
get sharedWithAgent(): boolean { return this._sharedWithAgent; }
get zoomFactor(): number { return this._zoomFactor; }
get onDidNavigate(): Event<IBrowserViewNavigationEvent> {
return this.browserViewService.onDynamicDidNavigate(this.id);
@@ -236,9 +246,11 @@ export class BrowserViewModel extends Disposable implements IBrowserViewModel {
}
/**
* Initialize the model with the current state from the main process
* Initialize the model with the current state from the main process.
* @param create Whether to create the browser view if it doesn't already exist.
* @throws If the browser view doesn't exist and `create` is false, or if initialization fails
*/
async initialize(): Promise<void> {
async initialize(create: boolean): Promise<void> {
const dataStorageSetting = this.configurationService.getValue<BrowserViewStorageScope>(
'workbench.browser.dataStorage'
) ?? BrowserViewStorageScope.Global;
@@ -253,7 +265,9 @@ export class BrowserViewModel extends Disposable implements IBrowserViewModel {
const dataStorage = isWorkspaceUntrusted ? BrowserViewStorageScope.Ephemeral : dataStorageSetting;
const workspaceId = this.workspaceContextService.getWorkspace().id;
const state = await this.browserViewService.getOrCreateBrowserView(this.id, dataStorage, workspaceId);
const state = create
? await this.browserViewService.getOrCreateBrowserView(this.id, dataStorage, workspaceId)
: await this.browserViewService.getState(this.id);
this._url = state.url;
this._title = state.title;
@@ -268,6 +282,7 @@ export class BrowserViewModel extends Disposable implements IBrowserViewModel {
this._error = state.lastError;
this._storageScope = state.storageScope;
this._sharedWithAgent = await this.playwrightService.isPageTracked(this.id);
this._zoomFactor = state.zoomFactor;
// Set up state synchronization
@@ -314,6 +329,7 @@ export class BrowserViewModel extends Disposable implements IBrowserViewModel {
}
async layout(bounds: IBrowserViewBounds): Promise<void> {
this._zoomFactor = bounds.zoomFactor;
return this.browserViewService.layout(this.id, bounds);
}
@@ -27,23 +27,11 @@ export class BrowserViewWorkbenchService implements IBrowserViewWorkbenchService
}
async getOrCreateBrowserViewModel(id: string): Promise<IBrowserViewModel> {
let model = this._models.get(id);
if (model) {
return model;
}
return this._getBrowserViewModel(id, true);
}
model = this.instantiationService.createInstance(BrowserViewModel, id, this._browserViewService);
this._models.set(id, model);
// Initialize the model with current state
await model.initialize();
// Clean up model when disposed
Event.once(model.onWillDispose)(() => {
this._models.delete(id);
});
return model;
async getBrowserViewModel(id: string): Promise<IBrowserViewModel> {
return this._getBrowserViewModel(id, false);
}
async clearGlobalStorage(): Promise<void> {
@@ -54,4 +42,29 @@ export class BrowserViewWorkbenchService implements IBrowserViewWorkbenchService
const workspaceId = this.workspaceContextService.getWorkspace().id;
return this._browserViewService.clearWorkspaceStorage(workspaceId);
}
private async _getBrowserViewModel(id: string, create: boolean): Promise<IBrowserViewModel> {
let model = this._models.get(id);
if (model) {
return model;
}
model = this.instantiationService.createInstance(BrowserViewModel, id, this._browserViewService);
this._models.set(id, model);
// Initialize the model with current state
try {
await model.initialize(create);
} catch (e) {
this._models.delete(id);
throw e;
}
// Clean up model when disposed
Event.once(model.onWillDispose)(() => {
this._models.delete(id);
});
return model;
}
}
@@ -9,6 +9,18 @@ import { IToolResult } from '../../../chat/common/tools/languageModelToolsServic
// eslint-disable-next-line local/code-import-patterns
import type { Page } from 'playwright-core';
/**
* Shared helper for running a Playwright function against a page and returning its result.
*/
export async function playwrightInvokeRaw<TArgs extends unknown[], TReturn>(
playwrightService: IPlaywrightService,
pageId: string,
fn: (page: Page, ...args: TArgs) => Promise<TReturn>,
...args: TArgs
): Promise<TReturn> {
return playwrightService.invokeFunctionRaw(pageId, fn.toString(), ...args);
}
/**
* Shared helper for running a Playwright function against a page and returning
* a tool result. Handles success/error formatting.
@@ -8,7 +8,8 @@ import { Codicon } from '../../../../../base/common/codicons.js';
import { localize } from '../../../../../nls.js';
import { IPlaywrightService } from '../../../../../platform/browserView/common/playwrightService.js';
import { ToolDataSource, type CountTokensCallback, type IPreparedToolInvocation, type IToolData, type IToolImpl, type IToolInvocation, type IToolInvocationPreparationContext, type IToolResult, type ToolProgress } from '../../../chat/common/tools/languageModelToolsService.js';
import { errorResult } from './browserToolHelpers.js';
import { IBrowserViewWorkbenchService } from '../../common/browserView.js';
import { errorResult, playwrightInvokeRaw } from './browserToolHelpers.js';
import { OpenPageToolId } from './openBrowserTool.js';
import { ReadBrowserToolData } from './readBrowserTool.js';
@@ -29,16 +30,16 @@ export const ScreenshotBrowserToolData: IToolData = {
},
selector: {
type: 'string',
description: 'Playwright selector of an element to capture. If omitted, captures the whole page.'
description: 'Playwright selector of an element to capture. If omitted, captures the whole viewport.'
},
ref: {
type: 'string',
description: 'Element reference to capture. If omitted, captures the whole page.'
description: 'Element reference to capture. If omitted, captures the whole viewport.'
},
fullPage: {
scrollIntoViewIfNeeded: {
type: 'boolean',
description: 'Set to true to capture the full scrollable page instead of just the viewport. Incompatible with selector/ref.'
},
description: 'Whether to scroll the element into view before capturing. Defaults to false.',
}
},
required: ['pageId'],
},
@@ -48,11 +49,12 @@ interface IScreenshotBrowserToolParams {
pageId: string;
selector?: string;
ref?: string;
fullPage?: boolean;
scrollIntoViewIfNeeded?: boolean;
}
export class ScreenshotBrowserTool implements IToolImpl {
constructor(
@IBrowserViewWorkbenchService private readonly browserViewWorkbenchService: IBrowserViewWorkbenchService,
@IPlaywrightService private readonly playwrightService: IPlaywrightService,
) { }
@@ -75,7 +77,24 @@ export class ScreenshotBrowserTool implements IToolImpl {
selector = `aria-ref=${params.ref}`;
}
const screenshot = await this.playwrightService.captureScreenshot(params.pageId, selector, params.fullPage);
// Note that we don't use Playwright's screenshot methods because they cause brief flashing on the page,
// and also doesn't handle zooming well.
const browserViewModel = await this.browserViewWorkbenchService.getBrowserViewModel(params.pageId); // Throws if the given pageId doesn't exist
const bounds = selector && await playwrightInvokeRaw(this.playwrightService, params.pageId, async (page, selector, scrollIntoViewIfNeeded) => {
const locator = page.locator(selector);
if (scrollIntoViewIfNeeded) {
await locator.scrollIntoViewIfNeeded();
}
return locator.boundingBox();
}, selector, params.scrollIntoViewIfNeeded) || undefined;
const zoomFactor = browserViewModel.zoomFactor;
if (bounds) {
bounds.x *= zoomFactor;
bounds.y *= zoomFactor;
bounds.width *= zoomFactor;
bounds.height *= zoomFactor;
}
const screenshot = await browserViewModel.captureScreenshot({ rect: bounds });
return {
content: [