Updates to perform all operations in worker

This commit is contained in:
Don Jayamanne
2024-09-09 11:37:44 +10:00
parent fc8fda56e5
commit ddd0f1fe34
10 changed files with 273 additions and 167 deletions

View File

@@ -65,7 +65,3 @@ export interface CellMetadata {
execution_count?: number;
}
export interface notebookSerializationWorkerData {
notebookContent: Partial<nbformat.INotebookContent>;
indentAmount: string;
}

View File

@@ -3,9 +3,23 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as vscode from 'vscode';
import type { DocumentSelector } from 'vscode';
export const defaultNotebookFormat = { major: 4, minor: 2 };
export const ATTACHMENT_CLEANUP_COMMANDID = 'ipynb.cleanInvalidImageAttachment';
export const JUPYTER_NOTEBOOK_MARKDOWN_SELECTOR: vscode.DocumentSelector = { notebookType: 'jupyter-notebook', language: 'markdown' };
export const JUPYTER_NOTEBOOK_MARKDOWN_SELECTOR: DocumentSelector = { notebookType: 'jupyter-notebook', language: 'markdown' };
// Copied from NotebookCellKind.Markup as we cannot import it from vscode directly in worker threads.
export const NotebookCellKindMarkup = 1;
// Copied from NotebookCellKind.Code as we cannot import it from vscode directly in worker threads.
export const NotebookCellKindCode = 2;
export enum CellOutputMimeTypes {
error = 'application/vnd.code.notebook.error',
stderr = 'application/vnd.code.notebook.stderr',
stdout = 'application/vnd.code.notebook.stdout'
}
export const textMimeTypes = ['text/plain', 'text/markdown', 'text/latex', CellOutputMimeTypes.stderr, CellOutputMimeTypes.stdout];

View File

@@ -6,6 +6,7 @@
import type * as nbformat from '@jupyterlab/nbformat';
import { extensions, NotebookCellData, NotebookCellExecutionSummary, NotebookCellKind, NotebookCellOutput, NotebookCellOutputItem, NotebookData } from 'vscode';
import { CellMetadata, CellOutputMetadata } from './common';
import { textMimeTypes } from './constants';
const jupyterLanguageToMonacoLanguageMapping = new Map([
['c#', 'csharp'],
@@ -89,15 +90,6 @@ function sortOutputItemsBasedOnDisplayOrder(outputItems: NotebookCellOutputItem[
.sort((outputItemA, outputItemB) => outputItemA.index - outputItemB.index).map(item => item.item);
}
enum CellOutputMimeTypes {
error = 'application/vnd.code.notebook.error',
stderr = 'application/vnd.code.notebook.stderr',
stdout = 'application/vnd.code.notebook.stdout'
}
export const textMimeTypes = ['text/plain', 'text/markdown', 'text/latex', CellOutputMimeTypes.stderr, CellOutputMimeTypes.stdout];
function concatMultilineString(str: string | string[], trim?: boolean): string {
const nonLineFeedWhiteSpaceTrim = /(^[\t\f\v\r ]+|[\t\f\v\r ]+$)/g;
if (Array.isArray(str)) {

View File

@@ -3,6 +3,8 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { CancellationError } from 'vscode';
export function deepClone<T>(obj: T): T {
if (!obj || typeof obj !== 'object') {
return obj;
@@ -140,3 +142,119 @@ export class Delayer<T> {
export interface ITask<T> {
(): T;
}
/**
* Copied from src/vs/base/common/uuid.ts
*/
export function generateUuid() {
// use `randomValues` if possible
function getRandomValues(bucket: Uint8Array): Uint8Array {
for (let i = 0; i < bucket.length; i++) {
bucket[i] = Math.floor(Math.random() * 256);
}
return bucket;
}
// prep-work
const _data = new Uint8Array(16);
const _hex: string[] = [];
for (let i = 0; i < 256; i++) {
_hex.push(i.toString(16).padStart(2, '0'));
}
// get data
getRandomValues(_data);
// set version bits
_data[6] = (_data[6] & 0x0f) | 0x40;
_data[8] = (_data[8] & 0x3f) | 0x80;
// print as string
let i = 0;
let result = '';
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += '-';
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += '-';
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += '-';
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += '-';
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
return result;
}
export type ValueCallback<T = unknown> = (value: T | Promise<T>) => void;
const enum DeferredOutcome {
Resolved,
Rejected
}
/**
* Creates a promise whose resolution or rejection can be controlled imperatively.
*/
export class DeferredPromise<T> {
private completeCallback!: ValueCallback<T>;
private errorCallback!: (err: unknown) => void;
private outcome?: { outcome: DeferredOutcome.Rejected; value: any } | { outcome: DeferredOutcome.Resolved; value: T };
public get isRejected() {
return this.outcome?.outcome === DeferredOutcome.Rejected;
}
public get isResolved() {
return this.outcome?.outcome === DeferredOutcome.Resolved;
}
public get isSettled() {
return !!this.outcome;
}
public get value() {
return this.outcome?.outcome === DeferredOutcome.Resolved ? this.outcome?.value : undefined;
}
public readonly p: Promise<T>;
constructor() {
this.p = new Promise<T>((c, e) => {
this.completeCallback = c;
this.errorCallback = e;
});
}
public complete(value: T) {
return new Promise<void>(resolve => {
this.completeCallback(value);
this.outcome = { outcome: DeferredOutcome.Resolved, value };
resolve();
});
}
public error(err: unknown) {
return new Promise<void>(resolve => {
this.errorCallback(err);
this.outcome = { outcome: DeferredOutcome.Rejected, value: err };
resolve();
});
}
public cancel() {
return this.error(new CancellationError());
}
}

View File

@@ -8,6 +8,7 @@ import { NotebookSerializer } from './notebookSerializer';
import { activate as keepNotebookModelStoreInSync } from './notebookModelStoreSync';
import { notebookImagePasteSetup } from './notebookImagePaste';
import { AttachmentCleaner } from './notebookAttachmentCleaner';
import { serializeNotebookToString } from './serializers';
// From {nbformat.INotebookMetadata} in @jupyterlab/coreutils
type NotebookMetadata = {
@@ -106,7 +107,7 @@ export function activate(context: vscode.ExtensionContext) {
return true;
},
exportNotebook: (notebook: vscode.NotebookData): Promise<string> => {
return exportNotebook(notebook, serializer);
return Promise.resolve(serializeNotebookToString(notebook));
},
setNotebookMetadata: async (resource: vscode.Uri, metadata: Partial<NotebookMetadata>): Promise<boolean> => {
const document = vscode.workspace.notebookDocuments.find(doc => doc.uri.toString() === resource.toString());
@@ -127,8 +128,4 @@ export function activate(context: vscode.ExtensionContext) {
};
}
function exportNotebook(notebook: vscode.NotebookData, serializer: NotebookSerializer): Promise<string> {
return serializer.serializeNotebookToString(notebook);
}
export function deactivate() { }

View File

@@ -4,10 +4,10 @@
*--------------------------------------------------------------------------------------------*/
import { Disposable, ExtensionContext, NotebookCellKind, NotebookDocument, NotebookDocumentChangeEvent, NotebookEdit, workspace, WorkspaceEdit, type NotebookCell, type NotebookDocumentWillSaveEvent } from 'vscode';
import { getCellMetadata, getVSCodeCellLanguageId, removeVSCodeCellLanguageId, setVSCodeCellLanguageId, sortObjectPropertiesRecursively } from './serializers';
import { getCellMetadata, getVSCodeCellLanguageId, removeVSCodeCellLanguageId, setVSCodeCellLanguageId, sortObjectPropertiesRecursively, getNotebookMetadata } from './serializers';
import { CellMetadata } from './common';
import { getNotebookMetadata } from './notebookSerializer';
import type * as nbformat from '@jupyterlab/nbformat';
import { generateUuid } from './helper';
const noop = () => {
//
@@ -242,55 +242,3 @@ function generateCellId(notebook: NotebookDocument) {
}
}
/**
* Copied from src/vs/base/common/uuid.ts
*/
function generateUuid() {
// use `randomValues` if possible
function getRandomValues(bucket: Uint8Array): Uint8Array {
for (let i = 0; i < bucket.length; i++) {
bucket[i] = Math.floor(Math.random() * 256);
}
return bucket;
}
// prep-work
const _data = new Uint8Array(16);
const _hex: string[] = [];
for (let i = 0; i < 256; i++) {
_hex.push(i.toString(16).padStart(2, '0'));
}
// get data
getRandomValues(_data);
// set version bits
_data[6] = (_data[6] & 0x0f) | 0x40;
_data[8] = (_data[8] & 0x3f) | 0x80;
// print as string
let i = 0;
let result = '';
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += '-';
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += '-';
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += '-';
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += '-';
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
result += _hex[_data[i++]];
return result;
}

View File

@@ -6,14 +6,34 @@
import type * as nbformat from '@jupyterlab/nbformat';
import * as detectIndent from 'detect-indent';
import * as vscode from 'vscode';
import { defaultNotebookFormat } from './constants';
import { getPreferredLanguage, jupyterNotebookModelToNotebookData } from './deserializers';
import { createJupyterCellFromNotebookCell, pruneCell, sortObjectPropertiesRecursively } from './serializers';
import * as fnv from '@enonic/fnv-plus';
import { notebookSerializationWorkerData } from './common';
import { DeferredPromise, generateUuid } from './helper';
import { serializeNotebookToString } from './serializers';
export class NotebookSerializer extends vscode.Disposable implements vscode.NotebookSerializer {
private experimentalSave = vscode.workspace.getConfiguration('ipynb').get('experimental.serialization', false);
private disposed: boolean = false;
private worker?: import('node:worker_threads').Worker;
private tasks = new Map<string, DeferredPromise<Uint8Array>>();
export class NotebookSerializer implements vscode.NotebookSerializer {
constructor(readonly context: vscode.ExtensionContext) {
super(() => { });
context.subscriptions.push(vscode.workspace.onDidChangeConfiguration(e => {
if (e.affectsConfiguration('ipynb.experimental.serialization')) {
this.experimentalSave = vscode.workspace.getConfiguration('ipynb').get('experimental.serialization', false);
}
}));
}
override dispose() {
this.disposed = true;
try {
void this.worker?.terminate();
} catch {
//
}
super.dispose();
}
public async deserializeNotebook(content: Uint8Array, _token: vscode.CancellationToken): Promise<vscode.NotebookData> {
@@ -72,67 +92,66 @@ export class NotebookSerializer implements vscode.NotebookSerializer {
}
public async serializeNotebook(data: vscode.NotebookData, _token: vscode.CancellationToken): Promise<Uint8Array> {
return new TextEncoder().encode(await this.serializeNotebookToString(data));
}
private async serializeViaWorker(workerData: notebookSerializationWorkerData): Promise<string> {
const workerThreads = await import('node:worker_threads');
const path = await import('node:path');
const { Worker } = workerThreads;
return await new Promise((resolve, reject) => {
const workerFile = path.join(__dirname, 'notebookSerializerWorker.js');
const worker = new Worker(workerFile, { workerData });
worker.on('message', resolve);
worker.on('error', reject);
worker.on('exit', (code) => {
if (code !== 0) {
reject(new Error(`Worker stopped with exit code ${code}`));
}
});
});
}
private serializeNotebookToJSON(notebookContent: Partial<nbformat.INotebookContent>, indentAmount: string): Promise<string> {
const isInNodeJSContext = typeof process !== 'undefined' && process.release && process.release.name === 'node';
const experimentalSave = vscode.workspace.getConfiguration('ipynb').get('experimental.serialization', false);
if (isInNodeJSContext && experimentalSave) {
return this.serializeViaWorker({
notebookContent,
indentAmount
});
} else {
// ipynb always ends with a trailing new line (we add this so that SCMs do not show unnecessary changes, resulting from a missing trailing new line).
const sorted = sortObjectPropertiesRecursively(notebookContent);
return Promise.resolve(JSON.stringify(sorted, undefined, indentAmount) + '\n');
if (this.disposed) {
return new Uint8Array(0);
}
if (this.experimentalSave) {
return this.serializeViaWorker2(data);
}
const serialized = serializeNotebookToString(data);
return new TextEncoder().encode(serialized);
}
public serializeNotebookToString(data: vscode.NotebookData): Promise<string> {
const notebookContent = getNotebookMetadata(data);
// use the preferred language from document metadata or the first cell language as the notebook preferred cell language
const preferredCellLanguage = notebookContent.metadata?.language_info?.name ?? data.cells.find(cell => cell.kind === vscode.NotebookCellKind.Code)?.languageId;
private async startWorker() {
if (this.disposed) {
throw new Error('Serializer disposed');
}
if (this.worker) {
return this.worker;
}
const { Worker } = await import('worker_threads');
const outputDir = getOutputDir(this.context);
this.worker = new Worker(vscode.Uri.joinPath(this.context.extensionUri, outputDir, 'notebookSerializerWorker.js').fsPath, {});
this.worker.on('exit', (exitCode) => {
if (!this.disposed) {
console.error(`IPynb Notebook Serializer Worker exited unexpectedly`, exitCode);
}
this.worker = undefined;
});
this.worker.on('message', (result: { data: Uint8Array; id: string }) => {
const task = this.tasks.get(result.id);
if (task) {
task.complete(result.data);
this.tasks.delete(result.id);
}
});
this.worker.on('error', (err) => {
if (!this.disposed) {
console.error(`IPynb Notebook Serializer Worker errored unexpectedly`, err);
}
});
return this.worker;
}
private async serializeViaWorker2(data: vscode.NotebookData): Promise<Uint8Array> {
const worker = await this.startWorker();
const id = generateUuid();
const start1 = performance.now();
notebookContent.cells = data.cells
.map(cell => createJupyterCellFromNotebookCell(cell, preferredCellLanguage))
.map(pruneCell);
const deferred = new DeferredPromise<Uint8Array>();
deferred.p.finally(() => {
const time0 = performance.now() - start1;
console.log(`Got Ba ck Buffers`, time0);
});
this.tasks.set(id, deferred);
worker.postMessage({ data, id });
const indentAmount = data.metadata && 'indentAmount' in data.metadata && typeof data.metadata.indentAmount === 'string' ?
data.metadata.indentAmount :
' ';
return this.serializeNotebookToJSON(notebookContent, indentAmount);
return deferred.p;
}
}
export function getNotebookMetadata(document: vscode.NotebookDocument | vscode.NotebookData) {
const existingContent: Partial<nbformat.INotebookContent> = document.metadata || {};
const notebookContent: Partial<nbformat.INotebookContent> = {};
notebookContent.cells = existingContent.cells || [];
notebookContent.nbformat = existingContent.nbformat || defaultNotebookFormat.major;
notebookContent.nbformat_minor = existingContent.nbformat_minor ?? defaultNotebookFormat.minor;
notebookContent.metadata = existingContent.metadata || {};
return notebookContent;
function getOutputDir(context: vscode.ExtensionContext): string {
const main = context.extension.packageJSON.main as string;
return main.indexOf('/dist/') !== -1 ? 'dist' : 'out';
}

View File

@@ -3,28 +3,17 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { notebookSerializationWorkerData } from './common';
import { workerData, parentPort } from 'node:worker_threads';
import { parentPort } from 'worker_threads';
import { serializeNotebookToString } from './serializers';
import type { NotebookData } from 'vscode';
function sortObjectPropertiesRecursively(obj: any): any {
if (Array.isArray(obj)) {
return obj.map(sortObjectPropertiesRecursively);
}
if (obj !== undefined && obj !== null && typeof obj === 'object' && Object.keys(obj).length > 0) {
return (
Object.keys(obj)
.sort()
.reduce<Record<string, any>>((sortedObj, prop) => {
sortedObj[prop] = sortObjectPropertiesRecursively(obj[prop]);
return sortedObj;
}, {}) as any
);
}
return obj;
}
if (parentPort) {
const { notebookContent, indentAmount } = <notebookSerializationWorkerData>workerData;
const json = JSON.stringify(sortObjectPropertiesRecursively(notebookContent), undefined, indentAmount) + '\n';
parentPort.postMessage(json);
parentPort.on('message', ({ id, data }: { id: string; data: NotebookData }) => {
if (parentPort) {
const json = serializeNotebookToString(data);
const bytes = new TextEncoder().encode(json);
parentPort.postMessage({ id, data: bytes });
}
});
}

View File

@@ -4,24 +4,18 @@
*--------------------------------------------------------------------------------------------*/
import type * as nbformat from '@jupyterlab/nbformat';
import { NotebookCell, NotebookCellData, NotebookCellKind, NotebookCellOutput } from 'vscode';
import type { NotebookCell, NotebookCellData, NotebookCellOutput, NotebookData, NotebookDocument } from 'vscode';
import { CellOutputMetadata, type CellMetadata } from './common';
import { textMimeTypes } from './deserializers';
import { textMimeTypes, NotebookCellKindMarkup, CellOutputMimeTypes, defaultNotebookFormat } from './constants';
const textDecoder = new TextDecoder();
enum CellOutputMimeTypes {
error = 'application/vnd.code.notebook.error',
stderr = 'application/vnd.code.notebook.stderr',
stdout = 'application/vnd.code.notebook.stdout'
}
export function createJupyterCellFromNotebookCell(
vscCell: NotebookCellData,
preferredLanguage: string | undefined
preferredLanguage: string | undefined,
): nbformat.IRawCell | nbformat.IMarkdownCell | nbformat.ICodeCell {
let cell: nbformat.IRawCell | nbformat.IMarkdownCell | nbformat.ICodeCell;
if (vscCell.kind === NotebookCellKind.Markup) {
if (vscCell.kind === NotebookCellKindMarkup) {
cell = createMarkdownCellFromNotebookCell(vscCell);
} else if (vscCell.languageId === 'raw') {
cell = createRawCellFromNotebookCell(vscCell);
@@ -97,7 +91,7 @@ function createCodeCellFromNotebookCell(cell: NotebookCellData, preferredLanguag
removeVSCodeCellLanguageId(cellMetadata);
}
const codeCell: any = {
const codeCell: nbformat.ICodeCell = {
cell_type: 'code',
// Metadata should always contain the execution_count.
// When ever execution summary data changes we will update the metadata to contain the execution count.
@@ -451,3 +445,36 @@ function fixupOutput(output: nbformat.IOutput): nbformat.IOutput {
}
return result;
}
export function serializeNotebookToString(data: NotebookData): string {
const notebookContent = getNotebookMetadata(data);
// use the preferred language from document metadata or the first cell language as the notebook preferred cell language
const preferredCellLanguage = notebookContent.metadata?.language_info?.name ?? data.cells.find(cell => cell.kind === 2)?.languageId;
notebookContent.cells = data.cells
.map(cell => createJupyterCellFromNotebookCell(cell, preferredCellLanguage))
.map(pruneCell);
const indentAmount = data.metadata && 'indentAmount' in data.metadata && typeof data.metadata.indentAmount === 'string' ?
data.metadata.indentAmount :
' ';
return serializeNotebookToJSON(notebookContent, indentAmount);
}
function serializeNotebookToJSON(notebookContent: Partial<nbformat.INotebookContent>, indentAmount: string): string {
// ipynb always ends with a trailing new line (we add this so that SCMs do not show unnecessary changes, resulting from a missing trailing new line).
const sorted = sortObjectPropertiesRecursively(notebookContent);
return JSON.stringify(sorted, undefined, indentAmount) + '\n';
}
export function getNotebookMetadata(document: NotebookDocument | NotebookData) {
const existingContent: Partial<nbformat.INotebookContent> = document.metadata || {};
const notebookContent: Partial<nbformat.INotebookContent> = {};
notebookContent.cells = existingContent.cells || [];
notebookContent.nbformat = existingContent.nbformat || defaultNotebookFormat.major;
notebookContent.nbformat_minor = existingContent.nbformat_minor ?? defaultNotebookFormat.minor;
notebookContent.metadata = existingContent.metadata || {};
return notebookContent;
}