Ensure we always normalize cell source (#251670)

* Ensure we always normalize cell source

* oops
This commit is contained in:
Don Jayamanne
2025-06-17 13:46:59 +10:00
committed by GitHub
parent 4bfe82aa94
commit 4ff7a87b13
2 changed files with 27 additions and 11 deletions
+15 -8
View File
@@ -90,8 +90,16 @@ function sortOutputItemsBasedOnDisplayOrder(outputItems: NotebookCellOutputItem[
.sort((outputItemA, outputItemB) => outputItemA.index - outputItemB.index).map(item => item.item);
}
function concatMultilineString(str: string | string[], trim?: boolean): string {
const nonLineFeedWhiteSpaceTrim = /(^[\t\f\v\r ]+|[\t\f\v\r ]+$)/g;
/**
* Concatenates a multiline string or an array of strings into a single string.
* Also normalizes line endings to use LF (`\n`) instead of CRLF (`\r\n`).
* Same is done in serializer as well.
*/
function concatMultilineCellSource(source: string | string[]): string {
return concatMultilineString(source).replace(/\r\n/g, '\n');
}
function concatMultilineString(str: string | string[]): string {
if (Array.isArray(str)) {
let result = '';
for (let i = 0; i < str.length; i += 1) {
@@ -103,10 +111,9 @@ function concatMultilineString(str: string | string[], trim?: boolean): string {
}
}
// Just trim whitespace. Leave \n in place
return trim ? result.replace(nonLineFeedWhiteSpaceTrim, '') : result;
return result;
}
return trim ? str.toString().replace(nonLineFeedWhiteSpaceTrim, '') : str.toString();
return str.toString();
}
function convertJupyterOutputToBuffer(mime: string, value: unknown): NotebookCellOutputItem {
@@ -289,7 +296,7 @@ export function jupyterCellOutputToCellOutput(output: nbformat.IOutput): Noteboo
}
function createNotebookCellDataFromRawCell(cell: nbformat.IRawCell): NotebookCellData {
const cellData = new NotebookCellData(NotebookCellKind.Code, concatMultilineString(cell.source), 'raw');
const cellData = new NotebookCellData(NotebookCellKind.Code, concatMultilineCellSource(cell.source), 'raw');
cellData.outputs = [];
cellData.metadata = getNotebookCellMetadata(cell);
return cellData;
@@ -297,7 +304,7 @@ function createNotebookCellDataFromRawCell(cell: nbformat.IRawCell): NotebookCel
function createNotebookCellDataFromMarkdownCell(cell: nbformat.IMarkdownCell): NotebookCellData {
const cellData = new NotebookCellData(
NotebookCellKind.Markup,
concatMultilineString(cell.source),
concatMultilineCellSource(cell.source),
'markdown'
);
cellData.outputs = [];
@@ -309,7 +316,7 @@ function createNotebookCellDataFromCodeCell(cell: nbformat.ICodeCell, cellLangua
const outputs = cellOutputs.map(jupyterCellOutputToCellOutput);
const hasExecutionCount = typeof cell.execution_count === 'number' && cell.execution_count > 0;
const source = concatMultilineString(cell.source);
const source = concatMultilineCellSource(cell.source);
const executionSummary: NotebookCellExecutionSummary = hasExecutionCount
? { executionOrder: cell.execution_count as number }
+12 -3
View File
@@ -103,7 +103,7 @@ function createCodeCellFromNotebookCell(cell: NotebookCellData, preferredLanguag
// & in that case execution summary could contain the data, but metadata will not.
// In such cases we do not want to re-set the metadata with the value from execution summary (remember, user reverted that).
execution_count: cellMetadata.execution_count ?? null,
source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),
source: splitCellSourceIntoMultilineString(cell.value),
outputs: (cell.outputs || []).map(translateCellDisplayOutput),
metadata: cellMetadata.metadata
};
@@ -117,7 +117,7 @@ function createRawCellFromNotebookCell(cell: NotebookCellData): nbformat.IRawCel
const cellMetadata = getCellMetadata({ cell });
const rawCell: any = {
cell_type: 'raw',
source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),
source: splitCellSourceIntoMultilineString(cell.value),
metadata: cellMetadata?.metadata || {} // This cannot be empty.
};
if (cellMetadata?.attachments) {
@@ -129,6 +129,15 @@ function createRawCellFromNotebookCell(cell: NotebookCellData): nbformat.IRawCel
return rawCell;
}
/**
* Splits the source of a cell into an array of strings, each representing a line.
* Also normalizes line endings to use LF (`\n`) instead of CRLF (`\r\n`).
* Same is done in deserializer as well.
*/
function splitCellSourceIntoMultilineString(source: string): string[] {
return splitMultilineString(source.replace(/\r\n/g, '\n'));
}
function splitMultilineString(source: nbformat.MultilineString): string[] {
if (Array.isArray(source)) {
return source as string[];
@@ -368,7 +377,7 @@ export function createMarkdownCellFromNotebookCell(cell: NotebookCellData): nbfo
const cellMetadata = getCellMetadata({ cell });
const markdownCell: any = {
cell_type: 'markdown',
source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),
source: splitCellSourceIntoMultilineString(cell.value),
metadata: cellMetadata?.metadata || {} // This cannot be empty.
};
if (cellMetadata?.attachments) {