Ensure we always normalize cell source (#251670)

* Ensure we always normalize cell source * oops
2026-07-12 09:38:26 +01:00 · 2025-06-17 13:46:59 +10:00
parent 4bfe82aa94
commit 4ff7a87b13
2 changed files with 27 additions and 11 deletions
@@ -90,8 +90,16 @@ function sortOutputItemsBasedOnDisplayOrder(outputItems: NotebookCellOutputItem[
 		.sort((outputItemA, outputItemB) => outputItemA.index - outputItemB.index).map(item => item.item);
 }

-function concatMultilineString(str: string | string[], trim?: boolean): string {
-	const nonLineFeedWhiteSpaceTrim = /(^[\t\f\v\r ]+|[\t\f\v\r ]+$)/g;
+/**
+ * Concatenates a multiline string or an array of strings into a single string.
+ * Also normalizes line endings to use LF (`\n`) instead of CRLF (`\r\n`).
+ * Same is done in serializer as well.
+ */
+function concatMultilineCellSource(source: string | string[]): string {
+	return concatMultilineString(source).replace(/\r\n/g, '\n');
+}
+
+function concatMultilineString(str: string | string[]): string {
 	if (Array.isArray(str)) {
 		let result = '';
 		for (let i = 0; i < str.length; i += 1) {
@@ -103,10 +111,9 @@ function concatMultilineString(str: string | string[], trim?: boolean): string {
 			}
 		}

-		// Just trim whitespace. Leave \n in place
-		return trim ? result.replace(nonLineFeedWhiteSpaceTrim, '') : result;
+		return result;
 	}
-	return trim ? str.toString().replace(nonLineFeedWhiteSpaceTrim, '') : str.toString();
+	return str.toString();
 }

 function convertJupyterOutputToBuffer(mime: string, value: unknown): NotebookCellOutputItem {
@@ -289,7 +296,7 @@ export function jupyterCellOutputToCellOutput(output: nbformat.IOutput): Noteboo
 }

 function createNotebookCellDataFromRawCell(cell: nbformat.IRawCell): NotebookCellData {
-	const cellData = new NotebookCellData(NotebookCellKind.Code, concatMultilineString(cell.source), 'raw');
+	const cellData = new NotebookCellData(NotebookCellKind.Code, concatMultilineCellSource(cell.source), 'raw');
 	cellData.outputs = [];
 	cellData.metadata = getNotebookCellMetadata(cell);
 	return cellData;
@@ -297,7 +304,7 @@ function createNotebookCellDataFromRawCell(cell: nbformat.IRawCell): NotebookCel
 function createNotebookCellDataFromMarkdownCell(cell: nbformat.IMarkdownCell): NotebookCellData {
 	const cellData = new NotebookCellData(
 		NotebookCellKind.Markup,
-		concatMultilineString(cell.source),
+		concatMultilineCellSource(cell.source),
 		'markdown'
 	);
 	cellData.outputs = [];
@@ -309,7 +316,7 @@ function createNotebookCellDataFromCodeCell(cell: nbformat.ICodeCell, cellLangua
 	const outputs = cellOutputs.map(jupyterCellOutputToCellOutput);
 	const hasExecutionCount = typeof cell.execution_count === 'number' && cell.execution_count > 0;

-	const source = concatMultilineString(cell.source);
+	const source = concatMultilineCellSource(cell.source);

 	const executionSummary: NotebookCellExecutionSummary = hasExecutionCount
 		? { executionOrder: cell.execution_count as number }
@@ -103,7 +103,7 @@ function createCodeCellFromNotebookCell(cell: NotebookCellData, preferredLanguag
 		// & in that case execution summary could contain the data, but metadata will not.
 		// In such cases we do not want to re-set the metadata with the value from execution summary (remember, user reverted that).
 		execution_count: cellMetadata.execution_count ?? null,
-		source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),
+		source: splitCellSourceIntoMultilineString(cell.value),
 		outputs: (cell.outputs || []).map(translateCellDisplayOutput),
 		metadata: cellMetadata.metadata
 	};
@@ -117,7 +117,7 @@ function createRawCellFromNotebookCell(cell: NotebookCellData): nbformat.IRawCel
 	const cellMetadata = getCellMetadata({ cell });
 	const rawCell: any = {
 		cell_type: 'raw',
-		source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),
+		source: splitCellSourceIntoMultilineString(cell.value),
 		metadata: cellMetadata?.metadata || {} // This cannot be empty.
 	};
 	if (cellMetadata?.attachments) {
@@ -129,6 +129,15 @@ function createRawCellFromNotebookCell(cell: NotebookCellData): nbformat.IRawCel
 	return rawCell;
 }

+/**
+ * Splits the source of a cell into an array of strings, each representing a line.
+ * Also normalizes line endings to use LF (`\n`) instead of CRLF (`\r\n`).
+ * Same is done in deserializer as well.
+ */
+function splitCellSourceIntoMultilineString(source: string): string[] {
+	return splitMultilineString(source.replace(/\r\n/g, '\n'));
+}
+
 function splitMultilineString(source: nbformat.MultilineString): string[] {
 	if (Array.isArray(source)) {
 		return source as string[];
@@ -368,7 +377,7 @@ export function createMarkdownCellFromNotebookCell(cell: NotebookCellData): nbfo
 	const cellMetadata = getCellMetadata({ cell });
 	const markdownCell: any = {
 		cell_type: 'markdown',
-		source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),
+		source: splitCellSourceIntoMultilineString(cell.value),
 		metadata: cellMetadata?.metadata || {} // This cannot be empty.
 	};
 	if (cellMetadata?.attachments) {