From 4ba9cfe9f2a86778f231712ad758fa5ee3fe1f82 Mon Sep 17 00:00:00 2001 From: Ben Villalobos Date: Tue, 31 Mar 2026 16:39:26 -0700 Subject: [PATCH] Shadow mode: Dynamic NOTICE generation scripts Add dynamic NOTICE file generation that merges CG output with LICENSE files discovered from node_modules/ and cgmanifest.json. New files: - build/azure-pipelines/oss/generate-notices.ts (merge script) - build/azure-pipelines/oss/validate-notices.ts (validation) - build/azure-pipelines/oss/static-notices.json (undiscoverable packages) Shadow mode runs after CG detection with continueOnError: true. Publishes merged NOTICE + report as pipeline artifact for comparison. No impact on shipping builds. --- build/azure-pipelines/oss/generate-notices.ts | 774 ++++++++++++++++++ build/azure-pipelines/oss/static-notices.json | 31 + build/azure-pipelines/oss/validate-notices.ts | 288 +++++++ .../product-quality-checks.yml | 33 +- 4 files changed, 1124 insertions(+), 2 deletions(-) create mode 100644 build/azure-pipelines/oss/generate-notices.ts create mode 100644 build/azure-pipelines/oss/static-notices.json create mode 100644 build/azure-pipelines/oss/validate-notices.ts diff --git a/build/azure-pipelines/oss/generate-notices.ts b/build/azure-pipelines/oss/generate-notices.ts new file mode 100644 index 00000000000..6579bc377c3 --- /dev/null +++ b/build/azure-pipelines/oss/generate-notices.ts @@ -0,0 +1,774 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import * as fs from 'fs'; +import * as path from 'path'; + +// -- Types -------------------------------------------------------------------- + +interface NoticeEntry { + name: string; + version: string; + license: string; + url: string; + licenseText: string; +} + +interface StaticNoticesFile { + version: 1; + /** Packages where LICENSE cannot be discovered automatically */ + packages: StaticPackage[]; + /** Dev deps that leak into CG output — remove from final NOTICE */ + devDepFilter: string[]; + /** Override CG's license classification for known mis-classifications */ + overrides: Record; + /** Packages that MUST appear in final file — validation fails if missing */ + requiredPackages: string[]; +} + +interface StaticPackage { + name: string; + version: string; + license: string; + licenseText: string; + url: string; +} + +interface CgManifestRegistration { + component: { + type: string; + git?: { name: string; repositoryUrl: string; commitHash: string }; + npm?: { name: string; version: string }; + other?: { name: string; downloadUrl?: string }; + }; + license?: string; + licenseDetail?: string[] | string[][]; + version?: string; +} + +interface MergeReport { + timestamp: string; + cgEntryCount: number; + dynamicGapsFilled: number; + staticEntriesAdded: number; + devDepsFiltered: string[]; + overridesApplied: string[]; + totalCount: number; + quality: string; + /** Packages covered dynamically — suggest filing CD curations */ + dynamicCoverageList: string[]; + /** Packages from static-notices.json that CG or node_modules now covers */ + staleStaticEntries: string[]; + /** Packages we could not find a license for */ + missingLicense: string[]; +} + +// -- Constants ---------------------------------------------------------------- + +const SEPARATOR = '---------------------------------------------------------'; +const SEPARATOR_REGEX = /^-{50,}$/m; + +const HEADER_INSIDER = `NOTICES AND INFORMATION +Do Not Translate or Localize + +This software incorporates material from third parties. +Microsoft makes certain open source code available at https://3rdpartysource.microsoft.com, +or you may send a check or money order for US $5.00, including the product name, +the open source component name, platform, and version number, to: + +Source Code Compliance Team +Microsoft Corporation +One Microsoft Way +Redmond, WA 98052 +USA + +Notwithstanding any other terms, you may reverse engineer this software to the extent +required to debug changes to any libraries licensed under the GNU Lesser General Public License.`; + +const HEADER_STABLE = HEADER_INSIDER; // Same header for both qualities + +// -- Parsing ------------------------------------------------------------------ + +/** + * Parse a ThirdPartyNotices.txt (CG or mixin format) into structured entries. + * The format uses separator lines (50+ dashes). Each component block has a + * header line followed by license text. + */ +function parseNoticeFile(content: string): NoticeEntry[] { + const entries: NoticeEntry[] = []; + const blocks = content.split(SEPARATOR_REGEX); + + // First block is the file header — skip it + for (let i = 1; i < blocks.length; i++) { + const block = blocks[i].trim(); + if (!block) { + continue; + } + + const lines = block.split('\n'); + // Find the first non-empty line — that's the component header + let headerLine = ''; + let headerIdx = 0; + for (let j = 0; j < lines.length; j++) { + if (lines[j].trim()) { + headerLine = lines[j].trim(); + headerIdx = j; + break; + } + } + + if (!headerLine) { + continue; + } + + // Parse header: "name version - license" + // Name may contain @scope/name, version starts with digit + const headerMatch = headerLine.match(/^(.+?)\s+([\d][^\s]*)\s+-\s+(.+)$/); + + let name: string; + let version: string; + let license: string; + let url = ''; + + if (headerMatch) { + name = headerMatch[1]; + version = headerMatch[2]; + license = headerMatch[3]; + } else { + // Fallback: treat entire line as name + name = headerLine; + version = ''; + license = ''; + } + + // Next non-empty line after header might be URL + const remainingLines = lines.slice(headerIdx + 1); + let licenseStartIdx = 0; + for (let j = 0; j < remainingLines.length; j++) { + const line = remainingLines[j].trim(); + if (!line) { + continue; + } + if (line.startsWith('http://') || line.startsWith('https://')) { + url = line; + licenseStartIdx = j + 1; + } else { + licenseStartIdx = j; + } + break; + } + + const licenseText = remainingLines.slice(licenseStartIdx).join('\n').trim(); + + entries.push({ name, version, license, url, licenseText }); + } + + return entries; +} + +// -- License Discovery -------------------------------------------------------- + +/** + * Try to find a LICENSE file in node_modules for the given package name. + * Returns the license text if found, undefined otherwise. + */ +function findLicenseInNodeModules(packageName: string, repoRoot: string): { licenseText: string; licensePath: string } | undefined { + const searchDirs = [ + path.join(repoRoot, 'node_modules', packageName), + path.join(repoRoot, 'remote', 'node_modules', packageName), + path.join(repoRoot, 'remote', 'web', 'node_modules', packageName), + ]; + + // Also search extension node_modules + const extensionsDir = path.join(repoRoot, 'extensions'); + if (fs.existsSync(extensionsDir)) { + for (const ext of fs.readdirSync(extensionsDir)) { + searchDirs.push(path.join(extensionsDir, ext, 'node_modules', packageName)); + searchDirs.push(path.join(extensionsDir, ext, 'server', 'node_modules', packageName)); + searchDirs.push(path.join(extensionsDir, ext, 'server', 'lib', 'node_modules', packageName)); + } + } + + for (const dir of searchDirs) { + if (!fs.existsSync(dir)) { + continue; + } + + try { + const files = fs.readdirSync(dir); + const licenseFile = files.find(f => + /^license(\.md|\.txt|\.mit|\.bsd|\.apache)?$/i.test(f) || + /^licence(\.md|\.txt)?$/i.test(f) + ); + + if (licenseFile) { + const licensePath = path.join(dir, licenseFile); + const licenseText = fs.readFileSync(licensePath, 'utf8').trim(); + return { licenseText, licensePath }; + } + } catch { + // Directory exists but can't be read — skip + } + } + + return undefined; +} + +/** + * Read package.json license field and version for a package in node_modules. + */ +function readPackageJson(packageName: string, repoRoot: string): { version: string; license: string; repository: string } | undefined { + const pkgPath = path.join(repoRoot, 'node_modules', packageName, 'package.json'); + if (!fs.existsSync(pkgPath)) { + return undefined; + } + + try { + const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')); + const repo = typeof pkg.repository === 'string' ? pkg.repository : pkg.repository?.url || ''; + return { + version: pkg.version || '', + license: typeof pkg.license === 'string' ? pkg.license : '', + repository: repo.replace(/^git\+/, '').replace(/\.git$/, ''), + }; + } catch { + return undefined; + } +} + +/** + * Normalize a repository URL so different forms of the same repo can be compared. + * Strips protocol, .git suffix, and subpaths (monorepo packages often use /tree/master/... paths). + * e.g. "https://github.com/xtermjs/xterm.js/tree/master/addons/addon-serialize" → "github.com/xtermjs/xterm.js" + */ +function normalizeRepoUrl(url: string): string { + let normalized = url + .replace(/^git\+/, '') + .replace(/^https?:\/\//, '') + .replace(/^git:\/\//, '') + .replace(/\.git$/, '') + .replace(/\/$/, '') + .toLowerCase(); + + // Strip subpaths like /tree/master/..., /blob/main/..., /packages/..., /addons/... + // Keep only owner/repo (first two path segments after the host) + const parts = normalized.split('/'); + if (parts.length > 3) { + // parts[0] = "github.com", parts[1] = "owner", parts[2] = "repo", parts[3+] = subpath + normalized = parts.slice(0, 3).join('/'); + } + + return normalized; +} + +/** + * When a package has no LICENSE file, look for a sibling package in node_modules + * that shares the same repository URL and DOES have a LICENSE file. + * Same repo = same license (monorepo pattern). + */ +function findSiblingLicense(packageName: string, repoUrl: string, repoRoot: string): { licenseText: string; siblingName: string } | undefined { + const normalizedRepo = normalizeRepoUrl(repoUrl); + if (!normalizedRepo) { + return undefined; + } + + // Determine the scope directory to search for siblings + const scope = packageName.startsWith('@') ? packageName.split('/')[0] : null; + const searchDir = scope + ? path.join(repoRoot, 'node_modules', scope) + : path.join(repoRoot, 'node_modules'); + + if (!fs.existsSync(searchDir)) { + return undefined; + } + + try { + const candidates = fs.readdirSync(searchDir); + for (const candidate of candidates) { + const candidateName = scope ? `${scope}/${candidate}` : candidate; + if (candidateName.toLowerCase() === packageName.toLowerCase()) { + continue; // Skip self + } + + const candidatePkgPath = path.join(searchDir, candidate, 'package.json'); + if (!fs.existsSync(candidatePkgPath)) { + continue; + } + + try { + const candidatePkg = JSON.parse(fs.readFileSync(candidatePkgPath, 'utf8')); + const candidateRepo = typeof candidatePkg.repository === 'string' + ? candidatePkg.repository + : candidatePkg.repository?.url || ''; + const normalizedCandidateRepo = normalizeRepoUrl(candidateRepo); + + if (normalizedCandidateRepo === normalizedRepo) { + // Same repo — check if this sibling has a LICENSE file + const licenseResult = findLicenseInNodeModules(candidateName, repoRoot); + if (licenseResult) { + return { licenseText: licenseResult.licenseText, siblingName: candidateName }; + } + } + } catch { + // Can't read sibling's package.json — skip + } + } + } catch { + // Can't read directory — skip + } + + return undefined; +} + +/** + * Parse all cgmanifest.json files and return entries with their license data. + */ +function parseCgManifests(repoRoot: string): NoticeEntry[] { + const entries: NoticeEntry[] = []; + const cgManifests = findCgManifestFiles(repoRoot); + + for (const manifestPath of cgManifests) { + try { + const data = JSON.parse(fs.readFileSync(manifestPath, 'utf8')); + const registrations: CgManifestRegistration[] = data.registrations || data.Registrations || []; + + for (const reg of registrations) { + const comp = reg.component; + const inner = comp.git || comp.npm || comp.other || {}; + const name = (inner as { name?: string }).name || ''; + if (!name) { + continue; + } + + let licenseText = ''; + if (reg.licenseDetail && reg.licenseDetail.length > 0) { + // licenseDetail can be string[] or string[][] (nested array) + if (Array.isArray(reg.licenseDetail[0])) { + licenseText = (reg.licenseDetail[0] as string[]).join('\n'); + } else { + licenseText = (reg.licenseDetail as string[]).join('\n'); + } + } + + const url = comp.git?.repositoryUrl || comp.other?.downloadUrl || ''; + const version = reg.version || comp.git?.commitHash?.substring(0, 7) || ''; + const license = typeof reg.license === 'string' ? reg.license : ''; + + entries.push({ name, version, license, url, licenseText }); + } + } catch { + console.warn(`WARN: Could not parse ${manifestPath}`); + } + } + + return entries; +} + +function findCgManifestFiles(repoRoot: string): string[] { + const results: string[] = []; + + function walk(dir: string, depth: number): void { + if (depth > 5) { + return; + } + + try { + for (const entry of fs.readdirSync(dir)) { + // Skip directories that shouldn't contain cgmanifests for our purposes + if (entry === 'node_modules' || entry === '.git' || entry === 'out' || entry === 'test') { + continue; + } + + const full = path.join(dir, entry); + if (entry === 'cgmanifest.json') { + results.push(full); + } else if (fs.statSync(full).isDirectory()) { + walk(full, depth + 1); + } + } + } catch { + // Permission error or similar — skip + } + } + + walk(repoRoot, 0); + return results; +} + +// -- Main --------------------------------------------------------------------- + +function main(): void { + const args = parseArgs(process.argv.slice(2)); + + const cgPath = args['cg']; + const staticPath = args['static']; + const outputPath = args['output']; + const reportPath = args['report']; + const repoRoot = args['repo'] || process.cwd(); + const quality = process.env['VSCODE_QUALITY'] || 'stable'; + + if (!cgPath || !staticPath || !outputPath || !reportPath) { + console.error('Usage: generate-notices.ts --cg --static --output --report [--repo ]'); + process.exit(1); + } + + // -- Step 1: Read inputs ---------------------------------------------- + + if (!fs.existsSync(cgPath)) { + console.error(`ERROR: CG output file not found at ${cgPath}. Was the notice@0 task successful?`); + process.exit(1); + } + + if (!fs.existsSync(staticPath)) { + console.error(`ERROR: Static notices file not found at ${staticPath}`); + process.exit(1); + } + + const cgContent = fs.readFileSync(cgPath, 'utf8'); + if (cgContent.length === 0) { + console.error('ERROR: CG output is empty — notice@0 may have failed silently'); + process.exit(1); + } + + const staticData: StaticNoticesFile = JSON.parse(fs.readFileSync(staticPath, 'utf8')); + if (staticData.version !== 1) { + console.error(`ERROR: Unsupported static-notices.json version: ${staticData.version}. Expected 1.`); + process.exit(1); + } + + if (quality !== 'insider' && quality !== 'stable' && quality !== 'exploration') { + console.warn(`WARN: VSCODE_QUALITY is "${quality}", expected "insider" or "stable". Using default header.`); + } + + const cgSizeMB = cgContent.length / (1024 * 1024); + if (cgSizeMB < 1) { + console.warn(`WARN: CG output is unusually small (${cgSizeMB.toFixed(2)} MB). Expected ~5 MB.`); + } + if (cgSizeMB > 20) { + console.warn(`WARN: CG output is unusually large (${cgSizeMB.toFixed(2)} MB). Expected ~5 MB.`); + } + + // -- Step 2: Parse CG output ------------------------------------------ + + console.log('Parsing CG output...'); + const cgEntries = parseNoticeFile(cgContent); + console.log(` Parsed ${cgEntries.length} entries from CG output`); + + if (cgEntries.length === 0) { + console.error('ERROR: 0 entries parsed from CG output — format may have changed'); + process.exit(1); + } + + // Build map keyed by lowercase name for dedup + const mergedMap = new Map(); + for (const entry of cgEntries) { + const key = entry.name.toLowerCase(); + if (!mergedMap.has(key)) { + mergedMap.set(key, entry); + } + } + + // -- Step 3: Filter dev dependency leaks ------------------------------ + + const filteredDevDeps: string[] = []; + for (const devDep of staticData.devDepFilter) { + const key = devDep.toLowerCase(); + if (mergedMap.has(key)) { + const entry = mergedMap.get(key)!; + console.log(` FILTERED dev dep: ${entry.name} ${entry.version}`); + mergedMap.delete(key); + filteredDevDeps.push(devDep); + } + } + + // -- Step 4: Apply overrides ------------------------------------------ + + const overridesApplied: string[] = []; + for (const [pkgName, override] of Object.entries(staticData.overrides)) { + const key = pkgName.toLowerCase(); + if (mergedMap.has(key)) { + const entry = mergedMap.get(key)!; + console.log(` OVERRIDE: ${entry.name} license changed to ${override.license}`); + entry.license = override.license; + overridesApplied.push(pkgName); + } + } + + const cgEntryCount = mergedMap.size; + + // -- Step 5: Dynamic gap filling -------------------------------------- + + console.log('Finding gaps — packages that need coverage but CG missed...'); + const dynamicCoverageList: string[] = []; + const missingLicense: string[] = []; + + // 5a: Check all production dependencies from package.json + const pkgJsonPath = path.join(repoRoot, 'package.json'); + if (fs.existsSync(pkgJsonPath)) { + const rootPkg = JSON.parse(fs.readFileSync(pkgJsonPath, 'utf8')); + const prodDeps = Object.keys(rootPkg.dependencies || {}); + const devDeps = new Set(Object.keys(rootPkg.devDependencies || {}).map(d => d.toLowerCase())); + + for (const dep of prodDeps) { + const key = dep.toLowerCase(); + if (mergedMap.has(key)) { + continue; // Already covered by CG + } + if (devDeps.has(key)) { + continue; // Also listed as dev dep, skip + } + + const licenseResult = findLicenseInNodeModules(dep, repoRoot); + const pkgInfo = readPackageJson(dep, repoRoot); + + if (licenseResult) { + const entry: NoticeEntry = { + name: dep, + version: pkgInfo?.version || '', + license: pkgInfo?.license || '', + url: pkgInfo?.repository || '', + licenseText: licenseResult.licenseText, + }; + mergedMap.set(key, entry); + dynamicCoverageList.push(`${dep} — LICENSE found at ${licenseResult.licensePath}`); + console.log(` DYNAMIC: ${dep} ${pkgInfo?.version || ''} — LICENSE from node_modules`); + } else { + // Try same-repo sibling fallback: find another package in node_modules + // with the same repository URL that DOES have a LICENSE file + const siblingResult = pkgInfo?.repository + ? findSiblingLicense(dep, pkgInfo.repository, repoRoot) + : undefined; + + if (siblingResult) { + const entry: NoticeEntry = { + name: dep, + version: pkgInfo?.version || '', + license: pkgInfo?.license || '', + url: pkgInfo?.repository || '', + licenseText: siblingResult.licenseText, + }; + mergedMap.set(key, entry); + dynamicCoverageList.push(`${dep} — LICENSE from sibling ${siblingResult.siblingName} (same repo: ${pkgInfo!.repository})`); + console.log(` DYNAMIC (sibling): ${dep} ${pkgInfo?.version || ''} — LICENSE from ${siblingResult.siblingName}`); + } else { + missingLicense.push(dep); + console.warn(` MISSING: ${dep} — no LICENSE in node_modules, no same-repo sibling, not in CG`); + } + } + } + } + + // 5b: Check extension dependencies (packages bundled into extensions) + const extensionsDir = path.join(repoRoot, 'extensions'); + if (fs.existsSync(extensionsDir)) { + for (const ext of fs.readdirSync(extensionsDir)) { + const extPkgPaths = [ + path.join(extensionsDir, ext, 'package.json'), + path.join(extensionsDir, ext, 'server', 'package.json'), + ]; + + for (const extPkgPath of extPkgPaths) { + if (!fs.existsSync(extPkgPath)) { + continue; + } + + try { + const extPkg = JSON.parse(fs.readFileSync(extPkgPath, 'utf8')); + const extDeps = Object.keys(extPkg.dependencies || {}); + + for (const dep of extDeps) { + const key = dep.toLowerCase(); + if (mergedMap.has(key)) { + continue; + } + + const licenseResult = findLicenseInNodeModules(dep, repoRoot); + const pkgInfo = readPackageJson(dep, repoRoot); + + if (licenseResult) { + const entry: NoticeEntry = { + name: dep, + version: pkgInfo?.version || '', + license: pkgInfo?.license || '', + url: pkgInfo?.repository || '', + licenseText: licenseResult.licenseText, + }; + mergedMap.set(key, entry); + dynamicCoverageList.push(`${dep} — LICENSE from extension ${ext}`); + console.log(` DYNAMIC: ${dep} — LICENSE from extension ${ext}`); + } else { + // Try same-repo sibling fallback for extension deps too + const siblingResult = pkgInfo?.repository + ? findSiblingLicense(dep, pkgInfo.repository, repoRoot) + : undefined; + + if (siblingResult) { + const entry: NoticeEntry = { + name: dep, + version: pkgInfo?.version || '', + license: pkgInfo?.license || '', + url: pkgInfo?.repository || '', + licenseText: siblingResult.licenseText, + }; + mergedMap.set(key, entry); + dynamicCoverageList.push(`${dep} — LICENSE from sibling ${siblingResult.siblingName} (extension ${ext})`); + console.log(` DYNAMIC (sibling): ${dep} — LICENSE from ${siblingResult.siblingName} (extension ${ext})`); + } + // Don't log missing for extension deps — many are dev deps + } + } + } catch { + // Invalid package.json — skip + } + } + } + } + + // 5c: Check cgmanifest.json entries + console.log('Scanning cgmanifest.json entries...'); + const cgManifestEntries = parseCgManifests(repoRoot); + for (const entry of cgManifestEntries) { + const key = entry.name.toLowerCase(); + if (mergedMap.has(key)) { + continue; // Already covered + } + + if (entry.licenseText) { + mergedMap.set(key, entry); + dynamicCoverageList.push(`${entry.name} — licenseDetail from cgmanifest.json`); + console.log(` DYNAMIC: ${entry.name} — licenseDetail from cgmanifest`); + } else { + // No licenseDetail — try to find LICENSE in the extension directory + // This is a gap that should be fixed by populating licenseDetail + missingLicense.push(`${entry.name} (cgmanifest — no licenseDetail, needs backfill)`); + console.warn(` MISSING: ${entry.name} — cgmanifest entry without licenseDetail`); + } + } + + // -- Step 6: Add static entries --------------------------------------- + + let staticEntriesAdded = 0; + const staleStaticEntries: string[] = []; + + for (const pkg of staticData.packages) { + const key = pkg.name.toLowerCase(); + if (mergedMap.has(key)) { + // Static entry is stale — CG or dynamic already covers this + staleStaticEntries.push(pkg.name); + console.log(` STALE: ${pkg.name} is in static-notices.json but already covered by CG/dynamic`); + continue; + } + + mergedMap.set(key, { + name: pkg.name, + version: pkg.version, + license: pkg.license, + url: pkg.url, + licenseText: pkg.licenseText, + }); + staticEntriesAdded++; + console.log(` STATIC: ${pkg.name} ${pkg.version}`); + } + + // -- Step 7: Sort ----------------------------------------------------- + + const sorted = [...mergedMap.values()].sort((a, b) => + a.name.toLowerCase().localeCompare(b.name.toLowerCase()) + ); + + // -- Step 8: Render output -------------------------------------------- + + const header = quality === 'insider' ? HEADER_INSIDER : HEADER_STABLE; + let output = header + '\n\n'; + + for (const entry of sorted) { + output += '\n' + SEPARATOR + '\n\n'; + output += entry.name; + if (entry.version) { + output += ' ' + entry.version; + } + if (entry.license) { + output += ' - ' + entry.license; + } + output += '\n'; + if (entry.url) { + output += entry.url + '\n'; + } + output += '\n'; + if (entry.licenseText) { + output += entry.licenseText + '\n'; + } + } + + output += '\n' + SEPARATOR + '\n'; + + // -- Step 9: Write outputs -------------------------------------------- + + const outputDir = path.dirname(outputPath); + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + fs.writeFileSync(outputPath, output, 'utf8'); + + const report: MergeReport = { + timestamp: new Date().toISOString(), + cgEntryCount, + dynamicGapsFilled: dynamicCoverageList.length, + staticEntriesAdded, + devDepsFiltered: filteredDevDeps, + overridesApplied, + totalCount: sorted.length, + quality, + dynamicCoverageList, + staleStaticEntries, + missingLicense, + }; + + fs.writeFileSync(reportPath, JSON.stringify(report, null, 2), 'utf8'); + + // -- Summary ---------------------------------------------------------- + + console.log(''); + console.log(`Merged NOTICE: ${cgEntryCount} CG + ${dynamicCoverageList.length} dynamic + ${staticEntriesAdded} static - ${filteredDevDeps.length} filtered = ${sorted.length} total`); + console.log(`Output: ${outputPath} (${(output.length / 1024 / 1024).toFixed(2)} MB)`); + console.log(`Report: ${reportPath}`); + + if (dynamicCoverageList.length > 0) { + console.log(`\n${dynamicCoverageList.length} packages covered dynamically — consider filing ClearlyDefined curations:`); + for (const item of dynamicCoverageList) { + console.log(` → ${item}`); + } + } + + if (staleStaticEntries.length > 0) { + console.log(`\n${staleStaticEntries.length} static-notices.json entries are now redundant (CG/dynamic covers them):`); + for (const name of staleStaticEntries) { + console.log(` → ${name}`); + } + } + + if (missingLicense.length > 0) { + console.warn(`\nWARNING: ${missingLicense.length} packages have no discoverable license:`); + for (const name of missingLicense) { + console.warn(` x ${name}`); + } + } +} + +// -- Arg parsing -------------------------------------------------------------- + +function parseArgs(argv: string[]): Record { + const args: Record = {}; + for (let i = 0; i < argv.length; i++) { + if (argv[i].startsWith('--') && i + 1 < argv.length) { + args[argv[i].substring(2)] = argv[i + 1]; + i++; + } + } + return args; +} + +main(); diff --git a/build/azure-pipelines/oss/static-notices.json b/build/azure-pipelines/oss/static-notices.json new file mode 100644 index 00000000000..61440b3c039 --- /dev/null +++ b/build/azure-pipelines/oss/static-notices.json @@ -0,0 +1,31 @@ +{ + "version": 1, + "packages": [ + { + "name": "ffmpeg", + "version": "6.0", + "license": "LGPL-2.1-or-later", + "url": "https://ffmpeg.org/", + "licenseText": "ffmpeg is released under the GNU Lesser General Public License version 2.1 or later (LGPL v2.1+). See https://ffmpeg.org/legal.html for full license text. FFmpeg includes code under various licenses including LGPL, GPL, and permissive licenses. VS Code uses FFmpeg through Electron for media codec support." + }, + { + "name": "lib-oniguruma", + "version": "6.9.8", + "license": "BSD-2-Clause", + "url": "https://github.com/kkos/oniguruma", + "licenseText": "BSD 2-Clause License\n\nCopyright (c) 2002-2021 K.Kosako \nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice,\n this list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n this list of conditions and the following disclaimer in the documentation\n and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." + } + ], + "devDepFilter": [ + "mocha", + "mocha-junit-reporter" + ], + "overrides": { + "@microsoft/applicationinsights-web-basic": { + "license": "MIT" + } + }, + "requiredPackages": [ + "ffmpeg" + ] +} diff --git a/build/azure-pipelines/oss/validate-notices.ts b/build/azure-pipelines/oss/validate-notices.ts new file mode 100644 index 00000000000..e1de5d912c0 --- /dev/null +++ b/build/azure-pipelines/oss/validate-notices.ts @@ -0,0 +1,288 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import * as fs from 'fs'; +import * as path from 'path'; + +// -- Types -------------------------------------------------------------------- + +interface StaticNoticesFile { + version: 1; + packages: { name: string; version: string; license: string; licenseText: string; url: string }[]; + devDepFilter: string[]; + overrides: Record; + requiredPackages: string[]; +} + +interface ValidationResult { + passed: boolean; + errors: string[]; + warnings: string[]; + stats: { + totalComponents: number; + productionDepsChecked: number; + productionDepsCovered: number; + lgplComponents: string[]; + }; +} + +// -- Main --------------------------------------------------------------------- + +function main(): void { + const args = parseArgs(process.argv.slice(2)); + + const noticesPath = args['notices']; + const staticPath = args['static']; + const packageJsonPath = args['package-json']; + + if (!noticesPath || !staticPath || !packageJsonPath) { + console.error('Usage: validate-notices.ts --notices --static --package-json '); + process.exit(1); + } + + const result: ValidationResult = { + passed: true, + errors: [], + warnings: [], + stats: { + totalComponents: 0, + productionDepsChecked: 0, + productionDepsCovered: 0, + lgplComponents: [], + }, + }; + + // -- Check 1: File exists and is non-empty ---------------------------- + + if (!fs.existsSync(noticesPath)) { + result.errors.push(`NOTICE file not found at ${noticesPath}`); + result.passed = false; + reportAndExit(result); + return; + } + + const content = fs.readFileSync(noticesPath, 'utf8'); + if (content.length === 0) { + result.errors.push('NOTICE file is empty'); + result.passed = false; + reportAndExit(result); + return; + } + + // -- Check 2: Header present ------------------------------------------ + + if (!content.startsWith('NOTICES AND INFORMATION')) { + result.errors.push('Invalid header — file does not start with "NOTICES AND INFORMATION"'); + result.passed = false; + } + + // -- Check 3: Size in range ------------------------------------------- + + const sizeMB = content.length / (1024 * 1024); + if (sizeMB < 0.5 || sizeMB > 20) { + result.errors.push(`File size ${sizeMB.toFixed(2)} MB outside expected range (0.5-20 MB)`); + result.passed = false; + } + + // -- Extract component names from NOTICE file ------------------------- + + const componentNames = new Set(); + const separatorRegex = /^-{50,}$/gm; + const blocks = content.split(separatorRegex); + + for (let i = 1; i < blocks.length; i++) { + const block = blocks[i].trim(); + if (!block) { + continue; + } + + const lines = block.split('\n'); + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) { + continue; + } + + // Parse "name version - license" or just "name" + const match = trimmed.match(/^(.+?)\s+[\d]/); + if (match) { + componentNames.add(match[1].toLowerCase()); + } else { + componentNames.add(trimmed.toLowerCase()); + } + break; // Only first non-empty line is the header + } + } + + result.stats.totalComponents = componentNames.size; + + // -- Check 4: Production deps covered --------------------------------- + + if (fs.existsSync(packageJsonPath)) { + const pkg = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8')); + const prodDeps = Object.keys(pkg.dependencies || {}); + result.stats.productionDepsChecked = prodDeps.length; + + const missingProdDeps: string[] = []; + for (const dep of prodDeps) { + if (componentNames.has(dep.toLowerCase())) { + result.stats.productionDepsCovered++; + } else { + // Some deps are internal (@github/copilot, etc.) — check if they're known internal + if (!isKnownInternalPackage(dep)) { + missingProdDeps.push(dep); + } else { + result.stats.productionDepsCovered++; + } + } + } + + if (missingProdDeps.length > 0) { + for (const dep of missingProdDeps) { + result.errors.push( + `Production dependency missing from NOTICE file: ${dep}\n` + + `\n` + + ` This package is not covered by Component Governance and was not found\n` + + ` dynamically. To fix:\n` + + `\n` + + ` 1. Check if the package has a LICENSE file — if so, this is a bug in the script\n` + + ` 2. Otherwise, add an entry to build/azure-pipelines/oss/static-notices.json\n` + + ` 3. File a ClearlyDefined curation request for long-term coverage\n` + ); + } + result.passed = false; + } + } + + // -- Check 5: Dev deps excluded --------------------------------------- + + const staticData: StaticNoticesFile = JSON.parse(fs.readFileSync(staticPath, 'utf8')); + const leakedDevDeps: string[] = []; + for (const devDep of staticData.devDepFilter) { + if (componentNames.has(devDep.toLowerCase())) { + leakedDevDeps.push(devDep); + } + } + + if (leakedDevDeps.length > 0) { + for (const dep of leakedDevDeps) { + result.errors.push(`Dev dependency leaked into NOTICE file: ${dep} — should be filtered`); + } + result.passed = false; + } + + // -- Check 6: Required packages present ------------------------------- + + for (const required of staticData.requiredPackages) { + if (!componentNames.has(required.toLowerCase())) { + result.errors.push(`Required package missing from NOTICE file: ${required}`); + result.passed = false; + } + } + + // -- Check 7: LGPL audit (warn only) ---------------------------------- + + for (let i = 1; i < blocks.length; i++) { + const block = blocks[i].trim(); + if (!block) { + continue; + } + + const firstLine = block.split('\n').find(l => l.trim())?.trim() || ''; + if (firstLine.toUpperCase().includes('LGPL')) { + const match = firstLine.match(/^(.+?)\s+([\d][^\s]*)/); + const name = match ? `${match[1]} ${match[2]}` : firstLine; + result.warnings.push(`LGPL component: ${name} — verify this is an accepted dependency`); + result.stats.lgplComponents.push(name); + } + } + + // -- Check 8: Stale static entries (on version bumps) ----------------- + + // This check only warns — stale entries are harmless + for (const pkg of staticData.packages) { + if (componentNames.has(pkg.name.toLowerCase())) { + // The package is in the NOTICE file — but did CG or dynamic cover it? + // If so, the static entry is redundant. We can't tell from here alone, + // so we check the merge report if available. + result.warnings.push( + `static-notices.json entry for "${pkg.name}" may be redundant — ` + + `the package appears in the final NOTICE (possibly from CG or dynamic coverage)` + ); + } + } + + reportAndExit(result); +} + +// -- Helpers ------------------------------------------------------------------ + +/** + * Known internal/private packages that don't need third-party attribution. + * These are first-party Microsoft packages not published to npm or published + * under proprietary licenses. + */ +function isKnownInternalPackage(name: string): boolean { + // @github/copilot* packages are private + if (name.startsWith('@github/copilot')) { + return true; + } + // tas-client is internal + if (name === 'tas-client') { + return true; + } + // v8-inspect-profiler is internal + if (name === 'v8-inspect-profiler') { + return true; + } + // playwright-core is dev tooling that doesn't ship + if (name === 'playwright-core') { + return true; + } + return false; +} + +function reportAndExit(result: ValidationResult): void { + console.log(''); + console.log('=== NOTICE Validation Report ==='); + console.log(`Components: ${result.stats.totalComponents}`); + console.log(`Prod deps checked: ${result.stats.productionDepsChecked}`); + console.log(`Prod deps covered: ${result.stats.productionDepsCovered}`); + + if (result.warnings.length > 0) { + console.log(`\nWarnings (${result.warnings.length}):`); + for (const w of result.warnings) { + console.warn(` ⚠ ${w}`); + } + } + + if (result.errors.length > 0) { + console.log(`\nErrors (${result.errors.length}):`); + for (const e of result.errors) { + console.error(` x ${e}`); + } + } + + if (result.passed) { + console.log(`\nPASS: VALIDATION PASSED: ${result.stats.totalComponents} components, ${result.stats.productionDepsCovered} production deps covered`); + process.exit(0); + } else { + console.error(`\nFAIL: VALIDATION FAILED: ${result.errors.length} error(s)`); + process.exit(1); + } +} + +function parseArgs(argv: string[]): Record { + const args: Record = {}; + for (let i = 0; i < argv.length; i++) { + if (argv[i].startsWith('--') && i + 1 < argv.length) { + args[argv[i].substring(2)] = argv[i + 1]; + i++; + } + } + return args; +} + +main(); diff --git a/build/azure-pipelines/product-quality-checks.yml b/build/azure-pipelines/product-quality-checks.yml index 983a0a4b25a..f371508b66c 100644 --- a/build/azure-pipelines/product-quality-checks.yml +++ b/build/azure-pipelines/product-quality-checks.yml @@ -141,7 +141,6 @@ jobs: env: GITHUB_TOKEN: "$(github-distro-mixin-password)" displayName: Download component details of built-in extensions - condition: and(succeeded(), eq(lower(variables['VSCODE_PUBLISH']), 'true')) - task: ms.vss-governance-buildtask.governance-build-task-component-detection.ComponentGovernanceComponentDetection@0 displayName: "Component Detection" @@ -149,7 +148,37 @@ jobs: sourceScanPath: $(Build.SourcesDirectory) alertWarningLevel: Medium continueOnError: true - condition: and(succeeded(), eq(lower(variables['VSCODE_PUBLISH']), 'true')) + + # -- Shadow Mode: Dynamic NOTICE Generation -- + - task: notice@0 + displayName: "[Shadow] Generate CG NOTICE file" + inputs: + outputfile: $(Build.SourcesDirectory)/ThirdPartyNotices.generated.txt + outputformat: text + continueOnError: true + + - script: | + npx tsc --skipLibCheck --module nodenext --moduleResolution nodenext --esModuleInterop --target es2022 --outDir .oss-build-out build/azure-pipelines/oss/generate-notices.ts + node .oss-build-out/generate-notices.js \ + --cg "$(Build.SourcesDirectory)/ThirdPartyNotices.generated.txt" \ + --static "build/azure-pipelines/oss/static-notices.json" \ + --output "$(Build.ArtifactStagingDirectory)/ThirdPartyNotices.dynamic.txt" \ + --report "$(Build.ArtifactStagingDirectory)/notice-merge-report.json" \ + --repo "$(Build.SourcesDirectory)" + displayName: "[Shadow] Merge CG + dynamic notices" + env: + VSCODE_QUALITY: $(VSCODE_QUALITY) + continueOnError: true + + - script: | + npx tsc --skipLibCheck --module nodenext --moduleResolution nodenext --esModuleInterop --target es2022 --outDir .oss-build-out build/azure-pipelines/oss/validate-notices.ts + node .oss-build-out/validate-notices.js \ + --notices "$(Build.ArtifactStagingDirectory)/ThirdPartyNotices.dynamic.txt" \ + --static "build/azure-pipelines/oss/static-notices.json" \ + --package-json "package.json" + displayName: "[Shadow] Validate NOTICE coverage" + continueOnError: true + # -- End Shadow Mode -- - task: AzureCLI@2 displayName: Fetch secrets