/*--------------------------------------------------------------------------------------------- * Copyright (c) Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ import * as vscode from 'vscode'; import * as uri from 'vscode-uri'; import { ILogger } from '../logging'; import { IMdParser } from '../markdownEngine'; import { getLine, ITextDocument } from '../types/textDocument'; import { noopToken } from '../util/cancellation'; import { Disposable } from '../util/dispose'; import { Schemes } from '../util/schemes'; import { MdDocumentInfoCache } from '../util/workspaceCache'; import { IMdWorkspace } from '../workspace'; export interface ExternalHref { readonly kind: 'external'; readonly uri: vscode.Uri; } export interface InternalHref { readonly kind: 'internal'; readonly path: vscode.Uri; readonly fragment: string; } export interface ReferenceHref { readonly kind: 'reference'; readonly ref: string; } export type LinkHref = ExternalHref | InternalHref | ReferenceHref; function resolveLink( document: ITextDocument, link: string, ): ExternalHref | InternalHref | undefined { const cleanLink = stripAngleBrackets(link); if (/^[a-z\-][a-z\-]+:/i.test(cleanLink)) { // Looks like a uri return { kind: 'external', uri: vscode.Uri.parse(cleanLink) }; } // Assume it must be an relative or absolute file path // Use a fake scheme to avoid parse warnings const tempUri = vscode.Uri.parse(`vscode-resource:${link}`); let resourceUri: vscode.Uri | undefined; if (!tempUri.path) { resourceUri = document.uri; } else if (tempUri.path[0] === '/') { const root = getWorkspaceFolder(document); if (root) { resourceUri = vscode.Uri.joinPath(root, tempUri.path); } } else { if (document.uri.scheme === Schemes.untitled) { const root = getWorkspaceFolder(document); if (root) { resourceUri = vscode.Uri.joinPath(root, tempUri.path); } } else { const base = uri.Utils.dirname(document.uri); resourceUri = vscode.Uri.joinPath(base, tempUri.path); } } if (!resourceUri) { return undefined; } // If we are in a notebook cell, resolve relative to notebook instead if (resourceUri.scheme === Schemes.notebookCell) { const notebook = vscode.workspace.notebookDocuments .find(notebook => notebook.getCells().some(cell => cell.document === document)); if (notebook) { resourceUri = resourceUri.with({ scheme: notebook.uri.scheme }); } } return { kind: 'internal', path: resourceUri.with({ fragment: '' }), fragment: tempUri.fragment, }; } function getWorkspaceFolder(document: ITextDocument) { return vscode.workspace.getWorkspaceFolder(document.uri)?.uri || vscode.workspace.workspaceFolders?.[0]?.uri; } export interface MdLinkSource { /** * The full range of the link. */ readonly range: vscode.Range; /** * The file where the link is defined. */ readonly resource: vscode.Uri; /** * The original text of the link destination in code. */ readonly hrefText: string; /** * The original text of just the link's path in code. */ readonly pathText: string; /** * The range of the path. */ readonly hrefRange: vscode.Range; /** * The range of the fragment within the path. */ readonly fragmentRange: vscode.Range | undefined; } export interface MdInlineLink { readonly kind: 'link'; readonly source: MdLinkSource; readonly href: LinkHref; } export interface MdLinkDefinition { readonly kind: 'definition'; readonly source: MdLinkSource; readonly ref: { readonly range: vscode.Range; readonly text: string; }; readonly href: ExternalHref | InternalHref; } export type MdLink = MdInlineLink | MdLinkDefinition; function extractDocumentLink( document: ITextDocument, pre: string, rawLink: string, matchIndex: number, fullMatch: string, ): MdLink | undefined { const isAngleBracketLink = rawLink.startsWith('<'); const link = stripAngleBrackets(rawLink); let linkTarget: ExternalHref | InternalHref | undefined; try { linkTarget = resolveLink(document, link); } catch { return undefined; } if (!linkTarget) { return undefined; } const linkStart = document.positionAt(matchIndex); const linkEnd = linkStart.translate(0, fullMatch.length); const hrefStart = linkStart.translate(0, pre.length + (isAngleBracketLink ? 1 : 0)); const hrefEnd = hrefStart.translate(0, link.length); return { kind: 'link', href: linkTarget, source: { hrefText: link, resource: document.uri, range: new vscode.Range(linkStart, linkEnd), hrefRange: new vscode.Range(hrefStart, hrefEnd), ...getLinkSourceFragmentInfo(document, link, hrefStart, hrefEnd), } }; } function getFragmentRange(text: string, start: vscode.Position, end: vscode.Position): vscode.Range | undefined { const index = text.indexOf('#'); if (index < 0) { return undefined; } return new vscode.Range(start.translate({ characterDelta: index + 1 }), end); } function getLinkSourceFragmentInfo(document: ITextDocument, link: string, linkStart: vscode.Position, linkEnd: vscode.Position): { fragmentRange: vscode.Range | undefined; pathText: string } { const fragmentRange = getFragmentRange(link, linkStart, linkEnd); return { pathText: document.getText(new vscode.Range(linkStart, fragmentRange ? fragmentRange.start.translate(0, -1) : linkEnd)), fragmentRange, }; } const angleBracketLinkRe = /^<(.*)>$/; /** * Used to strip brackets from the markdown link * * will be transformed to http://example.com */ function stripAngleBrackets(link: string) { return link.replace(angleBracketLinkRe, '$1'); } const r = String.raw; /** * Matches `[text](link)` or `[text]()` */ const linkPattern = new RegExp( // text r`(\[` + // open prefix match --> /**/r`(?:` + /*****/r`[^\[\]\\]|` + // Non-bracket chars, or... /*****/r`\\.|` + // Escaped char, or... /*****/r`\[[^\[\]]*\]` + // Matched bracket pair /**/r`)*` + r`\]` + // Destination r`\(\s*)` + // <-- close prefix match /**/r`(` + /*****/r`[^\s\(\)\<](?:[^\s\(\)]|\([^\s\(\)]*?\))*|` + // Link without whitespace, or... /*****/r`<[^<>]+>` + // In angle brackets /**/r`)` + // Title /**/r`\s*(?:"[^"]*"|'[^']*'|\([^\(\)]*\))?\s*` + r`\)`, 'g'); /** * Matches `[text][ref]` or `[shorthand]` */ const referenceLinkPattern = /(^|[^\]\\])(?:(?:(\[((?:\\\]|[^\]])+)\]\[\s*?)([^\s\]]*?)\]|\[\s*?([^\s\\\]]*?)\])(?![\:\(]))/gm; /** * Matches `` */ const autoLinkPattern = /\<(\w+:[^\>\s]+)\>/g; /** * Matches `[text]: link` */ const definitionPattern = /^([\t ]*\[(?!\^)((?:\\\]|[^\]])+)\]:\s*)([^<]\S*|<[^>]+>)/gm; const inlineCodePattern = /(?:^|[^`])(`+)(?:.+?|.*?(?:(?:\r?\n).+?)*?)(?:\r?\n)?\1(?:$|[^`])/gm; class NoLinkRanges { public static async compute(tokenizer: IMdParser, document: ITextDocument): Promise { const tokens = await tokenizer.tokenize(document); const multiline = tokens.filter(t => (t.type === 'code_block' || t.type === 'fence' || t.type === 'html_block') && !!t.map).map(t => t.map) as [number, number][]; const inlineRanges = new Map(); const text = document.getText(); for (const match of text.matchAll(inlineCodePattern)) { const startOffset = match.index ?? 0; const startPosition = document.positionAt(startOffset); const range = new vscode.Range(startPosition, document.positionAt(startOffset + match[0].length)); for (let line = range.start.line; line <= range.end.line; ++line) { let entry = inlineRanges.get(line); if (!entry) { entry = []; inlineRanges.set(line, entry); } entry.push(range); } } return new NoLinkRanges(multiline, inlineRanges); } private constructor( /** * code blocks and fences each represented by [line_start,line_end). */ public readonly multiline: ReadonlyArray<[number, number]>, /** * Inline code spans where links should not be detected */ public readonly inline: Map ) { } contains(position: vscode.Position): boolean { return this.multiline.some(interval => position.line >= interval[0] && position.line < interval[1]) || !!this.inline.get(position.line)?.some(inlineRange => inlineRange.contains(position)); } concatInline(inlineRanges: Iterable): NoLinkRanges { const newInline = new Map(this.inline); for (const range of inlineRanges) { for (let line = range.start.line; line <= range.end.line; ++line) { let entry = newInline.get(line); if (!entry) { entry = []; newInline.set(line, entry); } entry.push(range); } } return new NoLinkRanges(this.multiline, newInline); } } /** * Stateless object that extracts link information from markdown files. */ export class MdLinkComputer { constructor( private readonly tokenizer: IMdParser, ) { } public async getAllLinks(document: ITextDocument, token: vscode.CancellationToken): Promise { const noLinkRanges = await NoLinkRanges.compute(this.tokenizer, document); if (token.isCancellationRequested) { return []; } const inlineLinks = Array.from(this.getInlineLinks(document, noLinkRanges)); return Array.from([ ...inlineLinks, ...this.getReferenceLinks(document, noLinkRanges.concatInline(inlineLinks.map(x => x.source.range))), ...this.getLinkDefinitions(document, noLinkRanges), ...this.getAutoLinks(document, noLinkRanges), ]); } private *getInlineLinks(document: ITextDocument, noLinkRanges: NoLinkRanges): Iterable { const text = document.getText(); for (const match of text.matchAll(linkPattern)) { const matchLinkData = extractDocumentLink(document, match[1], match[2], match.index ?? 0, match[0]); if (matchLinkData && !noLinkRanges.contains(matchLinkData.source.hrefRange.start)) { yield matchLinkData; // Also check link destination for links for (const innerMatch of match[1].matchAll(linkPattern)) { const innerData = extractDocumentLink(document, innerMatch[1], innerMatch[2], (match.index ?? 0) + (innerMatch.index ?? 0), innerMatch[0]); if (innerData) { yield innerData; } } } } } private *getAutoLinks(document: ITextDocument, noLinkRanges: NoLinkRanges): Iterable { const text = document.getText(); for (const match of text.matchAll(autoLinkPattern)) { const linkOffset = (match.index ?? 0); const linkStart = document.positionAt(linkOffset); if (noLinkRanges.contains(linkStart)) { continue; } const link = match[1]; const linkTarget = resolveLink(document, link); if (!linkTarget) { continue; } const linkEnd = linkStart.translate(0, match[0].length); const hrefStart = linkStart.translate(0, 1); const hrefEnd = hrefStart.translate(0, link.length); yield { kind: 'link', href: linkTarget, source: { hrefText: link, resource: document.uri, hrefRange: new vscode.Range(hrefStart, hrefEnd), range: new vscode.Range(linkStart, linkEnd), ...getLinkSourceFragmentInfo(document, link, hrefStart, hrefEnd), } }; } } private *getReferenceLinks(document: ITextDocument, noLinkRanges: NoLinkRanges): Iterable { const text = document.getText(); for (const match of text.matchAll(referenceLinkPattern)) { const linkStartOffset = (match.index ?? 0) + match[1].length; const linkStart = document.positionAt(linkStartOffset); if (noLinkRanges.contains(linkStart)) { continue; } let hrefStart: vscode.Position; let hrefEnd: vscode.Position; let reference = match[4]; if (reference === '') { // [ref][], reference = match[3]; const offset = linkStartOffset + 1; hrefStart = document.positionAt(offset); hrefEnd = document.positionAt(offset + reference.length); } else if (reference) { // [text][ref] const pre = match[2]; const offset = linkStartOffset + pre.length; hrefStart = document.positionAt(offset); hrefEnd = document.positionAt(offset + reference.length); } else if (match[5]) { // [ref] reference = match[5]; const offset = linkStartOffset + 1; hrefStart = document.positionAt(offset); const line = getLine(document, hrefStart.line); // See if link looks like a checkbox const checkboxMatch = line.match(/^\s*[\-\*]\s*\[x\]/i); if (checkboxMatch && hrefStart.character <= checkboxMatch[0].length) { continue; } hrefEnd = document.positionAt(offset + reference.length); } else { continue; } const linkEnd = linkStart.translate(0, match[0].length - match[1].length); yield { kind: 'link', source: { hrefText: reference, pathText: reference, resource: document.uri, range: new vscode.Range(linkStart, linkEnd), hrefRange: new vscode.Range(hrefStart, hrefEnd), fragmentRange: undefined, }, href: { kind: 'reference', ref: reference, } }; } } private *getLinkDefinitions(document: ITextDocument, noLinkRanges: NoLinkRanges): Iterable { const text = document.getText(); for (const match of text.matchAll(definitionPattern)) { const offset = (match.index ?? 0); const linkStart = document.positionAt(offset); if (noLinkRanges.contains(linkStart)) { continue; } const pre = match[1]; const reference = match[2]; const rawLinkText = match[3].trim(); const target = resolveLink(document, rawLinkText); if (!target) { continue; } const isAngleBracketLink = angleBracketLinkRe.test(rawLinkText); const linkText = stripAngleBrackets(rawLinkText); const hrefStart = linkStart.translate(0, pre.length + (isAngleBracketLink ? 1 : 0)); const hrefEnd = hrefStart.translate(0, linkText.length); const hrefRange = new vscode.Range(hrefStart, hrefEnd); const refStart = linkStart.translate(0, 1); const refRange = new vscode.Range(refStart, refStart.translate({ characterDelta: reference.length })); const linkEnd = linkStart.translate(0, match[0].length); yield { kind: 'definition', source: { hrefText: linkText, resource: document.uri, range: new vscode.Range(linkStart, linkEnd), hrefRange, ...getLinkSourceFragmentInfo(document, rawLinkText, hrefStart, hrefEnd), }, ref: { text: reference, range: refRange }, href: target, }; } } } interface MdDocumentLinks { readonly links: readonly MdLink[]; readonly definitions: LinkDefinitionSet; } /** * Stateful object which provides links for markdown files the workspace. */ export class MdLinkProvider extends Disposable { private readonly _linkCache: MdDocumentInfoCache; private readonly linkComputer: MdLinkComputer; constructor( tokenizer: IMdParser, workspace: IMdWorkspace, logger: ILogger, ) { super(); this.linkComputer = new MdLinkComputer(tokenizer); this._linkCache = this._register(new MdDocumentInfoCache(workspace, async doc => { logger.verbose('LinkProvider', `compute - ${doc.uri}`); const links = await this.linkComputer.getAllLinks(doc, noopToken); return { links, definitions: new LinkDefinitionSet(links), }; })); } public async getLinks(document: ITextDocument): Promise { return this._linkCache.getForDocument(document); } } export class LinkDefinitionSet implements Iterable<[string, MdLinkDefinition]> { private readonly _map = new Map(); constructor(links: Iterable) { for (const link of links) { if (link.kind === 'definition') { this._map.set(link.ref.text, link); } } } public [Symbol.iterator](): Iterator<[string, MdLinkDefinition]> { return this._map.entries(); } public lookup(ref: string): MdLinkDefinition | undefined { return this._map.get(ref); } }