feat(link-utils): update link processing with anchor protection and trailing punctuation handling

This commit is contained in:
Ivan
2026-04-16 21:55:38 -05:00
parent ae6d42d302
commit eee4ed1ea2
3 changed files with 194 additions and 7 deletions

View File

@@ -6,6 +6,50 @@ function defaultNomadPagePath() {
}
export default class LinkUtils {
static protectAnchors(text) {
const anchors = [];
const protectedText = text.replace(/<a\b[^>]*>[\s\S]*?<\/a>/gi, (anchor) => {
const token = `[[ANCHOR_${anchors.length}]]`;
anchors.push(anchor);
return token;
});
return { protectedText, anchors };
}
static restoreAnchors(text, anchors) {
return text.replace(/\[\[ANCHOR_(\d+)\]\]/g, (match, idx) => {
const i = Number(idx);
return Number.isInteger(i) && i >= 0 && i < anchors.length ? anchors[i] : match;
});
}
static splitTrailingPunctuation(url) {
let core = url;
let suffix = "";
const alwaysTrim = new Set([".", ",", "!", "?", ":", ";"]);
while (core.length > 0) {
const ch = core.at(-1);
if (alwaysTrim.has(ch)) {
suffix = ch + suffix;
core = core.slice(0, -1);
continue;
}
if (ch === ")" || ch === "]") {
const open = ch === ")" ? "(" : "[";
const close = ch;
const opens = [...core].filter((c) => c === open).length;
const closes = [...core].filter((c) => c === close).length;
if (closes > opens) {
suffix = ch + suffix;
core = core.slice(0, -1);
continue;
}
}
break;
}
return { core, suffix };
}
/**
* Detects and wraps Reticulum (NomadNet and LXMF) links in HTML.
* Supports nomadnet://<hash>, nomadnet@<hash>, lxmf://<hash>, lxmf@<hash> and bare <hash>
@@ -45,10 +89,13 @@ export default class LinkUtils {
static renderStandardLinks(text) {
if (!text) return "";
// Simple regex for URLs
const urlRegex = /(https?:\/\/[^\s<]+)/g;
return text.replace(urlRegex, (url) => {
return `<a href="${url}" target="_blank" rel="noopener noreferrer" class="text-blue-600 dark:text-blue-400 hover:underline">${url}</a>`;
const urlRegex = /(^|[^\w"'=])(https?:\/\/[^\s<]+)/g;
return text.replace(urlRegex, (match, prefix, url) => {
const { core, suffix } = this.splitTrailingPunctuation(url);
if (!core) {
return match;
}
return `${prefix}<a href="${core}" target="_blank" rel="noopener noreferrer" class="text-blue-600 dark:text-blue-400 hover:underline">${core}</a>${suffix}`;
});
}
@@ -56,8 +103,9 @@ export default class LinkUtils {
* Applies all link rendering.
*/
static renderAllLinks(text) {
text = this.renderStandardLinks(text);
text = this.renderReticulumLinks(text);
return text;
const { protectedText, anchors } = this.protectAnchors(text);
let rendered = this.renderStandardLinks(protectedText);
rendered = this.renderReticulumLinks(rendered);
return this.restoreAnchors(rendered, anchors);
}
}

View File

@@ -58,6 +58,27 @@ describe("LinkUtils.js", () => {
const result = LinkUtils.renderStandardLinks(text);
expect(result).toContain('<a href="https://example.com/path?query=1"');
});
it("trims trailing punctuation from detected urls", () => {
const result = LinkUtils.renderStandardLinks("visit https://example.com/path?x=1, now");
expect(result).toContain('href="https://example.com/path?x=1"');
expect(result).toContain("</a>, now");
});
it("keeps balanced parenthesis in url but trims unmatched trailing one", () => {
const withBalanced = LinkUtils.renderStandardLinks("see https://example.com/path_(v1)");
expect(withBalanced).toContain('href="https://example.com/path_(v1)"');
const withTrailing = LinkUtils.renderStandardLinks("see (https://example.com/path_(v1))");
expect(withTrailing).toContain('href="https://example.com/path_(v1)"');
expect(withTrailing).toContain("</a>)");
});
it("keeps escaped entity query content in href", () => {
const text = "visit https://example.com/search?q=a&amp;lang=en";
const result = LinkUtils.renderStandardLinks(text);
expect(result).toContain('href="https://example.com/search?q=a&amp;lang=en"');
});
});
describe("renderAllLinks", () => {
@@ -77,6 +98,19 @@ describe("LinkUtils.js", () => {
expect(result).toContain("Just some words.");
expect(result).not.toContain("<a ");
});
it("does not double-wrap urls inside existing anchors", () => {
const original = 'already linked <a href="https://example.com">https://example.com</a>';
const result = LinkUtils.renderAllLinks(original);
expect(result).toBe(original);
expect((result.match(/<a /g) || []).length).toBe(1);
});
it("keeps reticulum path underscores", () => {
const text = "1dfeb0d794963579bd21ac8f153c77a4:/page/meshchatx_on_pi.mu";
const result = LinkUtils.renderAllLinks(text);
expect(result).toContain('data-nomadnet-url="1dfeb0d794963579bd21ac8f153c77a4:/page/meshchatx_on_pi.mu"');
});
});
describe("risky: no script or data URLs in href", () => {

View File

@@ -88,6 +88,58 @@ describe("MarkdownRenderer.js", () => {
expect(result).not.toContain("<em>on</em>");
expect(result).not.toContain("<em>raspberry</em>");
});
it("renders multiple urls in one line without corruption", () => {
const a = "https://example.com/docs/meshchatx_on_pi.md";
const b = "https://example.com/plain";
const result = MarkdownRenderer.render(`links: ${a} and ${b}`);
expect(result).toContain(`href="${a}"`);
expect(result).toContain(`href="${b}"`);
expect((result.match(/<a href=/g) || []).length).toBe(2);
});
it("trims trailing punctuation around links while keeping display punctuation", () => {
const result = MarkdownRenderer.render("Check (https://example.com/path_(v1)), and continue.");
expect(result).toContain('href="https://example.com/path_(v1)"');
expect(result).toContain("</a>), and continue.");
});
it("supports encoded chars and balanced parentheses in link path", () => {
const url = "https://example.com/docs/file%5Fname_(v1).md";
const result = MarkdownRenderer.render(`open ${url}`);
expect(result).toContain(`href="${url}"`);
});
it("keeps escaped entities in query string links", () => {
const url = "https://example.com/search?q=a&amp;lang=en";
const result = MarkdownRenderer.render(`lookup ${url}`);
expect(result).toContain('href="https://example.com/search?q=a&amp;amp;lang=en"');
expect(result).toContain("https://example.com/search?q=a&amp;amp;lang=en");
});
it("handles links at line boundaries with newline conversion", () => {
const url = "https://example.com/meshchatx_on_pi.md";
const result = MarkdownRenderer.render(`${url}\nnext line`);
expect(result).toContain(`href="${url}"`);
expect(result).toContain("<br>next line");
});
it("mixes underscore markdown with underscore urls safely", () => {
const url = "https://example.com/meshchatx_on_raspberry_pi.md";
const result = MarkdownRenderer.render(`_label_ ${url} _tail_`);
expect(result).toContain("<em>label</em>");
expect(result).toContain("<em>tail</em>");
expect(result).toContain(`href="${url}"`);
expect(result).not.toContain("<em>on</em>");
});
it("escapes pre-rendered html input safely instead of nesting raw anchors", () => {
const preRendered = '<p><a href="https://example.com/path_(v1)">https://example.com/path_(v1)</a></p>';
const result = MarkdownRenderer.render(preRendered);
expect(result).toContain("&lt;p&gt;");
expect(result).toContain("&lt;a href=&quot;");
expect(result).not.toContain("<script");
});
});
describe("security: XSS prevention", () => {
@@ -255,6 +307,35 @@ describe("MarkdownRenderer.js", () => {
expect(() => MarkdownRenderer.render(text)).not.toThrow();
});
});
it("underscore-heavy fuzz input does not create unbalanced emphasis tags", () => {
const randomUnderscoreText = () => {
const parts = [
"_",
"__",
"___",
"snake_case",
"meshchatx_on_pi",
" ",
"text",
"https://example.com/meshchatx_on_pi.md",
];
let out = "";
for (let i = 0; i < 200; i++) {
out += parts[Math.floor(Math.random() * parts.length)];
}
return out;
};
for (let i = 0; i < 50; i++) {
const rendered = MarkdownRenderer.render(randomUnderscoreText());
const opensEm = (rendered.match(/<em>/g) || []).length;
const closesEm = (rendered.match(/<\/em>/g) || []).length;
const opensStrong = (rendered.match(/<strong>/g) || []).length;
const closesStrong = (rendered.match(/<\/strong>/g) || []).length;
expect(opensEm).toBe(closesEm);
expect(opensStrong).toBe(closesStrong);
}
});
});
describe("isSingleEmojiMessage", () => {
@@ -397,5 +478,29 @@ describe("MarkdownRenderer.js", () => {
const r = MarkdownRenderer.render(msg);
expect(typeof r).toBe("string");
});
it("renders a real-world mixed message body safely", () => {
const msg = [
"# Deploy notes",
"",
"Read https://git.quad4.io/RNS-Things/MeshChatX/src/branch/dev/docs/meshchatx_on_raspberry_pi.md, then ping",
"nomadnet://1dfeb0d794963579bd21ac8f153c77a4:/page/meshchatx_on_pi.mu",
"",
"`inline_code` and _italic_ and snake_case stay sane.",
"",
"```txt",
"https://example.com/not_linked_inside_code",
"```",
].join("\n");
const result = MarkdownRenderer.render(msg);
expect(result).toContain("<h1");
expect(result).toContain(
'href="https://git.quad4.io/RNS-Things/MeshChatX/src/branch/dev/docs/meshchatx_on_raspberry_pi.md"'
);
expect(result).toContain('data-nomadnet-url="1dfeb0d794963579bd21ac8f153c77a4:/page/meshchatx_on_pi.mu"');
expect(result).toContain("<code");
expect(result).toContain("<pre");
expect(result).not.toContain("<em>case</em>");
});
});
});