Refactor MarkdownRenderer regex patterns for improved matching

- Updated regex patterns in the MarkdownRenderer to use non-greedy matching for better accuracy in text rendering. - Enhanced the test for header rendering to ensure correct HTML structure and added assertions for escaping content without markdown special characters.
2026-07-30 15:29:53 +00:00 · 2026-01-14 18:16:41 -06:00
parent c08cdb65b6
commit 16d5f2d497
2 changed files with 17 additions and 11 deletions
@@ -70,12 +70,12 @@ class MarkdownRenderer:
        )

        # Bold and Italic
-        text = re.sub(r"\*\*\*(.*?)\*\*\*", r"<strong><em>\1</em></strong>", text)
-        text = re.sub(r"\*\*(.*?)\*\*", r"<strong>\1</strong>", text)
-        text = re.sub(r"\*(?!\s)(.*?)(?<!\s)\*", r"<em>\1</em>", text)
-        text = re.sub(r"___(.*?)___", r"<strong><em>\1</em></strong>", text)
-        text = re.sub(r"__(.*?)__", r"<strong>\1</strong>", text)
-        text = re.sub(r"_(?!\s)(.*?)(?<!\s)_", r"<em>\1</em>", text)
+        text = re.sub(r"\*\*\*(.+?)\*\*\*", r"<strong><em>\1</em></strong>", text)
+        text = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", text)
+        text = re.sub(r"\*(?!\s)(.+?)(?<!\s)\*", r"<em>\1</em>", text)
+        text = re.sub(r"___(.+?)___", r"<strong><em>\1</em></strong>", text)
+        text = re.sub(r"__(.+?)__", r"<strong>\1</strong>", text)
+        text = re.sub(r"_(?!\s)(.+?)(?<!\s)_", r"<em>\1</em>", text)

        # Strikethrough
        text = re.sub(r"~~(.*?)~~", r"<del>\1</del>", text)
@@ -333,12 +333,18 @@ def test_markdown_renderer_xss_protection(text):
    assert "&lt;script&gt;" in result


-@given(content=st.text())
+@given(content=st.text().filter(lambda x: x and "\n" not in x and "#" not in x))
 def test_markdown_renderer_headers(content):
-    if content and "\n" not in content:
-        input_text = f"# {content}"
-        result = MarkdownRenderer.render(input_text)
-        assert "<h1" in result
+    input_text = f"# {content}"
+    result = MarkdownRenderer.render(input_text)
+    assert "<h1" in result
+    # Check that it's correctly wrapped in h1
+    assert result.startswith('<h1')
+    assert result.endswith('</h1>')
+
+    # If the content doesn't contain markdown special chars, we can expect it to be there escaped
+    # This is a safer assertion for property-based testing
+    if not any(c in content for c in "*_~`[]()"):
        assert html.escape(content) in result