Refactor MarkdownRenderer regex patterns for improved matching

- Updated regex patterns in the MarkdownRenderer to use non-greedy matching for better accuracy in text rendering.
- Enhanced the test for header rendering to ensure correct HTML structure and added assertions for escaping content without markdown special characters.
This commit is contained in:
Sudo-Ivan
2026-01-14 18:16:41 -06:00
parent c08cdb65b6
commit 16d5f2d497
2 changed files with 17 additions and 11 deletions
+6 -6
View File
@@ -70,12 +70,12 @@ class MarkdownRenderer:
)
# Bold and Italic
text = re.sub(r"\*\*\*(.*?)\*\*\*", r"<strong><em>\1</em></strong>", text)
text = re.sub(r"\*\*(.*?)\*\*", r"<strong>\1</strong>", text)
text = re.sub(r"\*(?!\s)(.*?)(?<!\s)\*", r"<em>\1</em>", text)
text = re.sub(r"___(.*?)___", r"<strong><em>\1</em></strong>", text)
text = re.sub(r"__(.*?)__", r"<strong>\1</strong>", text)
text = re.sub(r"_(?!\s)(.*?)(?<!\s)_", r"<em>\1</em>", text)
text = re.sub(r"\*\*\*(.+?)\*\*\*", r"<strong><em>\1</em></strong>", text)
text = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", text)
text = re.sub(r"\*(?!\s)(.+?)(?<!\s)\*", r"<em>\1</em>", text)
text = re.sub(r"___(.+?)___", r"<strong><em>\1</em></strong>", text)
text = re.sub(r"__(.+?)__", r"<strong>\1</strong>", text)
text = re.sub(r"_(?!\s)(.+?)(?<!\s)_", r"<em>\1</em>", text)
# Strikethrough
text = re.sub(r"~~(.*?)~~", r"<del>\1</del>", text)
+11 -5
View File
@@ -333,12 +333,18 @@ def test_markdown_renderer_xss_protection(text):
assert "&lt;script&gt;" in result
@given(content=st.text())
@given(content=st.text().filter(lambda x: x and "\n" not in x and "#" not in x))
def test_markdown_renderer_headers(content):
if content and "\n" not in content:
input_text = f"# {content}"
result = MarkdownRenderer.render(input_text)
assert "<h1" in result
input_text = f"# {content}"
result = MarkdownRenderer.render(input_text)
assert "<h1" in result
# Check that it's correctly wrapped in h1
assert result.startswith('<h1')
assert result.endswith('</h1>')
# If the content doesn't contain markdown special chars, we can expect it to be there escaped
# This is a safer assertion for property-based testing
if not any(c in content for c in "*_~`[]()"):
assert html.escape(content) in result