FossilRepo
Fix wiki markdown/HTML detection, proper rendering for both formats - Smart content detection: check for markdown patterns (# headings, code fences, [ref] links) BEFORE any transformations - Markdown pages get proper rendering through Python markdown library - Fossil wiki/HTML pages get link conversion and pass through as HTML - Fossil [/path|text] links converted to markdown links in MD mode, HTML anchors in HTML mode - <verbatim> blocks converted to code fences (MD) or <pre> (HTML) - Fixes Fossil-NG and other markdown wiki pages rendering as raw text
Commit
50843fb4d000506a1a3eee51fd4eca43fe23c610f0e85e7e1670f79eabd328a3
Parent
0fb1c2724e23568…
1 file changed
+34
-9
+34
-9
| --- fossil/views.py | ||
| +++ fossil/views.py | ||
| @@ -22,24 +22,49 @@ | ||
| 22 | 22 | - Markdown (newer pages) |
| 23 | 23 | """ |
| 24 | 24 | if not content: |
| 25 | 25 | return "" |
| 26 | 26 | |
| 27 | - # Convert Fossil-specific syntax | |
| 28 | - # [/path|text] -> <a href="/path">text</a> | |
| 27 | + # Detect format from the raw content BEFORE any transformations | |
| 28 | + is_markdown = _is_markdown(content) | |
| 29 | + | |
| 30 | + if is_markdown: | |
| 31 | + # Markdown: convert Fossil [/path|text] links to markdown links first | |
| 32 | + content = re.sub(r"\[(/[^|\]]+)\|([^\]]+)\]", r"[\2](\1)", content) | |
| 33 | + content = re.sub(r"<verbatim>(.*?)</verbatim>", r"```\n\1\n```", content, flags=re.DOTALL) | |
| 34 | + return md.markdown(content, extensions=["fenced_code", "tables", "toc"]) | |
| 35 | + | |
| 36 | + # Fossil wiki / HTML: convert Fossil-specific syntax to HTML | |
| 29 | 37 | content = re.sub(r"\[(/[^|\]]+)\|([^\]]+)\]", r'<a href="\1">\2</a>', content) |
| 30 | - # [url|text] -> <a href="url">text</a> | |
| 31 | 38 | content = re.sub(r"\[(https?://[^|\]]+)\|([^\]]+)\]", r'<a href="\1">\2</a>', content) |
| 32 | - # <verbatim>...</verbatim> -> <pre><code>...</code></pre> | |
| 33 | 39 | content = re.sub(r"<verbatim>(.*?)</verbatim>", r"<pre><code>\1</code></pre>", content, flags=re.DOTALL) |
| 40 | + return content | |
| 34 | 41 | |
| 35 | - # If content looks like it has HTML tags, treat as HTML (Fossil wiki) | |
| 36 | - if re.search(r"<(h[1-6]|p|ol|ul|li|div|table|pre|a|br)\b", content, re.IGNORECASE): | |
| 37 | - return content | |
| 38 | 42 | |
| 39 | - # Otherwise try markdown | |
| 40 | - return md.markdown(content, extensions=["fenced_code", "tables", "toc"]) | |
| 43 | +def _is_markdown(content: str) -> bool: | |
| 44 | + """Detect if content is Markdown vs Fossil wiki/HTML. | |
| 45 | + | |
| 46 | + Heuristic: if the content starts with markdown-style headings (#), | |
| 47 | + or has significant markdown syntax patterns, treat as markdown. | |
| 48 | + """ | |
| 49 | + stripped = content.strip() | |
| 50 | + # Starts with markdown heading | |
| 51 | + if re.match(r"^#{1,6}\s", stripped): | |
| 52 | + return True | |
| 53 | + # Has multiple markdown headings | |
| 54 | + if len(re.findall(r"^#{1,6}\s", stripped, re.MULTILINE)) >= 2: | |
| 55 | + return True | |
| 56 | + # Has markdown link references [text][ref] | |
| 57 | + if re.search(r"\[.+\]\[.+\]", stripped): | |
| 58 | + return True | |
| 59 | + # Has markdown code fences | |
| 60 | + if "```" in stripped: | |
| 61 | + return True | |
| 62 | + # Starts with HTML block element — it's Fossil wiki/HTML | |
| 63 | + if re.match(r"<(h[1-6]|p|ol|ul|div|table)\b", stripped, re.IGNORECASE): | |
| 64 | + return False | |
| 65 | + return False | |
| 41 | 66 | |
| 42 | 67 | |
| 43 | 68 | def _get_repo_and_reader(slug): |
| 44 | 69 | """Return (project, fossil_repo, reader) or raise 404.""" |
| 45 | 70 | project = get_object_or_404(Project, slug=slug, deleted_at__isnull=True) |
| 46 | 71 |
| --- fossil/views.py | |
| +++ fossil/views.py | |
| @@ -22,24 +22,49 @@ | |
| 22 | - Markdown (newer pages) |
| 23 | """ |
| 24 | if not content: |
| 25 | return "" |
| 26 | |
| 27 | # Convert Fossil-specific syntax |
| 28 | # [/path|text] -> <a href="/path">text</a> |
| 29 | content = re.sub(r"\[(/[^|\]]+)\|([^\]]+)\]", r'<a href="\1">\2</a>', content) |
| 30 | # [url|text] -> <a href="url">text</a> |
| 31 | content = re.sub(r"\[(https?://[^|\]]+)\|([^\]]+)\]", r'<a href="\1">\2</a>', content) |
| 32 | # <verbatim>...</verbatim> -> <pre><code>...</code></pre> |
| 33 | content = re.sub(r"<verbatim>(.*?)</verbatim>", r"<pre><code>\1</code></pre>", content, flags=re.DOTALL) |
| 34 | |
| 35 | # If content looks like it has HTML tags, treat as HTML (Fossil wiki) |
| 36 | if re.search(r"<(h[1-6]|p|ol|ul|li|div|table|pre|a|br)\b", content, re.IGNORECASE): |
| 37 | return content |
| 38 | |
| 39 | # Otherwise try markdown |
| 40 | return md.markdown(content, extensions=["fenced_code", "tables", "toc"]) |
| 41 | |
| 42 | |
| 43 | def _get_repo_and_reader(slug): |
| 44 | """Return (project, fossil_repo, reader) or raise 404.""" |
| 45 | project = get_object_or_404(Project, slug=slug, deleted_at__isnull=True) |
| 46 |
| --- fossil/views.py | |
| +++ fossil/views.py | |
| @@ -22,24 +22,49 @@ | |
| 22 | - Markdown (newer pages) |
| 23 | """ |
| 24 | if not content: |
| 25 | return "" |
| 26 | |
| 27 | # Detect format from the raw content BEFORE any transformations |
| 28 | is_markdown = _is_markdown(content) |
| 29 | |
| 30 | if is_markdown: |
| 31 | # Markdown: convert Fossil [/path|text] links to markdown links first |
| 32 | content = re.sub(r"\[(/[^|\]]+)\|([^\]]+)\]", r"[\2](\1)", content) |
| 33 | content = re.sub(r"<verbatim>(.*?)</verbatim>", r"```\n\1\n```", content, flags=re.DOTALL) |
| 34 | return md.markdown(content, extensions=["fenced_code", "tables", "toc"]) |
| 35 | |
| 36 | # Fossil wiki / HTML: convert Fossil-specific syntax to HTML |
| 37 | content = re.sub(r"\[(/[^|\]]+)\|([^\]]+)\]", r'<a href="\1">\2</a>', content) |
| 38 | content = re.sub(r"\[(https?://[^|\]]+)\|([^\]]+)\]", r'<a href="\1">\2</a>', content) |
| 39 | content = re.sub(r"<verbatim>(.*?)</verbatim>", r"<pre><code>\1</code></pre>", content, flags=re.DOTALL) |
| 40 | return content |
| 41 | |
| 42 | |
| 43 | def _is_markdown(content: str) -> bool: |
| 44 | """Detect if content is Markdown vs Fossil wiki/HTML. |
| 45 | |
| 46 | Heuristic: if the content starts with markdown-style headings (#), |
| 47 | or has significant markdown syntax patterns, treat as markdown. |
| 48 | """ |
| 49 | stripped = content.strip() |
| 50 | # Starts with markdown heading |
| 51 | if re.match(r"^#{1,6}\s", stripped): |
| 52 | return True |
| 53 | # Has multiple markdown headings |
| 54 | if len(re.findall(r"^#{1,6}\s", stripped, re.MULTILINE)) >= 2: |
| 55 | return True |
| 56 | # Has markdown link references [text][ref] |
| 57 | if re.search(r"\[.+\]\[.+\]", stripped): |
| 58 | return True |
| 59 | # Has markdown code fences |
| 60 | if "```" in stripped: |
| 61 | return True |
| 62 | # Starts with HTML block element — it's Fossil wiki/HTML |
| 63 | if re.match(r"<(h[1-6]|p|ol|ul|div|table)\b", stripped, re.IGNORECASE): |
| 64 | return False |
| 65 | return False |
| 66 | |
| 67 | |
| 68 | def _get_repo_and_reader(slug): |
| 69 | """Return (project, fossil_repo, reader) or raise 404.""" |
| 70 | project = get_object_or_404(Project, slug=slug, deleted_at__isnull=True) |
| 71 |