From 9e6335e4ba0ab741fb3f9657c854b6dbd93579b1 Mon Sep 17 00:00:00 2001 From: i-ky Date: Wed, 8 Nov 2023 15:09:46 +0000 Subject: [PATCH] rss-bot: Add option to convert body to Markdown --- pyproject.toml | 1 + zulip/integrations/rss/requirements.txt | 1 + zulip/integrations/rss/rss-bot | 22 +++++++++++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index aad2b4dbc..def77b8a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ module = [ "google_auth_oauthlib.*", "googleapiclient.*", "irc.*", + "markdownify.*", "mercurial.*", "nio.*", "oauth2client.*", diff --git a/zulip/integrations/rss/requirements.txt b/zulip/integrations/rss/requirements.txt index 7bbec3415..96e1b4d72 100644 --- a/zulip/integrations/rss/requirements.txt +++ b/zulip/integrations/rss/requirements.txt @@ -1 +1,2 @@ feedparser>=6.0.10 +markdownify>=0.11.6 diff --git a/zulip/integrations/rss/rss-bot b/zulip/integrations/rss/rss-bot index 49c82fb62..60437d3f9 100755 --- a/zulip/integrations/rss/rss-bot +++ b/zulip/integrations/rss/rss-bot @@ -13,10 +13,12 @@ import re import sys import time import urllib.parse +from collections.abc import Callable from html.parser import HTMLParser from typing import Any, Dict, List, Optional, Tuple import feedparser +from markdownify import markdownify from typing_extensions import override import zulip @@ -92,6 +94,19 @@ parser.add_argument( help="Convert $ to $$ (for KaTeX processing)", default=False, ) +body = parser.add_mutually_exclusive_group() +body.add_argument( + "--strip", + dest="strip", + action="store_true", + help="Strip HTML tags from body", +) +body.add_argument( + "--markdownify", + dest="strip", + action="store_false", + help="Convert body from HTML to Markdown", +) opts = parser.parse_args() @@ -178,7 +193,12 @@ def send_zulip(entry: Any, feed_name: str) -> Dict[str, Any]: body = unwrap_text(body) title = f"**[{entry.title}]({entry.link})**\n" if hasattr(entry, "title") else "" - content = f"{title}{strip_tags(body)}\n{entry.link}" + + def md(html: str) -> str: + return markdownify(html, escape_underscores=False) + + convert: Callable[[str], str] = strip_tags if opts.strip else md + content = f"{title}{convert(body)}\n{entry.link}" if opts.math: content = content.replace("$", "$$")