From 41de1c9b5b9deadb3df4cadf958f7a0799376cde Mon Sep 17 00:00:00 2001 From: x p k Date: Mon, 4 Dec 2023 15:09:02 +0800 Subject: [PATCH] NEW: html2text dumps html as plaintext --- py/html2text.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 py/html2text.py diff --git a/py/html2text.py b/py/html2text.py new file mode 100644 index 0000000..740e7ce --- /dev/null +++ b/py/html2text.py @@ -0,0 +1,6 @@ +from urllib import request +import html2text + +url = 'https://duckduckgo.com' +text = request.urlopen(url).read().decode('utf8') +print(html2text.html2text(text))