From e91584066759b45f22db3a425b65fa4e7209fd72 Mon Sep 17 00:00:00 2001 From: Kai Date: Sun, 23 Jun 2024 22:02:43 +0800 Subject: [PATCH] Handle relative image URLs --- markdownify/__init__.py | 12 ++++++++++++ tests/test_conversions.py | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index cd66a39..f17e5da 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -2,6 +2,7 @@ from textwrap import fill import re import six +from urllib.parse import urlparse convert_heading_re = re.compile(r'convert_h(\d+)') @@ -86,6 +87,7 @@ class DefaultOptions: sup_symbol = '' wrap = False wrap_width = 80 + base_url = None class Options(DefaultOptions): pass @@ -295,6 +297,16 @@ def convert_img(self, el, text, convert_as_inline): src = el.attrs.get('src', None) or '' title = el.attrs.get('title', None) or '' title_part = ' "%s"' % title.replace('"', r'\"') if title else '' + + if not src.startswith(('http://', 'https://', 'data:')): + if self.options['base_url']: + if src.startswith('/'): + parsed_url = urlparse(self.options['base_url']) + base_path = f"{parsed_url.scheme}://{parsed_url.netloc}" + src = f"{base_path}{src}" + else: + src = f"{self.options['base_url'].rstrip('/')}/{src.lstrip('/')}" + if (convert_as_inline and el.parent.name not in self.options['keep_inline_images_in']): return alt diff --git a/tests/test_conversions.py b/tests/test_conversions.py index a35b982..77deb10 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -289,3 +289,12 @@ def callback(el): assert md('
test\n    foo\nbar
', code_language_callback=callback) == '\n```python\ntest\n foo\nbar\n```\n' assert md('
test\n    foo\nbar
', code_language_callback=callback) == '\n```javascript\ntest\n foo\nbar\n```\n' assert md('
test\n    foo\nbar
', code_language_callback=callback) == '\n```javascript\ntest\n foo\nbar\n```\n' + +def test_img(): + assert md('Alt text') == '![Alt text](/path/to/img.jpg "Optional title")' + assert md('Alt text') == '![Alt text](/path/to/img.jpg)' + assert md('Alt text', base_url='https://example.com') == '![Alt text](https://example.com/path/to/img.jpg)' + assert md('Alt text', base_url='https://example.com/otherpath') == '![Alt text](https://example.com/path/to/img.jpg)' + assert md('Alt text', base_url='https://example.com/otherpath/') == '![Alt text](https://example.com/path/to/img.jpg)' + assert md('Alt text', base_url='https://example.com/anypage') == '![Alt text](https://example.com/anypage/path/to/img.jpg)' + assert md('Alt text', base_url='https://example.com/anypage/') == '![Alt text](https://example.com/anypage/path/to/img.jpg)'