From 40e1b7aa22a4a32fef002f9d5462bcaaa782f3bf Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Tue, 11 Jul 2023 16:10:44 +0200 Subject: [PATCH] Better documentation & errors when facing HTML rendering limitations for `` tags - close #845 (#852) --- CHANGELOG.md | 1 + docs/HTML.md | 17 ++++++++++++++--- fpdf/fpdf.py | 12 ++++++++++++ fpdf/html.py | 15 +++++++++++++++ test/html/test_html_table.py | 16 ++++++++++++++++ 5 files changed, 58 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fda90ac23..82d0ada25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default', - [`FPDF.table()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.table): new optional parameters `gutter_height`, `gutter_width` and `wrapmode`. Links can also be added to cells by passing a `link` parameter to [`Row.cell()`](https://pyfpdf.github.io/fpdf2/fpdf/table.html#fpdf.table.Row.cell) - [`FPDF.multi_cell()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.multi_cell): has a new optional `center` parameter to position the cell horizontally at the center of the page - Added Tutorial in Khmer language - thanks to @kuth-chi +- Better documentation & errors when facing HTML rendering limitations for `
` tags: ### Fixed - [`FPDF.table()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.table): the `colspan` setting has been fixed - [documentation](https://pyfpdf.github.io/fpdf2/Tables.html#column-span) - [`FPDF.image()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.image): allowing images path starting with `data` to be passed as input diff --git a/docs/HTML.md b/docs/HTML.md index f854d06a1..866b0545b 100644 --- a/docs/HTML.md +++ b/docs/HTML.md @@ -1,4 +1,4 @@ -# HTML # +# HTML `fpdf2` supports basic rendering from HTML. @@ -12,9 +12,9 @@ you may want to check [Reportlab](https://www.reportlab.com) (or [xhtml2pdf](htt or [borb](https://github.com/jorisschellekens/borb-examples/#76-exporting-html-as-pdf). -## write_html usage example ## +## write_html usage example -HTML rendering require the use of `write_html` method: +HTML rendering requires the use of [`FPDF.write_html()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html): ```python from fpdf import FPDF @@ -91,3 +91,14 @@ pdf.output("html.pdf") + ``: rows (with `align`, `bgcolor` attributes) + `
`: heading cells (with `align`, `bgcolor`, `width` attributes) * ``: cells (with `align`, `bgcolor`, `width` attributes) + + +## Known limitations + +`fpdf2` HTML renderer does not support many configuration of nested tags. +For example: +* `
` cannot be used as a parent for several elements - _cf._ [issue #640](https://github.com/PyFPDF/fpdf2/issues/640) +* `` cells can contain ``, but **not** `` - _cf._ [issue #845](https://github.com/PyFPDF/fpdf2/issues/845) + +You can also check the currently open GitHub issues with the tag `html`: +https://github.com/PyFPDF/fpdf2/issues?q=is%3Aopen+label%3Ahtml diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py index 5a15d0820..66c10ab22 100644 --- a/fpdf/fpdf.py +++ b/fpdf/fpdf.py @@ -391,6 +391,18 @@ def write_html(self, text, *args, **kwargs): """ Parse HTML and convert it to PDF. cf. https://pyfpdf.github.io/fpdf2/HTML.html + + Args: + text (str): HTML content to render + image_map (function): an optional one-argument function that map "src" + to new image URLs + li_tag_indent (int): numeric indentation of
  • elements + dd_tag_indent (int): numeric indentation of
    elements + table_line_separators (bool): enable horizontal line separators in
  • nested tags forming a single text blockarbitrarily nested tags
    + ul_bullet_char (str): bullet character for
      elements + heading_sizes (dict): font size per heading level names ("h1", "h2"...) + pre_code_font (str): font to use for
       &  blocks
      +            warn_on_tags_not_matching (bool): control warnings production for unmatched HTML tags
               """
               kwargs2 = vars(self)
               # Method arguments must override class & instance attributes:
      diff --git a/fpdf/html.py b/fpdf/html.py
      index db678cb32..736705467 100644
      --- a/fpdf/html.py
      +++ b/fpdf/html.py
      @@ -219,6 +219,9 @@ def __init__(
                   dd_tag_indent (int): numeric indentation of 
      elements table_line_separators (bool): enable horizontal line separators in
    ul_bullet_char (str): bullet character for
      elements + heading_sizes (dict): font size per heading level names ("h1", "h2"...) + pre_code_font (str): font to use for
       &  blocks
      +            warn_on_tags_not_matching (bool): control warnings production for unmatched HTML tags
               """
               super().__init__()
               self.pdf = pdf
      @@ -262,6 +265,17 @@ def handle_data(self, data):
                   data = data.strip()
                   if not data:
                       return
      +            if "inserted" in self.td_th:
      +                tag = self.td_th["tag"]
      +                raise NotImplementedError(
      +                    f"Unsupported nested HTML tags inside <{tag}> element"
      +                )
      +                # We could potentially support nested  /  /  tags
      +                # by building a list of Fragment instances from the HTML cell content
      +                # and then passing those fragments to Row.cell().
      +                # However there should be an incoming refactoring of this code
      +                # dedicated to text layout, and we should probably wait for that
      +                # before supporting this feature.
                   align = self.td_th.get("align", self.tr.get("align"))
                   if align:
                       align = align.upper()
      @@ -454,6 +468,7 @@ def handle_starttag(self, tag, attrs):
                   if not self.table_row:
                       raise FPDFException(f"Invalid HTML: <{tag}> used outside any 
    ") self.td_th = {k.lower(): v for k, v in attrs.items()} + self.td_th["tag"] = tag if tag == "th": self.td_th["align"] = "CENTER" self.td_th["b"] = True diff --git a/test/html/test_html_table.py b/test/html/test_html_table.py index cdd45e758..f26c3b5a3 100644 --- a/test/html/test_html_table.py +++ b/test/html/test_html_table.py @@ -278,3 +278,19 @@ def test_html_table_invalid(caplog): pdf.write_html("") assert str(error.value) == "Invalid HTML: used outside any
    " assert caplog.text == "" + + +def test_html_table_with_nested_tags(): # issue 845 + pdf = FPDF() + pdf.set_font_size(24) + pdf.add_page() + with pytest.raises(NotImplementedError): + pdf.write_html( + """
    + + + + + +
    LEFTRIGHT
    This is supportedThis is not supported
    """ + )