From 40e1b7aa22a4a32fef002f9d5462bcaaa782f3bf Mon Sep 17 00:00:00 2001
From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com>
Date: Tue, 11 Jul 2023 16:10:44 +0200
Subject: [PATCH] Better documentation & errors when facing HTML rendering
 limitations for `<table>` tags - close #845 (#852)

---
 CHANGELOG.md                 |  1 +
 docs/HTML.md                 | 17 ++++++++++++++---
 fpdf/fpdf.py                 | 12 ++++++++++++
 fpdf/html.py                 | 15 +++++++++++++++
 test/html/test_html_table.py | 16 ++++++++++++++++
 5 files changed, 58 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fda90ac23..82d0ada25 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
 - [`FPDF.table()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.table): new optional parameters `gutter_height`, `gutter_width` and `wrapmode`. Links can also be added to cells by passing a `link` parameter to [`Row.cell()`](https://pyfpdf.github.io/fpdf2/fpdf/table.html#fpdf.table.Row.cell)
 - [`FPDF.multi_cell()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.multi_cell): has a new optional `center` parameter to position the cell horizontally at the center of the page
 - Added Tutorial in Khmer language - thanks to @kuth-chi
+- Better documentation & errors when facing HTML rendering limitations for `<table>` tags: <https://pyfpdf.github.io/fpdf2/HTML.html>
 ### Fixed
 - [`FPDF.table()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.table): the `colspan` setting has been fixed - [documentation](https://pyfpdf.github.io/fpdf2/Tables.html#column-span)
 - [`FPDF.image()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.image): allowing images path starting with `data` to be passed as input
diff --git a/docs/HTML.md b/docs/HTML.md
index f854d06a1..866b0545b 100644
--- a/docs/HTML.md
+++ b/docs/HTML.md
@@ -1,4 +1,4 @@
-# HTML #
+# HTML
 
 `fpdf2` supports basic rendering from HTML.
 
@@ -12,9 +12,9 @@ you may want to check [Reportlab](https://www.reportlab.com) (or [xhtml2pdf](htt
 or [borb](https://github.com/jorisschellekens/borb-examples/#76-exporting-html-as-pdf).
 
 
-## write_html usage example ##
+## write_html usage example
 
-HTML rendering require the use of `write_html` method:
+HTML rendering requires the use of [`FPDF.write_html()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html):
 
 ```python
 from fpdf import FPDF
@@ -91,3 +91,14 @@ pdf.output("html.pdf")
     + `<tr>`: rows (with `align`, `bgcolor` attributes)
     + `<th>`: heading cells (with `align`, `bgcolor`, `width` attributes)
     * `<td>`: cells (with `align`, `bgcolor`, `width` attributes)
+
+
+## Known limitations
+
+`fpdf2` HTML renderer does not support many configuration of nested tags.
+For example:
+* `<center>` cannot be used as a parent for several elements - _cf._ [issue #640](https://github.com/PyFPDF/fpdf2/issues/640)
+* `<table>` cells can contain `<td><b><em>nested tags forming a single text block</em></b></td>`, but **not** `<td><b>arbitrarily</b> nested <em>tags</em></td>` - _cf._ [issue #845](https://github.com/PyFPDF/fpdf2/issues/845)
+
+You can also check the currently open GitHub issues with the tag `html`:
+https://github.com/PyFPDF/fpdf2/issues?q=is%3Aopen+label%3Ahtml
diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py
index 5a15d0820..66c10ab22 100644
--- a/fpdf/fpdf.py
+++ b/fpdf/fpdf.py
@@ -391,6 +391,18 @@ def write_html(self, text, *args, **kwargs):
         """
         Parse HTML and convert it to PDF.
         cf. https://pyfpdf.github.io/fpdf2/HTML.html
+
+        Args:
+            text (str): HTML content to render
+            image_map (function): an optional one-argument function that map <img> "src"
+                to new image URLs
+            li_tag_indent (int): numeric indentation of <li> elements
+            dd_tag_indent (int): numeric indentation of <dd> elements
+            table_line_separators (bool): enable horizontal line separators in <table>
+            ul_bullet_char (str): bullet character for <ul> elements
+            heading_sizes (dict): font size per heading level names ("h1", "h2"...)
+            pre_code_font (str): font to use for <pre> & <code> blocks
+            warn_on_tags_not_matching (bool): control warnings production for unmatched HTML tags
         """
         kwargs2 = vars(self)
         # Method arguments must override class & instance attributes:
diff --git a/fpdf/html.py b/fpdf/html.py
index db678cb32..736705467 100644
--- a/fpdf/html.py
+++ b/fpdf/html.py
@@ -219,6 +219,9 @@ def __init__(
             dd_tag_indent (int): numeric indentation of <dd> elements
             table_line_separators (bool): enable horizontal line separators in <table>
             ul_bullet_char (str): bullet character for <ul> elements
+            heading_sizes (dict): font size per heading level names ("h1", "h2"...)
+            pre_code_font (str): font to use for <pre> & <code> blocks
+            warn_on_tags_not_matching (bool): control warnings production for unmatched HTML tags
         """
         super().__init__()
         self.pdf = pdf
@@ -262,6 +265,17 @@ def handle_data(self, data):
             data = data.strip()
             if not data:
                 return
+            if "inserted" in self.td_th:
+                tag = self.td_th["tag"]
+                raise NotImplementedError(
+                    f"Unsupported nested HTML tags inside <{tag}> element"
+                )
+                # We could potentially support nested <b> / <em> / <font> tags
+                # by building a list of Fragment instances from the HTML cell content
+                # and then passing those fragments to Row.cell().
+                # However there should be an incoming refactoring of this code
+                # dedicated to text layout, and we should probably wait for that
+                # before supporting this feature.
             align = self.td_th.get("align", self.tr.get("align"))
             if align:
                 align = align.upper()
@@ -454,6 +468,7 @@ def handle_starttag(self, tag, attrs):
             if not self.table_row:
                 raise FPDFException(f"Invalid HTML: <{tag}> used outside any <tr>")
             self.td_th = {k.lower(): v for k, v in attrs.items()}
+            self.td_th["tag"] = tag
             if tag == "th":
                 self.td_th["align"] = "CENTER"
                 self.td_th["b"] = True
diff --git a/test/html/test_html_table.py b/test/html/test_html_table.py
index cdd45e758..f26c3b5a3 100644
--- a/test/html/test_html_table.py
+++ b/test/html/test_html_table.py
@@ -278,3 +278,19 @@ def test_html_table_invalid(caplog):
         pdf.write_html("<tr></tr>")
     assert str(error.value) == "Invalid HTML: <tr> used outside any <table>"
     assert caplog.text == ""
+
+
+def test_html_table_with_nested_tags():  # issue 845
+    pdf = FPDF()
+    pdf.set_font_size(24)
+    pdf.add_page()
+    with pytest.raises(NotImplementedError):
+        pdf.write_html(
+            """<table><tr>
+            <th>LEFT</th>
+            <th>RIGHT</th>
+        </tr><tr>
+            <td><font size=7>This is supported</font></td>
+            <td>This <font size=20>is not</font> <b>supported</b></td>
+        </tr></table>"""
+        )