diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d2d411d..e06074e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All changes to the Ox gem are documented here. Releases follow semantic versioning. +## [2.14.17] - 2023-07-14 + +### Fixed + +- The sax parser in html mode now allows unquoted attribute values with complaints. + ## [2.14.16] - 2023-04-11 ### Fixed diff --git a/ext/ox/sax.c b/ext/ox/sax.c index 7dfde8de..a26426a0 100644 --- a/ext/ox/sax.c +++ b/ext/ox/sax.c @@ -55,7 +55,7 @@ static char read_text(SaxDrive dr); static char read_jump(SaxDrive dr, const char *pat); static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h); static char read_name_token(SaxDrive dr); -static char read_quoted_value(SaxDrive dr); +static char read_quoted_value(SaxDrive dr, bool inst); static void hint_clear_empty(SaxDrive dr); static Nv hint_try_close(SaxDrive dr, const char *name); @@ -1219,6 +1219,7 @@ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, c = buf_next_non_white(&dr->buf); } if ('=' != c) { + // TBD allow in smart mode if (eq_req) { dr->err = 1; return c; @@ -1230,7 +1231,7 @@ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, pos = dr->buf.pos + 1; line = dr->buf.line; col = dr->buf.col + 1; - c = read_quoted_value(dr); + c = read_quoted_value(dr, '?' == termc); attr_value = dr->buf.str; if (is_encoding) { @@ -1297,10 +1298,11 @@ static char read_name_token(SaxDrive dr) { return '\0'; } -/* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one - * past that. dr->buf.str will point to the token which will be '\0' terminated. +/* The character after the quote or if there is no quote, the character after + * the word is returned. dr->buf.tail is one past that. dr->buf.str will point + * to the token which will be '\0' terminated. */ -static char read_quoted_value(SaxDrive dr) { +static char read_quoted_value(SaxDrive dr, bool inst) { char c; c = buf_get(&dr->buf); @@ -1324,19 +1326,27 @@ static char read_quoted_value(SaxDrive dr) { } // not quoted, look for something that terminates the string dr->buf.str = dr->buf.tail - 1; - ox_sax_drive_error(dr, WRONG_CHAR "attribute value not in quotes"); + // TBD if smart or html then no error + if (!(dr->options.smart && ox_hints_html() != dr->options.hints)) { + ox_sax_drive_error(dr, WRONG_CHAR "attribute value not in quotes"); + } while ('\0' != (c = buf_get(&dr->buf))) { switch (c) { case ' ': // case '/': case '>': - case '?': // for instructions case '\t': case '\n': case '\r': *(dr->buf.tail - 1) = '\0'; /* terminate value */ // dr->buf.tail is in the correct position, one after the word terminator return c; + case '?': // for instructions + if (inst) { + *(dr->buf.tail - 1) = '\0'; /* terminate value */ + return c; + } + break; default: break; } } diff --git a/lib/ox/version.rb b/lib/ox/version.rb index 8e6fc08d..5dd9c25e 100644 --- a/lib/ox/version.rb +++ b/lib/ox/version.rb @@ -1,4 +1,4 @@ module Ox # Current version of the module. - VERSION = '2.14.16' + VERSION = '2.14.17' end diff --git a/test/sax/sax_test.rb b/test/sax/sax_test.rb index fa9d2d59..c7f1745f 100755 --- a/test/sax/sax_test.rb +++ b/test/sax/sax_test.rb @@ -1419,4 +1419,28 @@ def test_sax_html_abort [:abort, :table] ], handler.calls) end + + def test_sax_html_attr + Ox.default_options = $ox_sax_options + handler = AllSax.new + overlay = Ox.sax_html_overlay + html = %{ + + + + +} + Ox.sax_html(handler, html, overlay: overlay, skip: :skip_white) + assert_equal([ + [:doctype, ' HTML'], + [:start_element, :html], + [:attr, :lang, 'en'], + [:start_element, :head], + [:attr, :url, "http://ohler.com?x=2"], + [:text, " "], + [:end_element, :head], + [:end_element, :html], + ], handler.calls) + end + end