From 937f33a1cc2070149b7449b032a60d1034c63a0b Mon Sep 17 00:00:00 2001 From: Kamil Waz Date: Thu, 10 Nov 2022 16:11:23 +0100 Subject: [PATCH 1/3] Throw exception on null characters --- c_src/exml.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/c_src/exml.cpp b/c_src/exml.cpp index 56d188c..e21da7a 100644 --- a/c_src/exml.cpp +++ b/c_src/exml.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -482,10 +483,22 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int argc, // Skip initial whitespace even if we don't manage to parse anything. // Also needed for has_stream_closing_tag to recognize the tag. + // Raise an exception when null character is found. + std::size_t offset = 0; - while (offset < Parser::buffer.size() - 1 && - std::isspace(Parser::buffer[offset])) - ++offset; + bool offset_set = false; + + for (std::size_t i = 0; i < Parser::buffer.size() - 1; i++) { + unsigned char byte = Parser::buffer[i]; + if (byte == 0) { + return enif_make_tuple2( + env, atom_error, + enif_make_string(env, "null character found in buffer", ERL_NIF_LATIN1)); + } else if (!std::isspace(byte) && !offset_set) { + offset = i; + offset_set = true; + } + } ParseCtx ctx{env, parser}; xml_document::ParseResult result; From 07ff395734291a074a8bb0c29d017d6f0f55db41 Mon Sep 17 00:00:00 2001 From: Kamil Waz Date: Thu, 10 Nov 2022 16:44:49 +0100 Subject: [PATCH 2/3] Simplify implementation --- c_src/exml.cpp | 27 +++++++++++---------------- test/exml_stream_tests.erl | 28 +++++++++++++++++----------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/c_src/exml.cpp b/c_src/exml.cpp index e21da7a..f63832e 100644 --- a/c_src/exml.cpp +++ b/c_src/exml.cpp @@ -11,8 +11,8 @@ #include #include +#include #include -#include #include #include #include @@ -481,24 +481,19 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int argc, if (!parser->copy_buffer(env, argv[1])) return enif_make_badarg(env); - // Skip initial whitespace even if we don't manage to parse anything. - // Also needed for has_stream_closing_tag to recognize the tag. // Raise an exception when null character is found. - - std::size_t offset = 0; - bool offset_set = false; - - for (std::size_t i = 0; i < Parser::buffer.size() - 1; i++) { - unsigned char byte = Parser::buffer[i]; - if (byte == 0) { - return enif_make_tuple2( + const char *data = reinterpret_cast(&Parser::buffer[0]); + if (std::strlen(data) != Parser::buffer.size() - 1) + return enif_make_tuple2( env, atom_error, enif_make_string(env, "null character found in buffer", ERL_NIF_LATIN1)); - } else if (!std::isspace(byte) && !offset_set) { - offset = i; - offset_set = true; - } - } + + // Skip initial whitespace even if we don't manage to parse anything. + // Also needed for has_stream_closing_tag to recognize the tag. + std::size_t offset = 0; + while (offset < Parser::buffer.size() - 1 && + std::isspace(Parser::buffer[offset])) + ++offset; ParseCtx ctx{env, parser}; xml_document::ParseResult result; diff --git a/test/exml_stream_tests.erl b/test/exml_stream_tests.erl index 5420f3a..99b8e0a 100644 --- a/test/exml_stream_tests.erl +++ b/test/exml_stream_tests.erl @@ -191,18 +191,24 @@ stream_max_child_size_test() -> infinite_stream_partial_chunk_test() -> {ok, Parser0} = exml_stream:new_parser([{infinite_stream, true}, {autoreset, true}]), - {ok, Parser1, Open} = exml_stream:parse(Parser0, <<"">>), + {ok, Parser1, Open} = exml_stream:parse(Parser0, <<"">>), ?assertEqual( [#xmlel{name = <<"open">>, - attrs = [{<<"xmlns">>, <<"urn:ietf:params:xml:ns:xmpp-framing">>}, - {<<"to">>, <<"i.am.banana.com">>}, - {<<"version">>, <<"1.0">>}]}], + attrs = [{<<"xmlns">>, <<"urn:ietf:params:xml:ns:xmpp-framing">>}, + {<<"to">>, <<"i.am.banana.com">>}, + {<<"version">>, <<"1.0">>}]}], Open), {ok, Parser2, A} = exml_stream:parse(Parser1, <<"">>), - ?assertEqual([#xmlel{name = <<"a">>, attrs = []}], A), - {ok, Parser3, Empty0} = exml_stream:parse(Parser2, <<" ">>), - ?assertEqual([], Empty0), - {ok, Parser4, Empty1} = exml_stream:parse(Parser3, <<">), - ?assertEqual([], Empty1), - {ok, _Parser5, B} = exml_stream:parse(Parser4, <<">">>), - ?assertEqual([#xmlel{name = <<"b">>, attrs = []}], B). + ?assertEqual([#xmlel{name = <<"a">>, attrs = []}], A), + {ok, Parser3, Empty0} = exml_stream:parse(Parser2, <<" ">>), + ?assertEqual([], Empty0), + {ok, Parser4, Empty1} = exml_stream:parse(Parser3, <<">), + ?assertEqual([], Empty1), + {ok, _Parser5, B} = exml_stream:parse(Parser4, <<">">>), + ?assertEqual([#xmlel{name = <<"b">>, attrs = []}], B). + +null_character_test() -> + {ok, P1} = exml_stream:new_parser(), + ?assertMatch({error, _}, exml_stream:parse(P1, <<"\0">>)), + {ok, P2} = exml_stream:new_parser(), + ?assertMatch({error, _}, exml_stream:parse(P2, <<"\0">>)). From 794f085da670f302d388d6b932cacbbbbb9a98ad Mon Sep 17 00:00:00 2001 From: Kamil Waz Date: Mon, 14 Nov 2022 13:12:49 +0100 Subject: [PATCH 3/3] Optimize the implementation --- c_src/exml.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/c_src/exml.cpp b/c_src/exml.cpp index f63832e..ab64b18 100644 --- a/c_src/exml.cpp +++ b/c_src/exml.cpp @@ -481,13 +481,6 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int argc, if (!parser->copy_buffer(env, argv[1])) return enif_make_badarg(env); - // Raise an exception when null character is found. - const char *data = reinterpret_cast(&Parser::buffer[0]); - if (std::strlen(data) != Parser::buffer.size() - 1) - return enif_make_tuple2( - env, atom_error, - enif_make_string(env, "null character found in buffer", ERL_NIF_LATIN1)); - // Skip initial whitespace even if we don't manage to parse anything. // Also needed for has_stream_closing_tag to recognize the tag. std::size_t offset = 0; @@ -559,6 +552,13 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int argc, enif_make_string(env, result.error_message.c_str(), ERL_NIF_LATIN1)); } + // Raise an exception when null character is found. + std::size_t rest_size = &Parser::buffer.back() - result.rest; + if (std::strlen(reinterpret_cast(result.rest)) != rest_size) + return enif_make_tuple2( + env, atom_error, + enif_make_string(env, "null character found in buffer", ERL_NIF_LATIN1)); + return enif_make_tuple3( env, atom_ok, element, enif_make_uint64(env, result.rest - Parser::buffer.data()));