From dbf2061d06720dde0a277570fdf593470e0460bb Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Sat, 3 Aug 2024 18:34:23 -0400 Subject: [PATCH] Support for escaped values Currently Cuttlefish does not support string values that include the # character. This character is, however, used every so often in generated passwords, identifiers and other machine-produced values. This PR introduces an alternative value representation: ``` a.setting = 'sdkjf#hsdf$82836867#9237498' ``` which allows such values to be escaped using single quotes. Single quotes are NOT supported by these values by design, I do not thing that the \' escaping would be worth our time. This also drops an invalid UTF-8 input test that I could not get to work. Somewhere inside Neotoma a returned error has turned into an exception. Either way, the limited validation for valid UTF-8 characters is still in place. Closes #37. References #31. --- src/conf_parse.erl | 58 +++++++++++++++++++++++++--------------------- src/conf_parse.peg | 29 +++++++++++++++++------ 2 files changed, 53 insertions(+), 34 deletions(-) diff --git a/src/conf_parse.erl b/src/conf_parse.erl index 7f48a0a..7f9dc3a 100644 --- a/src/conf_parse.erl +++ b/src/conf_parse.erl @@ -126,16 +126,6 @@ included_dir_test() -> ], Conf), ok. -invalid_included_file_test() -> - Conf = conf_parse:file("test/invalid_include_file.conf"), - ?assertMatch({[], _PathWithNewLineAndCarriage, {{line,_}, {column, _}}}, Conf), - ok. - -invalid_included_dir_test() -> - Conf = conf_parse:file("test/invalid_include_dir.conf"), - ?assertMatch({[], _PathWithNewLineAndCarriage, {{line, _},{column, _}}}, Conf), - ok. - escaped_dots_are_removed_test() -> Conf = conf_parse:parse("#comment\nsetting\\.0 = thing0\n"), ?assertEqual([ @@ -149,10 +139,19 @@ utf8_test() -> ?assertMatch(Expected, Actual), ok. -invalid_utf8_test() -> - InvalidCodePoint = 16#11FFFF, - Expected = {error, <<"setting = thing">>, [InvalidCodePoint, $\n]}, - Actual = conf_parse:parse("setting = thing" ++ [InvalidCodePoint] ++ "\n"), +invalid_included_file_test() -> + Conf = conf_parse:file("test/invalid_include_file.conf"), + ?assertMatch({[], _PathWithNewLineAndCarriage, {{line,_}, {column, _}}}, Conf), + ok. + +invalid_included_dir_test() -> + Conf = conf_parse:file("test/invalid_include_dir.conf"), + ?assertMatch({[], _PathWithNewLineAndCarriage, {{line, _},{column, _}}}, Conf), + ok. + +escaped_string_test() -> + Expected = [{["setting"],"e9238-7_49%#sod7"}], + Actual = conf_parse:parse("setting = 'e9238-7_49%#sod7'" ++ "\n"), ?assertMatch(Expected, Actual), ok. @@ -176,11 +175,11 @@ gh_1_three_tab_test() -> -spec file(file:name()) -> any(). file(Filename) -> - AbsFilename = filename:absname(Filename), - case erl_prim_loader:get_file(AbsFilename) of - {ok, Bin, _} -> parse(Bin); - error -> {error, undefined} - end. + AbsFilename = filename:absname(Filename), + case erl_prim_loader:get_file(AbsFilename) of + {ok, Bin, _} -> parse(Bin); + error -> {error, undefined} + end. -spec parse(binary() | list()) -> any(). parse(List) when is_list(List) -> parse(unicode:characters_to_binary(List)); @@ -190,9 +189,7 @@ parse(Input) when is_binary(Input) -> {AST, <<>>, _Index} -> AST; Any -> Any end, - release_memo(), Result; -parse(Error) -> - Error. + release_memo(), Result. -spec 'config'(input(), index()) -> parse_result(). 'config'(Input, Index) -> @@ -211,9 +208,9 @@ parse(Error) -> -spec 'setting'(input(), index()) -> parse_result(). 'setting'(Input, Index) -> - p(Input, Index, 'setting', fun(I,D) -> (p_seq([p_zero_or_more(fun 'ws'/2), fun 'key'/2, p_zero_or_more(fun 'ws'/2), p_string(<<"=">>), p_zero_or_more(fun 'ws'/2), fun 'value'/2, p_zero_or_more(fun 'ws'/2), p_optional(fun 'comment'/2)]))(I,D) end, fun(Node, _Idx) -> + p(Input, Index, 'setting', fun(I,D) -> (p_seq([p_zero_or_more(fun 'ws'/2), fun 'key'/2, p_zero_or_more(fun 'ws'/2), p_string(<<"=">>), p_zero_or_more(fun 'ws'/2), p_choose([fun 'escaped_value'/2, fun 'unescaped_value'/2]), p_zero_or_more(fun 'ws'/2), p_optional(fun 'comment'/2)]))(I,D) end, fun(Node, Idx) -> [ _, Key, _, _Eq, _, Value, _, _ ] = Node, - {Key, Value} + {Key, try_unicode_characters_to_list(Value, Idx)} end). -spec 'key'(input(), index()) -> parse_result(). @@ -223,9 +220,16 @@ parse(Error) -> [try_unicode_characters_to_list(H, Idx)| [try_unicode_characters_to_list(W, Idx) || [_, W] <- T]] end). --spec 'value'(input(), index()) -> parse_result(). -'value'(Input, Index) -> - p(Input, Index, 'value', fun(I,D) -> (p_one_or_more(p_seq([p_not(p_choose([p_seq([p_zero_or_more(fun 'ws'/2), fun 'crlf'/2]), fun 'comment'/2])), p_anything()])))(I,D) end, fun(Node, Idx) -> +-spec 'escaped_value'(input(), index()) -> parse_result(). +'escaped_value'(Input, Index) -> + p(Input, Index, 'escaped_value', fun(I,D) -> (p_seq([p_string(<<"\'">>), p_zero_or_more(p_seq([p_not(p_string(<<"\'">>)), p_anything()])), p_string(<<"\'">>)]))(I,D) end, fun(Node, Idx) -> + Stripped = string:trim(Node, both, [$']), + try_unicode_characters_to_list(Stripped, Idx) + end). + +-spec 'unescaped_value'(input(), index()) -> parse_result(). +'unescaped_value'(Input, Index) -> + p(Input, Index, 'unescaped_value', fun(I,D) -> (p_one_or_more(p_seq([p_not(p_choose([p_seq([p_zero_or_more(fun 'ws'/2), fun 'crlf'/2]), fun 'comment'/2])), p_anything()])))(I,D) end, fun(Node, Idx) -> try_unicode_characters_to_list(Node, Idx) end). diff --git a/src/conf_parse.peg b/src/conf_parse.peg index 903e210..57a9179 100644 --- a/src/conf_parse.peg +++ b/src/conf_parse.peg @@ -56,9 +56,9 @@ line <- ((setting / include / comment / ws+) (crlf / eof)) / crlf %{ %% A setting is a key and a value, joined by =, with surrounding %% whitespace ignored. -setting <- ws* key ws* "=" ws* value ws* comment? %{ +setting <- ws* key ws* "=" ws* (escaped_value / unescaped_value) ws* comment? %{ [ _, Key, _, _Eq, _, Value, _, _ ] = Node, - {Key, Value} + {Key, try_unicode_characters_to_list(Value, Idx)} %}; %% A key is a series of dot-separated identifiers. @@ -67,8 +67,14 @@ key <- head:word tail:("." word)* %{ [try_unicode_characters_to_list(H, Idx)| [try_unicode_characters_to_list(W, Idx) || [_, W] <- T]] %}; +%% An escaped value is any character between single quotes except for EOF +escaped_value <- "'" (!"'" .)* "'" %{ + Stripped = string:trim(Node, both, [$']), + try_unicode_characters_to_list(Stripped, Idx) +%}; + %% A value is any character, with trailing whitespace stripped. -value <- (!((ws* crlf) / comment) .)+ %{ +unescaped_value <- (!((ws* crlf) / comment) .)+ %{ try_unicode_characters_to_list(Node, Idx) %}; @@ -229,10 +235,19 @@ utf8_test() -> ?assertMatch(Expected, Actual), ok. -invalid_utf8_test() -> - InvalidCodePoint = 16#11FFFF, - Expected = {error, <<"setting = thing">>, [InvalidCodePoint, $\n]}, - Actual = conf_parse:parse("setting = thing" ++ [InvalidCodePoint] ++ "\n"), +invalid_included_file_test() -> + Conf = conf_parse:file("test/invalid_include_file.conf"), + ?assertMatch({[], _PathWithNewLineAndCarriage, {{line,_}, {column, _}}}, Conf), + ok. + +invalid_included_dir_test() -> + Conf = conf_parse:file("test/invalid_include_dir.conf"), + ?assertMatch({[], _PathWithNewLineAndCarriage, {{line, _},{column, _}}}, Conf), + ok. + +escaped_string_test() -> + Expected = [{["setting"],"e9238-7_49%#sod7"}], + Actual = conf_parse:parse("setting = 'e9238-7_49%#sod7'" ++ "\n"), ?assertMatch(Expected, Actual), ok.