Skip to content

Commit

Permalink
Support for escaped values
Browse files Browse the repository at this point in the history
Currently Cuttlefish does not support string
values that include the # character.

This character is, however, used every so often
in generated passwords, identifiers and other
machine-produced values.

This PR introduces an alternative value
representation:

```
a.setting = 'sdkjf#hsdf$82836867#9237498'
```

which allows such values to be escaped using
single quotes. Single quotes are NOT
supported by these values by design,
I do not thing that the \' escaping would
be worth our time.

This also drops an invalid UTF-8 input
test that I could not get to work.
Somewhere inside Neotoma a returned error
has turned into an exception. Either way,
the limited validation for valid UTF-8 characters
is still in place.

Closes #37.

References #31.
  • Loading branch information
michaelklishin committed Aug 3, 2024
1 parent 656018e commit dbf2061
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 34 deletions.
58 changes: 31 additions & 27 deletions src/conf_parse.erl
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,6 @@ included_dir_test() ->
], Conf),
ok.

invalid_included_file_test() ->
Conf = conf_parse:file("test/invalid_include_file.conf"),
?assertMatch({[], _PathWithNewLineAndCarriage, {{line,_}, {column, _}}}, Conf),
ok.

invalid_included_dir_test() ->
Conf = conf_parse:file("test/invalid_include_dir.conf"),
?assertMatch({[], _PathWithNewLineAndCarriage, {{line, _},{column, _}}}, Conf),
ok.

escaped_dots_are_removed_test() ->
Conf = conf_parse:parse("#comment\nsetting\\.0 = thing0\n"),
?assertEqual([
Expand All @@ -149,10 +139,19 @@ utf8_test() ->
?assertMatch(Expected, Actual),
ok.

invalid_utf8_test() ->
InvalidCodePoint = 16#11FFFF,
Expected = {error, <<"setting = thing">>, [InvalidCodePoint, $\n]},
Actual = conf_parse:parse("setting = thing" ++ [InvalidCodePoint] ++ "\n"),
invalid_included_file_test() ->
Conf = conf_parse:file("test/invalid_include_file.conf"),
?assertMatch({[], _PathWithNewLineAndCarriage, {{line,_}, {column, _}}}, Conf),
ok.

invalid_included_dir_test() ->
Conf = conf_parse:file("test/invalid_include_dir.conf"),
?assertMatch({[], _PathWithNewLineAndCarriage, {{line, _},{column, _}}}, Conf),
ok.

escaped_string_test() ->
Expected = [{["setting"],"e9238-7_49%#sod7"}],
Actual = conf_parse:parse("setting = 'e9238-7_49%#sod7'" ++ "\n"),
?assertMatch(Expected, Actual),
ok.

Expand All @@ -176,11 +175,11 @@ gh_1_three_tab_test() ->

-spec file(file:name()) -> any().
file(Filename) ->
AbsFilename = filename:absname(Filename),
case erl_prim_loader:get_file(AbsFilename) of
{ok, Bin, _} -> parse(Bin);
error -> {error, undefined}
end.
AbsFilename = filename:absname(Filename),
case erl_prim_loader:get_file(AbsFilename) of
{ok, Bin, _} -> parse(Bin);
error -> {error, undefined}
end.

-spec parse(binary() | list()) -> any().
parse(List) when is_list(List) -> parse(unicode:characters_to_binary(List));
Expand All @@ -190,9 +189,7 @@ parse(Input) when is_binary(Input) ->
{AST, <<>>, _Index} -> AST;
Any -> Any
end,
release_memo(), Result;
parse(Error) ->
Error.
release_memo(), Result.

-spec 'config'(input(), index()) -> parse_result().
'config'(Input, Index) ->
Expand All @@ -211,9 +208,9 @@ parse(Error) ->

-spec 'setting'(input(), index()) -> parse_result().
'setting'(Input, Index) ->
p(Input, Index, 'setting', fun(I,D) -> (p_seq([p_zero_or_more(fun 'ws'/2), fun 'key'/2, p_zero_or_more(fun 'ws'/2), p_string(<<"=">>), p_zero_or_more(fun 'ws'/2), fun 'value'/2, p_zero_or_more(fun 'ws'/2), p_optional(fun 'comment'/2)]))(I,D) end, fun(Node, _Idx) ->
p(Input, Index, 'setting', fun(I,D) -> (p_seq([p_zero_or_more(fun 'ws'/2), fun 'key'/2, p_zero_or_more(fun 'ws'/2), p_string(<<"=">>), p_zero_or_more(fun 'ws'/2), p_choose([fun 'escaped_value'/2, fun 'unescaped_value'/2]), p_zero_or_more(fun 'ws'/2), p_optional(fun 'comment'/2)]))(I,D) end, fun(Node, Idx) ->
[ _, Key, _, _Eq, _, Value, _, _ ] = Node,
{Key, Value}
{Key, try_unicode_characters_to_list(Value, Idx)}
end).

-spec 'key'(input(), index()) -> parse_result().
Expand All @@ -223,9 +220,16 @@ parse(Error) ->
[try_unicode_characters_to_list(H, Idx)| [try_unicode_characters_to_list(W, Idx) || [_, W] <- T]]
end).

-spec 'value'(input(), index()) -> parse_result().
'value'(Input, Index) ->
p(Input, Index, 'value', fun(I,D) -> (p_one_or_more(p_seq([p_not(p_choose([p_seq([p_zero_or_more(fun 'ws'/2), fun 'crlf'/2]), fun 'comment'/2])), p_anything()])))(I,D) end, fun(Node, Idx) ->
-spec 'escaped_value'(input(), index()) -> parse_result().
'escaped_value'(Input, Index) ->
p(Input, Index, 'escaped_value', fun(I,D) -> (p_seq([p_string(<<"\'">>), p_zero_or_more(p_seq([p_not(p_string(<<"\'">>)), p_anything()])), p_string(<<"\'">>)]))(I,D) end, fun(Node, Idx) ->
Stripped = string:trim(Node, both, [$']),
try_unicode_characters_to_list(Stripped, Idx)
end).

-spec 'unescaped_value'(input(), index()) -> parse_result().
'unescaped_value'(Input, Index) ->
p(Input, Index, 'unescaped_value', fun(I,D) -> (p_one_or_more(p_seq([p_not(p_choose([p_seq([p_zero_or_more(fun 'ws'/2), fun 'crlf'/2]), fun 'comment'/2])), p_anything()])))(I,D) end, fun(Node, Idx) ->
try_unicode_characters_to_list(Node, Idx)
end).

Expand Down
29 changes: 22 additions & 7 deletions src/conf_parse.peg
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ line <- ((setting / include / comment / ws+) (crlf / eof)) / crlf %{

%% A setting is a key and a value, joined by =, with surrounding
%% whitespace ignored.
setting <- ws* key ws* "=" ws* value ws* comment? %{
setting <- ws* key ws* "=" ws* (escaped_value / unescaped_value) ws* comment? %{
[ _, Key, _, _Eq, _, Value, _, _ ] = Node,
{Key, Value}
{Key, try_unicode_characters_to_list(Value, Idx)}
%};

%% A key is a series of dot-separated identifiers.
Expand All @@ -67,8 +67,14 @@ key <- head:word tail:("." word)* %{
[try_unicode_characters_to_list(H, Idx)| [try_unicode_characters_to_list(W, Idx) || [_, W] <- T]]
%};

%% An escaped value is any character between single quotes except for EOF
escaped_value <- "'" (!"'" .)* "'" %{
Stripped = string:trim(Node, both, [$']),
try_unicode_characters_to_list(Stripped, Idx)
%};

%% A value is any character, with trailing whitespace stripped.
value <- (!((ws* crlf) / comment) .)+ %{
unescaped_value <- (!((ws* crlf) / comment) .)+ %{
try_unicode_characters_to_list(Node, Idx)
%};

Expand Down Expand Up @@ -229,10 +235,19 @@ utf8_test() ->
?assertMatch(Expected, Actual),
ok.

invalid_utf8_test() ->
InvalidCodePoint = 16#11FFFF,
Expected = {error, <<"setting = thing">>, [InvalidCodePoint, $\n]},
Actual = conf_parse:parse("setting = thing" ++ [InvalidCodePoint] ++ "\n"),
invalid_included_file_test() ->
Conf = conf_parse:file("test/invalid_include_file.conf"),
?assertMatch({[], _PathWithNewLineAndCarriage, {{line,_}, {column, _}}}, Conf),
ok.

invalid_included_dir_test() ->
Conf = conf_parse:file("test/invalid_include_dir.conf"),
?assertMatch({[], _PathWithNewLineAndCarriage, {{line, _},{column, _}}}, Conf),
ok.

escaped_string_test() ->
Expected = [{["setting"],"e9238-7_49%#sod7"}],
Actual = conf_parse:parse("setting = 'e9238-7_49%#sod7'" ++ "\n"),
?assertMatch(Expected, Actual),
ok.

Expand Down

0 comments on commit dbf2061

Please sign in to comment.