Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(parser): ignore or push out trivia tokens around arguments and groups #79

Merged
merged 4 commits into from
Dec 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 35 additions & 2 deletions crates/mitex-parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ pub struct Parser<'a, S: BumpTokenStream<'a> = ()> {
spec: CommandSpec,
/// Argument matcher builder containing cached regexes
arg_matchers: ArgMatcherBuilder,
/// trivia buffer
trivia_buffer: Vec<(Token, &'a str)>,

/// State used by item_list/argument_list parser
/// The current state
Expand All @@ -153,6 +155,7 @@ impl<'a> Parser<'a> {
spec,
arg_matchers: ArgMatcherBuilder::default(),
list_state: Default::default(),
trivia_buffer: Vec::new(),
}
}

Expand All @@ -165,6 +168,7 @@ impl<'a> Parser<'a> {
spec,
arg_matchers: ArgMatcherBuilder::default(),
list_state: Default::default(),
trivia_buffer: Vec::new(),
}
}
}
Expand Down Expand Up @@ -231,7 +235,25 @@ impl<'a, S: BumpTokenStream<'a>> Parser<'a, S> {
fn eat_if(&mut self, kind: Token) {
if self.peek() == Some(kind) {
self.eat();
self.trivia();
}
}

/// Lexer Interface
/// Hold the next trivia token
fn hold_trivia(&mut self) {
self.trivia_buffer.push(self.lexer.eat().unwrap());
}

/// Lexer Interface
fn ignore_holding_trivia(&mut self) {
self.trivia_buffer.clear();
}

/// Lexer Interface
fn extract_holding_trivia(&mut self) {
for (kind, text) in self.trivia_buffer.drain(..) {
let kind: SyntaxKind = kind.into();
self.builder.token(kind.into(), text);
}
}

Expand Down Expand Up @@ -500,6 +522,8 @@ impl<'a, S: BumpTokenStream<'a>> Parser<'a, S> {

self.builder.finish_node();

self.extract_holding_trivia();

!is_greedy
}

Expand Down Expand Up @@ -527,6 +551,8 @@ impl<'a, S: BumpTokenStream<'a>> Parser<'a, S> {
}

self.builder.finish_node();

self.extract_holding_trivia();
}

self.item_list(ParseScope::Environment);
Expand Down Expand Up @@ -620,6 +646,7 @@ impl<'a, S: BumpTokenStream<'a>> Parser<'a, S> {
f: impl FnOnce(&mut Parser<'a, S>) -> T,
) -> T {
if k_wrap_args!() {
this.ignore_holding_trivia();
this.builder.start_node(ClauseArgument.into());
let res = f(this);
this.builder.finish_node();
Expand All @@ -638,7 +665,13 @@ impl<'a, S: BumpTokenStream<'a>> Parser<'a, S> {
while let Some(kind) = self.peek() {
match kind {
// trivials
Token::LineBreak | Token::Whitespace | Token::LineComment => self.eat(),
Token::LineBreak | Token::Whitespace | Token::LineComment => {
if GREEDY {
self.eat();
} else {
self.hold_trivia();
}
}
// Argument matches is stopped on these tokens
// However, newline is also a command (with name `\`), hence this is different from
// mark and (`&`)
Expand Down
17 changes: 9 additions & 8 deletions crates/mitex-parser/tests/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,22 @@ mod ast {
mod arg_match;

#[cfg(test)]
mod command;
mod attachment;

#[cfg(test)]
mod environment;
mod block_comment;

#[cfg(test)]
mod attachment;
mod command;

#[cfg(test)]
mod environment;

#[cfg(test)]
mod left_right;

#[cfg(test)]
mod block_comment;
mod trivia;

/// Convenient function to launch/debug a test case
#[test]
Expand Down Expand Up @@ -76,11 +79,9 @@ mod ast {
||args
|||cmd
||||cmd-name("\\frac")
||||space'(" ")
||||args(word'("1"))
||||space'(" ")
||||args(word'("2"))
||||space'(" ")
|||space'(" ")
||underscore'("_")
||word'("3")
"###);
Expand Down Expand Up @@ -108,7 +109,7 @@ mod ast {
||||lbrace'("{")
||||text(word'("d"))
||||rbrace'("}")
||||space'(" ")
|space'(" ")
|text(word'("x"))
"###);
}
Expand Down
31 changes: 14 additions & 17 deletions crates/mitex-parser/tests/ast/arg_match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ fn split_char() {
root
|cmd
||cmd-name("\\frac")
||space'(" ")
||args(word'("a"))
||args(word'("b"))
|text(word'("cd"))
Expand All @@ -15,15 +14,13 @@ fn split_char() {
root
|cmd
||cmd-name("\\frac")
||space'(" ")
||args(word'("a"))
||args(word'("b"))
"###);
assert_debug_snapshot!(parse(r#"\frac a"#), @r###"
root
|cmd
||cmd-name("\\frac")
||space'(" ")
||args(word'("a"))
"###);
}
Expand Down Expand Up @@ -52,7 +49,7 @@ fn eat_regular_brace() {
||cmd-name("\\mathrm")
||args
|||cmd(cmd-name("\\lbrace"))
||space'(" ")
|space'(" ")
|text(word'("x"),space'(" "))
|cmd(cmd-name("\\rbrace"))
"###);
Expand Down Expand Up @@ -140,7 +137,7 @@ fn special_marks() {
|||||lbrace'("{")
|||||text(word'("1"))
|||||rbrace'("}")
|||||space'(" ")
||space'(" ")
||ampersand'("&")
||space'(" ")
||curly
Expand All @@ -166,9 +163,8 @@ fn special_marks() {
|||||lbrace'("{")
|||||text(word'("1"))
|||||rbrace'("}")
|||||space'(" ")
|||args(newline("\\\\"))
|||space'(" ")
||space'(" ")
||curly
|||lbrace'("{")
|||text(word'("2"))
Expand Down Expand Up @@ -220,7 +216,7 @@ fn special_marks_in_env() {
||||||lbrace'("{")
||||||text(word'("2"))
||||||rbrace'("}")
||||||space'(" ")
|||space'(" ")
|||newline("\\\\")
|||space'(" ")
|||cmd
Expand All @@ -235,9 +231,9 @@ fn special_marks_in_env() {
||||||lbrace'("{")
||||||text(word'("2"))
||||||rbrace'("}")
||||||space'(" ")
||||||br'("\n")
||||||space'(" ")
|||space'(" ")
|||br'("\n")
|||space'(" ")
"###);
assert_debug_snapshot!(parse(r#"
\left. \displaystyle \frac{1}{2} \\ \frac{1}{2} \right.
Expand All @@ -264,7 +260,7 @@ fn special_marks_in_env() {
|||||||lbrace'("{")
|||||||text(word'("2"))
|||||||rbrace'("}")
|||||||space'(" ")
||||space'(" ")
||||newline("\\\\")
||||space'(" ")
||||cmd
Expand All @@ -279,7 +275,7 @@ fn special_marks_in_env() {
|||||||lbrace'("{")
|||||||text(word'("2"))
|||||||rbrace'("}")
|||||||space'(" ")
||||space'(" ")
||clause-lr(cmd-name("\\right"),word'("."))
|br'("\n")
|space'(" ")
Expand Down Expand Up @@ -311,7 +307,7 @@ fn special_marks_in_env() {
|||||||||lbrace'("{")
|||||||||text(word'("2"))
|||||||||rbrace'("}")
|||||||||space'(" ")
||||||space'(" ")
||||||newline("\\\\")
||||||space'(" ")
||||||cmd
Expand All @@ -326,10 +322,12 @@ fn special_marks_in_env() {
|||||||||lbrace'("{")
|||||||||text(word'("2"))
|||||||||rbrace'("}")
|||||||||space'(" ")
||||||space'(" ")
||||rbracket'("]")
||args
|||curly(lbrace'("{"),rbrace'("}"),br'("\n"),space'(" "))
|||curly(lbrace'("{"),rbrace'("}"))
|br'("\n")
|space'(" ")
"###);
assert_debug_snapshot!(parse(r#"
\begin{matrix}a \over b \\ c\end{matrix}
Expand Down Expand Up @@ -361,7 +359,6 @@ fn sqrt_pattern() {
root
|cmd
||cmd-name("\\sqrt")
||space'(" ")
||args(word'("1"))
|text(word'("2"))
"###);
Expand Down
4 changes: 0 additions & 4 deletions crates/mitex-parser/tests/ast/attachment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ fn test_attachment_may_weird() {
||args
|||cmd
||||cmd-name("\\frac")
||||space'(" ")
||||args(word'("a"))
||||args(word'("b"))
||underscore'("_")
Expand All @@ -219,7 +218,6 @@ fn test_attachment_may_weird() {
||args
|||cmd
||||cmd-name("\\frac")
||||space'(" ")
||||args(word'("a"))
||underscore'("_")
||word'("c")
Expand All @@ -230,7 +228,6 @@ fn test_attachment_may_weird() {
root
|cmd
||cmd-name("\\frac")
||space'(" ")
||args
|||curly
||||lbrace'("{")
Expand All @@ -240,7 +237,6 @@ fn test_attachment_may_weird() {
|||||underscore'("_")
|||||word'("c")
||||rbrace'("}")
||||space'(" ")
||args(word'("b"))
"###);
}
2 changes: 1 addition & 1 deletion crates/mitex-parser/tests/ast/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ fn right_greedy() {
||||space'(" ")
||||text(word'("a"),space'(" "),word'("b"))
||rbrace'("}")
||space'(" ")
|space'(" ")
|text(word'("c"))
"###);
// Description: doeesn't affect left side
Expand Down
2 changes: 1 addition & 1 deletion crates/mitex-parser/tests/ast/environment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ c & d
|||||lbrace'("{")
|||||text(word'("lc"))
|||||rbrace'("}")
|||||br'("\n")
||br'("\n")
||text(word'("a"),space'(" "))
||ampersand'("&")
||space'(" ")
Expand Down
Loading