Skip to content

Commit

Permalink
dev: merge json and rkyv command spec structs (#155)
Browse files Browse the repository at this point in the history
  • Loading branch information
Myriad-Dreamin committed Apr 18, 2024
1 parent b308f2f commit 76f63f8
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 161 deletions.
8 changes: 4 additions & 4 deletions crates/mitex-parser/src/arg_match.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use core::fmt;
use std::sync::Arc;

use crate::{argument_kind::ARGUMENT_KIND_TERM, ArgPattern};
use mitex_glob::glob_match_prefix;
use mitex_spec::GlobStr;

/// A matcher for arguments of a TeX command
/// It is created by `ArgMatcherBuilder`
Expand Down Expand Up @@ -30,7 +30,7 @@ pub enum ArgMatcher {
/// - t: it later matches a single term, e.g. `\sqrt[3]{a}` or `\sqrt{a}`
/// Note: any prefix of the glob is valid in parse stage hence you need to
/// check whether it is complete in later stage.
Glob { re: Arc<str>, prefix: String },
Glob { re: GlobStr, prefix: String },
}

impl ArgMatcher {
Expand Down Expand Up @@ -69,7 +69,7 @@ impl ArgMatcher {
}
Self::Glob { ref re, prefix } => {
prefix.push(text);
glob_match_prefix(re, prefix)
glob_match_prefix(&re.0, prefix)
}
}
}
Expand All @@ -88,7 +88,7 @@ impl ArgMatcherBuilder {
pub fn start_match(&mut self, pat_meta: &ArgPattern) -> ArgMatcher {
match pat_meta {
ArgPattern::None => ArgMatcher::None,
ArgPattern::RangeLenTerm(_, mx) | ArgPattern::FixedLenTerm(mx) => {
ArgPattern::RangeLenTerm { max: mx, .. } | ArgPattern::FixedLenTerm { len: mx } => {
if mx == &0 {
ArgMatcher::None
} else {
Expand Down
9 changes: 6 additions & 3 deletions crates/mitex-parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,10 @@ impl<'a, S: TokenStream<'a>> Parser<'a, S> {
let cmd_name = self.lexer.peek_text().unwrap().strip_prefix('\\').unwrap();
let arg_shape = self.spec.get_cmd(cmd_name).map(|cmd| &cmd.args);
let right_pat = match arg_shape {
None | Some(ArgShape::Right(ArgPattern::None | ArgPattern::FixedLenTerm(0))) => {
None
| Some(ArgShape::Right {
pattern: ArgPattern::None | ArgPattern::FixedLenTerm { len: 0 },
}) => {
self.builder.start_node(ItemCmd.into());
self.eat();
self.builder.finish_node();
Expand All @@ -501,7 +504,7 @@ impl<'a, S: TokenStream<'a>> Parser<'a, S> {
self.builder.finish_node();
return false;
}
Some(ArgShape::Right(pattern)) => {
Some(ArgShape::Right { pattern }) => {
self.builder.start_node(ItemCmd.into());
pattern
}
Expand Down Expand Up @@ -546,7 +549,7 @@ impl<'a, S: TokenStream<'a>> Parser<'a, S> {

let arg_shape = self.spec.get_env(env_name);
let right_pat = match arg_shape.map(|cmd| &cmd.args) {
None | Some(ArgPattern::None | ArgPattern::FixedLenTerm(0)) => None,
None | Some(ArgPattern::None | ArgPattern::FixedLenTerm { len: 0 }) => None,
Some(pattern) => Some(pattern),
};
let searcher = right_pat.map(|right_pat| self.arg_matchers.start_match(right_pat));
Expand Down
4 changes: 2 additions & 2 deletions crates/mitex-spec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ harness = false

[dependencies]
rkyv = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"] }
serde = { workspace = true, features = ["derive"], optional = true }
serde_json.workspace = true
fxhash.workspace = true

[features]

rkyv = ["dep:rkyv", "rkyv/alloc", "rkyv/archive_le"]
rkyv-validation = ["dep:rkyv", "rkyv/validation"]
default = ["rkyv", "rkyv-validation"]
default = ["serde", "rkyv", "rkyv-validation"]

[dev-dependencies]
once_cell = "1"
Expand Down
8 changes: 4 additions & 4 deletions crates/mitex-spec/benches/spec_constructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
//! The trusted (unsafe) Rkyv parsing is not used yet.

use divan::{AllocProfiler, Bencher};
use mitex_spec::CommandSpec;
use mitex_spec::{ArgPattern, ArgShape, CmdShape, CommandSpec};

#[global_allocator]
static ALLOC: AllocProfiler = AllocProfiler::system();
Expand Down Expand Up @@ -46,9 +46,9 @@ fn prelude_100000() {

fn bench_json_deserialize(bencher: Bencher, n: i32) {
use mitex_spec::query;
const JSON_TEX_SYMBOL: query::CommandSpecItem = query::CommandSpecItem::Cmd(query::CmdShape {
args: query::ArgShape::Right {
pattern: query::ArgPattern::None,
const JSON_TEX_SYMBOL: query::CommandSpecItem = query::CommandSpecItem::Cmd(CmdShape {
args: ArgShape::Right {
pattern: ArgPattern::None,
},
alias: None,
});
Expand Down
76 changes: 72 additions & 4 deletions crates/mitex-spec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

use std::sync::Arc;

#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

#[cfg(feature = "rkyv")]
use rkyv::{Archive, Deserialize as rDeser, Serialize as rSer};

Expand All @@ -27,6 +30,7 @@ pub use query::CommandSpecRepr as JsonCommandSpec;
/// [Command Syntax]: https://latexref.xyz/LaTeX-command-syntax.html
/// [Environment Syntax]: https://latexref.xyz/Environment-syntax.html
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub enum CommandSpecItem {
Expand All @@ -40,6 +44,7 @@ pub enum CommandSpecItem {

/// Command specification that contains a set of commands and environments.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct CommandSpecRepr {
Expand Down Expand Up @@ -129,6 +134,7 @@ impl CommandSpec {

/// Shape of a TeX command.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct CmdShape {
Expand All @@ -141,6 +147,7 @@ pub struct CmdShape {

/// Shape of a TeX envionment.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct EnvShape {
Expand All @@ -165,6 +172,36 @@ pub mod argument_kind {
pub const ARGUMENT_KIND_PAREN: char = 'p';
}

/// A shared string that represents a glob pattern.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct GlobStr(pub Arc<str>);

impl From<&str> for GlobStr {
fn from(s: &str) -> Self {
Self(s.into())
}
}
#[cfg(feature = "serde")]
mod glob_str_impl {

use super::GlobStr;
use serde::{Deserialize, Deserializer, Serialize, Serializer};

impl Serialize for GlobStr {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.0.serialize(serializer)
}
}

impl<'de> Deserialize<'de> for GlobStr {
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
Ok(GlobStr(String::deserialize(deserializer)?.into()))
}
}
}

/// An efficient pattern used for argument matching.
///
/// There are four kinds of pattern. The most powerful one is
Expand Down Expand Up @@ -193,13 +230,16 @@ pub mod argument_kind {
/// Note: any prefix of the argument pattern are matched during the parse stage,
/// so you need to check whether it is complete in later stages.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde", serde(tag = "kind"))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub enum ArgPattern {
/// No arguments are passed, i.e. this is processed as a variable in Typst.
///
/// E.g. `\alpha` => `$alpha$`, where `\alpha` has an argument pattern of
/// `None`
#[cfg_attr(feature = "serde", serde(rename = "none"))]
None,
/// Fixed length pattern, equivalent to repeat `{,t}` for `x` times
///
Expand All @@ -208,15 +248,26 @@ pub enum ArgPattern {
///
/// E.g. `1 \sum\limits` => `$1 limits(sum)$`, where `\limits` has an
/// argument pattern of `FixedLenTerm(1)`
FixedLenTerm(u8),
#[cfg_attr(feature = "serde", serde(rename = "fixed-len"))]
FixedLenTerm {
/// The length of the arguments should be matched
len: u8,
},
/// Range length pattern (matches as much as possible), equivalent to
/// repeat `t` for `x` times, then repeat `{,t}` for `y` times.
///
/// No example
RangeLenTerm(u8, u8),
#[cfg_attr(feature = "serde", serde(rename = "range-len"))]
RangeLenTerm {
/// The minimum length of the arguments should be matched
min: u8,
/// The maximum length of the arguments should be matched
max: u8,
},
/// Receives any items as much as possible, equivalent to `*`.
///
/// E.g. \over, \displaystyle
#[cfg_attr(feature = "serde", serde(rename = "greedy"))]
Greedy,
/// The most powerful pattern, but slightly slow.
/// Note that the glob must accept the whole prefix of the input.
Expand All @@ -226,7 +277,8 @@ pub enum ArgPattern {
/// Description of the glob pattern:
/// - {,b}: first, it matches a bracket option, e.g. `\sqrt[3]`
/// - t: it then matches a single term, e.g. `\sqrt[3]{a}` or `\sqrt{a}`
Glob(Arc<str>),
#[cfg_attr(feature = "serde", serde(rename = "glob"))]
Glob(GlobStr),
}

// struct ArgShape(ArgPattern, Direction);
Expand All @@ -238,40 +290,56 @@ pub enum ArgPattern {
/// - `Direction::Left` with `ArgPattern::FixedLenTerm(1)`
/// - `Direction::Infix` with `ArgPattern::Greedy`
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde", serde(tag = "kind"))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub enum ArgShape {
/// A command that associates with the right side of items.
///
/// E.g. `\hat`
Right(ArgPattern),
#[cfg_attr(feature = "serde", serde(rename = "right"))]
Right {
/// The pattern to match the arguments
pattern: ArgPattern,
},
/// A command that associates with the left side of items, and with
/// `ArgPattern::FixedLenTerm(1)`.
///
/// E.g. `\limits`
#[cfg_attr(feature = "serde", serde(rename = "left1"))]
Left1,
/// A command that associates with both side of items, and with
/// `ArgPattern::Greedy`, also known as infix operators.
///
/// E.g. `\over`
#[cfg_attr(feature = "serde", serde(rename = "infix-greedy"))]
InfixGreedy,
}

/// A feature that specifies how to process the content of an environment.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde", serde(tag = "kind"))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub enum ContextFeature {
/// No special feature
#[cfg_attr(feature = "serde", serde(rename = "none"))]
None,
/// Parse content like math environments
#[cfg_attr(feature = "serde", serde(rename = "is-math"))]
IsMath,
/// Parse content like mat arguments
#[cfg_attr(feature = "serde", serde(rename = "is-matrix"))]
IsMatrix,
/// Parse content like cases
#[cfg_attr(feature = "serde", serde(rename = "is-cases"))]
IsCases,
/// Parse content like itemize
#[cfg_attr(feature = "serde", serde(rename = "is-itemize"))]
IsItemize,
/// Parse content like enumerate
#[cfg_attr(feature = "serde", serde(rename = "is-enumerate"))]
IsEnumerate,
}
Loading

0 comments on commit 76f63f8

Please sign in to comment.