newt/src/internal/parse.rs
2024-05-19 01:52:05 -04:00

306 lines
6.5 KiB
Rust

#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct ValuePath(pub Vec<String>);
pub type Expr = ValuePath;
#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct If {
expr: Expr,
then: Ast,
else_: Option<Ast>,
}
#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct For {
pub expr: Expr,
pub body: Ast,
}
#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Has {
expr: Box<Expr>,
body: Ast,
}
#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Print(pub Expr);
#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum AstKind {
Text(String),
Print(Print),
Has(Has),
If(If),
For(For),
}
pub type Ast = Vec<AstKind>;
pub struct ParseState<'a> {
template: &'a str,
last_byte_offset: usize,
speculative_text_starts_at: usize,
speculative_text_ends_at: usize,
current_byte_offset: usize,
ast: Ast,
}
impl<'a> ParseState<'a> {
pub fn out_of_input(&self) -> bool {
self.current_byte_offset >= self.last_byte_offset
}
pub fn has_input(&self) -> bool {
!self.out_of_input()
}
pub fn remaining(&self) -> &str {
&self.template[self.current_byte_offset..]
}
pub fn add_current_text_to_ast(&mut self) {
let text = &self.template
[self.speculative_text_starts_at..self.speculative_text_ends_at];
if text.is_empty() {
return;
}
self.ast.push(AstKind::Text(
self.template[self.speculative_text_starts_at
..self.speculative_text_ends_at]
.to_string(),
));
}
pub fn text_might_end(&mut self) {
self.speculative_text_ends_at = self.current_byte_offset;
}
pub fn parse(&mut self) {
while self.has_input() {
self.parse_one();
}
self.text_might_end();
self.add_current_text_to_ast();
}
pub fn parse_one(&mut self) {
assert!(self.has_input());
let started_at = self.current_byte_offset;
if self.remaining().starts_with('{') {
self.text_might_end();
self.maybe_parse_block();
} else {
self.current_byte_offset += 1;
}
if started_at == self.current_byte_offset {
panic!("Parser made no progress");
}
}
pub fn maybe_parse_block(&mut self) {
assert!(self.remaining().starts_with('{'));
self.current_byte_offset += 1;
self.skip_whitespace();
let remaining = self.remaining();
if remaining.starts_with('.') {
self.maybe_parse_print();
} else if remaining.starts_with("for") {
self.maybe_parse_for();
}
}
pub fn maybe_parse_print(&mut self) {
assert!(self.remaining().starts_with('.'));
let path = match self.parse_path() {
None => return,
Some(vp) => vp,
};
self.skip_whitespace();
if !self.remaining().starts_with('}') {
return;
}
self.current_byte_offset += 1;
self.push_node_and_text(AstKind::Print(Print(path)));
self.new_empty_text();
}
pub fn maybe_parse_for(&mut self) {
assert!(self.remaining().starts_with("for"));
self.current_byte_offset += "for".len();
self.skip_whitespace();
let path = match self.parse_path() {
None => return,
Some(vp) => vp,
};
self.skip_whitespace();
if !self.remaining().starts_with('}') {
return;
}
self.current_byte_offset += 1;
let mut inner = ParseState {
template: self.template,
last_byte_offset: self.last_byte_offset,
speculative_text_starts_at: self.current_byte_offset,
speculative_text_ends_at: self.current_byte_offset,
current_byte_offset: self.current_byte_offset,
ast: Ast::new(),
};
while inner.has_input() {
if inner.remaining().starts_with("{/for}") {
break;
}
inner.parse_one();
}
inner.text_might_end();
inner.add_current_text_to_ast();
self.current_byte_offset = inner.current_byte_offset;
if !self.remaining().starts_with("{/for}") {
return;
}
self.current_byte_offset += "{/for}".len();
let body = inner.ast;
self.push_node_and_text(AstKind::For(For { expr: path, body }));
self.new_empty_text();
}
pub fn parse_path(&mut self) -> Option<ValuePath> {
assert!(self.remaining().starts_with('.'));
self.current_byte_offset += 1;
let end = self
.remaining()
.find(|ch: char| ch == '}' || ch.is_whitespace())
.unwrap_or(self.last_byte_offset - self.current_byte_offset);
let inner = &self.remaining()[..end];
let out = inner.split_terminator('.').map(|s| s.to_string()).collect();
let out = ValuePath(out);
self.current_byte_offset += end;
Some(out)
}
pub fn push_node_and_text(&mut self, node: AstKind) {
self.add_current_text_to_ast();
self.ast.push(node);
}
pub fn new_empty_text(&mut self) {
self.speculative_text_starts_at = self.current_byte_offset;
self.speculative_text_ends_at = self.speculative_text_starts_at;
}
pub fn skip_whitespace(&mut self) {
while self.has_input() {
if self.remaining().starts_with(char::is_whitespace) {
self.current_byte_offset +=
self.remaining().chars().next().unwrap().len_utf8();
continue;
}
break;
}
}
}
pub fn parse(template: &str) -> Ast {
let mut state = ParseState {
template,
speculative_text_starts_at: 0,
speculative_text_ends_at: 0,
current_byte_offset: 0,
last_byte_offset: template.len(),
ast: Ast::new(),
};
state.parse();
state.ast
}
#[cfg(test)]
mod test {
use crate::internal::parse::{AstKind, Print, ValuePath};
use super::{parse, Ast, For};
fn go(tmpl: &str, rhs: Ast) {
let ast = parse(tmpl);
assert_eq!(ast, rhs);
}
#[test]
fn parse_print_only() {
go("{.}", vec![AstKind::Print(Print(ValuePath(vec![])))]);
}
#[test]
fn parse_print() {
go(
"hello, {.}!",
vec![
AstKind::Text("hello, ".to_string()),
AstKind::Print(Print(ValuePath(vec![]))),
AstKind::Text("!".to_string()),
],
);
}
#[test]
fn parse_print_lookup() {
go(
"hello, {.name}!",
vec![
AstKind::Text("hello, ".to_string()),
AstKind::Print(Print(ValuePath(vec!["name".to_string()]))),
AstKind::Text("!".to_string()),
],
);
}
#[test]
fn parse_for() {
go(
"{for .}{.}{/for}",
vec![AstKind::For(For {
expr: ValuePath(vec![]),
body: vec![AstKind::Print(Print(ValuePath(vec![])))],
})],
);
}
#[test]
fn parse_for_2() {
go(
"{for .}{.}, {/for}",
vec![AstKind::For(For {
expr: ValuePath(vec![]),
body: vec![
AstKind::Print(Print(ValuePath(vec![]))),
AstKind::Text(", ".to_string()),
],
})],
);
}
#[test]
fn parse_for_nested() {
go(
"{for .items}{for .names}{.},{/for}{/for}",
vec![AstKind::For(For {
expr: ValuePath(vec!["items".to_string()]),
body: vec![AstKind::For(For {
expr: ValuePath(vec!["names".to_string()]),
body: vec![
AstKind::Print(Print(ValuePath(vec![]))),
AstKind::Text(",".to_string()),
],
})],
})],
)
}
}