239 lines
5 KiB
Rust
239 lines
5 KiB
Rust
use crate::wald::{NodeRef, NodeStorage, Text};
|
|
|
|
#[derive(Copy, Clone, Eq, PartialEq)]
|
|
#[repr(u16)]
|
|
pub enum NodeKind {
|
|
// Tokens
|
|
// These are the "leaf" nodes
|
|
LPar,
|
|
RPar,
|
|
Whitespace,
|
|
Word,
|
|
|
|
// Composite
|
|
/// A generic node that wraps multiple tokens. This is useful for e.g.
|
|
/// attaching whitespace to a Word.
|
|
Multi,
|
|
List,
|
|
Root,
|
|
|
|
#[doc(hidden)]
|
|
_ErrFirst,
|
|
ErrUnexpectedEOF,
|
|
#[doc(hidden)]
|
|
_ErrLast,
|
|
#[doc(hidden)]
|
|
_Last,
|
|
}
|
|
impl From<u16> for NodeKind {
|
|
fn from(value: u16) -> Self {
|
|
assert!(value < NodeKind::_Last as u16);
|
|
|
|
unsafe { core::mem::transmute::<u16, NodeKind>(value) }
|
|
}
|
|
}
|
|
|
|
impl From<NodeKind> for u16 {
|
|
fn from(val: NodeKind) -> Self {
|
|
val as u16
|
|
}
|
|
}
|
|
|
|
pub fn breaks_word(b: u8) -> bool {
|
|
b.is_ascii_whitespace() || b == b')' || b == b'('
|
|
}
|
|
|
|
pub struct Parser<'a> {
|
|
input: &'a str,
|
|
at: usize,
|
|
nodes: &'a mut NodeStorage,
|
|
}
|
|
|
|
impl<'a> Parser<'a> {
|
|
pub fn new(nodes: &'a mut NodeStorage, input: &'a str) -> Self {
|
|
Self {
|
|
nodes,
|
|
input,
|
|
at: 0,
|
|
}
|
|
}
|
|
|
|
pub fn head(&self) -> Option<u8> {
|
|
self.input[self.at..].bytes().next()
|
|
}
|
|
|
|
pub fn skip(&mut self, amt: usize) {
|
|
self.at += amt;
|
|
}
|
|
|
|
pub fn skip_while(&mut self, mut f: impl FnMut(u8) -> bool) {
|
|
while let Some(head) = self.head() {
|
|
if !f(head) {
|
|
break;
|
|
}
|
|
|
|
self.skip(1);
|
|
}
|
|
}
|
|
|
|
pub fn parse_whitespace(&mut self) -> Option<NodeRef> {
|
|
let start = self.at;
|
|
self.skip_while(|b| b.is_ascii_whitespace());
|
|
let end = self.at;
|
|
let span = start..end;
|
|
if span.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
let node = self.nodes.new_node(NodeKind::Whitespace.into());
|
|
self.nodes.set_text(node, Text::Span(span));
|
|
|
|
Some(node)
|
|
}
|
|
|
|
pub fn parse_word(&mut self, whitespace: Option<NodeRef>) -> NodeRef {
|
|
let start = self.at;
|
|
self.skip_while(|b| !breaks_word(b));
|
|
let end = self.at;
|
|
|
|
let node_word = self.nodes.new_node(NodeKind::Word.into());
|
|
self.nodes.set_text(node_word, Text::Span(start..end));
|
|
|
|
let node_multi = self.nodes.new_node(NodeKind::Multi.into());
|
|
if let Some(node_whitespace) = whitespace {
|
|
self.nodes.append_child(node_multi, node_whitespace);
|
|
}
|
|
self.nodes.append_child(node_multi, node_word);
|
|
|
|
node_multi
|
|
}
|
|
|
|
pub fn parse_list(&mut self, whitespace: Option<NodeRef>) -> NodeRef {
|
|
assert!(self.head().unwrap() == b'(');
|
|
|
|
let node_list = self.nodes.new_node(NodeKind::List.into());
|
|
if let Some(node_whitespace) = whitespace {
|
|
self.nodes.append_child(node_list, node_whitespace);
|
|
}
|
|
|
|
// LPar
|
|
let start = self.at;
|
|
self.skip(1);
|
|
let end = self.at;
|
|
|
|
let node_lpar = self.nodes.new_node(NodeKind::LPar.into());
|
|
self.nodes.set_text(node_lpar, Text::Span(start..end));
|
|
self.nodes.append_child(node_list, node_lpar);
|
|
|
|
loop {
|
|
let head = match self.head() {
|
|
None => {
|
|
let node_err =
|
|
self.nodes.new_node(NodeKind::ErrUnexpectedEOF.into());
|
|
self.nodes.append_child(node_list, node_err);
|
|
|
|
break;
|
|
},
|
|
Some(h) => h,
|
|
};
|
|
|
|
if head == b')' {
|
|
let start = self.at;
|
|
self.skip(1);
|
|
let end = self.at;
|
|
|
|
let node_rpar = self.nodes.new_node(NodeKind::RPar.into());
|
|
self.nodes.set_text(node_rpar, Text::Span(start..end));
|
|
self.nodes.append_child(node_list, node_rpar);
|
|
|
|
break;
|
|
}
|
|
|
|
let node_child = self.parse_one().unwrap();
|
|
self.nodes.append_child(node_list, node_child);
|
|
}
|
|
|
|
node_list
|
|
}
|
|
|
|
pub fn parse_one(&mut self) -> Option<NodeRef> {
|
|
let whitespace = self.parse_whitespace();
|
|
|
|
let head = self.head()?;
|
|
let node = match head {
|
|
b'(' => self.parse_list(whitespace),
|
|
_ => self.parse_word(whitespace),
|
|
};
|
|
|
|
Some(node)
|
|
}
|
|
|
|
pub fn parse(&mut self) {
|
|
assert!(self.nodes.nodes().next().is_none());
|
|
|
|
let node_root = self.nodes.new_node(NodeKind::Root.into());
|
|
|
|
while let Some(node) = self.parse_one() {
|
|
self.nodes.append_child(node_root, node);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn parse(storage: &mut NodeStorage, input: &str) {
|
|
let mut parser = Parser::new(storage, input);
|
|
parser.parse();
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use crate::{
|
|
syn::{parse, NodeKind},
|
|
wald::NodeStorage,
|
|
};
|
|
|
|
#[test]
|
|
fn simple_print_input_exactly() {
|
|
let input = r#"(+ 3 4)"#;
|
|
let mut storage = NodeStorage::new();
|
|
parse(&mut storage, input);
|
|
|
|
assert!(!storage
|
|
.nodes()
|
|
.any(|n| (NodeKind::_ErrFirst.into()..NodeKind::_ErrLast.into())
|
|
.contains(&storage.tag(n))));
|
|
|
|
let root = storage
|
|
.nodes()
|
|
.find(|&n| storage.tag(n) == NodeKind::Root.into())
|
|
.unwrap();
|
|
let display = storage.display_syntax(input, root);
|
|
|
|
let output = format!("{display}");
|
|
assert_eq!(input, output);
|
|
}
|
|
|
|
#[test]
|
|
fn traversal() {
|
|
let input = r#"(+ 3 4)"#;
|
|
let mut nodes = NodeStorage::new();
|
|
parse(&mut nodes, input);
|
|
|
|
let three = nodes
|
|
.nodes()
|
|
.find(|&n| nodes.text(n).as_str(input) == "3")
|
|
.unwrap();
|
|
|
|
let three_multi = nodes.parent(three).unwrap();
|
|
let four_multi = nodes.sibling_next(three_multi).unwrap();
|
|
let four = nodes
|
|
.children(four_multi)
|
|
.find(|&n| nodes.tag(n) == NodeKind::Word.into())
|
|
.unwrap();
|
|
|
|
assert_eq!(nodes.text(four).as_str(input), "4");
|
|
|
|
let list = nodes.parent(three_multi).unwrap();
|
|
assert_eq!(list, nodes.parent(four_multi).unwrap());
|
|
}
|
|
}
|