use crate::wald::{NodeRef, NodeStorage, Text}; #[derive(Copy, Clone, Eq, PartialEq)] #[repr(u16)] pub enum NodeKind { // Tokens // These are the "leaf" nodes LPar, RPar, Whitespace, Word, // Composite /// A generic node that wraps multiple tokens. This is useful for e.g. /// attaching whitespace to a Word. Multi, List, Root, #[doc(hidden)] _ErrFirst, ErrUnexpectedEOF, #[doc(hidden)] _ErrLast, #[doc(hidden)] _Last, } impl From for NodeKind { fn from(value: u16) -> Self { assert!(value < NodeKind::_Last as u16); unsafe { core::mem::transmute::(value) } } } impl From for u16 { fn from(val: NodeKind) -> Self { val as u16 } } pub fn breaks_word(b: u8) -> bool { b.is_ascii_whitespace() || b == b')' || b == b'(' } pub struct Parser<'a> { input: &'a str, at: usize, nodes: &'a mut NodeStorage, } impl<'a> Parser<'a> { pub fn new(nodes: &'a mut NodeStorage, input: &'a str) -> Self { Self { nodes, input, at: 0, } } pub fn head(&self) -> Option { self.input[self.at..].bytes().next() } pub fn skip(&mut self, amt: usize) { self.at += amt; } pub fn skip_while(&mut self, mut f: impl FnMut(u8) -> bool) { while let Some(head) = self.head() { if !f(head) { break; } self.skip(1); } } pub fn parse_whitespace(&mut self) -> Option { let start = self.at; self.skip_while(|b| b.is_ascii_whitespace()); let end = self.at; let span = start..end; if span.is_empty() { return None; } let node = self.nodes.new_node(NodeKind::Whitespace.into()); self.nodes.set_text(node, Text::Span(span)); Some(node) } pub fn parse_word(&mut self, whitespace: Option) -> NodeRef { let start = self.at; self.skip_while(|b| !breaks_word(b)); let end = self.at; let node_word = self.nodes.new_node(NodeKind::Word.into()); self.nodes.set_text(node_word, Text::Span(start..end)); let node_multi = self.nodes.new_node(NodeKind::Multi.into()); if let Some(node_whitespace) = whitespace { self.nodes.append_child(node_multi, node_whitespace); } self.nodes.append_child(node_multi, node_word); node_multi } pub fn parse_list(&mut self, whitespace: Option) -> NodeRef { assert!(self.head().unwrap() == b'('); let node_list = self.nodes.new_node(NodeKind::List.into()); if let Some(node_whitespace) = whitespace { self.nodes.append_child(node_list, node_whitespace); } // LPar let start = self.at; self.skip(1); let end = self.at; let node_lpar = self.nodes.new_node(NodeKind::LPar.into()); self.nodes.set_text(node_lpar, Text::Span(start..end)); self.nodes.append_child(node_list, node_lpar); loop { let head = match self.head() { None => { let node_err = self.nodes.new_node(NodeKind::ErrUnexpectedEOF.into()); self.nodes.append_child(node_list, node_err); break; }, Some(h) => h, }; if head == b')' { let start = self.at; self.skip(1); let end = self.at; let node_rpar = self.nodes.new_node(NodeKind::RPar.into()); self.nodes.set_text(node_rpar, Text::Span(start..end)); self.nodes.append_child(node_list, node_rpar); break; } let node_child = self.parse_one().unwrap(); self.nodes.append_child(node_list, node_child); } node_list } pub fn parse_one(&mut self) -> Option { let whitespace = self.parse_whitespace(); let head = self.head()?; let node = match head { b'(' => self.parse_list(whitespace), _ => self.parse_word(whitespace), }; Some(node) } pub fn parse(&mut self) { assert!(self.nodes.nodes().next().is_none()); let node_root = self.nodes.new_node(NodeKind::Root.into()); while let Some(node) = self.parse_one() { self.nodes.append_child(node_root, node); } } } pub fn parse(storage: &mut NodeStorage, input: &str) { let mut parser = Parser::new(storage, input); parser.parse(); } #[cfg(test)] mod test { use crate::{ syn::{parse, NodeKind}, wald::NodeStorage, }; #[test] fn simple_print_input_exactly() { let input = r#"(+ 3 4)"#; let mut storage = NodeStorage::new(); parse(&mut storage, input); assert!(!storage .nodes() .any(|n| (NodeKind::_ErrFirst.into()..NodeKind::_ErrLast.into()) .contains(&storage.tag(n)))); let root = storage .nodes() .find(|&n| storage.tag(n) == NodeKind::Root.into()) .unwrap(); let display = storage.display_syntax(input, root); let output = format!("{display}"); assert_eq!(input, output); } #[test] fn traversal() { let input = r#"(+ 3 4)"#; let mut nodes = NodeStorage::new(); parse(&mut nodes, input); let three = nodes .nodes() .find(|&n| nodes.text(n).as_str(input) == "3") .unwrap(); let three_multi = nodes.parent(three).unwrap(); let four_multi = nodes.sibling_next(three_multi).unwrap(); let four = nodes .children(four_multi) .find(|&n| nodes.tag(n) == NodeKind::Word.into()) .unwrap(); assert_eq!(nodes.text(four).as_str(input), "4"); let list = nodes.parent(three_multi).unwrap(); assert_eq!(list, nodes.parent(four_multi).unwrap()); } }