atelier/bake/lib/syn/mod.rs
2025-01-16 20:05:09 -05:00

239 lines
5 KiB
Rust

use crate::wald::{NodeRef, NodeStorage, Text};
#[derive(Copy, Clone, Eq, PartialEq)]
#[repr(u16)]
pub enum NodeKind {
// Tokens
// These are the "leaf" nodes
LPar,
RPar,
Whitespace,
Word,
// Composite
/// A generic node that wraps multiple tokens. This is useful for e.g.
/// attaching whitespace to a Word.
Multi,
List,
Root,
#[doc(hidden)]
_ErrFirst,
ErrUnexpectedEOF,
#[doc(hidden)]
_ErrLast,
#[doc(hidden)]
_Last,
}
impl From<u16> for NodeKind {
fn from(value: u16) -> Self {
assert!(value < NodeKind::_Last as u16);
unsafe { core::mem::transmute::<u16, NodeKind>(value) }
}
}
impl From<NodeKind> for u16 {
fn from(val: NodeKind) -> Self {
val as u16
}
}
pub fn breaks_word(b: u8) -> bool {
b.is_ascii_whitespace() || b == b')' || b == b'('
}
pub struct Parser<'a> {
input: &'a str,
at: usize,
nodes: &'a mut NodeStorage,
}
impl<'a> Parser<'a> {
pub fn new(nodes: &'a mut NodeStorage, input: &'a str) -> Self {
Self {
nodes,
input,
at: 0,
}
}
pub fn head(&self) -> Option<u8> {
self.input[self.at..].bytes().next()
}
pub fn skip(&mut self, amt: usize) {
self.at += amt;
}
pub fn skip_while(&mut self, mut f: impl FnMut(u8) -> bool) {
while let Some(head) = self.head() {
if !f(head) {
break;
}
self.skip(1);
}
}
pub fn parse_whitespace(&mut self) -> Option<NodeRef> {
let start = self.at;
self.skip_while(|b| b.is_ascii_whitespace());
let end = self.at;
let span = start..end;
if span.is_empty() {
return None;
}
let node = self.nodes.new_node(NodeKind::Whitespace.into());
self.nodes.set_text(node, Text::Span(span));
Some(node)
}
pub fn parse_word(&mut self, whitespace: Option<NodeRef>) -> NodeRef {
let start = self.at;
self.skip_while(|b| !breaks_word(b));
let end = self.at;
let node_word = self.nodes.new_node(NodeKind::Word.into());
self.nodes.set_text(node_word, Text::Span(start..end));
let node_multi = self.nodes.new_node(NodeKind::Multi.into());
if let Some(node_whitespace) = whitespace {
self.nodes.append_child(node_multi, node_whitespace);
}
self.nodes.append_child(node_multi, node_word);
node_multi
}
pub fn parse_list(&mut self, whitespace: Option<NodeRef>) -> NodeRef {
assert!(self.head().unwrap() == b'(');
let node_list = self.nodes.new_node(NodeKind::List.into());
if let Some(node_whitespace) = whitespace {
self.nodes.append_child(node_list, node_whitespace);
}
// LPar
let start = self.at;
self.skip(1);
let end = self.at;
let node_lpar = self.nodes.new_node(NodeKind::LPar.into());
self.nodes.set_text(node_lpar, Text::Span(start..end));
self.nodes.append_child(node_list, node_lpar);
loop {
let head = match self.head() {
None => {
let node_err =
self.nodes.new_node(NodeKind::ErrUnexpectedEOF.into());
self.nodes.append_child(node_list, node_err);
break;
},
Some(h) => h,
};
if head == b')' {
let start = self.at;
self.skip(1);
let end = self.at;
let node_rpar = self.nodes.new_node(NodeKind::RPar.into());
self.nodes.set_text(node_rpar, Text::Span(start..end));
self.nodes.append_child(node_list, node_rpar);
break;
}
let node_child = self.parse_one().unwrap();
self.nodes.append_child(node_list, node_child);
}
node_list
}
pub fn parse_one(&mut self) -> Option<NodeRef> {
let whitespace = self.parse_whitespace();
let head = self.head()?;
let node = match head {
b'(' => self.parse_list(whitespace),
_ => self.parse_word(whitespace),
};
Some(node)
}
pub fn parse(&mut self) {
assert!(self.nodes.nodes().next().is_none());
let node_root = self.nodes.new_node(NodeKind::Root.into());
while let Some(node) = self.parse_one() {
self.nodes.append_child(node_root, node);
}
}
}
pub fn parse(storage: &mut NodeStorage, input: &str) {
let mut parser = Parser::new(storage, input);
parser.parse();
}
#[cfg(test)]
mod test {
use crate::{
syn::{parse, NodeKind},
wald::NodeStorage,
};
#[test]
fn simple_print_input_exactly() {
let input = r#"(+ 3 4)"#;
let mut storage = NodeStorage::new();
parse(&mut storage, input);
assert!(!storage
.nodes()
.any(|n| (NodeKind::_ErrFirst.into()..NodeKind::_ErrLast.into())
.contains(&storage.tag(n))));
let root = storage
.nodes()
.find(|&n| storage.tag(n) == NodeKind::Root.into())
.unwrap();
let display = storage.display_syntax(input, root);
let output = format!("{display}");
assert_eq!(input, output);
}
#[test]
fn traversal() {
let input = r#"(+ 3 4)"#;
let mut nodes = NodeStorage::new();
parse(&mut nodes, input);
let three = nodes
.nodes()
.find(|&n| nodes.text(n).as_str(input) == "3")
.unwrap();
let three_multi = nodes.parent(three).unwrap();
let four_multi = nodes.sibling_next(three_multi).unwrap();
let four = nodes
.children(four_multi)
.find(|&n| nodes.tag(n) == NodeKind::Word.into())
.unwrap();
assert_eq!(nodes.text(four).as_str(input), "4");
let list = nodes.parent(three_multi).unwrap();
assert_eq!(list, nodes.parent(four_multi).unwrap());
}
}