.
This commit is contained in:
parent
ce38e751ac
commit
4f589a07af
|
|
@ -1,3 +1,6 @@
|
|||
mod syn;
|
||||
mod wald;
|
||||
|
||||
pub type Result<T> = core::result::Result<T, Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
|
|||
23
bake/lib/syn/ast.rs
Normal file
23
bake/lib/syn/ast.rs
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
use super::{
|
||||
cst::{self, NodeKind},
|
||||
tok::{self, TokenKind},
|
||||
};
|
||||
|
||||
pub struct Atom<'a>(&'a cst::Node);
|
||||
impl cst::Node {
|
||||
pub fn as_atom(&self) -> Option<Atom> {
|
||||
if *self.kind() != NodeKind::Atom {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Atom(self))
|
||||
}
|
||||
}
|
||||
impl Atom<'_> {
|
||||
pub fn value(&self) -> &tok::Atom {
|
||||
match &self.0.token().unwrap().kind {
|
||||
TokenKind::Atom(a) => a,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
167
bake/lib/syn/cst.rs
Normal file
167
bake/lib/syn/cst.rs
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
use super::{
|
||||
ast,
|
||||
tok::{Token, TokenKind, Tokens},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Tree {
|
||||
nodes: Vec<Node>,
|
||||
}
|
||||
impl Tree {
|
||||
pub fn new() -> Self {
|
||||
let mut out = Self { nodes: vec![] };
|
||||
out.register({
|
||||
let mut node = Node::unregistered();
|
||||
node.kind = NodeKind::Root;
|
||||
|
||||
node
|
||||
});
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
pub fn register(&mut self, mut node: Node) -> NodeRef {
|
||||
node.id = self.nodes.len();
|
||||
let out = NodeRef(node.id);
|
||||
|
||||
self.nodes.push(node);
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
pub fn root(&self) -> NodeRef {
|
||||
NodeRef(0)
|
||||
}
|
||||
|
||||
pub fn add_child(&mut self, parent: NodeRef, child: NodeRef) {
|
||||
parent.resolve_mut(self).children.push(child);
|
||||
child.resolve_mut(self).parent = Some(parent);
|
||||
}
|
||||
|
||||
pub fn nth_child(
|
||||
&self,
|
||||
parent: NodeRef,
|
||||
child_index: usize,
|
||||
) -> Option<NodeRef> {
|
||||
parent.resolve(self).children.get(child_index).copied()
|
||||
}
|
||||
|
||||
pub fn children_of(
|
||||
&self,
|
||||
parent: NodeRef,
|
||||
) -> impl Iterator<Item = NodeRef> {
|
||||
parent.resolve(self).children.iter().copied()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||||
struct NodeRef(usize);
|
||||
impl NodeRef {
|
||||
fn resolve<'a>(&self, tree: &'a Tree) -> &'a Node {
|
||||
&tree.nodes[self.0]
|
||||
}
|
||||
|
||||
fn resolve_mut<'a>(&self, tree: &'a mut Tree) -> &'a mut Node {
|
||||
&mut tree.nodes[self.0]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub struct Node {
|
||||
id: usize,
|
||||
parent: Option<NodeRef>,
|
||||
children: Vec<NodeRef>,
|
||||
kind: NodeKind,
|
||||
whitespace: Option<Token>,
|
||||
token: Option<Token>,
|
||||
}
|
||||
impl Node {
|
||||
fn unregistered() -> Self {
|
||||
Self {
|
||||
id: usize::MAX,
|
||||
parent: None,
|
||||
children: vec![],
|
||||
kind: NodeKind::Unknown,
|
||||
whitespace: None,
|
||||
token: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> &NodeKind {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
pub fn token(&self) -> Option<&Token> {
|
||||
self.token.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Eq, PartialEq)]
|
||||
pub enum NodeKind {
|
||||
#[default]
|
||||
Unknown,
|
||||
List,
|
||||
Token,
|
||||
Atom,
|
||||
Root,
|
||||
}
|
||||
|
||||
pub fn parse(corpus: &str) -> Tree {
|
||||
let mut tree = Tree::new();
|
||||
let mut tokens = Tokens::new(corpus);
|
||||
|
||||
while let Some(nr) = parse_one(&mut tree, &mut tokens) {
|
||||
let root = tree.root();
|
||||
tree.add_child(root, nr);
|
||||
}
|
||||
|
||||
tree
|
||||
}
|
||||
|
||||
pub fn parse_one(tree: &mut Tree, tokens: &mut Tokens) -> Option<NodeRef> {
|
||||
let tok = tokens.next()?;
|
||||
let (ws, tok) = match tok.kind {
|
||||
TokenKind::Whitespace(_) => (Some(tok), tokens.next()),
|
||||
_ => (None, Some(tok)),
|
||||
};
|
||||
|
||||
let mut node = Node::unregistered();
|
||||
node.whitespace = ws;
|
||||
let tok = match tok {
|
||||
Some(tok) => tok,
|
||||
None => {
|
||||
node.kind = NodeKind::Token;
|
||||
return Some(tree.register(node));
|
||||
},
|
||||
};
|
||||
|
||||
match &tok.kind {
|
||||
TokenKind::Atom(_) => {
|
||||
node.kind = NodeKind::Atom;
|
||||
},
|
||||
_ => todo!(),
|
||||
}
|
||||
|
||||
node.token = Some(tok);
|
||||
|
||||
Some(tree.register(node))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::syn::{
|
||||
cst::NodeKind,
|
||||
tok::{Atom, TokenKind},
|
||||
};
|
||||
|
||||
use super::parse;
|
||||
|
||||
#[test]
|
||||
fn atom1() {
|
||||
let tree = parse("32");
|
||||
let mut children =
|
||||
tree.children_of(tree.root()).map(|n| n.resolve(&tree));
|
||||
|
||||
let first = children.next().unwrap().as_atom().unwrap().value();
|
||||
}
|
||||
}
|
||||
238
bake/lib/syn/mod.rs
Normal file
238
bake/lib/syn/mod.rs
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
use crate::wald::{NodeRef, NodeStorage, Text};
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
#[repr(u16)]
|
||||
pub enum NodeKind {
|
||||
// Tokens
|
||||
// These are the "leaf" nodes
|
||||
LPar,
|
||||
RPar,
|
||||
Whitespace,
|
||||
Word,
|
||||
|
||||
// Composite
|
||||
/// A generic node that wraps multiple tokens. This is useful for e.g.
|
||||
/// attaching whitespace to a Word.
|
||||
Multi,
|
||||
List,
|
||||
Root,
|
||||
|
||||
#[doc(hidden)]
|
||||
_ErrFirst,
|
||||
ErrUnexpectedEOF,
|
||||
#[doc(hidden)]
|
||||
_ErrLast,
|
||||
#[doc(hidden)]
|
||||
_Last,
|
||||
}
|
||||
impl From<u16> for NodeKind {
|
||||
fn from(value: u16) -> Self {
|
||||
assert!(value < NodeKind::_Last as u16);
|
||||
|
||||
unsafe { core::mem::transmute::<u16, NodeKind>(value) }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NodeKind> for u16 {
|
||||
fn from(val: NodeKind) -> Self {
|
||||
val as u16
|
||||
}
|
||||
}
|
||||
|
||||
pub fn breaks_word(b: u8) -> bool {
|
||||
b.is_ascii_whitespace() || b == b')' || b == b'('
|
||||
}
|
||||
|
||||
pub struct Parser<'a> {
|
||||
input: &'a str,
|
||||
at: usize,
|
||||
nodes: &'a mut NodeStorage,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(nodes: &'a mut NodeStorage, input: &'a str) -> Self {
|
||||
Self {
|
||||
nodes,
|
||||
input,
|
||||
at: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn head(&self) -> Option<u8> {
|
||||
self.input[self.at..].bytes().next()
|
||||
}
|
||||
|
||||
pub fn skip(&mut self, amt: usize) {
|
||||
self.at += amt;
|
||||
}
|
||||
|
||||
pub fn skip_while(&mut self, mut f: impl FnMut(u8) -> bool) {
|
||||
while let Some(head) = self.head() {
|
||||
if !f(head) {
|
||||
break;
|
||||
}
|
||||
|
||||
self.skip(1);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_whitespace(&mut self) -> Option<NodeRef> {
|
||||
let start = self.at;
|
||||
self.skip_while(|b| b.is_ascii_whitespace());
|
||||
let end = self.at;
|
||||
let span = start..end;
|
||||
if span.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let node = self.nodes.new_node(NodeKind::Whitespace.into());
|
||||
self.nodes.set_text(node, Text::Span(span));
|
||||
|
||||
Some(node)
|
||||
}
|
||||
|
||||
pub fn parse_word(&mut self, whitespace: Option<NodeRef>) -> NodeRef {
|
||||
let start = self.at;
|
||||
self.skip_while(|b| !breaks_word(b));
|
||||
let end = self.at;
|
||||
|
||||
let node_word = self.nodes.new_node(NodeKind::Word.into());
|
||||
self.nodes.set_text(node_word, Text::Span(start..end));
|
||||
|
||||
let node_multi = self.nodes.new_node(NodeKind::Multi.into());
|
||||
if let Some(node_whitespace) = whitespace {
|
||||
self.nodes.append_child(node_multi, node_whitespace);
|
||||
}
|
||||
self.nodes.append_child(node_multi, node_word);
|
||||
|
||||
node_multi
|
||||
}
|
||||
|
||||
pub fn parse_list(&mut self, whitespace: Option<NodeRef>) -> NodeRef {
|
||||
assert!(self.head().unwrap() == b'(');
|
||||
|
||||
let node_list = self.nodes.new_node(NodeKind::List.into());
|
||||
if let Some(node_whitespace) = whitespace {
|
||||
self.nodes.append_child(node_list, node_whitespace);
|
||||
}
|
||||
|
||||
// LPar
|
||||
let start = self.at;
|
||||
self.skip(1);
|
||||
let end = self.at;
|
||||
|
||||
let node_lpar = self.nodes.new_node(NodeKind::LPar.into());
|
||||
self.nodes.set_text(node_lpar, Text::Span(start..end));
|
||||
self.nodes.append_child(node_list, node_lpar);
|
||||
|
||||
loop {
|
||||
let head = match self.head() {
|
||||
None => {
|
||||
let node_err =
|
||||
self.nodes.new_node(NodeKind::ErrUnexpectedEOF.into());
|
||||
self.nodes.append_child(node_list, node_err);
|
||||
|
||||
break;
|
||||
},
|
||||
Some(h) => h,
|
||||
};
|
||||
|
||||
if head == b')' {
|
||||
let start = self.at;
|
||||
self.skip(1);
|
||||
let end = self.at;
|
||||
|
||||
let node_rpar = self.nodes.new_node(NodeKind::RPar.into());
|
||||
self.nodes.set_text(node_rpar, Text::Span(start..end));
|
||||
self.nodes.append_child(node_list, node_rpar);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
let node_child = self.parse_one().unwrap();
|
||||
self.nodes.append_child(node_list, node_child);
|
||||
}
|
||||
|
||||
node_list
|
||||
}
|
||||
|
||||
pub fn parse_one(&mut self) -> Option<NodeRef> {
|
||||
let whitespace = self.parse_whitespace();
|
||||
|
||||
let head = self.head()?;
|
||||
let node = match head {
|
||||
b'(' => self.parse_list(whitespace),
|
||||
_ => self.parse_word(whitespace),
|
||||
};
|
||||
|
||||
Some(node)
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) {
|
||||
assert!(self.nodes.nodes().next().is_none());
|
||||
|
||||
let node_root = self.nodes.new_node(NodeKind::Root.into());
|
||||
|
||||
while let Some(node) = self.parse_one() {
|
||||
self.nodes.append_child(node_root, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(storage: &mut NodeStorage, input: &str) {
|
||||
let mut parser = Parser::new(storage, input);
|
||||
parser.parse();
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::{
|
||||
syn::{parse, NodeKind},
|
||||
wald::NodeStorage,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn simple_print_input_exactly() {
|
||||
let input = r#"(+ 3 4)"#;
|
||||
let mut storage = NodeStorage::new();
|
||||
parse(&mut storage, input);
|
||||
|
||||
assert!(!storage
|
||||
.nodes()
|
||||
.any(|n| (NodeKind::_ErrFirst.into()..NodeKind::_ErrLast.into())
|
||||
.contains(&storage.tag(n))));
|
||||
|
||||
let root = storage
|
||||
.nodes()
|
||||
.find(|&n| storage.tag(n) == NodeKind::Root.into())
|
||||
.unwrap();
|
||||
let display = storage.display_syntax(input, root);
|
||||
|
||||
let output = format!("{display}");
|
||||
assert_eq!(input, output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn traversal() {
|
||||
let input = r#"(+ 3 4)"#;
|
||||
let mut nodes = NodeStorage::new();
|
||||
parse(&mut nodes, input);
|
||||
|
||||
let three = nodes
|
||||
.nodes()
|
||||
.find(|&n| nodes.text(n).as_str(input) == "3")
|
||||
.unwrap();
|
||||
|
||||
let three_multi = nodes.parent(three).unwrap();
|
||||
let four_multi = nodes.sibling_next(three_multi).unwrap();
|
||||
let four = nodes
|
||||
.children(four_multi)
|
||||
.find(|&n| nodes.tag(n) == NodeKind::Word.into())
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(nodes.text(four).as_str(input), "4");
|
||||
|
||||
let list = nodes.parent(three_multi).unwrap();
|
||||
assert_eq!(list, nodes.parent(four_multi).unwrap());
|
||||
}
|
||||
}
|
||||
200
bake/lib/syn/tok.rs
Normal file
200
bake/lib/syn/tok.rs
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
use std::ops::Range;
|
||||
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
pub struct Span(Range<usize>);
|
||||
|
||||
impl From<Range<usize>> for Span {
|
||||
fn from(value: Range<usize>) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
pub enum TokenError {
|
||||
InvalidByteInNumericLiteral,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum TokenKind {
|
||||
Whitespace(String),
|
||||
LPar(u8),
|
||||
RPar(u8),
|
||||
Atom(Atom),
|
||||
Error(TokenError),
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub span: Span,
|
||||
}
|
||||
impl Token {
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum Atom {
|
||||
Keyword(String),
|
||||
Identifier(String),
|
||||
String(String),
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
}
|
||||
|
||||
fn is_numlit(b: u8) -> bool {
|
||||
b.is_ascii_digit() || b == b'.'
|
||||
}
|
||||
|
||||
fn is_identifier(b: u8) -> bool {
|
||||
!ends_literal(b)
|
||||
}
|
||||
|
||||
fn ends_literal(b: u8) -> bool {
|
||||
[b'(', b')'].contains(&b) || b.is_ascii_whitespace()
|
||||
}
|
||||
|
||||
pub struct Tokens<'a> {
|
||||
at: usize,
|
||||
corpus: &'a str,
|
||||
}
|
||||
impl<'a> Tokens<'a> {
|
||||
pub fn new(corpus: &'a str) -> Self {
|
||||
Self { corpus, at: 0 }
|
||||
}
|
||||
|
||||
fn head(&self) -> Option<u8> {
|
||||
if self.at >= self.corpus.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
self.corpus[self.at..].bytes().next()
|
||||
}
|
||||
|
||||
fn pop_head(&mut self) -> Option<u8> {
|
||||
let out = self.head()?;
|
||||
self.at += 1;
|
||||
|
||||
Some(out)
|
||||
}
|
||||
|
||||
fn chomp_while(&mut self, mut f: impl FnMut(u8) -> bool) {
|
||||
loop {
|
||||
let ch = match self.pop_head() {
|
||||
None => return,
|
||||
Some(ch) => ch,
|
||||
};
|
||||
|
||||
if !f(ch) {
|
||||
self.at -= 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Option<Token> {
|
||||
let start = self.at;
|
||||
let ch = self.pop_head()?;
|
||||
|
||||
let tk = match ch {
|
||||
b'(' => TokenKind::LPar(ch),
|
||||
b')' => TokenKind::RPar(ch),
|
||||
_ if ch.is_ascii_whitespace() => {
|
||||
self.chomp_while(|b| b.is_ascii_whitespace());
|
||||
|
||||
TokenKind::Whitespace(self.corpus[start..self.at].to_string())
|
||||
},
|
||||
_ if ch.is_ascii_digit() => {
|
||||
let mut is_float = false;
|
||||
|
||||
self.chomp_while(|b| {
|
||||
if b == b'.' {
|
||||
is_float = true;
|
||||
}
|
||||
|
||||
is_numlit(b)
|
||||
});
|
||||
|
||||
if !self.head().map(ends_literal).unwrap_or(true) {
|
||||
TokenKind::Error(TokenError::InvalidByteInNumericLiteral)
|
||||
} else {
|
||||
TokenKind::Atom(if is_float {
|
||||
Atom::Float(
|
||||
self.corpus[start..self.at].parse().unwrap(),
|
||||
)
|
||||
} else {
|
||||
Atom::Integer(
|
||||
self.corpus[start..self.at].parse().unwrap(),
|
||||
)
|
||||
})
|
||||
}
|
||||
},
|
||||
b':' => {
|
||||
self.chomp_while(is_identifier);
|
||||
|
||||
TokenKind::Atom(Atom::Keyword(
|
||||
self.corpus[start..self.at].to_string(),
|
||||
))
|
||||
},
|
||||
_ => {
|
||||
self.chomp_while(is_identifier);
|
||||
|
||||
TokenKind::Atom(Atom::Identifier(
|
||||
self.corpus[start..self.at].to_string(),
|
||||
))
|
||||
},
|
||||
};
|
||||
|
||||
Some(Token {
|
||||
kind: tk,
|
||||
span: Span::from(start..self.at),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Tokens<'_> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
Self::next(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_tokenize {
|
||||
use super::{Atom, TokenKind};
|
||||
|
||||
use super::Tokens;
|
||||
|
||||
#[test]
|
||||
fn simple1() {
|
||||
let tokens: Vec<_> =
|
||||
Tokens::new("(:hello)").map(|tk| tk.kind).collect();
|
||||
|
||||
assert_eq!(
|
||||
tokens,
|
||||
[
|
||||
TokenKind::LPar(b'('),
|
||||
TokenKind::Atom(Atom::Keyword(":hello".to_string())),
|
||||
TokenKind::RPar(b')'),
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple2() {
|
||||
let tokens: Vec<_> =
|
||||
Tokens::new("(-> 1 2.4)").map(|t| t.kind).collect();
|
||||
|
||||
assert_eq!(
|
||||
tokens,
|
||||
[
|
||||
TokenKind::LPar(b'('),
|
||||
TokenKind::Atom(Atom::Identifier("->".to_string())),
|
||||
TokenKind::Whitespace(" ".to_string()),
|
||||
TokenKind::Atom(Atom::Integer(1)),
|
||||
TokenKind::Whitespace(" ".to_string()),
|
||||
TokenKind::Atom(Atom::Float(2.4)),
|
||||
TokenKind::RPar(b')'),
|
||||
]
|
||||
)
|
||||
}
|
||||
}
|
||||
164
bake/lib/wald/mod.rs
Normal file
164
bake/lib/wald/mod.rs
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
use std::{fmt::Display, ops::Range};
|
||||
|
||||
pub enum Text {
|
||||
Span(Range<usize>),
|
||||
Static(&'static str),
|
||||
String(String),
|
||||
}
|
||||
impl Default for Text {
|
||||
fn default() -> Self {
|
||||
Self::Static("")
|
||||
}
|
||||
}
|
||||
|
||||
impl Text {
|
||||
pub fn empty() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Self::Span(s) => s.is_empty(),
|
||||
Self::Static(s) => s.is_empty(),
|
||||
Self::String(s) => s.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str<'a>(&'a self, text: &'a str) -> &'a str {
|
||||
match self {
|
||||
Self::Span(s) => &text[s.clone()],
|
||||
Self::Static(s) => s,
|
||||
Self::String(s) => s.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
|
||||
pub struct NodeRef(usize);
|
||||
#[derive(Default)]
|
||||
pub struct NodeStorage {
|
||||
tags: Vec<u16>,
|
||||
parents: Vec<Option<NodeRef>>,
|
||||
children: Vec<Vec<NodeRef>>,
|
||||
siblings_prev: Vec<Option<NodeRef>>,
|
||||
siblings_next: Vec<Option<NodeRef>>,
|
||||
texts: Vec<Text>,
|
||||
}
|
||||
impl NodeStorage {
|
||||
pub fn nodes(&self) -> impl Iterator<Item = NodeRef> {
|
||||
(0..self.tags.len()).map(NodeRef)
|
||||
}
|
||||
|
||||
pub fn new_node(&mut self, tag: u16) -> NodeRef {
|
||||
let node = NodeRef(self.tags.len());
|
||||
self.tags.push(tag);
|
||||
self.parents.push(None);
|
||||
self.children.push(vec![]);
|
||||
self.siblings_prev.push(None);
|
||||
self.siblings_next.push(None);
|
||||
self.texts.push(Text::default());
|
||||
|
||||
node
|
||||
}
|
||||
|
||||
pub fn tag(&self, node: NodeRef) -> u16 {
|
||||
self.tags[node.0]
|
||||
}
|
||||
|
||||
pub fn parent(&self, node: NodeRef) -> Option<NodeRef> {
|
||||
self.parents[node.0]
|
||||
}
|
||||
|
||||
pub fn set_parent(&mut self, child: NodeRef, parent: NodeRef) {
|
||||
self.parents[child.0] = Some(parent);
|
||||
}
|
||||
|
||||
pub fn children(&self, node: NodeRef) -> impl Iterator<Item = NodeRef> {
|
||||
self.children[node.0].iter().copied()
|
||||
}
|
||||
|
||||
pub fn append_child(&mut self, parent: NodeRef, child: NodeRef) {
|
||||
self.set_parent(child, parent);
|
||||
|
||||
let children = &mut self.children[parent.0];
|
||||
let child_index = children.len();
|
||||
|
||||
children.push(child);
|
||||
|
||||
if child_index > 0 {
|
||||
let prev_index = child_index - 1;
|
||||
let prev_sibling = children[prev_index];
|
||||
self.set_sibling_next(prev_sibling, child);
|
||||
self.set_sibling_prev(child, prev_sibling);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sibling_next(&mut self, node: NodeRef) -> Option<NodeRef> {
|
||||
self.siblings_next[node.0]
|
||||
}
|
||||
|
||||
pub fn set_sibling_next(&mut self, this: NodeRef, next: NodeRef) {
|
||||
self.siblings_next[this.0] = Some(next);
|
||||
}
|
||||
|
||||
pub fn sibling_prev(&mut self, node: NodeRef) -> Option<NodeRef> {
|
||||
self.siblings_prev[node.0]
|
||||
}
|
||||
|
||||
pub fn set_sibling_prev(&mut self, this: NodeRef, prev: NodeRef) {
|
||||
self.siblings_prev[this.0] = Some(prev);
|
||||
}
|
||||
|
||||
pub fn text(&self, node: NodeRef) -> &Text {
|
||||
&self.texts[node.0]
|
||||
}
|
||||
|
||||
pub fn set_text(&mut self, node: NodeRef, text: Text) {
|
||||
self.texts[node.0] = text;
|
||||
}
|
||||
|
||||
pub fn display_syntax<'a>(
|
||||
&'a self,
|
||||
text: &'a str,
|
||||
node: NodeRef,
|
||||
) -> impl Display + 'a {
|
||||
struct DisplaySyntax<'a> {
|
||||
nodes: &'a NodeStorage,
|
||||
node: NodeRef,
|
||||
text: &'a str,
|
||||
}
|
||||
|
||||
impl Display for DisplaySyntax<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let s = match self.nodes.text(self.node) {
|
||||
Text::Span(s) => &self.text[s.clone()],
|
||||
Text::Static(s) => s,
|
||||
Text::String(s) => s.as_str(),
|
||||
};
|
||||
|
||||
write!(f, "{s}")?;
|
||||
for child in self.nodes.children(self.node) {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
self.nodes.display_syntax(self.text, child)
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
DisplaySyntax {
|
||||
nodes: self,
|
||||
node,
|
||||
text,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeStorage {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
||||
|
|
@ -24,7 +24,7 @@
|
|||
},
|
||||
"locked": {
|
||||
"lastModified": 1,
|
||||
"narHash": "sha256-PVtFcvxh3Aqgel46BBFzxN0IvEVDzw/n/hWJ76mVThQ=",
|
||||
"narHash": "sha256-YOJheOuchbi3vU4jlQ9hMcyDU+bK9tzi+4dskNeE6Ww=",
|
||||
"path": "./nix/deno-flake",
|
||||
"type": "path"
|
||||
},
|
||||
|
|
|
|||
228
klout/Cargo.lock
generated
228
klout/Cargo.lock
generated
|
|
@ -14,6 +14,12 @@ version = "1.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
|
||||
|
||||
[[package]]
|
||||
name = "eyre"
|
||||
version = "0.6.12"
|
||||
|
|
@ -35,18 +41,45 @@ dependencies = [
|
|||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
|
||||
|
||||
[[package]]
|
||||
name = "indenter"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "klout"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"eyre",
|
||||
"num_cpus",
|
||||
"rand",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"toml",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -55,6 +88,22 @@ version = "0.2.169"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.20.2"
|
||||
|
|
@ -118,6 +167,50 @@ dependencies = [
|
|||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.216"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.216"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_spanned"
|
||||
version = "0.6.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.91"
|
||||
|
|
@ -129,18 +222,153 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.8.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"toml_edit",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_datetime"
|
||||
version = "0.6.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.22.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"winnow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.6.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.7.35"
|
||||
|
|
|
|||
|
|
@ -7,6 +7,15 @@ edition = "2024"
|
|||
name = "klout"
|
||||
path = "src/klout.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "klout-gen-data"
|
||||
path = "src/gen_data.rs"
|
||||
|
||||
[dependencies]
|
||||
eyre = "0.6.12"
|
||||
rand = "0.8.5"
|
||||
rand = "0.8.5"
|
||||
num_cpus = "1.16.0"
|
||||
serde = { version = "1.0.216", features = ["derive"] }
|
||||
toml = "0.8.19"
|
||||
walkdir = "2.5.0"
|
||||
rustc-hash = "2.1.0"
|
||||
15851
klout/corpora/dracula.txt
Normal file
15851
klout/corpora/dracula.txt
Normal file
File diff suppressed because it is too large
Load diff
7737
klout/corpora/frankenstein.txt
Normal file
7737
klout/corpora/frankenstein.txt
Normal file
File diff suppressed because it is too large
Load diff
10591
klout/corpora/walden.txt
Normal file
10591
klout/corpora/walden.txt
Normal file
File diff suppressed because it is too large
Load diff
4006
klout/corpora/winnie-the-pooh.txt
Normal file
4006
klout/corpora/winnie-the-pooh.txt
Normal file
File diff suppressed because it is too large
Load diff
12726
klout/corpora/wuthering-heights.txt
Normal file
12726
klout/corpora/wuthering-heights.txt
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,3 +0,0 @@
|
|||
q w f p b j l u y '
|
||||
a r s t g m n e i o
|
||||
x c d v z k h , . /
|
||||
|
|
@ -1,6 +1,10 @@
|
|||
LPINKY LRING LMIDDLE LINDEX LINDEX RINDEX RINDEX RMIDDLE RRING RPINKY
|
||||
LPINKY LRING LMIDDLE LINDEX LINDEX RINDEX RINDEX RMIDDLE RRING RPINKY
|
||||
LRING LMIDDLE LINDEX LINDEX LINDEX RINDEX RINDEX RMIDDLE RRING RPINKY
|
||||
q w f p b j l u y '
|
||||
a r s t g m n e i o
|
||||
x c d v z k h , . /
|
||||
|
||||
LPinky LRing LMiddle LIndex LIndex RIndex RIndex RMiddle RRing RPinky
|
||||
LPinky LRing LMiddle LIndex LIndex RIndex RIndex RMiddle RRing RPinky
|
||||
LRing LMiddle LIndex LIndex LIndex RIndex RIndex RMiddle RRing RPinky
|
||||
|
||||
100 4 2 4 30 30 4 2 4 100
|
||||
50 1 0.1 0.1 5 5 0.1 0.1 1 50
|
||||
|
|
|
|||
10
klout/settings.toml
Normal file
10
klout/settings.toml
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
[paths]
|
||||
layout = "data/initial_layout.txt"
|
||||
matrices = "data/matrices.txt"
|
||||
|
||||
[workers]
|
||||
n_workers = 1
|
||||
prefer_numcpus = true
|
||||
|
||||
[parameters]
|
||||
|
||||
172
klout/src/gen_data.rs
Normal file
172
klout/src/gen_data.rs
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
use std::{
|
||||
cmp::Ordering,
|
||||
ops::{Add, Div, Mul, Sub},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use eyre::{eyre, Result};
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
type GramMap<const N: usize, T> = FxHashMap<[u8; N], T>;
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
struct Grams<T> {
|
||||
grams1: GramMap<1, T>,
|
||||
grams2: GramMap<2, T>,
|
||||
grams3: GramMap<3, T>,
|
||||
grams4: GramMap<4, T>,
|
||||
}
|
||||
impl<T> Grams<T>
|
||||
where
|
||||
T: Div<Output = T> + Copy,
|
||||
{
|
||||
fn divide_by(&mut self, n: T) {
|
||||
divide_by(&mut self.grams1, n);
|
||||
divide_by(&mut self.grams2, n);
|
||||
divide_by(&mut self.grams3, n);
|
||||
divide_by(&mut self.grams4, n);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Grams<T>
|
||||
where
|
||||
T: Copy
|
||||
+ PartialOrd
|
||||
+ Default
|
||||
+ Sub<Output = T>
|
||||
+ Add<Output = T>
|
||||
+ Div<Output = T>
|
||||
+ Mul<Output = T>,
|
||||
{
|
||||
fn normalize(&mut self, omin: T, omax: T) {
|
||||
normalize(&mut self.grams1, omin, omax);
|
||||
normalize(&mut self.grams2, omin, omax);
|
||||
normalize(&mut self.grams3, omin, omax);
|
||||
normalize(&mut self.grams4, omin, omax);
|
||||
}
|
||||
}
|
||||
|
||||
fn divide_by<const N: usize, T: Div<Output = T> + Copy>(
|
||||
grams: &mut GramMap<N, T>,
|
||||
n: T,
|
||||
) {
|
||||
for v in grams.values_mut() {
|
||||
*v = *v / n;
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize<const N: usize, T>(grams: &mut GramMap<N, T>, omin: T, omax: T)
|
||||
where
|
||||
T: Copy
|
||||
+ PartialOrd
|
||||
+ Default
|
||||
+ Sub<Output = T>
|
||||
+ Add<Output = T>
|
||||
+ Div<Output = T>
|
||||
+ Mul<Output = T>,
|
||||
{
|
||||
let max = grams
|
||||
.values()
|
||||
.copied()
|
||||
.max_by(|&a, &b| {
|
||||
if a > b {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
Ordering::Less
|
||||
}
|
||||
})
|
||||
.unwrap_or(Default::default());
|
||||
let min = grams
|
||||
.values()
|
||||
.copied()
|
||||
.min_by(|&a, &b| {
|
||||
if a > b {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
Ordering::Less
|
||||
}
|
||||
})
|
||||
.unwrap_or(Default::default());
|
||||
|
||||
for v in grams.values_mut() {
|
||||
*v = map_to_range(*v, min, max, omin, omax);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize() {
|
||||
let mut input = GramMap::<1, f64>::default();
|
||||
input.insert([b'a'], 500.);
|
||||
input.insert([b'b'], 300.);
|
||||
input.insert([b'c'], 100.);
|
||||
input.insert([b'd'], 125.);
|
||||
|
||||
normalize(&mut input, 0., 100.);
|
||||
assert_eq!(input[b"a"], 100.);
|
||||
assert_eq!(input[b"b"], 50.);
|
||||
assert_eq!(input[b"c"], 0.);
|
||||
assert_eq!(input[b"d"], 6.25);
|
||||
}
|
||||
|
||||
// maps a number from range [amin, amax] to range [bmin, bmax]
|
||||
fn map_to_range<V>(v: V, amin: V, amax: V, bmin: V, bmax: V) -> V
|
||||
where
|
||||
V: Sub<Output = V>
|
||||
+ Add<Output = V>
|
||||
+ Mul<Output = V>
|
||||
+ Div<Output = V>
|
||||
+ Copy,
|
||||
{
|
||||
bmin + (((v - amin) * (bmax - bmin)) / (amax - amin))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_map_to_range() {
|
||||
assert_eq!(map_to_range(40, 0, 100, 0, 10), 4);
|
||||
assert_eq!(map_to_range(60, 50, 100, 5, 10), 6);
|
||||
assert_eq!(map_to_range(55.5, 55., 56., 0., 1.), 0.5);
|
||||
}
|
||||
|
||||
type GramsCounts = Grams<usize>;
|
||||
type GramsFreqs = Grams<usize>;
|
||||
|
||||
fn gen_data_file(path: &Path) -> Result<GramsCounts> {
|
||||
let data = std::fs::read_to_string(path)?;
|
||||
let mut grams = Grams::default();
|
||||
|
||||
for win in data.as_bytes().windows(4) {
|
||||
*grams.grams1.entry([win[0]]).or_insert(0) += 1;
|
||||
*grams.grams2.entry([win[0], win[1]]).or_insert(0) += 1;
|
||||
*grams.grams3.entry([win[0], win[1], win[2]]).or_insert(0) += 1;
|
||||
*grams
|
||||
.grams4
|
||||
.entry([win[0], win[1], win[2], win[3]])
|
||||
.or_insert(0) += 1;
|
||||
}
|
||||
|
||||
// TODO: We lose a few N<4 grams here, but it's probably not that big of a deal
|
||||
|
||||
Ok(grams)
|
||||
}
|
||||
|
||||
fn gen_data(inputs: Vec<String>) -> Result<GramsCounts> {
|
||||
let mut grams = Grams::default();
|
||||
|
||||
for dir in inputs {
|
||||
for de in walkdir::WalkDir::new(dir).into_iter() {
|
||||
let de = de?;
|
||||
if de.file_type().is_file() {
|
||||
grams = grams.combine(gen_data_file(de.path())?);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(grams)
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let mut dirs: Vec<String> = std::env::args().skip(1).collect();
|
||||
let grams = gen_data(dirs)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -1,14 +1,27 @@
|
|||
use std::{collections::HashMap, hash::Hash};
|
||||
use std::{collections::HashMap, hash::Hash, ops::Deref};
|
||||
|
||||
use eyre::{eyre, Result};
|
||||
use eyre::{eyre, Context, Result};
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Matrix<T> {
|
||||
width: usize,
|
||||
height: usize,
|
||||
data: Vec<T>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct BiMatrix<T> {
|
||||
m: Matrix<T>,
|
||||
to_coord: HashMap<T, MatrixCoord>,
|
||||
}
|
||||
impl<T> Deref for BiMatrix<T> {
|
||||
type Target = Matrix<T>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.m
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||||
struct MatrixCoord {
|
||||
|
|
@ -21,24 +34,32 @@ impl MatrixCoord {
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: Copy + Clone + Default + Eq + Hash> Matrix<T> {
|
||||
fn new(width: usize, height: usize) -> Self {
|
||||
impl<T: Copy + Clone> Matrix<T> {
|
||||
fn new(width: usize, height: usize) -> Self
|
||||
where
|
||||
T: Default,
|
||||
{
|
||||
let data = vec![T::default(); width * height];
|
||||
|
||||
Self {
|
||||
width,
|
||||
height,
|
||||
data,
|
||||
to_coord: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn val_to_coord(&self, v: &T) -> MatrixCoord {
|
||||
*self.to_coord.get(v).unwrap()
|
||||
}
|
||||
fn from_vec(width: usize, height: usize, data: Vec<T>) -> Result<Self> {
|
||||
if width * height != data.len() {
|
||||
return Err(eyre!("Invalid data len"));
|
||||
}
|
||||
|
||||
fn val_to_index(&self, v: &T) -> usize {
|
||||
self.coord_to_index(self.val_to_coord(v))
|
||||
let out = Self {
|
||||
width,
|
||||
height,
|
||||
data,
|
||||
};
|
||||
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
fn val_at_coord(&self, c: MatrixCoord) -> &T {
|
||||
|
|
@ -63,15 +84,86 @@ impl<T: Copy + Clone + Default + Eq + Hash> Matrix<T> {
|
|||
fn set(&mut self, v: T, c: MatrixCoord) {
|
||||
let i = self.coord_to_index(c);
|
||||
self.data[i] = v;
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
self.width * self.height
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Hash + Eq + Copy + Clone> BiMatrix<T> {
|
||||
fn new(width: usize, height: usize) -> Self
|
||||
where
|
||||
T: Default,
|
||||
{
|
||||
Self {
|
||||
m: Matrix::new(width, height),
|
||||
to_coord: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn from_vec(width: usize, height: usize, data: Vec<T>) -> Result<Self> {
|
||||
let m = Matrix::from_vec(width, height, data)?;
|
||||
|
||||
let mut out = Self {
|
||||
m,
|
||||
to_coord: Default::default(),
|
||||
};
|
||||
|
||||
for (i, v) in out.m.data.iter().enumerate() {
|
||||
out.to_coord
|
||||
.insert(*v, MatrixCoord::new(i % width, i / width));
|
||||
}
|
||||
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
fn val_to_coord(&self, v: &T) -> MatrixCoord {
|
||||
*self.to_coord.get(v).unwrap()
|
||||
}
|
||||
|
||||
fn val_to_index(&self, v: &T) -> usize {
|
||||
self.coord_to_index(self.val_to_coord(v))
|
||||
}
|
||||
|
||||
fn set(&mut self, v: T, c: MatrixCoord) {
|
||||
self.m.set(v, c);
|
||||
self.to_coord.insert(v, c);
|
||||
}
|
||||
}
|
||||
|
||||
type Layout = Matrix<char>;
|
||||
type Layout = BiMatrix<char>;
|
||||
|
||||
fn load_initial_layout() -> Result<Layout> {
|
||||
let data = std::fs::read_to_string("data/initial_layout.txt")?;
|
||||
let data: Vec<Vec<_>> = data
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
|
||||
enum Hand {
|
||||
Left,
|
||||
Right,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
|
||||
enum Digit {
|
||||
Pinky,
|
||||
Ring,
|
||||
Middle,
|
||||
Index,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
|
||||
struct Finger {
|
||||
digit: Digit,
|
||||
hand: Hand,
|
||||
}
|
||||
|
||||
fn load_matrices(
|
||||
paths: &Paths,
|
||||
) -> Result<(Layout, Matrix<Finger>, Matrix<f64>)> {
|
||||
let data = std::fs::read_to_string(&paths.matrices)?;
|
||||
|
||||
let mut sections = data.split("\n\n");
|
||||
let section_layout =
|
||||
sections.next().ok_or(eyre!("Missing section: layout"))?;
|
||||
|
||||
let data: Vec<Vec<_>> = section_layout
|
||||
.lines()
|
||||
.map(|l| {
|
||||
l.split_whitespace()
|
||||
|
|
@ -95,12 +187,83 @@ fn load_initial_layout() -> Result<Layout> {
|
|||
}
|
||||
}
|
||||
|
||||
Ok(layout)
|
||||
let section_fingers =
|
||||
sections.next().ok_or(eyre!("Missing section: fingers"))?;
|
||||
|
||||
let lines_fingers = section_fingers.lines();
|
||||
|
||||
let fingers: Vec<Finger> = lines_fingers
|
||||
.flat_map(|l| {
|
||||
let words = l.split_whitespace();
|
||||
words
|
||||
})
|
||||
.map(|w| {
|
||||
let (l_or_r, digit) = w.split_at(1);
|
||||
let hand = match l_or_r.to_lowercase().as_str() {
|
||||
"l" => Hand::Left,
|
||||
"r" => Hand::Right,
|
||||
_ => return Err(eyre!("Invalid finger in data/matrices.txt")),
|
||||
};
|
||||
|
||||
let digit = match digit.to_lowercase().as_str() {
|
||||
"pinky" => Digit::Pinky,
|
||||
"ring" => Digit::Ring,
|
||||
"middle" => Digit::Middle,
|
||||
"index" => Digit::Index,
|
||||
_ => return Err(eyre!("Invalid finger in data/matrices.txt")),
|
||||
};
|
||||
|
||||
Ok(Finger { hand, digit })
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
let m_finger = Matrix::from_vec(layout.width, layout.height, fingers)
|
||||
.wrap_err("When loading fingers from data/matrices.txt")?;
|
||||
|
||||
let section_effort =
|
||||
sections.next().ok_or(eyre!("Missing section: effort"))?;
|
||||
let lines_effort = section_effort.lines();
|
||||
let efforts: Vec<f64> = lines_effort
|
||||
.flat_map(|l| l.split_whitespace())
|
||||
.map(|w| w.parse())
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.wrap_err("When loading efforts from data/matrices.txt")?;
|
||||
|
||||
let m_effort = Matrix::from_vec(layout.width, layout.height, efforts)
|
||||
.wrap_err("When loading efforts from data/matrices.txt")?;
|
||||
|
||||
Ok((layout, m_finger, m_effort))
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct Paths {
|
||||
matrices: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct Workers {
|
||||
n_workers: usize,
|
||||
prefer_numcpus: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct Parameters {}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct Settings {
|
||||
paths: Paths,
|
||||
workers: Workers,
|
||||
parameters: Parameters,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let layout = load_initial_layout()?;
|
||||
// TODO: let (m_finger, m_effort) = load_matrices()?;
|
||||
let settings = std::fs::read_to_string("./settings.toml")?;
|
||||
let mut settings: Settings = toml::from_str(&settings)?;
|
||||
if settings.workers.prefer_numcpus {
|
||||
settings.workers.n_workers = num_cpus::get();
|
||||
}
|
||||
|
||||
let (m_layout, m_fingers, m_effort) = load_matrices(&settings.paths)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,36 +1,37 @@
|
|||
{
|
||||
inputs = {
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
nixpkgs.url = "nixpkgs";
|
||||
};
|
||||
outputs = inputs:
|
||||
let lib = import ./lib.nix;
|
||||
in
|
||||
inputs.flake-utils.lib.eachDefaultSystem (system:
|
||||
let
|
||||
nixpkgs = inputs.nixpkgs.legacyPackages.${system};
|
||||
versions = [
|
||||
["2.0.3" "sha256-++wvqD6TunG47jp2SKW+clGOJ6Sy9CnEu2e6AgKP1X0="]
|
||||
["2.0.0" "sha256-WQ4B0sT3qTVl4/Moj0FcFg5LDZIBPbnmcfUxwrmFyYY="]
|
||||
["1.46.3" "sha256-vnDzegjO7XFqBj3dZ1T4TZfuFr3Ur2f4/2zlFUQUwSI="]
|
||||
];
|
||||
|
||||
packages = builtins.listToAttrs (builtins.map (l:
|
||||
let
|
||||
version = builtins.elemAt l 0;
|
||||
zipHash = builtins.elemAt l 1;
|
||||
in {
|
||||
name = "deno-${builtins.replaceStrings ["."] ["_"] version}";
|
||||
value = lib.mkDeno { inherit version zipHash nixpkgs; };
|
||||
}
|
||||
) versions);
|
||||
inputs = {
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
nixpkgs.url = "nixpkgs";
|
||||
};
|
||||
outputs = inputs:
|
||||
let lib = import ./lib.nix;
|
||||
in
|
||||
inputs.flake-utils.lib.eachDefaultSystem (system:
|
||||
let
|
||||
nixpkgs = inputs.nixpkgs.legacyPackages.${system};
|
||||
versions = [
|
||||
["2.1.5" "sha256-xzQRtCwpksRA1XB2ILE3Gdc3r4ftT63M1WBmi6yXZzw="]
|
||||
["2.0.3" "sha256-++wvqD6TunG47jp2SKW+clGOJ6Sy9CnEu2e6AgKP1X0="]
|
||||
["2.0.0" "sha256-WQ4B0sT3qTVl4/Moj0FcFg5LDZIBPbnmcfUxwrmFyYY="]
|
||||
["1.46.3" "sha256-vnDzegjO7XFqBj3dZ1T4TZfuFr3Ur2f4/2zlFUQUwSI="]
|
||||
];
|
||||
|
||||
in { packages = packages // {
|
||||
deno-latest =
|
||||
let
|
||||
v = (builtins.elemAt (builtins.elemAt versions 0) 0);
|
||||
a = "deno-${builtins.replaceStrings ["."] ["_"] v}";
|
||||
in packages.${a};
|
||||
};
|
||||
});
|
||||
packages = builtins.listToAttrs (builtins.map (l:
|
||||
let
|
||||
version = builtins.elemAt l 0;
|
||||
zipHash = builtins.elemAt l 1;
|
||||
in {
|
||||
name = "deno-${builtins.replaceStrings ["."] ["_"] version}";
|
||||
value = lib.mkDeno { inherit version zipHash nixpkgs; };
|
||||
}
|
||||
) versions);
|
||||
|
||||
in { packages = packages // {
|
||||
deno-latest =
|
||||
let
|
||||
v = (builtins.elemAt (builtins.elemAt versions 0) 0);
|
||||
a = "deno-${builtins.replaceStrings ["."] ["_"] v}";
|
||||
in packages.${a};
|
||||
};
|
||||
});
|
||||
}
|
||||
|
|
|
|||
0
pritty/src/main.ts
Normal file
0
pritty/src/main.ts
Normal file
Loading…
Reference in a new issue