This commit is contained in:
soup 2025-01-16 20:05:09 -05:00
parent ce38e751ac
commit 4f589a07af
Signed by: soup
SSH key fingerprint: SHA256:GYxje8eQkJ6HZKzVWDdyOUF1TyDiprruGhE0Ym8qYDY
21 changed files with 52348 additions and 58 deletions

View file

@ -1,3 +1,6 @@
mod syn;
mod wald;
pub type Result<T> = core::result::Result<T, Error>;
#[derive(Debug)]

23
bake/lib/syn/ast.rs Normal file
View file

@ -0,0 +1,23 @@
use super::{
cst::{self, NodeKind},
tok::{self, TokenKind},
};
pub struct Atom<'a>(&'a cst::Node);
impl cst::Node {
pub fn as_atom(&self) -> Option<Atom> {
if *self.kind() != NodeKind::Atom {
return None;
}
Some(Atom(self))
}
}
impl Atom<'_> {
pub fn value(&self) -> &tok::Atom {
match &self.0.token().unwrap().kind {
TokenKind::Atom(a) => a,
_ => unreachable!(),
}
}
}

167
bake/lib/syn/cst.rs Normal file
View file

@ -0,0 +1,167 @@
use super::{
ast,
tok::{Token, TokenKind, Tokens},
};
#[derive(Debug)]
pub struct Tree {
nodes: Vec<Node>,
}
impl Tree {
pub fn new() -> Self {
let mut out = Self { nodes: vec![] };
out.register({
let mut node = Node::unregistered();
node.kind = NodeKind::Root;
node
});
out
}
pub fn register(&mut self, mut node: Node) -> NodeRef {
node.id = self.nodes.len();
let out = NodeRef(node.id);
self.nodes.push(node);
out
}
pub fn root(&self) -> NodeRef {
NodeRef(0)
}
pub fn add_child(&mut self, parent: NodeRef, child: NodeRef) {
parent.resolve_mut(self).children.push(child);
child.resolve_mut(self).parent = Some(parent);
}
pub fn nth_child(
&self,
parent: NodeRef,
child_index: usize,
) -> Option<NodeRef> {
parent.resolve(self).children.get(child_index).copied()
}
pub fn children_of(
&self,
parent: NodeRef,
) -> impl Iterator<Item = NodeRef> {
parent.resolve(self).children.iter().copied()
}
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
struct NodeRef(usize);
impl NodeRef {
fn resolve<'a>(&self, tree: &'a Tree) -> &'a Node {
&tree.nodes[self.0]
}
fn resolve_mut<'a>(&self, tree: &'a mut Tree) -> &'a mut Node {
&mut tree.nodes[self.0]
}
}
#[derive(Default, Debug)]
pub struct Node {
id: usize,
parent: Option<NodeRef>,
children: Vec<NodeRef>,
kind: NodeKind,
whitespace: Option<Token>,
token: Option<Token>,
}
impl Node {
fn unregistered() -> Self {
Self {
id: usize::MAX,
parent: None,
children: vec![],
kind: NodeKind::Unknown,
whitespace: None,
token: None,
}
}
pub fn kind(&self) -> &NodeKind {
&self.kind
}
pub fn token(&self) -> Option<&Token> {
self.token.as_ref()
}
}
#[derive(Default, Debug, Eq, PartialEq)]
pub enum NodeKind {
#[default]
Unknown,
List,
Token,
Atom,
Root,
}
pub fn parse(corpus: &str) -> Tree {
let mut tree = Tree::new();
let mut tokens = Tokens::new(corpus);
while let Some(nr) = parse_one(&mut tree, &mut tokens) {
let root = tree.root();
tree.add_child(root, nr);
}
tree
}
pub fn parse_one(tree: &mut Tree, tokens: &mut Tokens) -> Option<NodeRef> {
let tok = tokens.next()?;
let (ws, tok) = match tok.kind {
TokenKind::Whitespace(_) => (Some(tok), tokens.next()),
_ => (None, Some(tok)),
};
let mut node = Node::unregistered();
node.whitespace = ws;
let tok = match tok {
Some(tok) => tok,
None => {
node.kind = NodeKind::Token;
return Some(tree.register(node));
},
};
match &tok.kind {
TokenKind::Atom(_) => {
node.kind = NodeKind::Atom;
},
_ => todo!(),
}
node.token = Some(tok);
Some(tree.register(node))
}
#[cfg(test)]
mod test {
use crate::syn::{
cst::NodeKind,
tok::{Atom, TokenKind},
};
use super::parse;
#[test]
fn atom1() {
let tree = parse("32");
let mut children =
tree.children_of(tree.root()).map(|n| n.resolve(&tree));
let first = children.next().unwrap().as_atom().unwrap().value();
}
}

238
bake/lib/syn/mod.rs Normal file
View file

@ -0,0 +1,238 @@
use crate::wald::{NodeRef, NodeStorage, Text};
#[derive(Copy, Clone, Eq, PartialEq)]
#[repr(u16)]
pub enum NodeKind {
// Tokens
// These are the "leaf" nodes
LPar,
RPar,
Whitespace,
Word,
// Composite
/// A generic node that wraps multiple tokens. This is useful for e.g.
/// attaching whitespace to a Word.
Multi,
List,
Root,
#[doc(hidden)]
_ErrFirst,
ErrUnexpectedEOF,
#[doc(hidden)]
_ErrLast,
#[doc(hidden)]
_Last,
}
impl From<u16> for NodeKind {
fn from(value: u16) -> Self {
assert!(value < NodeKind::_Last as u16);
unsafe { core::mem::transmute::<u16, NodeKind>(value) }
}
}
impl From<NodeKind> for u16 {
fn from(val: NodeKind) -> Self {
val as u16
}
}
pub fn breaks_word(b: u8) -> bool {
b.is_ascii_whitespace() || b == b')' || b == b'('
}
pub struct Parser<'a> {
input: &'a str,
at: usize,
nodes: &'a mut NodeStorage,
}
impl<'a> Parser<'a> {
pub fn new(nodes: &'a mut NodeStorage, input: &'a str) -> Self {
Self {
nodes,
input,
at: 0,
}
}
pub fn head(&self) -> Option<u8> {
self.input[self.at..].bytes().next()
}
pub fn skip(&mut self, amt: usize) {
self.at += amt;
}
pub fn skip_while(&mut self, mut f: impl FnMut(u8) -> bool) {
while let Some(head) = self.head() {
if !f(head) {
break;
}
self.skip(1);
}
}
pub fn parse_whitespace(&mut self) -> Option<NodeRef> {
let start = self.at;
self.skip_while(|b| b.is_ascii_whitespace());
let end = self.at;
let span = start..end;
if span.is_empty() {
return None;
}
let node = self.nodes.new_node(NodeKind::Whitespace.into());
self.nodes.set_text(node, Text::Span(span));
Some(node)
}
pub fn parse_word(&mut self, whitespace: Option<NodeRef>) -> NodeRef {
let start = self.at;
self.skip_while(|b| !breaks_word(b));
let end = self.at;
let node_word = self.nodes.new_node(NodeKind::Word.into());
self.nodes.set_text(node_word, Text::Span(start..end));
let node_multi = self.nodes.new_node(NodeKind::Multi.into());
if let Some(node_whitespace) = whitespace {
self.nodes.append_child(node_multi, node_whitespace);
}
self.nodes.append_child(node_multi, node_word);
node_multi
}
pub fn parse_list(&mut self, whitespace: Option<NodeRef>) -> NodeRef {
assert!(self.head().unwrap() == b'(');
let node_list = self.nodes.new_node(NodeKind::List.into());
if let Some(node_whitespace) = whitespace {
self.nodes.append_child(node_list, node_whitespace);
}
// LPar
let start = self.at;
self.skip(1);
let end = self.at;
let node_lpar = self.nodes.new_node(NodeKind::LPar.into());
self.nodes.set_text(node_lpar, Text::Span(start..end));
self.nodes.append_child(node_list, node_lpar);
loop {
let head = match self.head() {
None => {
let node_err =
self.nodes.new_node(NodeKind::ErrUnexpectedEOF.into());
self.nodes.append_child(node_list, node_err);
break;
},
Some(h) => h,
};
if head == b')' {
let start = self.at;
self.skip(1);
let end = self.at;
let node_rpar = self.nodes.new_node(NodeKind::RPar.into());
self.nodes.set_text(node_rpar, Text::Span(start..end));
self.nodes.append_child(node_list, node_rpar);
break;
}
let node_child = self.parse_one().unwrap();
self.nodes.append_child(node_list, node_child);
}
node_list
}
pub fn parse_one(&mut self) -> Option<NodeRef> {
let whitespace = self.parse_whitespace();
let head = self.head()?;
let node = match head {
b'(' => self.parse_list(whitespace),
_ => self.parse_word(whitespace),
};
Some(node)
}
pub fn parse(&mut self) {
assert!(self.nodes.nodes().next().is_none());
let node_root = self.nodes.new_node(NodeKind::Root.into());
while let Some(node) = self.parse_one() {
self.nodes.append_child(node_root, node);
}
}
}
pub fn parse(storage: &mut NodeStorage, input: &str) {
let mut parser = Parser::new(storage, input);
parser.parse();
}
#[cfg(test)]
mod test {
use crate::{
syn::{parse, NodeKind},
wald::NodeStorage,
};
#[test]
fn simple_print_input_exactly() {
let input = r#"(+ 3 4)"#;
let mut storage = NodeStorage::new();
parse(&mut storage, input);
assert!(!storage
.nodes()
.any(|n| (NodeKind::_ErrFirst.into()..NodeKind::_ErrLast.into())
.contains(&storage.tag(n))));
let root = storage
.nodes()
.find(|&n| storage.tag(n) == NodeKind::Root.into())
.unwrap();
let display = storage.display_syntax(input, root);
let output = format!("{display}");
assert_eq!(input, output);
}
#[test]
fn traversal() {
let input = r#"(+ 3 4)"#;
let mut nodes = NodeStorage::new();
parse(&mut nodes, input);
let three = nodes
.nodes()
.find(|&n| nodes.text(n).as_str(input) == "3")
.unwrap();
let three_multi = nodes.parent(three).unwrap();
let four_multi = nodes.sibling_next(three_multi).unwrap();
let four = nodes
.children(four_multi)
.find(|&n| nodes.tag(n) == NodeKind::Word.into())
.unwrap();
assert_eq!(nodes.text(four).as_str(input), "4");
let list = nodes.parent(three_multi).unwrap();
assert_eq!(list, nodes.parent(four_multi).unwrap());
}
}

200
bake/lib/syn/tok.rs Normal file
View file

@ -0,0 +1,200 @@
use std::ops::Range;
#[derive(Eq, PartialEq, Debug)]
pub struct Span(Range<usize>);
impl From<Range<usize>> for Span {
fn from(value: Range<usize>) -> Self {
Self(value)
}
}
#[derive(Eq, PartialEq, Debug)]
pub enum TokenError {
InvalidByteInNumericLiteral,
}
#[derive(PartialEq, Debug)]
pub enum TokenKind {
Whitespace(String),
LPar(u8),
RPar(u8),
Atom(Atom),
Error(TokenError),
}
#[derive(PartialEq, Debug)]
pub struct Token {
pub kind: TokenKind,
pub span: Span,
}
impl Token {
}
#[derive(PartialEq, Debug)]
pub enum Atom {
Keyword(String),
Identifier(String),
String(String),
Integer(i64),
Float(f64),
}
fn is_numlit(b: u8) -> bool {
b.is_ascii_digit() || b == b'.'
}
fn is_identifier(b: u8) -> bool {
!ends_literal(b)
}
fn ends_literal(b: u8) -> bool {
[b'(', b')'].contains(&b) || b.is_ascii_whitespace()
}
pub struct Tokens<'a> {
at: usize,
corpus: &'a str,
}
impl<'a> Tokens<'a> {
pub fn new(corpus: &'a str) -> Self {
Self { corpus, at: 0 }
}
fn head(&self) -> Option<u8> {
if self.at >= self.corpus.len() {
return None;
}
self.corpus[self.at..].bytes().next()
}
fn pop_head(&mut self) -> Option<u8> {
let out = self.head()?;
self.at += 1;
Some(out)
}
fn chomp_while(&mut self, mut f: impl FnMut(u8) -> bool) {
loop {
let ch = match self.pop_head() {
None => return,
Some(ch) => ch,
};
if !f(ch) {
self.at -= 1;
return;
}
}
}
fn next(&mut self) -> Option<Token> {
let start = self.at;
let ch = self.pop_head()?;
let tk = match ch {
b'(' => TokenKind::LPar(ch),
b')' => TokenKind::RPar(ch),
_ if ch.is_ascii_whitespace() => {
self.chomp_while(|b| b.is_ascii_whitespace());
TokenKind::Whitespace(self.corpus[start..self.at].to_string())
},
_ if ch.is_ascii_digit() => {
let mut is_float = false;
self.chomp_while(|b| {
if b == b'.' {
is_float = true;
}
is_numlit(b)
});
if !self.head().map(ends_literal).unwrap_or(true) {
TokenKind::Error(TokenError::InvalidByteInNumericLiteral)
} else {
TokenKind::Atom(if is_float {
Atom::Float(
self.corpus[start..self.at].parse().unwrap(),
)
} else {
Atom::Integer(
self.corpus[start..self.at].parse().unwrap(),
)
})
}
},
b':' => {
self.chomp_while(is_identifier);
TokenKind::Atom(Atom::Keyword(
self.corpus[start..self.at].to_string(),
))
},
_ => {
self.chomp_while(is_identifier);
TokenKind::Atom(Atom::Identifier(
self.corpus[start..self.at].to_string(),
))
},
};
Some(Token {
kind: tk,
span: Span::from(start..self.at),
})
}
}
impl Iterator for Tokens<'_> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
Self::next(self)
}
}
#[cfg(test)]
mod test_tokenize {
use super::{Atom, TokenKind};
use super::Tokens;
#[test]
fn simple1() {
let tokens: Vec<_> =
Tokens::new("(:hello)").map(|tk| tk.kind).collect();
assert_eq!(
tokens,
[
TokenKind::LPar(b'('),
TokenKind::Atom(Atom::Keyword(":hello".to_string())),
TokenKind::RPar(b')'),
]
)
}
#[test]
fn simple2() {
let tokens: Vec<_> =
Tokens::new("(-> 1 2.4)").map(|t| t.kind).collect();
assert_eq!(
tokens,
[
TokenKind::LPar(b'('),
TokenKind::Atom(Atom::Identifier("->".to_string())),
TokenKind::Whitespace(" ".to_string()),
TokenKind::Atom(Atom::Integer(1)),
TokenKind::Whitespace(" ".to_string()),
TokenKind::Atom(Atom::Float(2.4)),
TokenKind::RPar(b')'),
]
)
}
}

164
bake/lib/wald/mod.rs Normal file
View file

@ -0,0 +1,164 @@
use std::{fmt::Display, ops::Range};
pub enum Text {
Span(Range<usize>),
Static(&'static str),
String(String),
}
impl Default for Text {
fn default() -> Self {
Self::Static("")
}
}
impl Text {
pub fn empty() -> Self {
Self::default()
}
pub fn is_empty(&self) -> bool {
match self {
Self::Span(s) => s.is_empty(),
Self::Static(s) => s.is_empty(),
Self::String(s) => s.is_empty(),
}
}
pub fn as_str<'a>(&'a self, text: &'a str) -> &'a str {
match self {
Self::Span(s) => &text[s.clone()],
Self::Static(s) => s,
Self::String(s) => s.as_str(),
}
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub struct NodeRef(usize);
#[derive(Default)]
pub struct NodeStorage {
tags: Vec<u16>,
parents: Vec<Option<NodeRef>>,
children: Vec<Vec<NodeRef>>,
siblings_prev: Vec<Option<NodeRef>>,
siblings_next: Vec<Option<NodeRef>>,
texts: Vec<Text>,
}
impl NodeStorage {
pub fn nodes(&self) -> impl Iterator<Item = NodeRef> {
(0..self.tags.len()).map(NodeRef)
}
pub fn new_node(&mut self, tag: u16) -> NodeRef {
let node = NodeRef(self.tags.len());
self.tags.push(tag);
self.parents.push(None);
self.children.push(vec![]);
self.siblings_prev.push(None);
self.siblings_next.push(None);
self.texts.push(Text::default());
node
}
pub fn tag(&self, node: NodeRef) -> u16 {
self.tags[node.0]
}
pub fn parent(&self, node: NodeRef) -> Option<NodeRef> {
self.parents[node.0]
}
pub fn set_parent(&mut self, child: NodeRef, parent: NodeRef) {
self.parents[child.0] = Some(parent);
}
pub fn children(&self, node: NodeRef) -> impl Iterator<Item = NodeRef> {
self.children[node.0].iter().copied()
}
pub fn append_child(&mut self, parent: NodeRef, child: NodeRef) {
self.set_parent(child, parent);
let children = &mut self.children[parent.0];
let child_index = children.len();
children.push(child);
if child_index > 0 {
let prev_index = child_index - 1;
let prev_sibling = children[prev_index];
self.set_sibling_next(prev_sibling, child);
self.set_sibling_prev(child, prev_sibling);
}
}
pub fn sibling_next(&mut self, node: NodeRef) -> Option<NodeRef> {
self.siblings_next[node.0]
}
pub fn set_sibling_next(&mut self, this: NodeRef, next: NodeRef) {
self.siblings_next[this.0] = Some(next);
}
pub fn sibling_prev(&mut self, node: NodeRef) -> Option<NodeRef> {
self.siblings_prev[node.0]
}
pub fn set_sibling_prev(&mut self, this: NodeRef, prev: NodeRef) {
self.siblings_prev[this.0] = Some(prev);
}
pub fn text(&self, node: NodeRef) -> &Text {
&self.texts[node.0]
}
pub fn set_text(&mut self, node: NodeRef, text: Text) {
self.texts[node.0] = text;
}
pub fn display_syntax<'a>(
&'a self,
text: &'a str,
node: NodeRef,
) -> impl Display + 'a {
struct DisplaySyntax<'a> {
nodes: &'a NodeStorage,
node: NodeRef,
text: &'a str,
}
impl Display for DisplaySyntax<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self.nodes.text(self.node) {
Text::Span(s) => &self.text[s.clone()],
Text::Static(s) => s,
Text::String(s) => s.as_str(),
};
write!(f, "{s}")?;
for child in self.nodes.children(self.node) {
write!(
f,
"{}",
self.nodes.display_syntax(self.text, child)
)?;
}
Ok(())
}
}
DisplaySyntax {
nodes: self,
node,
text,
}
}
}
impl NodeStorage {
pub fn new() -> Self {
Self::default()
}
}

View file

@ -24,7 +24,7 @@
},
"locked": {
"lastModified": 1,
"narHash": "sha256-PVtFcvxh3Aqgel46BBFzxN0IvEVDzw/n/hWJ76mVThQ=",
"narHash": "sha256-YOJheOuchbi3vU4jlQ9hMcyDU+bK9tzi+4dskNeE6Ww=",
"path": "./nix/deno-flake",
"type": "path"
},

228
klout/Cargo.lock generated
View file

@ -14,6 +14,12 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "eyre"
version = "0.6.12"
@ -35,18 +41,45 @@ dependencies = [
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
[[package]]
name = "hermit-abi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
[[package]]
name = "indenter"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683"
[[package]]
name = "indexmap"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "klout"
version = "0.0.0"
dependencies = [
"eyre",
"num_cpus",
"rand",
"rustc-hash",
"serde",
"toml",
"walkdir",
]
[[package]]
@ -55,6 +88,22 @@ version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "num_cpus"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "once_cell"
version = "1.20.2"
@ -118,6 +167,50 @@ dependencies = [
"getrandom",
]
[[package]]
name = "rustc-hash"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.216"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.216"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_spanned"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1"
dependencies = [
"serde",
]
[[package]]
name = "syn"
version = "2.0.91"
@ -129,18 +222,153 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "toml"
version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5"
dependencies = [
"indexmap",
"serde",
"serde_spanned",
"toml_datetime",
"winnow",
]
[[package]]
name = "unicode-ident"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "winnow"
version = "0.6.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b"
dependencies = [
"memchr",
]
[[package]]
name = "zerocopy"
version = "0.7.35"

View file

@ -7,6 +7,15 @@ edition = "2024"
name = "klout"
path = "src/klout.rs"
[[bin]]
name = "klout-gen-data"
path = "src/gen_data.rs"
[dependencies]
eyre = "0.6.12"
rand = "0.8.5"
rand = "0.8.5"
num_cpus = "1.16.0"
serde = { version = "1.0.216", features = ["derive"] }
toml = "0.8.19"
walkdir = "2.5.0"
rustc-hash = "2.1.0"

15851
klout/corpora/dracula.txt Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

10591
klout/corpora/walden.txt Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,3 +0,0 @@
q w f p b j l u y '
a r s t g m n e i o
x c d v z k h , . /

View file

@ -1,6 +1,10 @@
LPINKY LRING LMIDDLE LINDEX LINDEX RINDEX RINDEX RMIDDLE RRING RPINKY
LPINKY LRING LMIDDLE LINDEX LINDEX RINDEX RINDEX RMIDDLE RRING RPINKY
LRING LMIDDLE LINDEX LINDEX LINDEX RINDEX RINDEX RMIDDLE RRING RPINKY
q w f p b j l u y '
a r s t g m n e i o
x c d v z k h , . /
LPinky LRing LMiddle LIndex LIndex RIndex RIndex RMiddle RRing RPinky
LPinky LRing LMiddle LIndex LIndex RIndex RIndex RMiddle RRing RPinky
LRing LMiddle LIndex LIndex LIndex RIndex RIndex RMiddle RRing RPinky
100 4 2 4 30 30 4 2 4 100
50 1 0.1 0.1 5 5 0.1 0.1 1 50

10
klout/settings.toml Normal file
View file

@ -0,0 +1,10 @@
[paths]
layout = "data/initial_layout.txt"
matrices = "data/matrices.txt"
[workers]
n_workers = 1
prefer_numcpus = true
[parameters]

172
klout/src/gen_data.rs Normal file
View file

@ -0,0 +1,172 @@
use std::{
cmp::Ordering,
ops::{Add, Div, Mul, Sub},
path::Path,
};
use eyre::{eyre, Result};
use rustc_hash::FxHashMap;
type GramMap<const N: usize, T> = FxHashMap<[u8; N], T>;
#[derive(Default, Debug)]
struct Grams<T> {
grams1: GramMap<1, T>,
grams2: GramMap<2, T>,
grams3: GramMap<3, T>,
grams4: GramMap<4, T>,
}
impl<T> Grams<T>
where
T: Div<Output = T> + Copy,
{
fn divide_by(&mut self, n: T) {
divide_by(&mut self.grams1, n);
divide_by(&mut self.grams2, n);
divide_by(&mut self.grams3, n);
divide_by(&mut self.grams4, n);
}
}
impl<T> Grams<T>
where
T: Copy
+ PartialOrd
+ Default
+ Sub<Output = T>
+ Add<Output = T>
+ Div<Output = T>
+ Mul<Output = T>,
{
fn normalize(&mut self, omin: T, omax: T) {
normalize(&mut self.grams1, omin, omax);
normalize(&mut self.grams2, omin, omax);
normalize(&mut self.grams3, omin, omax);
normalize(&mut self.grams4, omin, omax);
}
}
fn divide_by<const N: usize, T: Div<Output = T> + Copy>(
grams: &mut GramMap<N, T>,
n: T,
) {
for v in grams.values_mut() {
*v = *v / n;
}
}
fn normalize<const N: usize, T>(grams: &mut GramMap<N, T>, omin: T, omax: T)
where
T: Copy
+ PartialOrd
+ Default
+ Sub<Output = T>
+ Add<Output = T>
+ Div<Output = T>
+ Mul<Output = T>,
{
let max = grams
.values()
.copied()
.max_by(|&a, &b| {
if a > b {
Ordering::Greater
} else {
Ordering::Less
}
})
.unwrap_or(Default::default());
let min = grams
.values()
.copied()
.min_by(|&a, &b| {
if a > b {
Ordering::Greater
} else {
Ordering::Less
}
})
.unwrap_or(Default::default());
for v in grams.values_mut() {
*v = map_to_range(*v, min, max, omin, omax);
}
}
#[test]
fn test_normalize() {
let mut input = GramMap::<1, f64>::default();
input.insert([b'a'], 500.);
input.insert([b'b'], 300.);
input.insert([b'c'], 100.);
input.insert([b'd'], 125.);
normalize(&mut input, 0., 100.);
assert_eq!(input[b"a"], 100.);
assert_eq!(input[b"b"], 50.);
assert_eq!(input[b"c"], 0.);
assert_eq!(input[b"d"], 6.25);
}
// maps a number from range [amin, amax] to range [bmin, bmax]
fn map_to_range<V>(v: V, amin: V, amax: V, bmin: V, bmax: V) -> V
where
V: Sub<Output = V>
+ Add<Output = V>
+ Mul<Output = V>
+ Div<Output = V>
+ Copy,
{
bmin + (((v - amin) * (bmax - bmin)) / (amax - amin))
}
#[test]
fn test_map_to_range() {
assert_eq!(map_to_range(40, 0, 100, 0, 10), 4);
assert_eq!(map_to_range(60, 50, 100, 5, 10), 6);
assert_eq!(map_to_range(55.5, 55., 56., 0., 1.), 0.5);
}
type GramsCounts = Grams<usize>;
type GramsFreqs = Grams<usize>;
fn gen_data_file(path: &Path) -> Result<GramsCounts> {
let data = std::fs::read_to_string(path)?;
let mut grams = Grams::default();
for win in data.as_bytes().windows(4) {
*grams.grams1.entry([win[0]]).or_insert(0) += 1;
*grams.grams2.entry([win[0], win[1]]).or_insert(0) += 1;
*grams.grams3.entry([win[0], win[1], win[2]]).or_insert(0) += 1;
*grams
.grams4
.entry([win[0], win[1], win[2], win[3]])
.or_insert(0) += 1;
}
// TODO: We lose a few N<4 grams here, but it's probably not that big of a deal
Ok(grams)
}
fn gen_data(inputs: Vec<String>) -> Result<GramsCounts> {
let mut grams = Grams::default();
for dir in inputs {
for de in walkdir::WalkDir::new(dir).into_iter() {
let de = de?;
if de.file_type().is_file() {
grams = grams.combine(gen_data_file(de.path())?);
}
}
}
Ok(grams)
}
fn main() -> Result<()> {
let mut dirs: Vec<String> = std::env::args().skip(1).collect();
let grams = gen_data(dirs)?;
Ok(())
}

View file

@ -1,14 +1,27 @@
use std::{collections::HashMap, hash::Hash};
use std::{collections::HashMap, hash::Hash, ops::Deref};
use eyre::{eyre, Result};
use eyre::{eyre, Context, Result};
use serde::Deserialize;
#[derive(Debug)]
struct Matrix<T> {
width: usize,
height: usize,
data: Vec<T>,
}
#[derive(Debug)]
struct BiMatrix<T> {
m: Matrix<T>,
to_coord: HashMap<T, MatrixCoord>,
}
impl<T> Deref for BiMatrix<T> {
type Target = Matrix<T>;
fn deref(&self) -> &Self::Target {
&self.m
}
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
struct MatrixCoord {
@ -21,24 +34,32 @@ impl MatrixCoord {
}
}
impl<T: Copy + Clone + Default + Eq + Hash> Matrix<T> {
fn new(width: usize, height: usize) -> Self {
impl<T: Copy + Clone> Matrix<T> {
fn new(width: usize, height: usize) -> Self
where
T: Default,
{
let data = vec![T::default(); width * height];
Self {
width,
height,
data,
to_coord: Default::default(),
}
}
fn val_to_coord(&self, v: &T) -> MatrixCoord {
*self.to_coord.get(v).unwrap()
}
fn from_vec(width: usize, height: usize, data: Vec<T>) -> Result<Self> {
if width * height != data.len() {
return Err(eyre!("Invalid data len"));
}
fn val_to_index(&self, v: &T) -> usize {
self.coord_to_index(self.val_to_coord(v))
let out = Self {
width,
height,
data,
};
Ok(out)
}
fn val_at_coord(&self, c: MatrixCoord) -> &T {
@ -63,15 +84,86 @@ impl<T: Copy + Clone + Default + Eq + Hash> Matrix<T> {
fn set(&mut self, v: T, c: MatrixCoord) {
let i = self.coord_to_index(c);
self.data[i] = v;
}
fn size(&self) -> usize {
self.width * self.height
}
}
impl<T: Hash + Eq + Copy + Clone> BiMatrix<T> {
fn new(width: usize, height: usize) -> Self
where
T: Default,
{
Self {
m: Matrix::new(width, height),
to_coord: Default::default(),
}
}
fn from_vec(width: usize, height: usize, data: Vec<T>) -> Result<Self> {
let m = Matrix::from_vec(width, height, data)?;
let mut out = Self {
m,
to_coord: Default::default(),
};
for (i, v) in out.m.data.iter().enumerate() {
out.to_coord
.insert(*v, MatrixCoord::new(i % width, i / width));
}
Ok(out)
}
fn val_to_coord(&self, v: &T) -> MatrixCoord {
*self.to_coord.get(v).unwrap()
}
fn val_to_index(&self, v: &T) -> usize {
self.coord_to_index(self.val_to_coord(v))
}
fn set(&mut self, v: T, c: MatrixCoord) {
self.m.set(v, c);
self.to_coord.insert(v, c);
}
}
type Layout = Matrix<char>;
type Layout = BiMatrix<char>;
fn load_initial_layout() -> Result<Layout> {
let data = std::fs::read_to_string("data/initial_layout.txt")?;
let data: Vec<Vec<_>> = data
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
enum Hand {
Left,
Right,
}
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
enum Digit {
Pinky,
Ring,
Middle,
Index,
}
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
struct Finger {
digit: Digit,
hand: Hand,
}
fn load_matrices(
paths: &Paths,
) -> Result<(Layout, Matrix<Finger>, Matrix<f64>)> {
let data = std::fs::read_to_string(&paths.matrices)?;
let mut sections = data.split("\n\n");
let section_layout =
sections.next().ok_or(eyre!("Missing section: layout"))?;
let data: Vec<Vec<_>> = section_layout
.lines()
.map(|l| {
l.split_whitespace()
@ -95,12 +187,83 @@ fn load_initial_layout() -> Result<Layout> {
}
}
Ok(layout)
let section_fingers =
sections.next().ok_or(eyre!("Missing section: fingers"))?;
let lines_fingers = section_fingers.lines();
let fingers: Vec<Finger> = lines_fingers
.flat_map(|l| {
let words = l.split_whitespace();
words
})
.map(|w| {
let (l_or_r, digit) = w.split_at(1);
let hand = match l_or_r.to_lowercase().as_str() {
"l" => Hand::Left,
"r" => Hand::Right,
_ => return Err(eyre!("Invalid finger in data/matrices.txt")),
};
let digit = match digit.to_lowercase().as_str() {
"pinky" => Digit::Pinky,
"ring" => Digit::Ring,
"middle" => Digit::Middle,
"index" => Digit::Index,
_ => return Err(eyre!("Invalid finger in data/matrices.txt")),
};
Ok(Finger { hand, digit })
})
.collect::<Result<Vec<_>, _>>()?;
let m_finger = Matrix::from_vec(layout.width, layout.height, fingers)
.wrap_err("When loading fingers from data/matrices.txt")?;
let section_effort =
sections.next().ok_or(eyre!("Missing section: effort"))?;
let lines_effort = section_effort.lines();
let efforts: Vec<f64> = lines_effort
.flat_map(|l| l.split_whitespace())
.map(|w| w.parse())
.collect::<Result<Vec<_>, _>>()
.wrap_err("When loading efforts from data/matrices.txt")?;
let m_effort = Matrix::from_vec(layout.width, layout.height, efforts)
.wrap_err("When loading efforts from data/matrices.txt")?;
Ok((layout, m_finger, m_effort))
}
#[derive(Deserialize, Debug)]
struct Paths {
matrices: String,
}
#[derive(Deserialize, Debug)]
struct Workers {
n_workers: usize,
prefer_numcpus: bool,
}
#[derive(Deserialize, Debug)]
struct Parameters {}
#[derive(Deserialize, Debug)]
struct Settings {
paths: Paths,
workers: Workers,
parameters: Parameters,
}
fn main() -> Result<()> {
let layout = load_initial_layout()?;
// TODO: let (m_finger, m_effort) = load_matrices()?;
let settings = std::fs::read_to_string("./settings.toml")?;
let mut settings: Settings = toml::from_str(&settings)?;
if settings.workers.prefer_numcpus {
settings.workers.n_workers = num_cpus::get();
}
let (m_layout, m_fingers, m_effort) = load_matrices(&settings.paths)?;
Ok(())
}

View file

@ -1,36 +1,37 @@
{
inputs = {
flake-utils.url = "github:numtide/flake-utils";
nixpkgs.url = "nixpkgs";
};
outputs = inputs:
let lib = import ./lib.nix;
in
inputs.flake-utils.lib.eachDefaultSystem (system:
let
nixpkgs = inputs.nixpkgs.legacyPackages.${system};
versions = [
["2.0.3" "sha256-++wvqD6TunG47jp2SKW+clGOJ6Sy9CnEu2e6AgKP1X0="]
["2.0.0" "sha256-WQ4B0sT3qTVl4/Moj0FcFg5LDZIBPbnmcfUxwrmFyYY="]
["1.46.3" "sha256-vnDzegjO7XFqBj3dZ1T4TZfuFr3Ur2f4/2zlFUQUwSI="]
];
packages = builtins.listToAttrs (builtins.map (l:
let
version = builtins.elemAt l 0;
zipHash = builtins.elemAt l 1;
in {
name = "deno-${builtins.replaceStrings ["."] ["_"] version}";
value = lib.mkDeno { inherit version zipHash nixpkgs; };
}
) versions);
inputs = {
flake-utils.url = "github:numtide/flake-utils";
nixpkgs.url = "nixpkgs";
};
outputs = inputs:
let lib = import ./lib.nix;
in
inputs.flake-utils.lib.eachDefaultSystem (system:
let
nixpkgs = inputs.nixpkgs.legacyPackages.${system};
versions = [
["2.1.5" "sha256-xzQRtCwpksRA1XB2ILE3Gdc3r4ftT63M1WBmi6yXZzw="]
["2.0.3" "sha256-++wvqD6TunG47jp2SKW+clGOJ6Sy9CnEu2e6AgKP1X0="]
["2.0.0" "sha256-WQ4B0sT3qTVl4/Moj0FcFg5LDZIBPbnmcfUxwrmFyYY="]
["1.46.3" "sha256-vnDzegjO7XFqBj3dZ1T4TZfuFr3Ur2f4/2zlFUQUwSI="]
];
in { packages = packages // {
deno-latest =
let
v = (builtins.elemAt (builtins.elemAt versions 0) 0);
a = "deno-${builtins.replaceStrings ["."] ["_"] v}";
in packages.${a};
};
});
packages = builtins.listToAttrs (builtins.map (l:
let
version = builtins.elemAt l 0;
zipHash = builtins.elemAt l 1;
in {
name = "deno-${builtins.replaceStrings ["."] ["_"] version}";
value = lib.mkDeno { inherit version zipHash nixpkgs; };
}
) versions);
in { packages = packages // {
deno-latest =
let
v = (builtins.elemAt (builtins.elemAt versions 0) 0);
a = "deno-${builtins.replaceStrings ["."] ["_"] v}";
in packages.${a};
};
});
}

0
pritty/src/main.ts Normal file
View file