reading the chumsky docs helps actually, wild /s
This commit is contained in:
parent
4a2ba05bd1
commit
2810913aae
3 changed files with 39 additions and 72 deletions
|
|
@ -1,5 +1,5 @@
|
|||
use chumsky::{
|
||||
combinator::Or, prelude::{choice, just, recursive}, recursive, select, select_ref, text::{self, ascii::ident, whitespace}, IterParser, Parser
|
||||
combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, just, recursive}, recursive, select, select_ref, span::SimpleSpan, text::{self, ascii::ident, whitespace}, IterParser, Parser
|
||||
};
|
||||
|
||||
use crate::{language_frontend::abstract_syntax_tree::ast::Expression, language_frontend::lexer::tokens::Token};
|
||||
|
|
@ -7,36 +7,24 @@ use crate::{language_frontend::abstract_syntax_tree::ast::Expression, language_f
|
|||
// goal of parsing is to construct an abstract syntax tree
|
||||
|
||||
#[allow(clippy::let_and_return)]
|
||||
pub fn parser<'src>() -> impl Parser<'src, &'src [Token<'src>], Expression<'src>> {
|
||||
let ident = select_ref! {
|
||||
Token::Ident(ident) => *ident
|
||||
};
|
||||
|
||||
let keyword = |kw: &'static str| {
|
||||
select! {
|
||||
Token::Keyword(k) if k == kw => ()
|
||||
}
|
||||
};
|
||||
|
||||
let eq = just(Token::Equals);
|
||||
|
||||
pub fn parser<'tokens, 'src: 'tokens, I>() -> impl Parser<'tokens, I, Expression<'src>, extra::Err<Rich<'tokens, Token<'src>>>>
|
||||
where
|
||||
I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>,
|
||||
{
|
||||
|
||||
let expr = recursive(|expr| {
|
||||
|
||||
let atom = select! {
|
||||
Token::Float(x) => Expression::Float(x),
|
||||
|
||||
Token::Integer(x) => Expression::Integer(x),
|
||||
};
|
||||
|
||||
let unary = just(Token::Substract)
|
||||
.repeated()
|
||||
.foldr(atom, |_op, rhs| Expression::Negatation(Box::new(rhs)));
|
||||
.foldr(atom.clone(), |_op, rhs| Expression::Negatation(Box::new(rhs)));
|
||||
|
||||
// "Punktrechnung vor Strichrechnung :nerd:"
|
||||
|
||||
let binary_1 = unary.clone().foldl(
|
||||
just(Token::Multiply)
|
||||
.or(just(Token::Divide))
|
||||
.then(unary)
|
||||
.repeated(),
|
||||
let mul_div = unary.clone().foldl(
|
||||
just(Token::Multiply).or(just(Token::Divide)).then(unary).repeated(),
|
||||
|lhs, (op, rhs)| match op {
|
||||
Token::Multiply => Expression::Multiply(Box::new(lhs), Box::new(rhs)),
|
||||
Token::Divide => Expression::Divide(Box::new(lhs), Box::new(rhs)),
|
||||
|
|
@ -44,48 +32,16 @@ pub fn parser<'src>() -> impl Parser<'src, &'src [Token<'src>], Expression<'src>
|
|||
},
|
||||
);
|
||||
|
||||
let binary_2 = binary_1.clone().foldl(
|
||||
just(Token::Add)
|
||||
.or(just(Token::Substract))
|
||||
.then(binary_1)
|
||||
.repeated(),
|
||||
let add_sub = mul_div.clone().foldl(
|
||||
just(Token::Add).or(just(Token::Substract)).then(mul_div).repeated(),
|
||||
|lhs, (op, rhs)| match op {
|
||||
Token::Add => Expression::Add(Box::new(lhs), Box::new(rhs)),
|
||||
Token::Substract => Expression::Substract(Box::new(lhs), Box::new(rhs)),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
);
|
||||
|
||||
binary_2
|
||||
|
||||
add_sub
|
||||
});
|
||||
|
||||
let decl = recursive(|decl| {
|
||||
let r#var = keyword("var")
|
||||
.ignore_then(ident)
|
||||
.then_ignore(eq.clone())
|
||||
.then(expr.clone())
|
||||
.then(decl.clone())
|
||||
.map(|((name, rhs), then)| Expression::Var {
|
||||
name,
|
||||
rhs: Box::new(rhs),
|
||||
then: Box::new(then),
|
||||
});
|
||||
|
||||
let r#fun = keyword("fun")
|
||||
.ignore_then(ident.clone())
|
||||
.then(ident.repeated().collect::<Vec<_>>())
|
||||
.then_ignore(eq.clone())
|
||||
.then(expr.clone())
|
||||
.then(decl)
|
||||
.map(|(((name, args), body), then)| Expression::Function {
|
||||
name,
|
||||
args,
|
||||
body: Box::new(body),
|
||||
then: Box::new(then),
|
||||
});
|
||||
|
||||
var.or(r#fun).or(expr)
|
||||
});
|
||||
|
||||
decl
|
||||
expr
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ use logos::{Lexer, Logos};
|
|||
#[derive(Logos, Debug, Clone, PartialEq)]
|
||||
#[logos(skip r"[ \t\r\n\f]+")] // Skip whitespace
|
||||
pub enum Token<'src> {
|
||||
Error,
|
||||
|
||||
#[token("false", |_| false)]
|
||||
#[token("true", |_| true)]
|
||||
Bool(bool),
|
||||
|
|
@ -79,6 +81,7 @@ impl fmt::Display for Token<'_> {
|
|||
Token::Ident(s) => write!(f, "{s}"),
|
||||
Token::String(s) => write!(f, "{s}"),
|
||||
Token::Keyword(s) => write!(f, "{s}"),
|
||||
Token::Error => write!(f, "<error>")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
32
src/main.rs
32
src/main.rs
|
|
@ -1,3 +1,4 @@
|
|||
use chumsky::input::{Input, Stream};
|
||||
use chumsky::Parser;
|
||||
use logos::Logos;
|
||||
|
||||
|
|
@ -18,20 +19,27 @@ Simple Compiler -> 4 Stages:
|
|||
|
||||
fn main() {
|
||||
let sourcecode = std::fs::read_to_string("sample.akai").unwrap();
|
||||
let lexer = Token::lexer(&sourcecode);
|
||||
|
||||
let mut tokens = vec![];
|
||||
for (token, span) in lexer.spanned() {
|
||||
match token {
|
||||
Ok(token) => tokens.push(token),
|
||||
Err(e) => {
|
||||
println!("lexer error at {:?}: {:?}", span, e);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Create a logos lexer over the source code
|
||||
let token_iter = Token::lexer(&sourcecode)
|
||||
.spanned()
|
||||
// Convert logos errors into tokens. We want parsing to be recoverable and not fail at the lexing stage, so
|
||||
// we have a dedicated `Token::Error` variant that represents a token error that was previously encountered
|
||||
.map(|(tok, span)| match tok {
|
||||
// Turn the `Range<usize>` spans logos gives us into chumsky's `SimpleSpan` via `Into`, because it's easier
|
||||
// to work with
|
||||
Ok(tok) => (tok, span.into()),
|
||||
Err(()) => (Token::Error, span.into()),
|
||||
});
|
||||
|
||||
match parser().parse(&tokens).into_result() {
|
||||
// Turn the token iterator into a stream that chumsky can use for things like backtracking
|
||||
let token_stream = Stream::from_iter(token_iter)
|
||||
// Tell chumsky to split the (Token, SimpleSpan) stream into its parts so that it can handle the spans for us
|
||||
// This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string
|
||||
.map((0..sourcecode.len()).into(), |(t, s): (_, _)| (t, s));
|
||||
|
||||
|
||||
match parser().parse(token_stream).into_result() {
|
||||
Ok(ast) => match eval(&ast, &mut Vec::new(), &mut Vec::new()) {
|
||||
Ok(output) => println!("{output}"),
|
||||
Err(eval_err) => println!("Evaluation error: {eval_err}"),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue