diff --git a/src/language_frontend/abstract_syntax_tree/parser.rs b/src/language_frontend/abstract_syntax_tree/parser.rs index e871bc8..4d92b3d 100644 --- a/src/language_frontend/abstract_syntax_tree/parser.rs +++ b/src/language_frontend/abstract_syntax_tree/parser.rs @@ -1,5 +1,5 @@ use chumsky::{ - combinator::Or, prelude::{choice, just, recursive}, recursive, select, select_ref, text::{self, ascii::ident, whitespace}, IterParser, Parser + combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, just, recursive}, recursive, select, select_ref, span::SimpleSpan, text::{self, ascii::ident, whitespace}, IterParser, Parser }; use crate::{language_frontend::abstract_syntax_tree::ast::Expression, language_frontend::lexer::tokens::Token}; @@ -7,36 +7,24 @@ use crate::{language_frontend::abstract_syntax_tree::ast::Expression, language_f // goal of parsing is to construct an abstract syntax tree #[allow(clippy::let_and_return)] -pub fn parser<'src>() -> impl Parser<'src, &'src [Token<'src>], Expression<'src>> { - let ident = select_ref! { - Token::Ident(ident) => *ident - }; - - let keyword = |kw: &'static str| { - select! { - Token::Keyword(k) if k == kw => () - } - }; - - let eq = just(Token::Equals); - +pub fn parser<'tokens, 'src: 'tokens, I>() -> impl Parser<'tokens, I, Expression<'src>, extra::Err>>> +where + I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>, +{ + let expr = recursive(|expr| { + let atom = select! { Token::Float(x) => Expression::Float(x), - + Token::Integer(x) => Expression::Integer(x), }; let unary = just(Token::Substract) .repeated() - .foldr(atom, |_op, rhs| Expression::Negatation(Box::new(rhs))); + .foldr(atom.clone(), |_op, rhs| Expression::Negatation(Box::new(rhs))); - // "Punktrechnung vor Strichrechnung :nerd:" - - let binary_1 = unary.clone().foldl( - just(Token::Multiply) - .or(just(Token::Divide)) - .then(unary) - .repeated(), + let mul_div = unary.clone().foldl( + just(Token::Multiply).or(just(Token::Divide)).then(unary).repeated(), |lhs, (op, rhs)| match op { Token::Multiply => Expression::Multiply(Box::new(lhs), Box::new(rhs)), Token::Divide => Expression::Divide(Box::new(lhs), Box::new(rhs)), @@ -44,48 +32,16 @@ pub fn parser<'src>() -> impl Parser<'src, &'src [Token<'src>], Expression<'src> }, ); - let binary_2 = binary_1.clone().foldl( - just(Token::Add) - .or(just(Token::Substract)) - .then(binary_1) - .repeated(), + let add_sub = mul_div.clone().foldl( + just(Token::Add).or(just(Token::Substract)).then(mul_div).repeated(), |lhs, (op, rhs)| match op { Token::Add => Expression::Add(Box::new(lhs), Box::new(rhs)), Token::Substract => Expression::Substract(Box::new(lhs), Box::new(rhs)), _ => unreachable!(), }, ); - - binary_2 + + add_sub }); - - let decl = recursive(|decl| { - let r#var = keyword("var") - .ignore_then(ident) - .then_ignore(eq.clone()) - .then(expr.clone()) - .then(decl.clone()) - .map(|((name, rhs), then)| Expression::Var { - name, - rhs: Box::new(rhs), - then: Box::new(then), - }); - - let r#fun = keyword("fun") - .ignore_then(ident.clone()) - .then(ident.repeated().collect::>()) - .then_ignore(eq.clone()) - .then(expr.clone()) - .then(decl) - .map(|(((name, args), body), then)| Expression::Function { - name, - args, - body: Box::new(body), - then: Box::new(then), - }); - - var.or(r#fun).or(expr) - }); - - decl + expr } diff --git a/src/language_frontend/lexer/tokens.rs b/src/language_frontend/lexer/tokens.rs index 6d2a994..67ba1ac 100644 --- a/src/language_frontend/lexer/tokens.rs +++ b/src/language_frontend/lexer/tokens.rs @@ -5,6 +5,8 @@ use logos::{Lexer, Logos}; #[derive(Logos, Debug, Clone, PartialEq)] #[logos(skip r"[ \t\r\n\f]+")] // Skip whitespace pub enum Token<'src> { + Error, + #[token("false", |_| false)] #[token("true", |_| true)] Bool(bool), @@ -79,6 +81,7 @@ impl fmt::Display for Token<'_> { Token::Ident(s) => write!(f, "{s}"), Token::String(s) => write!(f, "{s}"), Token::Keyword(s) => write!(f, "{s}"), + Token::Error => write!(f, "") } } } diff --git a/src/main.rs b/src/main.rs index d8f741c..e3f4d21 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +use chumsky::input::{Input, Stream}; use chumsky::Parser; use logos::Logos; @@ -18,20 +19,27 @@ Simple Compiler -> 4 Stages: fn main() { let sourcecode = std::fs::read_to_string("sample.akai").unwrap(); - let lexer = Token::lexer(&sourcecode); - let mut tokens = vec![]; - for (token, span) in lexer.spanned() { - match token { - Ok(token) => tokens.push(token), - Err(e) => { - println!("lexer error at {:?}: {:?}", span, e); - return; - } - } - } + // Create a logos lexer over the source code + let token_iter = Token::lexer(&sourcecode) + .spanned() + // Convert logos errors into tokens. We want parsing to be recoverable and not fail at the lexing stage, so + // we have a dedicated `Token::Error` variant that represents a token error that was previously encountered + .map(|(tok, span)| match tok { + // Turn the `Range` spans logos gives us into chumsky's `SimpleSpan` via `Into`, because it's easier + // to work with + Ok(tok) => (tok, span.into()), + Err(()) => (Token::Error, span.into()), + }); - match parser().parse(&tokens).into_result() { + // Turn the token iterator into a stream that chumsky can use for things like backtracking + let token_stream = Stream::from_iter(token_iter) + // Tell chumsky to split the (Token, SimpleSpan) stream into its parts so that it can handle the spans for us + // This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string + .map((0..sourcecode.len()).into(), |(t, s): (_, _)| (t, s)); + + + match parser().parse(token_stream).into_result() { Ok(ast) => match eval(&ast, &mut Vec::new(), &mut Vec::new()) { Ok(output) => println!("{output}"), Err(eval_err) => println!("Evaluation error: {eval_err}"),