reading the chumsky docs helps actually, wild /s

2025-08-07 02:29:37 +02:00 · 2025-08-07 02:29:37 +02:00 · 2810913aae
commit 2810913aae
parent 4a2ba05bd1
3 changed files with 39 additions and 72 deletions
--- a/src/language_frontend/abstract_syntax_tree/parser.rs
+++ b/src/language_frontend/abstract_syntax_tree/parser.rs
@ -1,5 +1,5 @@
 use chumsky::{
-    combinator::Or, prelude::{choice, just, recursive}, recursive, select, select_ref, text::{self, ascii::ident, whitespace}, IterParser, Parser
+    combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, just, recursive}, recursive, select, select_ref, span::SimpleSpan, text::{self, ascii::ident, whitespace}, IterParser, Parser
 };

 use crate::{language_frontend::abstract_syntax_tree::ast::Expression, language_frontend::lexer::tokens::Token};
@ -7,36 +7,24 @@ use crate::{language_frontend::abstract_syntax_tree::ast::Expression, language_f
 // goal of parsing is to construct an abstract syntax tree

 #[allow(clippy::let_and_return)]
-pub fn parser<'src>() -> impl Parser<'src, &'src [Token<'src>], Expression<'src>> {
-    let ident = select_ref! {
-        Token::Ident(ident) => *ident
-    };
-
-    let keyword = |kw: &'static str| {
-        select! {
-            Token::Keyword(k) if k == kw => ()
-        }
-    };
-
-    let eq = just(Token::Equals);
-
+pub fn parser<'tokens, 'src: 'tokens, I>() -> impl Parser<'tokens, I, Expression<'src>, extra::Err<Rich<'tokens, Token<'src>>>> 
+where 
+    I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>,
+{
+    
    let expr = recursive(|expr| {
+        
        let atom = select! {
            Token::Float(x) => Expression::Float(x),
-
+            Token::Integer(x) => Expression::Integer(x),
        };

        let unary = just(Token::Substract)
            .repeated()
-            .foldr(atom, |_op, rhs| Expression::Negatation(Box::new(rhs)));
+            .foldr(atom.clone(), |_op, rhs| Expression::Negatation(Box::new(rhs)));

-        // "Punktrechnung vor Strichrechnung :nerd:"
-
-        let binary_1 = unary.clone().foldl(
-            just(Token::Multiply)
-                .or(just(Token::Divide))
-                .then(unary)
-                .repeated(),
+        let mul_div = unary.clone().foldl(
+            just(Token::Multiply).or(just(Token::Divide)).then(unary).repeated(),
            |lhs, (op, rhs)| match op {
                Token::Multiply => Expression::Multiply(Box::new(lhs), Box::new(rhs)),
                Token::Divide => Expression::Divide(Box::new(lhs), Box::new(rhs)),
@ -44,48 +32,16 @@ pub fn parser<'src>() -> impl Parser<'src, &'src [Token<'src>], Expression<'src>
            },
        );

-        let binary_2 = binary_1.clone().foldl(
-            just(Token::Add)
-                .or(just(Token::Substract))
-                .then(binary_1)
-                .repeated(),
+        let add_sub = mul_div.clone().foldl(
+            just(Token::Add).or(just(Token::Substract)).then(mul_div).repeated(),
            |lhs, (op, rhs)| match op {
                Token::Add => Expression::Add(Box::new(lhs), Box::new(rhs)),
                Token::Substract => Expression::Substract(Box::new(lhs), Box::new(rhs)),
                _ => unreachable!(),
            },
        );
-
-        binary_2
+        
+        add_sub
    });
-
-    let decl = recursive(|decl| {
-        let r#var = keyword("var")
-            .ignore_then(ident)
-            .then_ignore(eq.clone())
-            .then(expr.clone())
-            .then(decl.clone())
-            .map(|((name, rhs), then)| Expression::Var {
-                name,
-                rhs: Box::new(rhs),
-                then: Box::new(then),
-            });
-
-        let r#fun = keyword("fun")
-            .ignore_then(ident.clone())
-            .then(ident.repeated().collect::<Vec<_>>())
-            .then_ignore(eq.clone())
-            .then(expr.clone())
-            .then(decl)
-            .map(|(((name, args), body), then)| Expression::Function {
-                name,
-                args,
-                body: Box::new(body),
-                then: Box::new(then),
-            });
-
-        var.or(r#fun).or(expr)
-    });
-
-    decl
+    expr
 }
--- a/src/language_frontend/lexer/tokens.rs
+++ b/src/language_frontend/lexer/tokens.rs
@ -5,6 +5,8 @@ use logos::{Lexer, Logos};
 #[derive(Logos, Debug, Clone, PartialEq)]
 #[logos(skip r"[ \t\r\n\f]+")] // Skip whitespace
 pub enum Token<'src> {
+    Error,
+
    #[token("false", |_| false)]
    #[token("true", |_| true)]
    Bool(bool),
@ -79,6 +81,7 @@ impl fmt::Display for Token<'_> {
            Token::Ident(s) => write!(f, "{s}"),
            Token::String(s) => write!(f, "{s}"),
            Token::Keyword(s) => write!(f, "{s}"),
+            Token::Error => write!(f, "<error>")
        }
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -1,3 +1,4 @@
+use chumsky::input::{Input, Stream};
 use chumsky::Parser;
 use logos::Logos;

@ -18,20 +19,27 @@ Simple Compiler -> 4 Stages:

 fn main() {
    let sourcecode = std::fs::read_to_string("sample.akai").unwrap();
-    let lexer = Token::lexer(&sourcecode);

-    let mut tokens = vec![];
-    for (token, span) in lexer.spanned() {
-        match token {
-            Ok(token) => tokens.push(token),
-            Err(e) => {
-                println!("lexer error at {:?}: {:?}", span, e);
-                return;
-            }
-        }
-    }
+    // Create a logos lexer over the source code
+    let token_iter = Token::lexer(&sourcecode)
+        .spanned()
+        // Convert logos errors into tokens. We want parsing to be recoverable and not fail at the lexing stage, so
+        // we have a dedicated `Token::Error` variant that represents a token error that was previously encountered
+        .map(|(tok, span)| match tok {
+            // Turn the `Range<usize>` spans logos gives us into chumsky's `SimpleSpan` via `Into`, because it's easier
+            // to work with
+            Ok(tok) => (tok, span.into()),
+            Err(()) => (Token::Error, span.into()),
+        });

-    match parser().parse(&tokens).into_result() {
+    // Turn the token iterator into a stream that chumsky can use for things like backtracking
+    let token_stream = Stream::from_iter(token_iter)
+        // Tell chumsky to split the (Token, SimpleSpan) stream into its parts so that it can handle the spans for us
+        // This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string
+        .map((0..sourcecode.len()).into(), |(t, s): (_, _)| (t, s));
+
+
+    match parser().parse(token_stream).into_result() {
        Ok(ast) => match eval(&ast, &mut Vec::new(), &mut Vec::new()) {
            Ok(output) => println!("{output}"),
            Err(eval_err) => println!("Evaluation error: {eval_err}"),