From 47cad9fb160372a2b2112ccb7b5cb26a610c3391 Mon Sep 17 00:00:00 2001 From: LunarAkai Date: Thu, 7 Aug 2025 20:02:43 +0200 Subject: [PATCH] error on whitespaces / newlines :( --- .../abstract_syntax_tree/ast.rs | 94 ++++++++------ .../abstract_syntax_tree/mod.rs | 1 - .../abstract_syntax_tree/op.rs | 0 .../abstract_syntax_tree/parser.rs | 115 +++++++----------- src/language_frontend/lexer/tokens.rs | 25 +++- src/main.rs | 22 ++-- 6 files changed, 129 insertions(+), 128 deletions(-) delete mode 100644 src/language_frontend/abstract_syntax_tree/op.rs diff --git a/src/language_frontend/abstract_syntax_tree/ast.rs b/src/language_frontend/abstract_syntax_tree/ast.rs index 9920a97..6242f67 100644 --- a/src/language_frontend/abstract_syntax_tree/ast.rs +++ b/src/language_frontend/abstract_syntax_tree/ast.rs @@ -1,47 +1,60 @@ use std::rc::Rc; +use chumsky::span::Span; + /// Abstract Syntax Tree pub type BlockStatement = Vec; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum Expr { - Ident(Ident), - Literal(Literal), - Call(FunctionCall), + Ident(String), + + IntLiteral(i64), + + FloatLiteral(f64), + + StringLiteral(String), + + BoolLiteral(bool), + + CharLiteral(char), + + Null, + + + Call { + callee: Box, + arguments: Vec, + }, + + Unary { + operator: UnaryOp, + operand: Box, + }, + + Binary { + lhs: Box, + operator: BinaryOp, + rhs: Box, + }, + + Assignment { + target: Box, + value: Box, + }, + Error, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct FunctionCall { - pub parameters: Vec, pub name: Rc, + pub parameters: Vec, + pub(crate) args: Vec, } -#[derive(Debug, Clone)] -pub enum ExprResult { - Bool(bool), - UnsignedInteger(usize), - SignedInteger(isize), - Char(char), - Return(Box), - Void, -} - -impl std::fmt::Display for ExprResult { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ExprResult::Bool(b) => write!(f, "{b}"), - ExprResult::UnsignedInteger(i) => write!(f, "{i}"), - ExprResult::SignedInteger(i) => write!(f, "{i}"), - ExprResult::Char(c) => write!(f, "{c}"), - ExprResult::Return(v) => write!(f, "{}", *v), - ExprResult::Void => write!(f, ""), - } - } -} - -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum BinaryOp { Multiply, Divide, @@ -59,43 +72,45 @@ pub enum BinaryOp { Or, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum UnaryOp { Not, Minus, Plus, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum Literal { - UnsignedInteger(usize), + UnsignedInteger(u64), Bool(bool), Char(char), String(Rc), + Int(i64), + Float(f64), } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct Ident(pub Rc); -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum Statement { Var(Ident, Option) } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct While { pub condition: Expr, pub body: BlockStatement, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct Condition { pub condition: Expr, pub if_body: BlockStatement, pub else_body: Option, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum Type { UnsignedInteger, SignedInteger, @@ -104,6 +119,7 @@ pub enum Type { String, } +#[derive(Clone, Debug, PartialEq)] pub enum Value { UnsignedInteger(u32), SignedInteger(i32), @@ -125,7 +141,7 @@ impl Value { } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct Function { pub name: Rc, pub params: Vec<(Ident, Type)>, diff --git a/src/language_frontend/abstract_syntax_tree/mod.rs b/src/language_frontend/abstract_syntax_tree/mod.rs index 3c19eab..a310c76 100644 --- a/src/language_frontend/abstract_syntax_tree/mod.rs +++ b/src/language_frontend/abstract_syntax_tree/mod.rs @@ -1,3 +1,2 @@ pub mod ast; -pub mod op; pub mod parser; diff --git a/src/language_frontend/abstract_syntax_tree/op.rs b/src/language_frontend/abstract_syntax_tree/op.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/language_frontend/abstract_syntax_tree/parser.rs b/src/language_frontend/abstract_syntax_tree/parser.rs index 1579832..eb4ef84 100644 --- a/src/language_frontend/abstract_syntax_tree/parser.rs +++ b/src/language_frontend/abstract_syntax_tree/parser.rs @@ -1,32 +1,20 @@ use chumsky::{ - combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, just, nested_delimiters, recursive, via_parser}, primitive::select, recursive, select, select_ref, span::{self, SimpleSpan}, text::{self, ascii::{ident, keyword}, whitespace}, Boxed, ConfigIterParser, IterParser, Parser + combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, end, just, nested_delimiters, recursive, via_parser}, primitive::select, recursive, select, select_ref, span::{self, SimpleSpan}, text::{self, ascii::{ident, keyword}, whitespace}, Boxed, ConfigIterParser, IterParser, Parser }; -use crate::{language_frontend::abstract_syntax_tree::ast::Expression, language_frontend::lexer::tokens::Token}; +use crate::language_frontend::{abstract_syntax_tree::ast::{BinaryOp, Expr}, lexer::tokens::Token}; // goal of parsing is to construct an abstract syntax tree #[allow(clippy::let_and_return)] pub fn parser<'tokens, 'src: 'tokens, I>() - -> impl Parser<'tokens, I, Expression<'src>, extra::Err>>> + -> impl Parser<'tokens, I, Expr, extra::Err>> where - I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>, + I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>, { let ident = select! { - Token::Ident(s) => s, + Token::Identifier(s) => s, }; - - - let keyword = |kw: &'static str| { - select! { - Token::Keyword(k) if k == kw => (), - } - }; - - - - let eq = just(Token::Equals).labelled("="); - /* let block = recursive(|block| { let indent = just(Token::NewLine) @@ -40,76 +28,55 @@ where */ let expr = recursive(|expr| { - let block = expr - .clone() - .delimited_by(just(Token::BraceBegin), just(Token::BraceEnd)); - - - - // 'Atoms' are expressions that contain no ambiguity let atom = select! { - Token::Float(x) => Expression::Float(x), - Token::Integer(x) => Expression::Integer(x), - }.or( - expr.clone().delimited_by(just(Token::ParenBegin), - just(Token::ParenEnd)) - ).or(block); + Token::FloatLiteral(x) => Expr::FloatLiteral(x), + Token::IntLiteral(x) => Expr::IntLiteral(x), + } + .or(expr.clone().delimited_by(just(Token::LParen), just(Token::RParen))); - - let unary = just(Token::Substract) - .repeated() - .foldr(atom.clone(), |_op, rhs| Expression::Negatation(Box::new(rhs))); - - let mul_div = unary.clone().foldl( - just(Token::Multiply).or(just(Token::Divide)).then(unary).repeated(), - |lhs, (op, rhs)| match op { - Token::Multiply => Expression::Multiply(Box::new(lhs), Box::new(rhs)), - Token::Divide => Expression::Divide(Box::new(lhs), Box::new(rhs)), - _ => unreachable!(), + let mul_div = atom.clone().foldl( + choice(( + just(Token::Multiply).to(BinaryOp::Multiply), + just(Token::Divide).to(BinaryOp::Divide), + )) + .then(atom) + .repeated(), + |lhs, (op, rhs)| Expr::Binary { + lhs: Box::new(lhs), + operator: op, + rhs: Box::new(rhs), }, ); let add_sub = mul_div.clone().foldl( - just(Token::Add).or(just(Token::Substract)).then(mul_div).repeated(), - |lhs, (op, rhs)| match op { - Token::Add => Expression::Add(Box::new(lhs), Box::new(rhs)), - Token::Substract => Expression::Substract(Box::new(lhs), Box::new(rhs)), - _ => unreachable!(), + choice(( + just(Token::Add).to(BinaryOp::Add), + just(Token::Substract).to(BinaryOp::Substract), + )) + .then(mul_div) + .repeated(), + |lhs, (op, rhs)| Expr::Binary { + lhs: Box::new(lhs), + operator: op, + rhs: Box::new(rhs), }, ); add_sub }); - + let var = just(Token::Var) + .ignore_then(ident) + .then_ignore(just(Token::Assign)) + .then(expr.clone()) + .then_ignore(just(Token::NewLine).or_not()) + .map(|(name, rhs)| Expr::Assignment { + target: Box::new(Expr::Ident(name)), + value: Box::new(rhs), + }); - let decl = recursive(|decl| { - let var = keyword("var") - .ignore_then(ident) - .then_ignore(eq.clone()) - .then(expr.clone()) - .then(decl.clone()) - .map(|((name, rhs), then)| Expression::Var { - name, - rhs: Box::new(rhs), - then: Box::new(then), - }); - let fun = keyword("fun") - .ignore_then(ident.clone()) - .then(ident.repeated().collect::>()) - .then_ignore(eq.clone()) - .then(expr.clone()) - .then(decl) - .map(|(((name, args), body), then)| Expression::Function { - name, - args, - body: Box::new(body), - then: Box::new(then), - }); + var.or(expr) + - var.or(fun).or(expr) - }); - - decl } diff --git a/src/language_frontend/lexer/tokens.rs b/src/language_frontend/lexer/tokens.rs index 159b1a3..192ecf7 100644 --- a/src/language_frontend/lexer/tokens.rs +++ b/src/language_frontend/lexer/tokens.rs @@ -4,8 +4,8 @@ use logos::{Logos}; #[derive(Logos, Debug, Clone, PartialEq)] pub enum Token { - Error, - + #[token(r"\n")] + NewLine, // Identifier #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned())] Identifier(String), @@ -133,20 +133,33 @@ pub enum Token { #[token(":")] Colon, + #[token(",")] + Comma, + + #[token(".")] + Dot, + // Special - #[regex(r"//[^\n\r]*", logos::skip)] + + + #[regex(r"//[^\r]*", logos::skip)] #[regex(r"/\*([^*]|\*[^/])*\*/", logos::skip)] Comment, - #[regex(r"[ \t\n\f]+", logos::skip)] + + + #[regex(r"[\t\r\f]+", logos::skip)] Whitespace, - + Eof, + + Error, } impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { + Token::NewLine => write!(f, ""), Token::Identifier(ident) => write!(f, "{ident}"), Token::Fun => write!(f, "fun"), Token::Class => write!(f, "class"), @@ -186,6 +199,8 @@ impl fmt::Display for Token { Token::LBracket => write!(f, "["), Token::RBracket => write!(f, "]"), Token::Colon => write!(f, ":"), + Token::Comma => write!(f, ","), + Token::Dot => write!(f, "."), Token::Comment => write!(f, ""), Token::Whitespace => write!(f, ""), Token::Eof => write!(f, ""), diff --git a/src/main.rs b/src/main.rs index 35a6165..5eb2c87 100644 --- a/src/main.rs +++ b/src/main.rs @@ -38,16 +38,20 @@ fn main() { // This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string .map((0..sourcecode.len()).into(), |(t, s): (_, _)| (t, s)); + println!("{:?}", sourcecode); + + let lexer = Token::lexer(&sourcecode) + .spanned() + .collect::>(); + + for token in lexer { + println!("{:?}", token); + } match parser().parse(token_stream).into_result() { - Ok(ast) => match eval(&ast, &mut Vec::new(), &mut Vec::new()) { - Ok(output) => println!("{output}"), - Err(eval_err) => println!("Evaluation error: {eval_err}"), - }, - Err(parse_errs) => parse_errs - .into_iter() - .for_each(|err| println!("Parse error: {err}")), + Ok(res) => println!("{:?}", res), + Err(e) => { + panic!("{:#?}", e) + } }; - - //println!("\n[result]\n{}", abstract_syntax_tree::ast::eval(absyntr, vars, funcs)); }