From 55605d2cd13ddbba480b2b2f09522720e6ee6fe3 Mon Sep 17 00:00:00 2001 From: LunarAkai Date: Thu, 7 Aug 2025 14:19:26 +0200 Subject: [PATCH] better ast --- .../abstract_syntax_tree/ast.rs | 213 +++++++++------- .../abstract_syntax_tree/op.rs | 19 -- src/language_frontend/lexer/tokens.rs | 232 +++++++++++++----- src/main.rs | 2 +- 4 files changed, 292 insertions(+), 174 deletions(-) diff --git a/src/language_frontend/abstract_syntax_tree/ast.rs b/src/language_frontend/abstract_syntax_tree/ast.rs index eac5c97..9920a97 100644 --- a/src/language_frontend/abstract_syntax_tree/ast.rs +++ b/src/language_frontend/abstract_syntax_tree/ast.rs @@ -1,95 +1,134 @@ +use std::rc::Rc; + + + /// Abstract Syntax Tree +pub type BlockStatement = Vec; + +#[derive(Clone, Debug)] +pub enum Expr { + Ident(Ident), + Literal(Literal), + Call(FunctionCall), +} + +#[derive(Clone, Debug)] +pub struct FunctionCall { + pub parameters: Vec, + pub name: Rc, +} #[derive(Debug, Clone)] -pub enum Expression<'src> { - // Identifier - Ident(&'src str), - - // Types - Integer(i64), - Float(f64), - String(String), +pub enum ExprResult { Bool(bool), - - // Operations - Negatation(Box>), - Add(Box>, Box>), - Substract(Box>, Box>), - Multiply(Box>, Box>), - Divide(Box>, Box>), - - // Keywords - Var { - name: &'src str, - rhs: Box>, - then: Box>, - }, - - Function { - name: &'src str, - args: Vec<&'src str>, - body: Box>, - then: Box>, - }, - - Unit, + UnsignedInteger(usize), + SignedInteger(isize), + Char(char), + Return(Box), + Void, } -pub fn eval<'src>( - expr: &'src Expression<'src>, - vars: &mut Vec<(&'src str, f64)>, - funcs: &mut Vec<(&'src str, &'src [&'src str], &'src Expression<'src>)>, -) -> Result { - match expr { - Expression::Ident(name) => { - if let Some((_, val)) = vars.iter().rev().find(|(var, _)| var == name) { - Ok(*val) - } else { - Err(format!("Cannot find variable `{name}` in scope")) - } - }, - - // Types - Expression::Integer(x) => Ok((*x) as f64), // todo - - Expression::Float(x) => Ok(*x), - - Expression::String(_) => todo!(), - - Expression::Bool(_) => todo!(), - - // Operations - Expression::Negatation(lhs) => todo!(), - - Expression::Add(lhs, rhs) => Ok(eval(lhs, vars, funcs)? + eval(rhs, vars, funcs)?), - - Expression::Substract(lhs, rhs) => Ok(eval(lhs, vars, funcs)? - eval(rhs, vars, funcs)?), - - Expression::Multiply(lhs, rhs) => Ok(eval(lhs, vars, funcs)? * eval(rhs, vars, funcs)?), - - Expression::Divide(lhs, rhs) => Ok(eval(lhs, vars, funcs)? / eval(rhs, vars, funcs)?), - - // Keywords - Expression::Var { name, rhs, then } => { - let rhs = eval(rhs, vars, funcs)?; - vars.push((*name, rhs)); - let output = eval(then, vars, funcs); - vars.pop(); - output - }, - - Expression::Function { - name, - args, - body, - then, - } => { - funcs.push((name, args, body)); - let output = eval(then, vars, funcs); - funcs.pop(); - output - }, - - Expression::Unit => todo!(), +impl std::fmt::Display for ExprResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ExprResult::Bool(b) => write!(f, "{b}"), + ExprResult::UnsignedInteger(i) => write!(f, "{i}"), + ExprResult::SignedInteger(i) => write!(f, "{i}"), + ExprResult::Char(c) => write!(f, "{c}"), + ExprResult::Return(v) => write!(f, "{}", *v), + ExprResult::Void => write!(f, ""), + } } } + +#[derive(Clone, Debug)] +pub enum BinaryOp { + Multiply, + Divide, + Add, + Substract, + + Equals, + NotEquals, + Less, + LessEquals, + Greater, + GreaterEquals, + + And, + Or, +} + +#[derive(Clone, Debug)] +pub enum UnaryOp { + Not, + Minus, + Plus, +} + +#[derive(Clone, Debug)] +pub enum Literal { + UnsignedInteger(usize), + Bool(bool), + Char(char), + String(Rc), +} + +#[derive(Clone, Debug)] +pub struct Ident(pub Rc); + +#[derive(Clone, Debug)] +pub enum Statement { + Var(Ident, Option) +} + +#[derive(Clone, Debug)] +pub struct While { + pub condition: Expr, + pub body: BlockStatement, +} + +#[derive(Clone, Debug)] +pub struct Condition { + pub condition: Expr, + pub if_body: BlockStatement, + pub else_body: Option, +} + +#[derive(Clone, Debug)] +pub enum Type { + UnsignedInteger, + SignedInteger, + Bool, + Char, + String, +} + +pub enum Value { + UnsignedInteger(u32), + SignedInteger(i32), + Bool(bool), + Char(char), + String(String), +} + +impl Value { + pub fn is_type(&self, ty: &Type) -> bool { + match (ty, self) { + (Type::Bool, Value::Bool(_)) => true, + (Type::Char, Value::Char(_)) => true, + (Type::SignedInteger, Value::SignedInteger(_)) => true, + (Type::UnsignedInteger, Value::UnsignedInteger(_)) => true, + (Type::String, Value::String(_)) => true, + _ => false, + } + } +} + +#[derive(Clone, Debug)] +pub struct Function { + pub name: Rc, + pub params: Vec<(Ident, Type)>, + pub return_type: Option, + pub body: Vec, +} \ No newline at end of file diff --git a/src/language_frontend/abstract_syntax_tree/op.rs b/src/language_frontend/abstract_syntax_tree/op.rs index 075022d..e69de29 100644 --- a/src/language_frontend/abstract_syntax_tree/op.rs +++ b/src/language_frontend/abstract_syntax_tree/op.rs @@ -1,19 +0,0 @@ -#[derive(Debug, Clone)] -pub enum Op { - Add, - Subtract, - Multiply, - Divide, -} - -impl Op { - pub fn eval(&self) -> String { - let text: &str = match self { - Op::Add => "+", - Op::Subtract => "-", - Op::Multiply => "*", - Op::Divide => "/", - }; - text.to_string() - } -} diff --git a/src/language_frontend/lexer/tokens.rs b/src/language_frontend/lexer/tokens.rs index 7a428af..159b1a3 100644 --- a/src/language_frontend/lexer/tokens.rs +++ b/src/language_frontend/lexer/tokens.rs @@ -1,26 +1,96 @@ use std::fmt; -use logos::{Lexer, Logos}; +use logos::{Logos}; #[derive(Logos, Debug, Clone, PartialEq)] -#[logos(skip r"[ \r\f]+")] // Skip whitespace -pub enum Token<'src> { +pub enum Token { Error, - Null, - Indent, - NewLine, - Dedent, + // Identifier + #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned())] + Identifier(String), + + // Keywords + #[token("fun")] + Fun, + + #[token("class")] + Class, + + #[token("var")] + Var, + + #[token("interface")] + Interface, + + #[token("derive")] + Derive, + + #[token("impl")] + Impl, + + #[token("if")] + If, + + #[token("else")] + Else, + + #[token("->")] + Return, + + #[token("enum")] + Enum, + + + // Types + #[token("int")] + IntType, + + #[token("float")] + FloatType, + + #[token("bool")] + BoolType, + + #[token("String")] + StringType, + + + // Literals + #[regex(r#""([^"\\]|\\.)*""#, |lex| lex.slice().to_owned())] + StringLiteral(String), + + #[regex(r"[0-9]+", |lex| lex.slice().parse::().ok())] + IntLiteral(i64), + + #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().parse::().ok())] + FloatLiteral(f64), - #[token("false", |_| false)] #[token("true", |_| true)] - Bool(bool), + #[token("false", |_| false)] + BoolLiteral(bool), - #[token("+")] - Add, + // Operators + #[token("=")] + Assign, + + #[token("==")] + Equals, - #[token("-")] - Substract, + #[token("!=")] + NotEquals, + + #[token("<", priority = 2)] + Less, + + #[token("<=")] + LessEquals, + + #[token(">", priority = 2)] + Greater, + + #[token(">=")] + GreaterEquals, #[token("*")] Multiply, @@ -28,70 +98,98 @@ pub enum Token<'src> { #[token("/")] Divide, - #[token("=")] - Equals, + #[token("+")] + Add, + + #[token("-")] + Substract, + + #[token("&&")] + And, + + #[token("||")] + Or, + + + // Punctiuation + #[token("(")] + LParen, + + #[token(")")] + RParen, + + #[token("{")] + LBrace, + + #[token("}")] + RBrace, + + #[token("[")] + LBracket, + + #[token("]")] + RBracket, #[token(":")] Colon, - #[token("(")] - ParenBegin, - - #[token(")")] - ParenEnd, - - #[token("{")] - BraceBegin, - - #[token("}")] - BraceEnd, - - #[regex(r"[+-]?[0-9]+", |lex| lex.slice().parse::().unwrap(), priority = 3)] - Integer(i64), - - #[regex(r"[+-]?([0-9]*[.])?[0-9]+", |lex| lex.slice().parse::().unwrap())] - Float(f64), - - #[regex(r"[_a-zA-Z][_0-9a-zA-Z]*")] - Ident(&'src str), - - #[regex(r#""([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*""#, |lex| lex.slice().to_owned())] - String(String), - - #[token("class")] - #[token("fun")] - #[token("var")] - #[token("if")] - #[token("else")] - Keyword(&'src str), + // Special + #[regex(r"//[^\n\r]*", logos::skip)] + #[regex(r"/\*([^*]|\*[^/])*\*/", logos::skip)] + Comment, + + #[regex(r"[ \t\n\f]+", logos::skip)] + Whitespace, + + Eof, } - -impl fmt::Display for Token<'_> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Token::Float(s) => write!(f, "{s}"), - Token::Null => write!(f, ""), - Token::Indent => write!(f, ""), - Token::NewLine => write!(f, ""), - Token::Dedent => write!(f, ""), - Token::Add => write!(f, "+"), - Token::Bool(_) => write!(f, "+"), - Token::Substract => write!(f, "-"), + Token::Identifier(ident) => write!(f, "{ident}"), + Token::Fun => write!(f, "fun"), + Token::Class => write!(f, "class"), + Token::Var => write!(f, "var"), + Token::Interface => write!(f, "interface"), + Token::Derive => write!(f, "derive"), + Token::Impl => write!(f, "impl"), + Token::If => write!(f, "if"), + Token::Else => write!(f, "else"), + Token::Return => write!(f, "->"), + Token::Enum => write!(f, "enum"), + Token::IntType => write!(f, "int"), + Token::FloatType => write!(f, "float"), + Token::BoolType => write!(f, "bool"), + Token::StringType => write!(f, "String"), + Token::StringLiteral(s) => write!(f, "{s}"), + Token::IntLiteral(i) => write!(f, "{i}"), + Token::FloatLiteral(fl) => write!(f, "{fl}"), + Token::BoolLiteral(b) => write!(f, "{b}"), + Token::Assign => write!(f, "="), + Token::Equals => write!(f, "=="), + Token::NotEquals => write!(f, "!="), + Token::Less => write!(f, "<"), + Token::LessEquals => write!(f, "<="), + Token::Greater => write!(f, ">"), + Token::GreaterEquals => write!(f, ">="), Token::Multiply => write!(f, "*"), Token::Divide => write!(f, "/"), - Token::Equals => write!(f, "="), + Token::Add => write!(f, "+"), + Token::Substract => write!(f, "-"), + Token::And => write!(f, "&&"), + Token::Or => write!(f, "||"), + Token::LParen => write!(f, "("), + Token::RParen => write!(f, ")"), + Token::LBrace => write!(f, "{{"), + Token::RBrace => write!(f, "}}"), + Token::LBracket => write!(f, "["), + Token::RBracket => write!(f, "]"), Token::Colon => write!(f, ":"), - Token::ParenBegin => write!(f, "("), - Token::ParenEnd => write!(f, ")"), - Token::BraceBegin => write!(f, "{{"), - Token::BraceEnd => write!(f, "}}"), - Token::Integer(s) => write!(f, "{s}"), - Token::Ident(s) => write!(f, "{s}"), - Token::String(s) => write!(f, "{s}"), - Token::Keyword(s) => write!(f, "{s}"), + Token::Comment => write!(f, ""), + Token::Whitespace => write!(f, ""), + Token::Eof => write!(f, ""), Token::Error => write!(f, ""), - } } -} +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index e3f4d21..35a6165 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,7 @@ use logos::Logos; use crate::{ language_frontend::lexer::tokens::Token, language_frontend::abstract_syntax_tree::parser::parser}; -use crate::language_frontend::abstract_syntax_tree::ast::{eval, Expression}; +use crate::language_frontend::abstract_syntax_tree::ast::{Expr}; mod language_frontend;