From 8ce2b5aad2b7ad3f5a995648cf203eddcaced1ab Mon Sep 17 00:00:00 2001 From: LunarAkai Date: Fri, 8 Aug 2025 20:02:54 +0200 Subject: [PATCH] parser works for var decl with assignment --- .../abstract_syntax_tree/ast.rs | 111 +----------------- .../abstract_syntax_tree/definitions.rs | 110 +++++++++++++++++ .../abstract_syntax_tree/mod.rs | 1 + .../abstract_syntax_tree/parser.rs | 14 ++- src/language_frontend/lexer/tokens.rs | 2 +- src/main.rs | 25 ++-- 6 files changed, 139 insertions(+), 124 deletions(-) create mode 100644 src/language_frontend/abstract_syntax_tree/definitions.rs diff --git a/src/language_frontend/abstract_syntax_tree/ast.rs b/src/language_frontend/abstract_syntax_tree/ast.rs index 868598b..74463da 100644 --- a/src/language_frontend/abstract_syntax_tree/ast.rs +++ b/src/language_frontend/abstract_syntax_tree/ast.rs @@ -1,11 +1,4 @@ -use std::rc::Rc; - -use chumsky::span::Span; - - - -/// Abstract Syntax Tree -pub type BlockStatement = Vec; +use crate::language_frontend::abstract_syntax_tree::definitions::*; #[derive(Clone, Debug, PartialEq)] pub enum Expr { @@ -50,106 +43,4 @@ pub enum Expr { value: Box, }, Error, -} - -#[derive(Clone, Debug, PartialEq)] -pub struct FunctionCall { - pub name: Rc, - pub parameters: Vec, - pub(crate) args: Vec, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum BinaryOp { - Multiply, - Divide, - Add, - Substract, - - Equals, - NotEquals, - Less, - LessEquals, - Greater, - GreaterEquals, - - And, - Or, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum UnaryOp { - Not, - Minus, - Plus, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum Literal { - UnsignedInteger(u64), - Bool(bool), - Char(char), - String(Rc), - Int(i64), - Float(f64), -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Ident(pub Rc); - -#[derive(Clone, Debug, PartialEq)] -pub enum Statement { - Var(Ident, Option) -} - -#[derive(Clone, Debug, PartialEq)] -pub struct While { - pub condition: Expr, - pub body: BlockStatement, -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Condition { - pub condition: Expr, - pub if_body: BlockStatement, - pub else_body: Option, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum Type { - UnsignedInteger, - SignedInteger, - Bool, - Char, - String, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum Value { - UnsignedInteger(u32), - SignedInteger(i32), - Bool(bool), - Char(char), - String(String), -} - -impl Value { - pub fn is_type(&self, ty: &Type) -> bool { - match (ty, self) { - (Type::Bool, Value::Bool(_)) => true, - (Type::Char, Value::Char(_)) => true, - (Type::SignedInteger, Value::SignedInteger(_)) => true, - (Type::UnsignedInteger, Value::UnsignedInteger(_)) => true, - (Type::String, Value::String(_)) => true, - _ => false, - } - } -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Function { - pub name: Rc, - pub params: Vec<(Ident, Type)>, - pub return_type: Option, - pub body: Vec, } \ No newline at end of file diff --git a/src/language_frontend/abstract_syntax_tree/definitions.rs b/src/language_frontend/abstract_syntax_tree/definitions.rs new file mode 100644 index 0000000..21e8861 --- /dev/null +++ b/src/language_frontend/abstract_syntax_tree/definitions.rs @@ -0,0 +1,110 @@ +use std::{ops::Range, rc::Rc}; + +use crate::language_frontend::abstract_syntax_tree::ast::Expr; + +/// Abstract Syntax Tree +pub type BlockStatement = Vec; + +pub type Span = Range; + + +#[derive(Clone, Debug, PartialEq)] +pub enum Statement { + Var(Ident, Option) +} + +#[derive(Clone, Debug, PartialEq)] +pub enum BinaryOp { + Multiply, + Divide, + Add, + Substract, + + Equals, + NotEquals, + Less, + LessEquals, + Greater, + GreaterEquals, + + And, + Or, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum UnaryOp { + Not, + Minus, + Plus, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Literal { + UnsignedInteger(u64), + Bool(bool), + Char(char), + String(Rc), + Int(i64), + Float(f64), +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Type { + UnsignedInteger, + SignedInteger, + Bool, + Char, + String, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Value { + UnsignedInteger(u32), + SignedInteger(i32), + Bool(bool), + Char(char), + String(String), +} + +impl Value { + pub fn is_type(&self, ty: &Type) -> bool { + match (ty, self) { + (Type::Bool, Value::Bool(_)) => true, + (Type::Char, Value::Char(_)) => true, + (Type::SignedInteger, Value::SignedInteger(_)) => true, + (Type::UnsignedInteger, Value::UnsignedInteger(_)) => true, + (Type::String, Value::String(_)) => true, + _ => false, + } + } +} + + +//--------------------------------------- +// Structs +//--------------------------------------- +#[derive(Clone, Debug, PartialEq)] +pub struct Ident(pub Rc); + + +#[derive(Clone, Debug, PartialEq)] +pub struct While { + pub condition: Expr, + pub body: BlockStatement, +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Condition { + pub condition: Expr, + pub if_body: BlockStatement, + pub else_body: Option, +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Function { + pub name: Rc, + pub params: Vec<(Ident, Type)>, + pub return_type: Option, + pub body: Vec, +} + diff --git a/src/language_frontend/abstract_syntax_tree/mod.rs b/src/language_frontend/abstract_syntax_tree/mod.rs index a310c76..1687eca 100644 --- a/src/language_frontend/abstract_syntax_tree/mod.rs +++ b/src/language_frontend/abstract_syntax_tree/mod.rs @@ -1,2 +1,3 @@ pub mod ast; +pub mod definitions; pub mod parser; diff --git a/src/language_frontend/abstract_syntax_tree/parser.rs b/src/language_frontend/abstract_syntax_tree/parser.rs index d97eab2..6453999 100644 --- a/src/language_frontend/abstract_syntax_tree/parser.rs +++ b/src/language_frontend/abstract_syntax_tree/parser.rs @@ -3,16 +3,14 @@ use chumsky::{ }; use logos::{source, Logos}; -use crate::language_frontend::{abstract_syntax_tree::ast::{BinaryOp, Expr, UnaryOp}, lexer::tokens::{self, Token}}; +use crate::language_frontend::{abstract_syntax_tree::{ast::Expr, definitions::*}, lexer::tokens::Token}; // goal of parsing is to construct an abstract syntax tree - - pub fn parse(source: &str) ->Result, Vec>> { let token_iter = Token::lexer(source).spanned().map(|(token, span)| (token.unwrap_or(Token::Error), span.into())); - let end_of_input: SimpleSpan = (source.len()..source.len()).into(); + let end_of_input: SimpleSpan = (0..source.len()).into(); let token_stream = Stream::from_iter(token_iter) // Tell chumsky to split the (Token, SimpleSpan) stream into its parts so that it can handle the spans for us // This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string @@ -37,7 +35,6 @@ where block.with_ctx(0) }); - */ let expr = recursive(|expr| { @@ -93,3 +90,10 @@ where decl.repeated().collect() } + +#[cfg(test)] +mod tests { + use super::*; + + +} \ No newline at end of file diff --git a/src/language_frontend/lexer/tokens.rs b/src/language_frontend/lexer/tokens.rs index ee0fad9..fe4711c 100644 --- a/src/language_frontend/lexer/tokens.rs +++ b/src/language_frontend/lexer/tokens.rs @@ -3,7 +3,7 @@ use std::fmt; use logos::{Logos}; #[derive(Logos, Debug, Clone, PartialEq)] -#[regex(r"[ \t\f]+", logos::skip)] +#[regex(r"[\t\f]+", logos::skip)] pub enum Token { // Identifier #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned())] diff --git a/src/main.rs b/src/main.rs index ad6afc7..d2cbcad 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,14 @@ use chumsky::input::{Input, Stream}; -use chumsky::prelude::end; use chumsky::Parser; use logos::Logos; -use crate::language_frontend::abstract_syntax_tree::parser::parse; - mod language_frontend; +use crate::{ + language_frontend::lexer::tokens::Token, language_frontend::abstract_syntax_tree::parser::parse}; + +use crate::language_frontend::abstract_syntax_tree::ast::{Expr}; + /* Simple Compiler -> 4 Stages: - lex @@ -24,17 +26,24 @@ fn main() { println!("{:?}", sourcecode); - /* + let lexer = Token::lexer(&sourcecode) - .spanned(); - //.collect::>(); + .spanned() + .collect::>(); for token in lexer { println!("{:?}", token); } - */ + + let token_iter = Token::lexer(&sourcecode).spanned().map(|(tok, span)| tok.map(|t| (t, span))).filter_map(Result::ok); - match parse(&sourcecode) { + let token_stream = Stream::from_iter(token_iter) + // Tell chumsky to split the (Token, SimpleSpan) stream into its parts so that it can handle the spans for us + // This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string + .map((0..sourcecode.len()).into(), |(t, s): (_, _)| (t, s)); + + + match parse(&sourcecode) { Ok(res) => println!("{:?}", res), Err(e) => { panic!("{:#?}", e)