error on whitespaces / newlines :(

This commit is contained in:
LunarAkai 2025-08-07 20:02:43 +02:00
commit 47cad9fb16
6 changed files with 129 additions and 128 deletions

View file

@ -1,47 +1,60 @@
use std::rc::Rc; use std::rc::Rc;
use chumsky::span::Span;
/// Abstract Syntax Tree /// Abstract Syntax Tree
pub type BlockStatement = Vec<Statement>; pub type BlockStatement = Vec<Statement>;
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub enum Expr { pub enum Expr {
Ident(Ident), Ident(String),
Literal(Literal),
Call(FunctionCall), IntLiteral(i64),
FloatLiteral(f64),
StringLiteral(String),
BoolLiteral(bool),
CharLiteral(char),
Null,
Call {
callee: Box<Expr>,
arguments: Vec<Expr>,
},
Unary {
operator: UnaryOp,
operand: Box<Expr>,
},
Binary {
lhs: Box<Expr>,
operator: BinaryOp,
rhs: Box<Expr>,
},
Assignment {
target: Box<Expr>,
value: Box<Expr>,
},
Error,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub struct FunctionCall { pub struct FunctionCall {
pub parameters: Vec<Expr>,
pub name: Rc<str>, pub name: Rc<str>,
pub parameters: Vec<Expr>,
pub(crate) args: Vec<String>,
} }
#[derive(Debug, Clone)] #[derive(Clone, Debug, PartialEq)]
pub enum ExprResult {
Bool(bool),
UnsignedInteger(usize),
SignedInteger(isize),
Char(char),
Return(Box<ExprResult>),
Void,
}
impl std::fmt::Display for ExprResult {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ExprResult::Bool(b) => write!(f, "{b}"),
ExprResult::UnsignedInteger(i) => write!(f, "{i}"),
ExprResult::SignedInteger(i) => write!(f, "{i}"),
ExprResult::Char(c) => write!(f, "{c}"),
ExprResult::Return(v) => write!(f, "{}", *v),
ExprResult::Void => write!(f, ""),
}
}
}
#[derive(Clone, Debug)]
pub enum BinaryOp { pub enum BinaryOp {
Multiply, Multiply,
Divide, Divide,
@ -59,43 +72,45 @@ pub enum BinaryOp {
Or, Or,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub enum UnaryOp { pub enum UnaryOp {
Not, Not,
Minus, Minus,
Plus, Plus,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub enum Literal { pub enum Literal {
UnsignedInteger(usize), UnsignedInteger(u64),
Bool(bool), Bool(bool),
Char(char), Char(char),
String(Rc<str>), String(Rc<str>),
Int(i64),
Float(f64),
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub struct Ident(pub Rc<str>); pub struct Ident(pub Rc<str>);
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub enum Statement { pub enum Statement {
Var(Ident, Option<Type>) Var(Ident, Option<Type>)
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub struct While { pub struct While {
pub condition: Expr, pub condition: Expr,
pub body: BlockStatement, pub body: BlockStatement,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub struct Condition { pub struct Condition {
pub condition: Expr, pub condition: Expr,
pub if_body: BlockStatement, pub if_body: BlockStatement,
pub else_body: Option<BlockStatement>, pub else_body: Option<BlockStatement>,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub enum Type { pub enum Type {
UnsignedInteger, UnsignedInteger,
SignedInteger, SignedInteger,
@ -104,6 +119,7 @@ pub enum Type {
String, String,
} }
#[derive(Clone, Debug, PartialEq)]
pub enum Value { pub enum Value {
UnsignedInteger(u32), UnsignedInteger(u32),
SignedInteger(i32), SignedInteger(i32),
@ -125,7 +141,7 @@ impl Value {
} }
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub struct Function { pub struct Function {
pub name: Rc<str>, pub name: Rc<str>,
pub params: Vec<(Ident, Type)>, pub params: Vec<(Ident, Type)>,

View file

@ -1,3 +1,2 @@
pub mod ast; pub mod ast;
pub mod op;
pub mod parser; pub mod parser;

View file

@ -1,32 +1,20 @@
use chumsky::{ use chumsky::{
combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, just, nested_delimiters, recursive, via_parser}, primitive::select, recursive, select, select_ref, span::{self, SimpleSpan}, text::{self, ascii::{ident, keyword}, whitespace}, Boxed, ConfigIterParser, IterParser, Parser combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, end, just, nested_delimiters, recursive, via_parser}, primitive::select, recursive, select, select_ref, span::{self, SimpleSpan}, text::{self, ascii::{ident, keyword}, whitespace}, Boxed, ConfigIterParser, IterParser, Parser
}; };
use crate::{language_frontend::abstract_syntax_tree::ast::Expression, language_frontend::lexer::tokens::Token}; use crate::language_frontend::{abstract_syntax_tree::ast::{BinaryOp, Expr}, lexer::tokens::Token};
// goal of parsing is to construct an abstract syntax tree // goal of parsing is to construct an abstract syntax tree
#[allow(clippy::let_and_return)] #[allow(clippy::let_and_return)]
pub fn parser<'tokens, 'src: 'tokens, I>() pub fn parser<'tokens, 'src: 'tokens, I>()
-> impl Parser<'tokens, I, Expression<'src>, extra::Err<Rich<'tokens, Token<'src>>>> -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token>>>
where where
I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>, I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{ {
let ident = select! { let ident = select! {
Token::Ident(s) => s, Token::Identifier(s) => s,
}; };
let keyword = |kw: &'static str| {
select! {
Token::Keyword(k) if k == kw => (),
}
};
let eq = just(Token::Equals).labelled("=");
/* /*
let block = recursive(|block| { let block = recursive(|block| {
let indent = just(Token::NewLine) let indent = just(Token::NewLine)
@ -40,76 +28,55 @@ where
*/ */
let expr = recursive(|expr| { let expr = recursive(|expr| {
let block = expr
.clone()
.delimited_by(just(Token::BraceBegin), just(Token::BraceEnd));
// 'Atoms' are expressions that contain no ambiguity
let atom = select! { let atom = select! {
Token::Float(x) => Expression::Float(x), Token::FloatLiteral(x) => Expr::FloatLiteral(x),
Token::Integer(x) => Expression::Integer(x), Token::IntLiteral(x) => Expr::IntLiteral(x),
}.or( }
expr.clone().delimited_by(just(Token::ParenBegin), .or(expr.clone().delimited_by(just(Token::LParen), just(Token::RParen)));
just(Token::ParenEnd))
).or(block);
let mul_div = atom.clone().foldl(
let unary = just(Token::Substract) choice((
.repeated() just(Token::Multiply).to(BinaryOp::Multiply),
.foldr(atom.clone(), |_op, rhs| Expression::Negatation(Box::new(rhs))); just(Token::Divide).to(BinaryOp::Divide),
))
let mul_div = unary.clone().foldl( .then(atom)
just(Token::Multiply).or(just(Token::Divide)).then(unary).repeated(), .repeated(),
|lhs, (op, rhs)| match op { |lhs, (op, rhs)| Expr::Binary {
Token::Multiply => Expression::Multiply(Box::new(lhs), Box::new(rhs)), lhs: Box::new(lhs),
Token::Divide => Expression::Divide(Box::new(lhs), Box::new(rhs)), operator: op,
_ => unreachable!(), rhs: Box::new(rhs),
}, },
); );
let add_sub = mul_div.clone().foldl( let add_sub = mul_div.clone().foldl(
just(Token::Add).or(just(Token::Substract)).then(mul_div).repeated(), choice((
|lhs, (op, rhs)| match op { just(Token::Add).to(BinaryOp::Add),
Token::Add => Expression::Add(Box::new(lhs), Box::new(rhs)), just(Token::Substract).to(BinaryOp::Substract),
Token::Substract => Expression::Substract(Box::new(lhs), Box::new(rhs)), ))
_ => unreachable!(), .then(mul_div)
.repeated(),
|lhs, (op, rhs)| Expr::Binary {
lhs: Box::new(lhs),
operator: op,
rhs: Box::new(rhs),
}, },
); );
add_sub add_sub
}); });
let var = just(Token::Var)
.ignore_then(ident)
.then_ignore(just(Token::Assign))
.then(expr.clone())
.then_ignore(just(Token::NewLine).or_not())
.map(|(name, rhs)| Expr::Assignment {
target: Box::new(Expr::Ident(name)),
value: Box::new(rhs),
});
let decl = recursive(|decl| {
let var = keyword("var")
.ignore_then(ident)
.then_ignore(eq.clone())
.then(expr.clone())
.then(decl.clone())
.map(|((name, rhs), then)| Expression::Var {
name,
rhs: Box::new(rhs),
then: Box::new(then),
});
let fun = keyword("fun") var.or(expr)
.ignore_then(ident.clone())
.then(ident.repeated().collect::<Vec<_>>())
.then_ignore(eq.clone())
.then(expr.clone())
.then(decl)
.map(|(((name, args), body), then)| Expression::Function {
name,
args,
body: Box::new(body),
then: Box::new(then),
});
var.or(fun).or(expr)
});
decl
} }

View file

@ -4,8 +4,8 @@ use logos::{Logos};
#[derive(Logos, Debug, Clone, PartialEq)] #[derive(Logos, Debug, Clone, PartialEq)]
pub enum Token { pub enum Token {
Error, #[token(r"\n")]
NewLine,
// Identifier // Identifier
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned())] #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned())]
Identifier(String), Identifier(String),
@ -133,20 +133,33 @@ pub enum Token {
#[token(":")] #[token(":")]
Colon, Colon,
#[token(",")]
Comma,
#[token(".")]
Dot,
// Special // Special
#[regex(r"//[^\n\r]*", logos::skip)]
#[regex(r"//[^\r]*", logos::skip)]
#[regex(r"/\*([^*]|\*[^/])*\*/", logos::skip)] #[regex(r"/\*([^*]|\*[^/])*\*/", logos::skip)]
Comment, Comment,
#[regex(r"[ \t\n\f]+", logos::skip)]
#[regex(r"[\t\r\f]+", logos::skip)]
Whitespace, Whitespace,
Eof, Eof,
Error,
} }
impl fmt::Display for Token { impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
Token::NewLine => write!(f, ""),
Token::Identifier(ident) => write!(f, "{ident}"), Token::Identifier(ident) => write!(f, "{ident}"),
Token::Fun => write!(f, "fun"), Token::Fun => write!(f, "fun"),
Token::Class => write!(f, "class"), Token::Class => write!(f, "class"),
@ -186,6 +199,8 @@ impl fmt::Display for Token {
Token::LBracket => write!(f, "["), Token::LBracket => write!(f, "["),
Token::RBracket => write!(f, "]"), Token::RBracket => write!(f, "]"),
Token::Colon => write!(f, ":"), Token::Colon => write!(f, ":"),
Token::Comma => write!(f, ","),
Token::Dot => write!(f, "."),
Token::Comment => write!(f, ""), Token::Comment => write!(f, ""),
Token::Whitespace => write!(f, ""), Token::Whitespace => write!(f, ""),
Token::Eof => write!(f, ""), Token::Eof => write!(f, ""),

View file

@ -38,16 +38,20 @@ fn main() {
// This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string // This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string
.map((0..sourcecode.len()).into(), |(t, s): (_, _)| (t, s)); .map((0..sourcecode.len()).into(), |(t, s): (_, _)| (t, s));
println!("{:?}", sourcecode);
let lexer = Token::lexer(&sourcecode)
.spanned()
.collect::<Vec<_>>();
for token in lexer {
println!("{:?}", token);
}
match parser().parse(token_stream).into_result() { match parser().parse(token_stream).into_result() {
Ok(ast) => match eval(&ast, &mut Vec::new(), &mut Vec::new()) { Ok(res) => println!("{:?}", res),
Ok(output) => println!("{output}"), Err(e) => {
Err(eval_err) => println!("Evaluation error: {eval_err}"), panic!("{:#?}", e)
}, }
Err(parse_errs) => parse_errs
.into_iter()
.for_each(|err| println!("Parse error: {err}")),
}; };
//println!("\n[result]\n{}", abstract_syntax_tree::ast::eval(absyntr, vars, funcs));
} }