error on whitespaces / newlines :(

This commit is contained in:
LunarAkai 2025-08-07 20:02:43 +02:00
commit 47cad9fb16
6 changed files with 129 additions and 128 deletions

View file

@ -1,47 +1,60 @@
use std::rc::Rc;
use chumsky::span::Span;
/// Abstract Syntax Tree
pub type BlockStatement = Vec<Statement>;
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub enum Expr {
Ident(Ident),
Literal(Literal),
Call(FunctionCall),
Ident(String),
IntLiteral(i64),
FloatLiteral(f64),
StringLiteral(String),
BoolLiteral(bool),
CharLiteral(char),
Null,
Call {
callee: Box<Expr>,
arguments: Vec<Expr>,
},
Unary {
operator: UnaryOp,
operand: Box<Expr>,
},
Binary {
lhs: Box<Expr>,
operator: BinaryOp,
rhs: Box<Expr>,
},
Assignment {
target: Box<Expr>,
value: Box<Expr>,
},
Error,
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub struct FunctionCall {
pub parameters: Vec<Expr>,
pub name: Rc<str>,
pub parameters: Vec<Expr>,
pub(crate) args: Vec<String>,
}
#[derive(Debug, Clone)]
pub enum ExprResult {
Bool(bool),
UnsignedInteger(usize),
SignedInteger(isize),
Char(char),
Return(Box<ExprResult>),
Void,
}
impl std::fmt::Display for ExprResult {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ExprResult::Bool(b) => write!(f, "{b}"),
ExprResult::UnsignedInteger(i) => write!(f, "{i}"),
ExprResult::SignedInteger(i) => write!(f, "{i}"),
ExprResult::Char(c) => write!(f, "{c}"),
ExprResult::Return(v) => write!(f, "{}", *v),
ExprResult::Void => write!(f, ""),
}
}
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub enum BinaryOp {
Multiply,
Divide,
@ -59,43 +72,45 @@ pub enum BinaryOp {
Or,
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub enum UnaryOp {
Not,
Minus,
Plus,
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub enum Literal {
UnsignedInteger(usize),
UnsignedInteger(u64),
Bool(bool),
Char(char),
String(Rc<str>),
Int(i64),
Float(f64),
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub struct Ident(pub Rc<str>);
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub enum Statement {
Var(Ident, Option<Type>)
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub struct While {
pub condition: Expr,
pub body: BlockStatement,
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub struct Condition {
pub condition: Expr,
pub if_body: BlockStatement,
pub else_body: Option<BlockStatement>,
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub enum Type {
UnsignedInteger,
SignedInteger,
@ -104,6 +119,7 @@ pub enum Type {
String,
}
#[derive(Clone, Debug, PartialEq)]
pub enum Value {
UnsignedInteger(u32),
SignedInteger(i32),
@ -125,7 +141,7 @@ impl Value {
}
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub struct Function {
pub name: Rc<str>,
pub params: Vec<(Ident, Type)>,

View file

@ -1,3 +1,2 @@
pub mod ast;
pub mod op;
pub mod parser;

View file

@ -1,32 +1,20 @@
use chumsky::{
combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, just, nested_delimiters, recursive, via_parser}, primitive::select, recursive, select, select_ref, span::{self, SimpleSpan}, text::{self, ascii::{ident, keyword}, whitespace}, Boxed, ConfigIterParser, IterParser, Parser
combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, end, just, nested_delimiters, recursive, via_parser}, primitive::select, recursive, select, select_ref, span::{self, SimpleSpan}, text::{self, ascii::{ident, keyword}, whitespace}, Boxed, ConfigIterParser, IterParser, Parser
};
use crate::{language_frontend::abstract_syntax_tree::ast::Expression, language_frontend::lexer::tokens::Token};
use crate::language_frontend::{abstract_syntax_tree::ast::{BinaryOp, Expr}, lexer::tokens::Token};
// goal of parsing is to construct an abstract syntax tree
#[allow(clippy::let_and_return)]
pub fn parser<'tokens, 'src: 'tokens, I>()
-> impl Parser<'tokens, I, Expression<'src>, extra::Err<Rich<'tokens, Token<'src>>>>
-> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token>>>
where
I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>,
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
let ident = select! {
Token::Ident(s) => s,
Token::Identifier(s) => s,
};
let keyword = |kw: &'static str| {
select! {
Token::Keyword(k) if k == kw => (),
}
};
let eq = just(Token::Equals).labelled("=");
/*
let block = recursive(|block| {
let indent = just(Token::NewLine)
@ -40,76 +28,55 @@ where
*/
let expr = recursive(|expr| {
let block = expr
.clone()
.delimited_by(just(Token::BraceBegin), just(Token::BraceEnd));
// 'Atoms' are expressions that contain no ambiguity
let atom = select! {
Token::Float(x) => Expression::Float(x),
Token::Integer(x) => Expression::Integer(x),
}.or(
expr.clone().delimited_by(just(Token::ParenBegin),
just(Token::ParenEnd))
).or(block);
Token::FloatLiteral(x) => Expr::FloatLiteral(x),
Token::IntLiteral(x) => Expr::IntLiteral(x),
}
.or(expr.clone().delimited_by(just(Token::LParen), just(Token::RParen)));
let unary = just(Token::Substract)
.repeated()
.foldr(atom.clone(), |_op, rhs| Expression::Negatation(Box::new(rhs)));
let mul_div = unary.clone().foldl(
just(Token::Multiply).or(just(Token::Divide)).then(unary).repeated(),
|lhs, (op, rhs)| match op {
Token::Multiply => Expression::Multiply(Box::new(lhs), Box::new(rhs)),
Token::Divide => Expression::Divide(Box::new(lhs), Box::new(rhs)),
_ => unreachable!(),
let mul_div = atom.clone().foldl(
choice((
just(Token::Multiply).to(BinaryOp::Multiply),
just(Token::Divide).to(BinaryOp::Divide),
))
.then(atom)
.repeated(),
|lhs, (op, rhs)| Expr::Binary {
lhs: Box::new(lhs),
operator: op,
rhs: Box::new(rhs),
},
);
let add_sub = mul_div.clone().foldl(
just(Token::Add).or(just(Token::Substract)).then(mul_div).repeated(),
|lhs, (op, rhs)| match op {
Token::Add => Expression::Add(Box::new(lhs), Box::new(rhs)),
Token::Substract => Expression::Substract(Box::new(lhs), Box::new(rhs)),
_ => unreachable!(),
choice((
just(Token::Add).to(BinaryOp::Add),
just(Token::Substract).to(BinaryOp::Substract),
))
.then(mul_div)
.repeated(),
|lhs, (op, rhs)| Expr::Binary {
lhs: Box::new(lhs),
operator: op,
rhs: Box::new(rhs),
},
);
add_sub
});
let var = just(Token::Var)
.ignore_then(ident)
.then_ignore(just(Token::Assign))
.then(expr.clone())
.then_ignore(just(Token::NewLine).or_not())
.map(|(name, rhs)| Expr::Assignment {
target: Box::new(Expr::Ident(name)),
value: Box::new(rhs),
});
let decl = recursive(|decl| {
let var = keyword("var")
.ignore_then(ident)
.then_ignore(eq.clone())
.then(expr.clone())
.then(decl.clone())
.map(|((name, rhs), then)| Expression::Var {
name,
rhs: Box::new(rhs),
then: Box::new(then),
});
let fun = keyword("fun")
.ignore_then(ident.clone())
.then(ident.repeated().collect::<Vec<_>>())
.then_ignore(eq.clone())
.then(expr.clone())
.then(decl)
.map(|(((name, args), body), then)| Expression::Function {
name,
args,
body: Box::new(body),
then: Box::new(then),
});
var.or(expr)
var.or(fun).or(expr)
});
decl
}

View file

@ -4,8 +4,8 @@ use logos::{Logos};
#[derive(Logos, Debug, Clone, PartialEq)]
pub enum Token {
Error,
#[token(r"\n")]
NewLine,
// Identifier
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned())]
Identifier(String),
@ -133,20 +133,33 @@ pub enum Token {
#[token(":")]
Colon,
#[token(",")]
Comma,
#[token(".")]
Dot,
// Special
#[regex(r"//[^\n\r]*", logos::skip)]
#[regex(r"//[^\r]*", logos::skip)]
#[regex(r"/\*([^*]|\*[^/])*\*/", logos::skip)]
Comment,
#[regex(r"[ \t\n\f]+", logos::skip)]
#[regex(r"[\t\r\f]+", logos::skip)]
Whitespace,
Eof,
Error,
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Token::NewLine => write!(f, ""),
Token::Identifier(ident) => write!(f, "{ident}"),
Token::Fun => write!(f, "fun"),
Token::Class => write!(f, "class"),
@ -186,6 +199,8 @@ impl fmt::Display for Token {
Token::LBracket => write!(f, "["),
Token::RBracket => write!(f, "]"),
Token::Colon => write!(f, ":"),
Token::Comma => write!(f, ","),
Token::Dot => write!(f, "."),
Token::Comment => write!(f, ""),
Token::Whitespace => write!(f, ""),
Token::Eof => write!(f, ""),

View file

@ -38,16 +38,20 @@ fn main() {
// This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string
.map((0..sourcecode.len()).into(), |(t, s): (_, _)| (t, s));
println!("{:?}", sourcecode);
let lexer = Token::lexer(&sourcecode)
.spanned()
.collect::<Vec<_>>();
for token in lexer {
println!("{:?}", token);
}
match parser().parse(token_stream).into_result() {
Ok(ast) => match eval(&ast, &mut Vec::new(), &mut Vec::new()) {
Ok(output) => println!("{output}"),
Err(eval_err) => println!("Evaluation error: {eval_err}"),
},
Err(parse_errs) => parse_errs
.into_iter()
.for_each(|err| println!("Parse error: {err}")),
Ok(res) => println!("{:?}", res),
Err(e) => {
panic!("{:#?}", e)
}
};
//println!("\n[result]\n{}", abstract_syntax_tree::ast::eval(absyntr, vars, funcs));
}