error on whitespaces / newlines :(
This commit is contained in:
parent
55605d2cd1
commit
47cad9fb16
6 changed files with 129 additions and 128 deletions
|
|
@ -1,47 +1,60 @@
|
|||
use std::rc::Rc;
|
||||
|
||||
use chumsky::span::Span;
|
||||
|
||||
|
||||
|
||||
/// Abstract Syntax Tree
|
||||
pub type BlockStatement = Vec<Statement>;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Expr {
|
||||
Ident(Ident),
|
||||
Literal(Literal),
|
||||
Call(FunctionCall),
|
||||
Ident(String),
|
||||
|
||||
IntLiteral(i64),
|
||||
|
||||
FloatLiteral(f64),
|
||||
|
||||
StringLiteral(String),
|
||||
|
||||
BoolLiteral(bool),
|
||||
|
||||
CharLiteral(char),
|
||||
|
||||
Null,
|
||||
|
||||
|
||||
Call {
|
||||
callee: Box<Expr>,
|
||||
arguments: Vec<Expr>,
|
||||
},
|
||||
|
||||
Unary {
|
||||
operator: UnaryOp,
|
||||
operand: Box<Expr>,
|
||||
},
|
||||
|
||||
Binary {
|
||||
lhs: Box<Expr>,
|
||||
operator: BinaryOp,
|
||||
rhs: Box<Expr>,
|
||||
},
|
||||
|
||||
Assignment {
|
||||
target: Box<Expr>,
|
||||
value: Box<Expr>,
|
||||
},
|
||||
Error,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct FunctionCall {
|
||||
pub parameters: Vec<Expr>,
|
||||
pub name: Rc<str>,
|
||||
pub parameters: Vec<Expr>,
|
||||
pub(crate) args: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ExprResult {
|
||||
Bool(bool),
|
||||
UnsignedInteger(usize),
|
||||
SignedInteger(isize),
|
||||
Char(char),
|
||||
Return(Box<ExprResult>),
|
||||
Void,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ExprResult {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ExprResult::Bool(b) => write!(f, "{b}"),
|
||||
ExprResult::UnsignedInteger(i) => write!(f, "{i}"),
|
||||
ExprResult::SignedInteger(i) => write!(f, "{i}"),
|
||||
ExprResult::Char(c) => write!(f, "{c}"),
|
||||
ExprResult::Return(v) => write!(f, "{}", *v),
|
||||
ExprResult::Void => write!(f, ""),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum BinaryOp {
|
||||
Multiply,
|
||||
Divide,
|
||||
|
|
@ -59,43 +72,45 @@ pub enum BinaryOp {
|
|||
Or,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum UnaryOp {
|
||||
Not,
|
||||
Minus,
|
||||
Plus,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Literal {
|
||||
UnsignedInteger(usize),
|
||||
UnsignedInteger(u64),
|
||||
Bool(bool),
|
||||
Char(char),
|
||||
String(Rc<str>),
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Ident(pub Rc<str>);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Statement {
|
||||
Var(Ident, Option<Type>)
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct While {
|
||||
pub condition: Expr,
|
||||
pub body: BlockStatement,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Condition {
|
||||
pub condition: Expr,
|
||||
pub if_body: BlockStatement,
|
||||
pub else_body: Option<BlockStatement>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Type {
|
||||
UnsignedInteger,
|
||||
SignedInteger,
|
||||
|
|
@ -104,6 +119,7 @@ pub enum Type {
|
|||
String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Value {
|
||||
UnsignedInteger(u32),
|
||||
SignedInteger(i32),
|
||||
|
|
@ -125,7 +141,7 @@ impl Value {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Function {
|
||||
pub name: Rc<str>,
|
||||
pub params: Vec<(Ident, Type)>,
|
||||
|
|
|
|||
|
|
@ -1,3 +1,2 @@
|
|||
pub mod ast;
|
||||
pub mod op;
|
||||
pub mod parser;
|
||||
|
|
|
|||
|
|
@ -1,32 +1,20 @@
|
|||
use chumsky::{
|
||||
combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, just, nested_delimiters, recursive, via_parser}, primitive::select, recursive, select, select_ref, span::{self, SimpleSpan}, text::{self, ascii::{ident, keyword}, whitespace}, Boxed, ConfigIterParser, IterParser, Parser
|
||||
combinator::Or, error::Rich, extra, input::ValueInput, prelude::{choice, end, just, nested_delimiters, recursive, via_parser}, primitive::select, recursive, select, select_ref, span::{self, SimpleSpan}, text::{self, ascii::{ident, keyword}, whitespace}, Boxed, ConfigIterParser, IterParser, Parser
|
||||
};
|
||||
|
||||
use crate::{language_frontend::abstract_syntax_tree::ast::Expression, language_frontend::lexer::tokens::Token};
|
||||
use crate::language_frontend::{abstract_syntax_tree::ast::{BinaryOp, Expr}, lexer::tokens::Token};
|
||||
|
||||
// goal of parsing is to construct an abstract syntax tree
|
||||
|
||||
#[allow(clippy::let_and_return)]
|
||||
pub fn parser<'tokens, 'src: 'tokens, I>()
|
||||
-> impl Parser<'tokens, I, Expression<'src>, extra::Err<Rich<'tokens, Token<'src>>>>
|
||||
-> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token>>>
|
||||
where
|
||||
I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>,
|
||||
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
|
||||
{
|
||||
let ident = select! {
|
||||
Token::Ident(s) => s,
|
||||
Token::Identifier(s) => s,
|
||||
};
|
||||
|
||||
|
||||
let keyword = |kw: &'static str| {
|
||||
select! {
|
||||
Token::Keyword(k) if k == kw => (),
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
let eq = just(Token::Equals).labelled("=");
|
||||
|
||||
/*
|
||||
let block = recursive(|block| {
|
||||
let indent = just(Token::NewLine)
|
||||
|
|
@ -40,76 +28,55 @@ where
|
|||
*/
|
||||
|
||||
let expr = recursive(|expr| {
|
||||
let block = expr
|
||||
.clone()
|
||||
.delimited_by(just(Token::BraceBegin), just(Token::BraceEnd));
|
||||
|
||||
|
||||
|
||||
// 'Atoms' are expressions that contain no ambiguity
|
||||
let atom = select! {
|
||||
Token::Float(x) => Expression::Float(x),
|
||||
Token::Integer(x) => Expression::Integer(x),
|
||||
}.or(
|
||||
expr.clone().delimited_by(just(Token::ParenBegin),
|
||||
just(Token::ParenEnd))
|
||||
).or(block);
|
||||
Token::FloatLiteral(x) => Expr::FloatLiteral(x),
|
||||
Token::IntLiteral(x) => Expr::IntLiteral(x),
|
||||
}
|
||||
.or(expr.clone().delimited_by(just(Token::LParen), just(Token::RParen)));
|
||||
|
||||
|
||||
let unary = just(Token::Substract)
|
||||
.repeated()
|
||||
.foldr(atom.clone(), |_op, rhs| Expression::Negatation(Box::new(rhs)));
|
||||
|
||||
let mul_div = unary.clone().foldl(
|
||||
just(Token::Multiply).or(just(Token::Divide)).then(unary).repeated(),
|
||||
|lhs, (op, rhs)| match op {
|
||||
Token::Multiply => Expression::Multiply(Box::new(lhs), Box::new(rhs)),
|
||||
Token::Divide => Expression::Divide(Box::new(lhs), Box::new(rhs)),
|
||||
_ => unreachable!(),
|
||||
let mul_div = atom.clone().foldl(
|
||||
choice((
|
||||
just(Token::Multiply).to(BinaryOp::Multiply),
|
||||
just(Token::Divide).to(BinaryOp::Divide),
|
||||
))
|
||||
.then(atom)
|
||||
.repeated(),
|
||||
|lhs, (op, rhs)| Expr::Binary {
|
||||
lhs: Box::new(lhs),
|
||||
operator: op,
|
||||
rhs: Box::new(rhs),
|
||||
},
|
||||
);
|
||||
|
||||
let add_sub = mul_div.clone().foldl(
|
||||
just(Token::Add).or(just(Token::Substract)).then(mul_div).repeated(),
|
||||
|lhs, (op, rhs)| match op {
|
||||
Token::Add => Expression::Add(Box::new(lhs), Box::new(rhs)),
|
||||
Token::Substract => Expression::Substract(Box::new(lhs), Box::new(rhs)),
|
||||
_ => unreachable!(),
|
||||
choice((
|
||||
just(Token::Add).to(BinaryOp::Add),
|
||||
just(Token::Substract).to(BinaryOp::Substract),
|
||||
))
|
||||
.then(mul_div)
|
||||
.repeated(),
|
||||
|lhs, (op, rhs)| Expr::Binary {
|
||||
lhs: Box::new(lhs),
|
||||
operator: op,
|
||||
rhs: Box::new(rhs),
|
||||
},
|
||||
);
|
||||
|
||||
add_sub
|
||||
});
|
||||
|
||||
|
||||
let var = just(Token::Var)
|
||||
.ignore_then(ident)
|
||||
.then_ignore(just(Token::Assign))
|
||||
.then(expr.clone())
|
||||
.then_ignore(just(Token::NewLine).or_not())
|
||||
.map(|(name, rhs)| Expr::Assignment {
|
||||
target: Box::new(Expr::Ident(name)),
|
||||
value: Box::new(rhs),
|
||||
});
|
||||
|
||||
let decl = recursive(|decl| {
|
||||
let var = keyword("var")
|
||||
.ignore_then(ident)
|
||||
.then_ignore(eq.clone())
|
||||
.then(expr.clone())
|
||||
.then(decl.clone())
|
||||
.map(|((name, rhs), then)| Expression::Var {
|
||||
name,
|
||||
rhs: Box::new(rhs),
|
||||
then: Box::new(then),
|
||||
});
|
||||
|
||||
let fun = keyword("fun")
|
||||
.ignore_then(ident.clone())
|
||||
.then(ident.repeated().collect::<Vec<_>>())
|
||||
.then_ignore(eq.clone())
|
||||
.then(expr.clone())
|
||||
.then(decl)
|
||||
.map(|(((name, args), body), then)| Expression::Function {
|
||||
name,
|
||||
args,
|
||||
body: Box::new(body),
|
||||
then: Box::new(then),
|
||||
});
|
||||
var.or(expr)
|
||||
|
||||
|
||||
var.or(fun).or(expr)
|
||||
});
|
||||
|
||||
decl
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ use logos::{Logos};
|
|||
|
||||
#[derive(Logos, Debug, Clone, PartialEq)]
|
||||
pub enum Token {
|
||||
Error,
|
||||
|
||||
#[token(r"\n")]
|
||||
NewLine,
|
||||
// Identifier
|
||||
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned())]
|
||||
Identifier(String),
|
||||
|
|
@ -133,20 +133,33 @@ pub enum Token {
|
|||
#[token(":")]
|
||||
Colon,
|
||||
|
||||
#[token(",")]
|
||||
Comma,
|
||||
|
||||
#[token(".")]
|
||||
Dot,
|
||||
|
||||
// Special
|
||||
#[regex(r"//[^\n\r]*", logos::skip)]
|
||||
|
||||
|
||||
#[regex(r"//[^\r]*", logos::skip)]
|
||||
#[regex(r"/\*([^*]|\*[^/])*\*/", logos::skip)]
|
||||
Comment,
|
||||
|
||||
#[regex(r"[ \t\n\f]+", logos::skip)]
|
||||
|
||||
|
||||
#[regex(r"[\t\r\f]+", logos::skip)]
|
||||
Whitespace,
|
||||
|
||||
|
||||
Eof,
|
||||
|
||||
Error,
|
||||
}
|
||||
|
||||
impl fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Token::NewLine => write!(f, ""),
|
||||
Token::Identifier(ident) => write!(f, "{ident}"),
|
||||
Token::Fun => write!(f, "fun"),
|
||||
Token::Class => write!(f, "class"),
|
||||
|
|
@ -186,6 +199,8 @@ impl fmt::Display for Token {
|
|||
Token::LBracket => write!(f, "["),
|
||||
Token::RBracket => write!(f, "]"),
|
||||
Token::Colon => write!(f, ":"),
|
||||
Token::Comma => write!(f, ","),
|
||||
Token::Dot => write!(f, "."),
|
||||
Token::Comment => write!(f, ""),
|
||||
Token::Whitespace => write!(f, ""),
|
||||
Token::Eof => write!(f, ""),
|
||||
|
|
|
|||
22
src/main.rs
22
src/main.rs
|
|
@ -38,16 +38,20 @@ fn main() {
|
|||
// This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string
|
||||
.map((0..sourcecode.len()).into(), |(t, s): (_, _)| (t, s));
|
||||
|
||||
println!("{:?}", sourcecode);
|
||||
|
||||
let lexer = Token::lexer(&sourcecode)
|
||||
.spanned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for token in lexer {
|
||||
println!("{:?}", token);
|
||||
}
|
||||
|
||||
match parser().parse(token_stream).into_result() {
|
||||
Ok(ast) => match eval(&ast, &mut Vec::new(), &mut Vec::new()) {
|
||||
Ok(output) => println!("{output}"),
|
||||
Err(eval_err) => println!("Evaluation error: {eval_err}"),
|
||||
},
|
||||
Err(parse_errs) => parse_errs
|
||||
.into_iter()
|
||||
.for_each(|err| println!("Parse error: {err}")),
|
||||
Ok(res) => println!("{:?}", res),
|
||||
Err(e) => {
|
||||
panic!("{:#?}", e)
|
||||
}
|
||||
};
|
||||
|
||||
//println!("\n[result]\n{}", abstract_syntax_tree::ast::eval(absyntr, vars, funcs));
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue