diff --git a/Cargo.lock b/Cargo.lock index 4411f41..4368f31 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,12 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + [[package]] name = "beef" version = "0.5.2" @@ -52,6 +58,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "equivalent" version = "1.0.2" @@ -81,11 +93,37 @@ dependencies = [ "foldhash", ] +[[package]] +name = "inkwell" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67349bd7578d4afebbe15eaa642a80b884e8623db74b1716611b131feb1deef" +dependencies = [ + "either", + "inkwell_internals", + "libc", + "llvm-sys", + "once_cell", + "thiserror", +] + +[[package]] +name = "inkwell_internals" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f365c8de536236cfdebd0ba2130de22acefed18b1fb99c32783b3840aec5fb46" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "lang" version = "0.1.0" dependencies = [ "chumsky", + "inkwell", "logos", ] @@ -101,6 +139,20 @@ version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +[[package]] +name = "llvm-sys" +version = "181.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d320f9d2723c97d4b78f9190a61ed25cc7cfbe456668c08e6e7dd8e50ceb8500" +dependencies = [ + "anyhow", + "cc", + "lazy_static", + "libc", + "regex-lite", + "semver", +] + [[package]] name = "logos" version = "0.15.0" @@ -141,6 +193,12 @@ version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + [[package]] name = "proc-macro2" version = "1.0.95" @@ -179,6 +237,12 @@ dependencies = [ "regex-syntax 0.7.5", ] +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + [[package]] name = "regex-syntax" version = "0.7.5" @@ -256,6 +320,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.18" diff --git a/Cargo.toml b/Cargo.toml index f6c6272..681fc8e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,4 @@ edition = "2024" [dependencies] logos = "0.15.0" chumsky = "0.10.1" +inkwell = { version = "0.6.0", features = ["llvm18-1"] } diff --git a/src/ast/evaluator.rs b/src/ast/evaluator.rs index 69a52da..0765dd3 100644 --- a/src/ast/evaluator.rs +++ b/src/ast/evaluator.rs @@ -1,18 +1,33 @@ use crate::ast::Expression; -impl Expression { - pub fn eval(&self) -> isize { + +impl<'src> Expression<'src> { + pub fn eval(&self) -> String { match self { - Expression::Integer(n) => *n, + Expression::VariableName(_) => todo!(), + Expression::Integer(_) => todo!(), + + Expression::Float(_) => todo!(), + + Expression::String(_) => todo!(), + + Expression::Bool(_) => todo!(), + + Expression::Negatation(expression) => todo!(), + + Expression::Add(expression, expression1) => todo!(), + + Expression::Substract(expression, expression1) => todo!(), + + Expression::Multiply(expression, expression1) => todo!(), + + Expression::Divide(expression, expression1) => todo!(), + + Expression::Var { name, rhs, then } => todo!(), - Expression::Negate(rhs) => -rhs.eval(), - - Expression::Add(lhs, rhs) => lhs.eval() + rhs.eval(), - Expression::Substract(lhs, rhs) => lhs.eval() - rhs.eval(), - Expression::Multiply(lhs, rhs) => lhs.eval() * rhs.eval(), - Expression::Divide(lhs, rhs) => lhs.eval() / rhs.eval(), + Expression::Function { name, args, body, then } => todo!(), } } } \ No newline at end of file diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 24d7150..c37a3b2 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1,12 +1,36 @@ -pub mod evaluator; +use std::ops::Range; +use crate::ast::op::Op; + + +pub mod evaluator; +pub mod op; + + +/// Abstract Syntax Tree #[derive(Debug)] -pub enum Expression { - Integer(isize), - Negate(Box), - // Binary operators, - Add(Box, Box), - Substract(Box, Box), - Multiply(Box, Box), - Divide(Box, Box), +pub enum Expression<'src> { + VariableName(&'src str), + Integer(i64), + Float(f64), + String(String), + Bool(bool), + + Negatation(Box>), + Add(Box>, Box>), + Substract(Box>, Box>), + Multiply(Box>, Box>), + Divide(Box>, Box>), + + Var { + name: &'src str, + rhs: Box>, + then: Box>, + }, + Function { + name: &'src str, + args: Vec<&'src str>, + body: Box>, + then: Box>, + } } diff --git a/src/ast/op.rs b/src/ast/op.rs new file mode 100644 index 0000000..fce3a58 --- /dev/null +++ b/src/ast/op.rs @@ -0,0 +1,19 @@ +#[derive(Debug, Clone)] +pub enum Op { + Add, + Subtract, + Multiply, + Divide, +} + +impl Op { + pub fn eval(&self) -> String { + let text: &str = match self { + Op::Add => "+", + Op::Subtract => "-", + Op::Multiply => "*", + Op::Divide => "/", + }; + text.to_string() + } +} \ No newline at end of file diff --git a/src/code_generation/compiler.rs b/src/code_generation/compiler.rs new file mode 100644 index 0000000..c5fa592 --- /dev/null +++ b/src/code_generation/compiler.rs @@ -0,0 +1,29 @@ +use std::collections::HashMap; + +use inkwell::{builder::Builder, context::Context, module::Module, values::{FunctionValue, PointerValue}}; + +use crate::code_generation::Function; + +pub struct Compiler<'a, 'ctx> { + pub context: &'ctx Context, + pub builder: &'a Builder<'ctx>, + pub module: &'a Module<'ctx>, + pub function: &'a Function, + + variables: HashMap>, + fn_value_opt: Option> +} + +impl<'a, 'ctx> Compiler<'a, 'ctx> { + /// Gets a defined function given its name. + #[inline] + fn get_function(&self, name: &str) -> Option> { + self.module.get_function(name) + } + + /// Returns the `FunctionValue` representing the function being compiled. + #[inline] + fn fn_value(&self) -> FunctionValue<'ctx> { + self.fn_value_opt.unwrap() + } +} \ No newline at end of file diff --git a/src/code_generation/mod.rs b/src/code_generation/mod.rs new file mode 100644 index 0000000..6f66e7a --- /dev/null +++ b/src/code_generation/mod.rs @@ -0,0 +1,71 @@ +pub mod compiler; + +// LLVM Codegen + + +//-------------------------- +// Parser +//------------------------- + +use std::collections::HashMap; + +use logos::Lexer; + +use crate::tokens::Token; + +/// Defines a primitive expression +#[derive(Debug)] +pub enum Expr { + Binary { + op: char, + left: Box, + right: Box, + }, + Call { + fn_name: String, + args: Vec, + }, + Conditional { + cond: Box, + consequence: Box, + alternative: Box, + }, + For { + var_name: String, + start: Box, + end: Box, + step: Option>, + body: Box, + }, + Number(f64), + Variable(String), + VarIn { + variables: Vec<(String, Option)>, + body: Box, + } + +} + + +/// Defines the prototype (name and parameters) of a function +#[derive(Debug)] +pub struct Prototype { + pub name: String, + pub args: Vec, + pub is_op: bool, + pub prec: usize, +} + +/// Defines a user-defined or external function +#[derive(Debug)] +pub struct Function { + pub prototype: Prototype, + pub body: Option, + pub is_anon: bool, +} + +pub struct Parser<'a> { + tokens: Vec>, + pos: usize, + prec: &'a mut HashMap, +} diff --git a/src/main.rs b/src/main.rs index 96a31ae..d6c1873 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,6 +6,7 @@ use crate::{parser::parser, tokens::Token}; mod tokens; mod ast; mod parser; +mod code_generation; fn main() { let lexer = Token::lexer("(1 + 1) * 3"); diff --git a/src/parser.rs b/src/parser.rs index e7114ef..f033150 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,18 +1,17 @@ -use chumsky::{prelude::{just, recursive}, recursive, select, IterParser, Parser}; +use chumsky::{combinator::Or, prelude::{choice, just, recursive}, recursive, select, text::{self, ascii::ident}, IterParser, Parser}; use crate::{ast::Expression, tokens::Token}; #[allow(clippy::let_and_return)] -/* ANCHOR: parser */ pub fn parser<'src>( -) -> impl Parser<'src, &'src [Token<'src>], Expression, chumsky::extra::Err>>> +) -> impl Parser<'src, &'src [Token<'src>], Expression<'src>, chumsky::extra::Err>>> { - recursive( - |p| + let expr = recursive( + |expr| { let atom = { - let parenthesized = p + let parenthesized = expr .clone() .delimited_by(just(Token::ParenBegin), just(Token::ParenEnd)); @@ -25,7 +24,7 @@ pub fn parser<'src>( let unary = just(Token::Substract) .repeated() - .foldr(atom, |_op, rhs| Expression::Negate(Box::new(rhs))); + .foldr(atom, |_op, rhs| Expression::Negatation(Box::new(rhs))); let binary_1 = unary.clone().foldl( just(Token::Multiply) @@ -52,5 +51,7 @@ pub fn parser<'src>( ); binary_2 - }) + }); + + expr } \ No newline at end of file diff --git a/src/tokens.rs b/src/tokens.rs index 30a5b41..ec77d9e 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -2,7 +2,7 @@ use logos::{Lexer, Logos}; #[derive(Logos, Debug, Clone, PartialEq)] #[logos(skip r"[ \t\r\n\f]+")] // Skips whitespace -pub enum Token<'source> { +pub enum Token<'src> { #[token("false", |_| false)] #[token("true", |_| true)] Bool(bool), @@ -37,11 +37,11 @@ pub enum Token<'source> { #[token("}")] BraceEnd, - #[regex("[0-9]+", |lex| lex.slice().parse::().unwrap())] - Integer(isize), + #[regex("[0-9]+", |lex| lex.slice().parse::().unwrap())] + Integer(i64), #[regex(r"[_a-zA-Z][_0-9a-zA-Z]*")] - Ident(&'source str), + Ident(&'src str), #[regex(r#""([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*""#, |lex| lex.slice().to_owned())] String(String), @@ -51,7 +51,7 @@ pub enum Token<'source> { #[token("var")] #[token("if")] #[token("else")] - Keyword(&'source str), + Keyword(&'src str), }