diff --git a/Cargo.lock b/Cargo.lock index c877797..bf3e5fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -350,6 +350,7 @@ dependencies = [ "num-traits", "regex", "rlox2-frontend", + "static_assertions", "thiserror", ] @@ -379,6 +380,12 @@ version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.10.0" diff --git a/frontend/src/lexer/_lexer.rs b/frontend/src/lexer/_lexer.rs index 8c03a5e..190b545 100644 --- a/frontend/src/lexer/_lexer.rs +++ b/frontend/src/lexer/_lexer.rs @@ -85,43 +85,43 @@ impl Lexer { let c = self.advance(); - let token_type = match c { - '(' => Some(LeftParen), - ')' => Some(RightParen), - '{' => Some(LeftBrace), - '}' => Some(RightBrace), - ',' => Some(Comma), - '.' => Some(Dot), - '+' => Some(Plus), - '-' => Some(Minus), - ';' => Some(Semicolon), - '*' => Some(Star), + match c { + '(' => self.push_token(LeftParen), + ')' => self.push_token(RightParen), + '{' => self.push_token(LeftBrace), + '}' => self.push_token(RightBrace), + ',' => self.push_token(Comma), + '.' => self.push_token(Dot), + '+' => self.push_token(Plus), + '-' => self.push_token(Minus), + ';' => self.push_token(Semicolon), + '*' => self.push_token(Star), '!' => { if self.consume('=') { - Some(BangEqual) + self.push_token(BangEqual) } else { - Some(Bang) + self.push_token(Bang) } } '=' => { if self.consume('=') { - Some(EqualEqual) + self.push_token(EqualEqual) } else { - Some(Equal) + self.push_token(Equal) } } '<' => { if self.consume('=') { - Some(LessEqual) + self.push_token(LessEqual) } else { - Some(Less) + self.push_token(Less) } } '>' => { if self.consume('=') { - Some(GreaterEqual) + self.push_token(GreaterEqual) } else { - Some(Greater) + self.push_token(Greater) } } '/' => { @@ -129,8 +129,6 @@ impl Lexer { // line comment // advance until either source is empty or newline if found while !self.source_is_empty() && self.advance() != '\n' {} - - None } else if self.consume('*') { // block comment @@ -166,28 +164,21 @@ impl Lexer { self.advance(); } - - None } else { - Some(Slash) + self.push_token(Slash) } } '"' => self.try_parse_string(), '0'..='9' => self.try_parse_number(), - ' ' | '\r' | '\n' | '\t' => None, // handled automatically in advance() + ' ' | '\r' | '\n' | '\t' => {} // handled automatically in advance() c @ '_' | c if c.is_ascii_alphabetic() => self.try_parse_identifier(), _ => { self.errors.push(LexerError::UnexpectedCharacter { c, code_pos: self.code_pos, }); - None } }; - - if let Some(token_type) = token_type { - self.push_token(token_type); - } } fn source_is_empty(&self) -> bool { @@ -235,23 +226,24 @@ impl Lexer { self.tokens.push(Token::new(token_type, self.code_pos)); } - fn try_parse_string(&mut self) -> Option { + fn try_parse_string(&mut self) { // advance until second " while self.advance() != '"' { if self.source_is_empty() { self.errors.push(LexerError::UnterminatedStringLiteral { code_pos: self.code_pos, }); - return None; + return; } } let string_literal = self.source[self.start + 1..self.current - 1].iter().collect(); - Some(TokenType::String(string_literal)) + // Some(TokenType::String(Box::new(string_literal))) + self.tokens.push(Token::new_string(string_literal, self.code_pos)); } - fn try_parse_number(&mut self) -> Option { + fn try_parse_number(&mut self) { let is_some_digit = |c: Option| c.map_or(false, |c| c.is_ascii_digit()); // eat all digits @@ -289,14 +281,15 @@ impl Lexer { msg: err.to_string(), code_pos: self.code_pos, }); - return None; + return; } }; - Some(TokenType::Number(num)) + // Some(TokenType::Number(num)) + self.tokens.push(Token::new_number(num, self.code_pos)); } - fn try_parse_identifier(&mut self) -> Option { + fn try_parse_identifier(&mut self) { let is_alpha_num_underscore = |c: Option| c.map_or(false, |c| matches!(c, '0'..='9' | 'A'..='Z' | '_' | 'a'..='z')); @@ -306,8 +299,18 @@ impl Lexer { let lexeme: String = self.source[self.start..self.current].iter().collect(); - let token_type = KEYWORDS.get(&lexeme).cloned().unwrap_or(TokenType::Identifier(lexeme)); + /* let token_type = KEYWORDS + .get(&lexeme) + .cloned() + .unwrap_or(TokenType::Identifier(Box::new(lexeme))); */ - Some(token_type) + if let Some(&token_type) = KEYWORDS.get(&lexeme) { + // Token::new(token_type, self.code_pos) + self.push_token(token_type); + } else { + self.tokens.push(Token::new_identifier(lexeme, self.code_pos)); + } + + // Some(token_type) } } diff --git a/frontend/src/lexer/token.rs b/frontend/src/lexer/token.rs index 6956e07..4005ccd 100644 --- a/frontend/src/lexer/token.rs +++ b/frontend/src/lexer/token.rs @@ -1,7 +1,10 @@ +use std::fmt::{Debug, Display}; +use std::mem::ManuallyDrop; + use super::CodePos; -#[allow(dead_code, clippy::upper_case_acronyms)] -#[derive(Debug, Clone, PartialEq)] +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq)] #[rustfmt::skip] pub enum TokenType { // Single-character tokens @@ -14,44 +17,139 @@ pub enum TokenType { Greater, GreaterEqual, Less, LessEqual, - // Literals - Identifier(String), - String(String), - Number(f64), + // Identifier and literals + Identifier, String, Number, // Keywords And, Break, Class, Else, False, Fun, For, If, Nil, Or, Print, Return, Super, This, True, Var, While, + #[allow(dead_code, clippy::upper_case_acronyms)] EOF } -#[derive(Clone)] +union TokenData { + none: (), + #[allow(clippy::box_collection)] + s: ManuallyDrop>, + num: f64, +} + +impl TokenData { + fn none() -> Self { + TokenData { none: () } + } + + fn string(s: String) -> Self { + let s = ManuallyDrop::new(Box::new(s)); + TokenData { s } + } + + fn num(num: f64) -> Self { + TokenData { num } + } +} + pub struct Token { pub token_type: TokenType, // pub lexeme: String, + data: TokenData, pub code_pos: CodePos, } impl Token { - pub fn new(token_type: TokenType, pos: CodePos) -> Self { + pub fn new(token_type: TokenType, code_pos: CodePos) -> Self { Token { token_type, // lexeme, - code_pos: pos, + data: TokenData::none(), + code_pos, + } + } + + pub fn new_string(s: String, code_pos: CodePos) -> Self { + Token { + token_type: TokenType::String, + data: TokenData::string(s), + code_pos, + } + } + + pub fn new_identifier(name: String, code_pos: CodePos) -> Self { + Token { + token_type: TokenType::Identifier, + data: TokenData::string(name), + code_pos, + } + } + + pub fn new_number(num: f64, code_pos: CodePos) -> Self { + Token { + token_type: TokenType::Number, + data: TokenData::num(num), + code_pos, + } + } + + pub fn string_data(self) -> String { + assert!(self.token_type == TokenType::String || self.token_type == TokenType::Identifier); + + // std::mem::take(&mut self.data.s) + unsafe { + let mut me = self; + + let s = std::mem::take(&mut me.data.s); + + *ManuallyDrop::into_inner(s) + } + } + + pub fn num_data(self) -> f64 { + assert_eq!(self.token_type, TokenType::Number); + + unsafe { self.data.num } + } +} + +impl Debug for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // write!(f, "<{:?}>", self.token_type) + match self.token_type { + TokenType::Number => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.num) }, + TokenType::String => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) }, + TokenType::Identifier => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) }, + _ => write!(f, "<{:?}>", self.token_type), } } } -impl std::fmt::Debug for Token { +impl Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "<{:?}>", self.token_type) - // write!(f, "<{:?}> (\"{}\")", self.token_type, self.lexeme) + // write!(f, "<{:?}>", self.token_type) + match self.token_type { + TokenType::Number => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.num) }, + TokenType::String => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) }, + TokenType::Identifier => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) }, + _ => write!(f, "<{:?}>", self.token_type), + } } } -impl std::fmt::Display for Token { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "<{:?}>", self.token_type) +/* impl Clone for Token { + fn clone(&self) -> Self { + let code_pos = self.code_pos; + + match self.token_type { + TokenType::Number => Token::new_number(self.num_data(), code_pos), + TokenType::String => unsafe { Token::new_string(self.data.s.as_ref().clone(), code_pos) }, + TokenType::Identifier => unsafe { Token::new_identifier(self.data.s.as_ref().clone(), code_pos) }, + token_type => Token::new(token_type, code_pos), + } + } +} */ + +impl Drop for Token { + fn drop(&mut self) { + if self.token_type == TokenType::String {} } } diff --git a/frontend/src/parser/parse.rs b/frontend/src/parser/_parser.rs similarity index 96% rename from frontend/src/parser/parse.rs rename to frontend/src/parser/_parser.rs index f814f7f..ce8f2db 100644 --- a/frontend/src/parser/parse.rs +++ b/frontend/src/parser/_parser.rs @@ -101,7 +101,7 @@ impl Parser { } } - // me.consume_token(TokenType::EOF).unwrap(); + assert_eq!(me.next_token().token_type, TokenType::EOF); if !me.parse_errors.is_empty() { Err(me.parse_errors) @@ -324,12 +324,10 @@ impl Parser { // self.consume_token(TokenType::Var)?; assert_eq!(self.next_token().token_type, TokenType::Var); - let name = match self.next_token() { - Token { - token_type: TokenType::Identifier(name), - .. - } => name, - token => return Err(ParserError::ExpectedVarName { token }), + let token = self.next_token(); + let name = match token.token_type { + TokenType::Identifier => token.string_data(), + _ => return Err(ParserError::ExpectedVarName { token }), }; let initializer = if self.peek_token().token_type == TokenType::Equal { @@ -738,8 +736,8 @@ impl Parser { match token.token_type { TokenType::Fun => Ok(self.fun_params_and_body("")?), - TokenType::Number(num) => Ok(Expr::number(num)), - TokenType::String(s) => Ok(Expr::string(s)), + TokenType::Number => Ok(Expr::number(token.num_data())), + TokenType::String => Ok(Expr::string(token.string_data())), TokenType::False => Ok(Expr::bool(false)), TokenType::True => Ok(Expr::bool(true)), TokenType::Nil => Ok(Expr::nil()), @@ -768,7 +766,9 @@ impl Parser { Ok(Expr::grouping(expr)) } - TokenType::Identifier(name) => Ok(Expr::Variable { name }), + TokenType::Identifier => Ok(Expr::Variable { + name: token.string_data(), + }), _ => Err(ParserError::ExpectedPrimary { token }), } } @@ -781,13 +781,7 @@ impl Parser { fn identifier(&mut self, msg: &str) -> ParserResult { match self.peek_token().token_type { - TokenType::Identifier(_) => { - if let TokenType::Identifier(name) = self.next_token().token_type { - Ok(name) - } else { - unreachable!() - } - } + TokenType::Identifier => Ok(self.next_token().string_data()), _ => Err(ParserError::MissingIdentifier { msg: msg.to_owned(), code_pos: self.peek_token().code_pos, @@ -834,7 +828,8 @@ impl Parser { let _ = self.next_token(); Ok(()) } - TokenType::EOF => Err(err_fn(self.peek_token().clone())), + // call err_fn with dummy token so we don't have to eat the EOF token + TokenType::EOF => Err(err_fn(Token::new(TokenType::EOF, self.peek_token().code_pos))), _ => Err(err_fn(self.next_token())), } } diff --git a/frontend/src/parser/mod.rs b/frontend/src/parser/mod.rs index 4a93b66..5d46246 100644 --- a/frontend/src/parser/mod.rs +++ b/frontend/src/parser/mod.rs @@ -1,10 +1,10 @@ +mod _parser; mod error; mod expr; mod misc; -mod parse; mod stmt; +pub use _parser::parse_tokens; pub use error::ParserError; pub use expr::{BinaryOp, Expr, Literal, LogicalOp, UnaryOp}; -pub use parse::parse_tokens; pub use stmt::Stmt; diff --git a/interpreter/src/resolver/resolve.rs b/interpreter/src/resolver/_resolver.rs similarity index 100% rename from interpreter/src/resolver/resolve.rs rename to interpreter/src/resolver/_resolver.rs diff --git a/interpreter/src/resolver/mod.rs b/interpreter/src/resolver/mod.rs index 86fe52f..82139e3 100644 --- a/interpreter/src/resolver/mod.rs +++ b/interpreter/src/resolver/mod.rs @@ -1,5 +1,5 @@ +mod _resolver; mod error; -mod resolve; +pub use _resolver::resolve; pub use error::ResolverError; -pub use resolve::resolve; diff --git a/interpreter/src/run.rs b/interpreter/src/run.rs index f804841..5397de3 100644 --- a/interpreter/src/run.rs +++ b/interpreter/src/run.rs @@ -64,12 +64,9 @@ pub fn run_repl(runtime: &mut Runtime) { pub fn run(source: &str, runtime: &mut Runtime) -> Result<(), LoxError> { let tokens: Vec = scan_tokens(source)?; - /* let token_str = tokens - .iter() - .map(|token| token.to_string()) - .join(" "); + let token_str = itertools::Itertools::join(&mut tokens.iter().map(|token| token.to_string()), " "); - println!("{token_str}"); */ + println!("{token_str}"); let statements = parse_tokens(tokens)?; diff --git a/src/main.rs b/src/main.rs index 1bb5db7..1c6782b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,7 +22,7 @@ fn main() { let cli_args = CliArgs::parse(); if cli_args.vm { - use rlox2_vm::InterpretError; + use rlox2_vm::LoxError; let mut vm = rlox2_vm::VM::default(); @@ -35,9 +35,9 @@ fn main() { if let Err(err) = rlox2_vm::run(&source, &mut vm) { eprintln!("{err}"); match err { - InterpretError::LexerError { .. } | InterpretError::CompileError { .. } => std::process::exit(65), - InterpretError::RuntimeError { .. } => std::process::exit(70), - InterpretError::Exit { exit_code } => std::process::exit(exit_code), + LoxError::LexerError { .. } | LoxError::CompileError { .. } => std::process::exit(65), + LoxError::RuntimeError { .. } => std::process::exit(70), + LoxError::Exit { exit_code } => std::process::exit(exit_code), } } diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 884ad15..09cd212 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -15,3 +15,4 @@ num-derive = "0.3.3" num-traits = "0.2.15" regex = "1.7.1" thiserror = "1.0.38" +static_assertions = "1.1.0" diff --git a/vm/src/compile.rs b/vm/src/compile.rs deleted file mode 100644 index 561de7c..0000000 --- a/vm/src/compile.rs +++ /dev/null @@ -1,6 +0,0 @@ -use itertools::Itertools; -use rlox2_frontend::lexer::Token; - -pub fn compile(tokens: Vec) { - println!("{}", tokens.iter().map(|token| token.to_string()).join(" ")); -} diff --git a/vm/src/compiler.rs b/vm/src/compiler.rs new file mode 100644 index 0000000..9937964 --- /dev/null +++ b/vm/src/compiler.rs @@ -0,0 +1,433 @@ +use std::vec::IntoIter; + +use static_assertions::const_assert_eq; + +use num_derive::FromPrimitive; +use num_traits::FromPrimitive; +use rlox2_frontend::lexer::{Token, TokenType}; + +use crate::debug::DEBUG_PRINT_CODE; +use crate::error::CompilerError; +use crate::misc::u16_to_bytes; +use crate::{gen_rules_table, single_rule, Chunk, Opcode, Value}; + +/*====================================================================================================================*/ + +pub fn compile(tokens: Vec) -> Result { + Compiler::new(tokens).compile() +} + +/*====================================================================================================================*/ + +struct TokenIter { + token_iter: IntoIter, + + peek_token: Option, +} + +impl TokenIter { + pub fn new(tokens: Vec) -> Self { + TokenIter { + token_iter: tokens.into_iter(), + peek_token: None, + } + } + + fn peek(&mut self) -> Option<&Token> { + // if peek_token is empty: fill with next token from token_iter + if self.peek_token.is_none() && self.token_iter.len() != 0 { + self.peek_token = self.token_iter.next(); + } + + self.peek_token.as_ref() + } + + /* fn is_empty(&self) -> bool { + // peek_token is None and there are no more tokens to take from token_iter + self.peek_token.is_none() && self.token_iter.len() == 0 + } */ +} + +impl Iterator for TokenIter { + type Item = Token; + + fn next(&mut self) -> Option { + // return the peeked token if any, or else next token from token_iter + self.peek_token.take().or_else(|| self.token_iter.next()) + } +} + +/*====================================================================================================================*/ + +type CompilerResult = Result<(), CompilerError>; + +#[repr(u8)] +#[derive(Debug, FromPrimitive, Copy, Clone, PartialOrd, PartialEq)] +enum Precedence { + Null = 0, + Assignment = 1, + Or = 2, + And = 3, + Equality = 4, + Comparison = 5, + Term = 6, + Factor = 7, + Unary = 8, + Call = 9, + Primary = 10, +} + +impl Precedence { + fn inc(self) -> Precedence { + FromPrimitive::from_u8(self as u8 + 1).unwrap() + } +} + +/* impl Add for Precedence { + type Output = Precedence; + + fn add(self, rhs: u8) -> Self::Output { + FromPrimitive::from_u8(self as u8 + rhs).unwrap() + } +} */ + +struct Compiler { + token_iter: TokenIter, + + chunk: Chunk, + + current_line: u32, +} + +impl Compiler { + fn new(tokens: Vec) -> Self { + Compiler { + token_iter: TokenIter::new(tokens), + chunk: Chunk::default(), + current_line: u32::MAX, + } + } + + fn current_chunk(&mut self) -> &mut Chunk { + &mut self.chunk + } + + fn compile(self) -> Result { + let mut compiler = self; + + compiler.expression()?; + + compiler.emit_opcode(Opcode::Return); + + // assert_eq!(compiler.token_iter.next().unwrap().token_type, TokenType::EOF); + + if compiler.peek_token().token_type != TokenType::EOF { + return Err(CompilerError::Todo { + msg: format!("Expected EOF, found {}", compiler.next_token()), + }); + } + + if DEBUG_PRINT_CODE { + compiler.chunk.disassemble("code"); + println!() + } + + Ok(compiler.chunk) + } + + fn parse_precedence(&mut self, precedence: Precedence) -> CompilerResult { + let prefix_rule = get_rule(self.peek_token().token_type).prefix; + + // hacky, but can't be compared as raw types + if prefix_rule as usize == Compiler::null as usize { + return Err(CompilerError::Todo { + msg: "Expect expression".to_owned(), + }); + } + + prefix_rule(self)?; + + loop { + let rule = get_rule(self.peek_token().token_type); + + if precedence > rule.precedence { + break; + } + + let infix_rule = get_rule(self.peek_token().token_type).infix; + + infix_rule(self)?; + } + + Ok(()) + } + + /* fn synchronise(&mut self) { + loop { + if self.token_iter.is_empty() { + return; + } + + // when synchronising: assume all false + /* self.is_in_loop = false; + self.is_in_class = false; + self.is_in_function = false; + self.is_in_init = false; */ + + let peek_token = self.peek_token(); + + // if we match a synchronisation point: return + match peek_token.token_type { + TokenType::Class + | TokenType::Fun + | TokenType::Var + | TokenType::For + | TokenType::If + | TokenType::While + | TokenType::Print + | TokenType::Return + | TokenType::EOF => return, + TokenType::Semicolon => { + // discard semicolon first, then return + assert_eq!(self.next_token().token_type, TokenType::Semicolon); + return; + } + _ => {} + } + + // no sync point: discard token + let _ = self.next_token(); + // println!("Discarding {} token", self.next_token()); + } + } */ + + fn expression(&mut self) -> CompilerResult { + self.parse_precedence(Precedence::Assignment) + } + + fn binary(&mut self) -> CompilerResult { + let token = self.next_token(); + + let rule = get_rule(token.token_type); + + // increment precedence to make binary operators left-associative + self.parse_precedence(rule.precedence.inc())?; + + match token.token_type { + TokenType::Plus => self.emit_opcode(Opcode::Add), + TokenType::Minus => self.emit_opcode(Opcode::Subtract), + TokenType::Star => self.emit_opcode(Opcode::Multiply), + TokenType::Slash => self.emit_opcode(Opcode::Divide), + tt => unreachable!("Called binary() on token type {tt:?}"), + } + + Ok(()) + } + + fn unary(&mut self) -> CompilerResult { + let token = self.next_token(); + + self.parse_precedence(Precedence::Unary)?; + + self.set_line(&token); + + match self.next_token().token_type { + TokenType::Minus => { + self.emit_opcode(Opcode::Negate); + } + TokenType::Bang => todo!(), + _ => unreachable!("Called unary, but next token had token_type {:?}", token.token_type), + } + + Ok(()) + } + + fn grouping(&mut self) -> CompilerResult { + assert_eq!(self.next_token().token_type, TokenType::LeftParen); + + self.expression()?; + + self.consume_token(TokenType::RightParen, |token| CompilerError::MissingRightParen { + code_pos: token.code_pos, + })?; + + Ok(()) + } + + fn number(&mut self) -> CompilerResult { + let token = self.next_token(); + + assert_eq!(token.token_type, TokenType::Number); + + let num = token.num_data(); + let value = Value::Number(num); + self.emit_constant(value); + + Ok(()) + } + + fn string(&mut self) -> CompilerResult { + todo!() + } + + fn literal(&mut self) -> CompilerResult { + todo!() + } + + fn null(&mut self) -> CompilerResult { + panic!("Called null on token {}", self.peek_token()); + } + + fn next_token(&mut self) -> Token { + let token = self.token_iter.next().unwrap(); + + // println!("Next token: {next:?}"); + + if token.token_type == TokenType::EOF { + unreachable!("Someone ate a EOF token"); + } + + self.set_line(&token); + + token + + // self.token_iter.next().unwrap() + } + + fn peek_token(&mut self) -> &Token { + self.token_iter.peek().unwrap() + } + + fn consume_token(&mut self, token_type: TokenType, err_fn: F) -> CompilerResult + where + F: Fn(Token) -> CompilerError, + { + match &self.peek_token().token_type { + tt if tt == &token_type => { + let _ = self.next_token(); + Ok(()) + } + // call err_fn with dummy token so we don't have to eat the EOF token + TokenType::EOF => Err(err_fn(Token::new(TokenType::EOF, self.peek_token().code_pos))), + _ => Err(err_fn(self.next_token())), + } + } + + fn emit_opcode(&mut self, opcode: Opcode) { + let line = self.current_line; + self.current_chunk().write_opcode(opcode, line); + } + + /* fn emit_byte(&mut self, byte: u8) { + let line = self.current_line; + self.current_chunk().write_byte(byte, line); + } + + fn emit_bytes(&mut self, bytes: &[u8]) { + let line = self.current_line; + self.current_chunk().write_bytes(bytes, line); + } */ + + fn emit_opcode_byte(&mut self, opcode: Opcode, byte: u8) { + let line = self.current_line; + self.current_chunk().write_opcode(opcode, line); + self.current_chunk().write_byte(byte, line); + } + + fn emit_opcode_bytes(&mut self, opcode: Opcode, bytes: &[u8]) { + let line = self.current_line; + self.current_chunk().write_opcode(opcode, line); + self.current_chunk().write_bytes(bytes, line); + } + + fn emit_constant(&mut self, value: Value) { + let const_idx = self.current_chunk().add_constant(value); + + if const_idx <= u8::MAX as usize { + self.emit_opcode_byte(Opcode::LoadConst, const_idx.try_into().unwrap()); + } else if const_idx <= u16::MAX as usize { + self.emit_opcode_bytes(Opcode::LoadConstLong, &u16_to_bytes(const_idx.try_into().unwrap())) + } else { + panic!("Tried to add more than {} constants to current chunk", u16::MAX); + } + } + + fn set_line(&mut self, token: &Token) { + self.current_line = token.code_pos.line; + } +} + +/*====================================================================================================================*/ + +type ParseFn = fn(&mut Compiler) -> CompilerResult; + +struct ParseRule { + #[allow(dead_code)] // supress unused warning + token_type: TokenType, + prefix: ParseFn, + infix: ParseFn, + precedence: Precedence, +} + +fn get_rule(token_type: TokenType) -> &'static ParseRule { + let idx = token_type as usize; + &RULES_TABLE[idx] +} + +const RULES_TABLE: [ParseRule; 40] = gen_rules_table![ + LeftParen => (grouping, null, Null), + RightParen => (null, null, Null), + LeftBrace => (null, null, Null), + RightBrace => (null, null, Null), + Comma => (null, null, Null), + Dot => (null, null, Null), + Minus => (unary, binary, Term), + Plus => (null, binary, Term), + Semicolon => (null, null, Null), + Slash => (null, binary, Factor), + Star => (null, binary, Factor), + Bang => (unary, null, Unary), + BangEqual => (null, binary, Equality), + Equal => (null, null, Null), + EqualEqual => (null, binary, Equality), + Greater => (null, binary, Comparison), + GreaterEqual => (null, binary, Comparison), + Less => (null, binary, Comparison), + LessEqual => (null, binary, Comparison), + Identifier => (null, null, Null), + String => (string, null, Null), + Number => (number, null, Null), + And => (null, null, Null), + Break => (null, null, Null), + Class => (null, null, Null), + Else => (null, null, Null), + False => (literal, null, Null), + Fun => (null, null, Null), + For => (null, null, Null), + If => (null, null, Null), + Nil => (literal, null, Null), + Or => (null, null, Null), + Print => (null, null, Null), + Return => (null, null, Null), + Super => (null, null, Null), + This => (null, null, Null), + True => (literal, null, Null), + Var => (null, null, Null), + While => (null, null, Null), + EOF => (null, null, Null) +]; + +macro_rules! rules_table_tests { + () => { + rules_table_tests!(@counter); + }; + (@counter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) => { + const_assert_eq!(40, RULES_TABLE.len()); + }; + (@counter $($counter:tt)*) => { + const_assert_eq!(RULES_TABLE[$($counter +)* 0].token_type as u8, $($counter +)* 0); + rules_table_tests!(@counter $($counter)* 1); + }; +} + +rules_table_tests!(); diff --git a/vm/src/debug.rs b/vm/src/debug.rs index e8f47f1..2d142bf 100644 --- a/vm/src/debug.rs +++ b/vm/src/debug.rs @@ -1,3 +1,8 @@ +pub const DEBUG_PRINT_CODE: bool = true; +pub const DEBUG_TRACE_EXECUTION: bool = true; + +/*====================================================================================================================*/ + #[derive(Debug)] struct LineInfo { start_offset: usize, diff --git a/vm/src/disassemble.rs b/vm/src/disassembler.rs similarity index 88% rename from vm/src/disassemble.rs rename to vm/src/disassembler.rs index 104f628..c184578 100644 --- a/vm/src/disassemble.rs +++ b/vm/src/disassembler.rs @@ -1,5 +1,6 @@ use num_traits::FromPrimitive; +use crate::misc::u16_from_bytes; use crate::Chunk; use crate::Opcode; @@ -45,11 +46,11 @@ impl Chunk { offset += 1; } LoadConstLong => { - let bytes = &self.code()[offset..offset + 3]; - let constant_idx = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], 0]); + let bytes = &self.code()[offset..offset + 2]; + let constant_idx = u16_from_bytes([bytes[0], bytes[1]]); let value = self.get_constant(constant_idx as usize); print!("{constant_idx:4} '{value}'"); - offset += 3; + offset += 2; } Add | Subtract | Multiply | Divide | Negate | Return => {} } diff --git a/vm/src/error.rs b/vm/src/error.rs index d19e63a..58b203b 100644 --- a/vm/src/error.rs +++ b/vm/src/error.rs @@ -1,11 +1,16 @@ use itertools::Itertools; -use rlox2_frontend::lexer::LexerError; +use rlox2_frontend::lexer::{CodePos, LexerError}; use thiserror::Error; use crate::{Opcode, Value}; #[derive(Error, Debug)] -pub enum CompileError {} +pub enum CompilerError { + #[error("Missing closing parenthesis at {code_pos}")] + MissingRightParen { code_pos: CodePos }, + #[error("{msg}")] + Todo { msg: String }, +} #[derive(Error, Debug)] pub enum RuntimeError { @@ -18,32 +23,32 @@ pub enum RuntimeError { } #[derive(Error, Debug)] -pub enum InterpretError { +pub enum LoxError { #[error("{0}", format_multiple_errors(inner))] LexerError { inner: Vec }, #[error("{inner}")] - CompileError { inner: CompileError }, + CompileError { inner: CompilerError }, #[error("{inner}")] RuntimeError { inner: RuntimeError }, #[error("Called exit() with exit code {exit_code}")] Exit { exit_code: i32 }, } -impl From> for InterpretError { +impl From> for LoxError { fn from(lexer_errs: Vec) -> Self { - InterpretError::LexerError { inner: lexer_errs } + LoxError::LexerError { inner: lexer_errs } } } -impl From for InterpretError { - fn from(compile_err: CompileError) -> Self { - InterpretError::CompileError { inner: compile_err } +impl From for LoxError { + fn from(compile_err: CompilerError) -> Self { + LoxError::CompileError { inner: compile_err } } } -impl From for InterpretError { +impl From for LoxError { fn from(runtime_err: RuntimeError) -> Self { - InterpretError::RuntimeError { inner: runtime_err } + LoxError::RuntimeError { inner: runtime_err } } } diff --git a/vm/src/lib.rs b/vm/src/lib.rs index 58f44e2..d18c470 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -1,16 +1,18 @@ mod chunk; -mod compile; +mod compiler; mod debug; -mod disassemble; +mod disassembler; mod error; +mod macros; +mod misc; mod opcode; mod run; mod value; mod vm; pub use chunk::Chunk; -pub use compile::compile; -pub use error::InterpretError; +pub use compiler::compile; +pub use error::LoxError; pub use opcode::Opcode; pub use run::{run, run_repl}; pub use value::Value; diff --git a/vm/src/macros.rs b/vm/src/macros.rs new file mode 100644 index 0000000..ad920b7 --- /dev/null +++ b/vm/src/macros.rs @@ -0,0 +1,34 @@ +#[macro_export] +macro_rules! debug_println { + ($($arg:tt)*) => { + if cfg!(debug_assertions) { + println!($($arg)*); + } + }; +} + +#[macro_export] +macro_rules! single_rule { + ( $token_type:tt, ( $prefix:tt, $infix:tt, $prec:ident ) ) => { + ParseRule { + token_type: rlox2_frontend::lexer::TokenType::$token_type, + prefix: Compiler::$prefix, + infix: Compiler::$infix, + precedence: Precedence::$prec, + } + }; +} + +#[macro_export] +macro_rules! gen_rules_table { + ( $( $token_type:tt => $args:tt ),* ) => { + { + [ + $( + single_rule!($token_type, $args), + )* + ] + } + + }; +} diff --git a/vm/src/misc.rs b/vm/src/misc.rs new file mode 100644 index 0000000..6c1f6bf --- /dev/null +++ b/vm/src/misc.rs @@ -0,0 +1,7 @@ +pub(crate) fn u16_to_bytes(short: u16) -> [u8; 2] { + short.to_le_bytes() +} + +pub(crate) fn u16_from_bytes(bytes: [u8; 2]) -> u16 { + u16::from_le_bytes(bytes) +} diff --git a/vm/src/run.rs b/vm/src/run.rs index 98c7093..14591dc 100644 --- a/vm/src/run.rs +++ b/vm/src/run.rs @@ -2,7 +2,7 @@ use std::io::Write; use rlox2_frontend::lexer::{scan_tokens, Token}; -use crate::{compile, InterpretError, VM}; +use crate::{compile, LoxError, VM}; /* pub fn vm_main() { let mut chunk = Chunk::new(); @@ -82,16 +82,18 @@ pub fn run_repl(vm: &mut VM) { match run(input_buf, vm) { Ok(()) => {} - Err(InterpretError::Exit { exit_code }) => std::process::exit(exit_code), + Err(LoxError::Exit { exit_code }) => std::process::exit(exit_code), Err(err) => eprintln!("{err}"), } } } -pub fn run(source: &str, _vm: &mut VM) -> Result<(), InterpretError> { +pub fn run(source: &str, vm: &mut VM) -> Result<(), LoxError> { let tokens: Vec = scan_tokens(source)?; - compile(tokens); + let chunk = compile(tokens)?; + + vm.interpret(&chunk)?; Ok(()) } diff --git a/vm/src/vm.rs b/vm/src/vm.rs index 215ed5a..c487e1a 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -2,14 +2,14 @@ use std::ptr; use num_traits::FromPrimitive; +use crate::debug::DEBUG_TRACE_EXECUTION; use crate::error::RuntimeError; use crate::{Chunk, Opcode}; -use crate::{InterpretError, Value}; +use crate::{LoxError, Value}; /*====================================================================================================================*/ const STACK_MAX: usize = 256; -const DEBUG_TRACE_EXECUTION: bool = true; /*====================================================================================================================*/ @@ -23,14 +23,6 @@ pub struct VM { stack_top: *mut Value, } -/* macro_rules! debug_println { - ($($arg:tt)*) => { - if cfg!(debug_assertions) { - println!($($arg)*); - } - }; -} */ - impl VM { pub fn new() -> Self { const NIL: Value = Value::Nil; @@ -84,7 +76,7 @@ impl VM { std::mem::take(&mut *self.stack_top) } - pub fn interpret(&mut self, chunk: &Chunk) -> Result<(), InterpretError> { + pub fn interpret(&mut self, chunk: &Chunk) -> Result<(), LoxError> { self.chunk_ptr = chunk; self.ip = chunk.code().as_ptr(); @@ -108,7 +100,7 @@ impl VM { } unsafe fn read_constant_long(&mut self) -> &Value { - let bytes = [self.read_byte(), self.read_byte(), self.read_byte(), 0]; + let bytes = [self.read_byte(), self.read_byte(), 0, 0]; let constant_idx = u32::from_le_bytes(bytes) as usize; self.chunk().get_constant(constant_idx) @@ -117,7 +109,6 @@ impl VM { unsafe fn run(&mut self) -> Result<(), RuntimeError> { loop { if DEBUG_TRACE_EXECUTION { - println!(); self.print_stack(); self.chunk().disassemble_instruction(self.offset()); println!(); @@ -128,12 +119,10 @@ impl VM { match opcode { Opcode::LoadConst => { let value = self.read_constant().clone(); - println!("Constant: {value}"); self.push_value(value); } Opcode::LoadConstLong => { let value = self.read_constant_long().clone(); - println!("LongConstant: {value}"); self.push_value(value); } @@ -193,7 +182,7 @@ impl VM { Opcode::Return => { let value = self.pop_value(); debug_assert_eq!(self.stack_len(), 0); - println!("Return: {value}"); + println!("{value}"); return Ok(()); } }