Chapter 17: Compiling Expressions done

This commit is contained in:
Moritz Gmeiner 2023-01-31 22:54:12 +01:00
commit 1cca1494a4
20 changed files with 702 additions and 129 deletions

View file

@ -15,3 +15,4 @@ num-derive = "0.3.3"
num-traits = "0.2.15"
regex = "1.7.1"
thiserror = "1.0.38"
static_assertions = "1.1.0"

View file

@ -1,6 +0,0 @@
use itertools::Itertools;
use rlox2_frontend::lexer::Token;
pub fn compile(tokens: Vec<Token>) {
println!("{}", tokens.iter().map(|token| token.to_string()).join(" "));
}

433
vm/src/compiler.rs Normal file
View file

@ -0,0 +1,433 @@
use std::vec::IntoIter;
use static_assertions::const_assert_eq;
use num_derive::FromPrimitive;
use num_traits::FromPrimitive;
use rlox2_frontend::lexer::{Token, TokenType};
use crate::debug::DEBUG_PRINT_CODE;
use crate::error::CompilerError;
use crate::misc::u16_to_bytes;
use crate::{gen_rules_table, single_rule, Chunk, Opcode, Value};
/*====================================================================================================================*/
pub fn compile(tokens: Vec<Token>) -> Result<Chunk, CompilerError> {
Compiler::new(tokens).compile()
}
/*====================================================================================================================*/
struct TokenIter {
token_iter: IntoIter<Token>,
peek_token: Option<Token>,
}
impl TokenIter {
pub fn new(tokens: Vec<Token>) -> Self {
TokenIter {
token_iter: tokens.into_iter(),
peek_token: None,
}
}
fn peek(&mut self) -> Option<&Token> {
// if peek_token is empty: fill with next token from token_iter
if self.peek_token.is_none() && self.token_iter.len() != 0 {
self.peek_token = self.token_iter.next();
}
self.peek_token.as_ref()
}
/* fn is_empty(&self) -> bool {
// peek_token is None and there are no more tokens to take from token_iter
self.peek_token.is_none() && self.token_iter.len() == 0
} */
}
impl Iterator for TokenIter {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
// return the peeked token if any, or else next token from token_iter
self.peek_token.take().or_else(|| self.token_iter.next())
}
}
/*====================================================================================================================*/
type CompilerResult = Result<(), CompilerError>;
#[repr(u8)]
#[derive(Debug, FromPrimitive, Copy, Clone, PartialOrd, PartialEq)]
enum Precedence {
Null = 0,
Assignment = 1,
Or = 2,
And = 3,
Equality = 4,
Comparison = 5,
Term = 6,
Factor = 7,
Unary = 8,
Call = 9,
Primary = 10,
}
impl Precedence {
fn inc(self) -> Precedence {
FromPrimitive::from_u8(self as u8 + 1).unwrap()
}
}
/* impl Add<u8> for Precedence {
type Output = Precedence;
fn add(self, rhs: u8) -> Self::Output {
FromPrimitive::from_u8(self as u8 + rhs).unwrap()
}
} */
struct Compiler {
token_iter: TokenIter,
chunk: Chunk,
current_line: u32,
}
impl Compiler {
fn new(tokens: Vec<Token>) -> Self {
Compiler {
token_iter: TokenIter::new(tokens),
chunk: Chunk::default(),
current_line: u32::MAX,
}
}
fn current_chunk(&mut self) -> &mut Chunk {
&mut self.chunk
}
fn compile(self) -> Result<Chunk, CompilerError> {
let mut compiler = self;
compiler.expression()?;
compiler.emit_opcode(Opcode::Return);
// assert_eq!(compiler.token_iter.next().unwrap().token_type, TokenType::EOF);
if compiler.peek_token().token_type != TokenType::EOF {
return Err(CompilerError::Todo {
msg: format!("Expected EOF, found {}", compiler.next_token()),
});
}
if DEBUG_PRINT_CODE {
compiler.chunk.disassemble("code");
println!()
}
Ok(compiler.chunk)
}
fn parse_precedence(&mut self, precedence: Precedence) -> CompilerResult {
let prefix_rule = get_rule(self.peek_token().token_type).prefix;
// hacky, but can't be compared as raw types
if prefix_rule as usize == Compiler::null as usize {
return Err(CompilerError::Todo {
msg: "Expect expression".to_owned(),
});
}
prefix_rule(self)?;
loop {
let rule = get_rule(self.peek_token().token_type);
if precedence > rule.precedence {
break;
}
let infix_rule = get_rule(self.peek_token().token_type).infix;
infix_rule(self)?;
}
Ok(())
}
/* fn synchronise(&mut self) {
loop {
if self.token_iter.is_empty() {
return;
}
// when synchronising: assume all false
/* self.is_in_loop = false;
self.is_in_class = false;
self.is_in_function = false;
self.is_in_init = false; */
let peek_token = self.peek_token();
// if we match a synchronisation point: return
match peek_token.token_type {
TokenType::Class
| TokenType::Fun
| TokenType::Var
| TokenType::For
| TokenType::If
| TokenType::While
| TokenType::Print
| TokenType::Return
| TokenType::EOF => return,
TokenType::Semicolon => {
// discard semicolon first, then return
assert_eq!(self.next_token().token_type, TokenType::Semicolon);
return;
}
_ => {}
}
// no sync point: discard token
let _ = self.next_token();
// println!("Discarding {} token", self.next_token());
}
} */
fn expression(&mut self) -> CompilerResult {
self.parse_precedence(Precedence::Assignment)
}
fn binary(&mut self) -> CompilerResult {
let token = self.next_token();
let rule = get_rule(token.token_type);
// increment precedence to make binary operators left-associative
self.parse_precedence(rule.precedence.inc())?;
match token.token_type {
TokenType::Plus => self.emit_opcode(Opcode::Add),
TokenType::Minus => self.emit_opcode(Opcode::Subtract),
TokenType::Star => self.emit_opcode(Opcode::Multiply),
TokenType::Slash => self.emit_opcode(Opcode::Divide),
tt => unreachable!("Called binary() on token type {tt:?}"),
}
Ok(())
}
fn unary(&mut self) -> CompilerResult {
let token = self.next_token();
self.parse_precedence(Precedence::Unary)?;
self.set_line(&token);
match self.next_token().token_type {
TokenType::Minus => {
self.emit_opcode(Opcode::Negate);
}
TokenType::Bang => todo!(),
_ => unreachable!("Called unary, but next token had token_type {:?}", token.token_type),
}
Ok(())
}
fn grouping(&mut self) -> CompilerResult {
assert_eq!(self.next_token().token_type, TokenType::LeftParen);
self.expression()?;
self.consume_token(TokenType::RightParen, |token| CompilerError::MissingRightParen {
code_pos: token.code_pos,
})?;
Ok(())
}
fn number(&mut self) -> CompilerResult {
let token = self.next_token();
assert_eq!(token.token_type, TokenType::Number);
let num = token.num_data();
let value = Value::Number(num);
self.emit_constant(value);
Ok(())
}
fn string(&mut self) -> CompilerResult {
todo!()
}
fn literal(&mut self) -> CompilerResult {
todo!()
}
fn null(&mut self) -> CompilerResult {
panic!("Called null on token {}", self.peek_token());
}
fn next_token(&mut self) -> Token {
let token = self.token_iter.next().unwrap();
// println!("Next token: {next:?}");
if token.token_type == TokenType::EOF {
unreachable!("Someone ate a EOF token");
}
self.set_line(&token);
token
// self.token_iter.next().unwrap()
}
fn peek_token(&mut self) -> &Token {
self.token_iter.peek().unwrap()
}
fn consume_token<F>(&mut self, token_type: TokenType, err_fn: F) -> CompilerResult
where
F: Fn(Token) -> CompilerError,
{
match &self.peek_token().token_type {
tt if tt == &token_type => {
let _ = self.next_token();
Ok(())
}
// call err_fn with dummy token so we don't have to eat the EOF token
TokenType::EOF => Err(err_fn(Token::new(TokenType::EOF, self.peek_token().code_pos))),
_ => Err(err_fn(self.next_token())),
}
}
fn emit_opcode(&mut self, opcode: Opcode) {
let line = self.current_line;
self.current_chunk().write_opcode(opcode, line);
}
/* fn emit_byte(&mut self, byte: u8) {
let line = self.current_line;
self.current_chunk().write_byte(byte, line);
}
fn emit_bytes(&mut self, bytes: &[u8]) {
let line = self.current_line;
self.current_chunk().write_bytes(bytes, line);
} */
fn emit_opcode_byte(&mut self, opcode: Opcode, byte: u8) {
let line = self.current_line;
self.current_chunk().write_opcode(opcode, line);
self.current_chunk().write_byte(byte, line);
}
fn emit_opcode_bytes(&mut self, opcode: Opcode, bytes: &[u8]) {
let line = self.current_line;
self.current_chunk().write_opcode(opcode, line);
self.current_chunk().write_bytes(bytes, line);
}
fn emit_constant(&mut self, value: Value) {
let const_idx = self.current_chunk().add_constant(value);
if const_idx <= u8::MAX as usize {
self.emit_opcode_byte(Opcode::LoadConst, const_idx.try_into().unwrap());
} else if const_idx <= u16::MAX as usize {
self.emit_opcode_bytes(Opcode::LoadConstLong, &u16_to_bytes(const_idx.try_into().unwrap()))
} else {
panic!("Tried to add more than {} constants to current chunk", u16::MAX);
}
}
fn set_line(&mut self, token: &Token) {
self.current_line = token.code_pos.line;
}
}
/*====================================================================================================================*/
type ParseFn = fn(&mut Compiler) -> CompilerResult;
struct ParseRule {
#[allow(dead_code)] // supress unused warning
token_type: TokenType,
prefix: ParseFn,
infix: ParseFn,
precedence: Precedence,
}
fn get_rule(token_type: TokenType) -> &'static ParseRule {
let idx = token_type as usize;
&RULES_TABLE[idx]
}
const RULES_TABLE: [ParseRule; 40] = gen_rules_table![
LeftParen => (grouping, null, Null),
RightParen => (null, null, Null),
LeftBrace => (null, null, Null),
RightBrace => (null, null, Null),
Comma => (null, null, Null),
Dot => (null, null, Null),
Minus => (unary, binary, Term),
Plus => (null, binary, Term),
Semicolon => (null, null, Null),
Slash => (null, binary, Factor),
Star => (null, binary, Factor),
Bang => (unary, null, Unary),
BangEqual => (null, binary, Equality),
Equal => (null, null, Null),
EqualEqual => (null, binary, Equality),
Greater => (null, binary, Comparison),
GreaterEqual => (null, binary, Comparison),
Less => (null, binary, Comparison),
LessEqual => (null, binary, Comparison),
Identifier => (null, null, Null),
String => (string, null, Null),
Number => (number, null, Null),
And => (null, null, Null),
Break => (null, null, Null),
Class => (null, null, Null),
Else => (null, null, Null),
False => (literal, null, Null),
Fun => (null, null, Null),
For => (null, null, Null),
If => (null, null, Null),
Nil => (literal, null, Null),
Or => (null, null, Null),
Print => (null, null, Null),
Return => (null, null, Null),
Super => (null, null, Null),
This => (null, null, Null),
True => (literal, null, Null),
Var => (null, null, Null),
While => (null, null, Null),
EOF => (null, null, Null)
];
macro_rules! rules_table_tests {
() => {
rules_table_tests!(@counter);
};
(@counter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) => {
const_assert_eq!(40, RULES_TABLE.len());
};
(@counter $($counter:tt)*) => {
const_assert_eq!(RULES_TABLE[$($counter +)* 0].token_type as u8, $($counter +)* 0);
rules_table_tests!(@counter $($counter)* 1);
};
}
rules_table_tests!();

View file

@ -1,3 +1,8 @@
pub const DEBUG_PRINT_CODE: bool = true;
pub const DEBUG_TRACE_EXECUTION: bool = true;
/*====================================================================================================================*/
#[derive(Debug)]
struct LineInfo {
start_offset: usize,

View file

@ -1,5 +1,6 @@
use num_traits::FromPrimitive;
use crate::misc::u16_from_bytes;
use crate::Chunk;
use crate::Opcode;
@ -45,11 +46,11 @@ impl Chunk {
offset += 1;
}
LoadConstLong => {
let bytes = &self.code()[offset..offset + 3];
let constant_idx = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], 0]);
let bytes = &self.code()[offset..offset + 2];
let constant_idx = u16_from_bytes([bytes[0], bytes[1]]);
let value = self.get_constant(constant_idx as usize);
print!("{constant_idx:4} '{value}'");
offset += 3;
offset += 2;
}
Add | Subtract | Multiply | Divide | Negate | Return => {}
}

View file

@ -1,11 +1,16 @@
use itertools::Itertools;
use rlox2_frontend::lexer::LexerError;
use rlox2_frontend::lexer::{CodePos, LexerError};
use thiserror::Error;
use crate::{Opcode, Value};
#[derive(Error, Debug)]
pub enum CompileError {}
pub enum CompilerError {
#[error("Missing closing parenthesis at {code_pos}")]
MissingRightParen { code_pos: CodePos },
#[error("{msg}")]
Todo { msg: String },
}
#[derive(Error, Debug)]
pub enum RuntimeError {
@ -18,32 +23,32 @@ pub enum RuntimeError {
}
#[derive(Error, Debug)]
pub enum InterpretError {
pub enum LoxError {
#[error("{0}", format_multiple_errors(inner))]
LexerError { inner: Vec<LexerError> },
#[error("{inner}")]
CompileError { inner: CompileError },
CompileError { inner: CompilerError },
#[error("{inner}")]
RuntimeError { inner: RuntimeError },
#[error("Called exit() with exit code {exit_code}")]
Exit { exit_code: i32 },
}
impl From<Vec<LexerError>> for InterpretError {
impl From<Vec<LexerError>> for LoxError {
fn from(lexer_errs: Vec<LexerError>) -> Self {
InterpretError::LexerError { inner: lexer_errs }
LoxError::LexerError { inner: lexer_errs }
}
}
impl From<CompileError> for InterpretError {
fn from(compile_err: CompileError) -> Self {
InterpretError::CompileError { inner: compile_err }
impl From<CompilerError> for LoxError {
fn from(compile_err: CompilerError) -> Self {
LoxError::CompileError { inner: compile_err }
}
}
impl From<RuntimeError> for InterpretError {
impl From<RuntimeError> for LoxError {
fn from(runtime_err: RuntimeError) -> Self {
InterpretError::RuntimeError { inner: runtime_err }
LoxError::RuntimeError { inner: runtime_err }
}
}

View file

@ -1,16 +1,18 @@
mod chunk;
mod compile;
mod compiler;
mod debug;
mod disassemble;
mod disassembler;
mod error;
mod macros;
mod misc;
mod opcode;
mod run;
mod value;
mod vm;
pub use chunk::Chunk;
pub use compile::compile;
pub use error::InterpretError;
pub use compiler::compile;
pub use error::LoxError;
pub use opcode::Opcode;
pub use run::{run, run_repl};
pub use value::Value;

34
vm/src/macros.rs Normal file
View file

@ -0,0 +1,34 @@
#[macro_export]
macro_rules! debug_println {
($($arg:tt)*) => {
if cfg!(debug_assertions) {
println!($($arg)*);
}
};
}
#[macro_export]
macro_rules! single_rule {
( $token_type:tt, ( $prefix:tt, $infix:tt, $prec:ident ) ) => {
ParseRule {
token_type: rlox2_frontend::lexer::TokenType::$token_type,
prefix: Compiler::$prefix,
infix: Compiler::$infix,
precedence: Precedence::$prec,
}
};
}
#[macro_export]
macro_rules! gen_rules_table {
( $( $token_type:tt => $args:tt ),* ) => {
{
[
$(
single_rule!($token_type, $args),
)*
]
}
};
}

7
vm/src/misc.rs Normal file
View file

@ -0,0 +1,7 @@
pub(crate) fn u16_to_bytes(short: u16) -> [u8; 2] {
short.to_le_bytes()
}
pub(crate) fn u16_from_bytes(bytes: [u8; 2]) -> u16 {
u16::from_le_bytes(bytes)
}

View file

@ -2,7 +2,7 @@ use std::io::Write;
use rlox2_frontend::lexer::{scan_tokens, Token};
use crate::{compile, InterpretError, VM};
use crate::{compile, LoxError, VM};
/* pub fn vm_main() {
let mut chunk = Chunk::new();
@ -82,16 +82,18 @@ pub fn run_repl(vm: &mut VM) {
match run(input_buf, vm) {
Ok(()) => {}
Err(InterpretError::Exit { exit_code }) => std::process::exit(exit_code),
Err(LoxError::Exit { exit_code }) => std::process::exit(exit_code),
Err(err) => eprintln!("{err}"),
}
}
}
pub fn run(source: &str, _vm: &mut VM) -> Result<(), InterpretError> {
pub fn run(source: &str, vm: &mut VM) -> Result<(), LoxError> {
let tokens: Vec<Token> = scan_tokens(source)?;
compile(tokens);
let chunk = compile(tokens)?;
vm.interpret(&chunk)?;
Ok(())
}

View file

@ -2,14 +2,14 @@ use std::ptr;
use num_traits::FromPrimitive;
use crate::debug::DEBUG_TRACE_EXECUTION;
use crate::error::RuntimeError;
use crate::{Chunk, Opcode};
use crate::{InterpretError, Value};
use crate::{LoxError, Value};
/*====================================================================================================================*/
const STACK_MAX: usize = 256;
const DEBUG_TRACE_EXECUTION: bool = true;
/*====================================================================================================================*/
@ -23,14 +23,6 @@ pub struct VM {
stack_top: *mut Value,
}
/* macro_rules! debug_println {
($($arg:tt)*) => {
if cfg!(debug_assertions) {
println!($($arg)*);
}
};
} */
impl VM {
pub fn new() -> Self {
const NIL: Value = Value::Nil;
@ -84,7 +76,7 @@ impl VM {
std::mem::take(&mut *self.stack_top)
}
pub fn interpret(&mut self, chunk: &Chunk) -> Result<(), InterpretError> {
pub fn interpret(&mut self, chunk: &Chunk) -> Result<(), LoxError> {
self.chunk_ptr = chunk;
self.ip = chunk.code().as_ptr();
@ -108,7 +100,7 @@ impl VM {
}
unsafe fn read_constant_long(&mut self) -> &Value {
let bytes = [self.read_byte(), self.read_byte(), self.read_byte(), 0];
let bytes = [self.read_byte(), self.read_byte(), 0, 0];
let constant_idx = u32::from_le_bytes(bytes) as usize;
self.chunk().get_constant(constant_idx)
@ -117,7 +109,6 @@ impl VM {
unsafe fn run(&mut self) -> Result<(), RuntimeError> {
loop {
if DEBUG_TRACE_EXECUTION {
println!();
self.print_stack();
self.chunk().disassemble_instruction(self.offset());
println!();
@ -128,12 +119,10 @@ impl VM {
match opcode {
Opcode::LoadConst => {
let value = self.read_constant().clone();
println!("Constant: {value}");
self.push_value(value);
}
Opcode::LoadConstLong => {
let value = self.read_constant_long().clone();
println!("LongConstant: {value}");
self.push_value(value);
}
@ -193,7 +182,7 @@ impl VM {
Opcode::Return => {
let value = self.pop_value();
debug_assert_eq!(self.stack_len(), 0);
println!("Return: {value}");
println!("{value}");
return Ok(());
}
}