Finished up to and including chapter 16

This commit is contained in:
Moritz Gmeiner 2023-01-30 17:41:48 +01:00
commit b86985deaf
24 changed files with 1051 additions and 198 deletions

17
vm/Cargo.toml Normal file
View file

@ -0,0 +1,17 @@
[package]
name = "rlox2-vm"
version = "0.1.0"
edition = "2021"
[dependencies.rlox2-frontend]
path = "../frontend"
[dependencies]
itertools = "0.10.5"
lazy_static = "1.4.0"
num-derive = "0.3.3"
num-traits = "0.2.15"
regex = "1.7.1"
thiserror = "1.0.38"

68
vm/src/chunk.rs Normal file
View file

@ -0,0 +1,68 @@
use crate::debug::ChunkDebugInfo;
use crate::{Opcode, Value};
#[derive(Debug, Default)]
pub struct Chunk {
code: Vec<u8>,
constants: Vec<Value>,
debug_info: Option<ChunkDebugInfo>,
}
impl Chunk {
pub fn new() -> Self {
Chunk {
code: Vec::new(),
constants: Vec::new(),
debug_info: Some(ChunkDebugInfo::new()),
}
}
pub fn code(&self) -> &[u8] {
&self.code
}
pub fn debug_info(&self) -> Option<&ChunkDebugInfo> {
self.debug_info.as_ref()
}
pub fn debug_info_mut(&mut self) -> Option<&mut ChunkDebugInfo> {
self.debug_info.as_mut()
}
pub fn write_byte<T>(&mut self, byte: T, line: u32)
where
T: TryInto<u8> + std::fmt::Debug,
<T as std::convert::TryInto<u8>>::Error: std::fmt::Debug,
{
let byte = byte.try_into().unwrap();
self.code.push(byte);
if let Some(ref mut debug_info) = self.debug_info {
debug_info.write_line(line, self.code.len() - 1);
}
}
pub fn write_bytes(&mut self, bytes: &[u8], line: u32) {
for byte in bytes {
self.write_byte(*byte, line);
}
}
pub fn write_opcode(&mut self, opcode: Opcode, line: u32) {
let byte = opcode as u8;
self.write_byte(byte, line);
}
pub fn add_constant(&mut self, value: Value) -> usize {
self.constants.push(value);
self.constants.len() - 1
}
pub fn get_constant(&self, offset: usize) -> &Value {
&self.constants[offset]
}
}

6
vm/src/compile.rs Normal file
View file

@ -0,0 +1,6 @@
use itertools::Itertools;
use rlox2_frontend::lexer::Token;
pub fn compile(tokens: Vec<Token>) {
println!("{}", tokens.iter().map(|token| token.to_string()).join(" "));
}

76
vm/src/debug.rs Normal file
View file

@ -0,0 +1,76 @@
#[derive(Debug)]
struct LineInfo {
start_offset: usize,
line: u32,
}
#[derive(Debug, Default)]
pub struct ChunkDebugInfo {
line_infos: Vec<LineInfo>,
}
impl ChunkDebugInfo {
pub fn new() -> Self {
ChunkDebugInfo { line_infos: Vec::new() }
}
pub fn write_line(&mut self, line: u32, offset: usize) {
let line_info = || LineInfo {
start_offset: offset,
line,
};
if self.line_infos.is_empty() {
self.line_infos.push(line_info())
}
match line.cmp(&self.line_infos.last().unwrap().line) {
// assert increasing line numbers
std::cmp::Ordering::Less => panic!(
"Tried to write_byte for line {line}, but line {} is already written",
self.line_infos.last().unwrap().line
),
// line same as for last byte -> do nothing
std::cmp::Ordering::Equal => {}
// line greater than for last byte -> insert new line marker at current position
std::cmp::Ordering::Greater => self.line_infos.push(line_info()),
}
}
pub fn line_number(&self, offset: usize) -> u32 {
/* if self.line_infos.len() > 2 && offset < self.line_infos[1].start_offset {
assert!(offset >= self.line_infos[0].start_offset);
return self.line_infos[0].line;
}
let mut low = 0; // will be max { line_offset : line_offset < offset }
let mut high = self.line_infos.len() - 1; // will be min { line_offset : line_offset >= offset }
// bisect line_numbers
while high > low + 1 {
// as high >= low + 2, never high or low
let mid = (low + high) / 2;
match offset.cmp(&self.line_infos[mid].start_offset) {
std::cmp::Ordering::Less => high = mid,
std::cmp::Ordering::Equal => return self.line_infos[mid].line,
std::cmp::Ordering::Greater => low = mid,
}
}
self.line_infos[high].line */
// special case: all start_offsets are <= than offset:
// need to manually return the last line number as slice::partition_point doesn't like that
if self.line_infos.last().unwrap().start_offset <= offset {
return self.line_infos.last().unwrap().line;
}
let idx = self
.line_infos
.partition_point(|line_info| line_info.start_offset < offset);
self.line_infos[idx].line
}
}

61
vm/src/disassemble.rs Normal file
View file

@ -0,0 +1,61 @@
use num_traits::FromPrimitive;
use crate::Chunk;
use crate::Opcode;
impl Chunk {
pub fn disassemble(&self, name: &str) {
println!("==== begin {name} ====\n");
let mut offset = 0;
while offset < self.code().len() {
offset = self.disassemble_instruction(offset)
}
println!("\n==== end {name} ====");
}
pub fn disassemble_instruction(&self, offset: usize) -> usize {
use Opcode::*;
let mut offset = offset;
print!("{offset:04} ");
if let Some(debug_info) = self.debug_info() {
if offset == 0 || debug_info.line_number(offset) != debug_info.line_number(offset - 1) {
let line = debug_info.line_number(offset);
print!("{line:4} ");
} else {
print!(" | ");
};
}
let opcode: Opcode = FromPrimitive::from_u8(self.code()[offset]).unwrap();
offset += 1;
print!("{opcode:<16} ");
match opcode {
LoadConst => {
let constant_idx = self.code()[offset];
let value = self.get_constant(constant_idx as usize);
print!("{constant_idx:4} '{value}'");
offset += 1;
}
LoadConstLong => {
let bytes = &self.code()[offset..offset + 3];
let constant_idx = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], 0]);
let value = self.get_constant(constant_idx as usize);
print!("{constant_idx:4} '{value}'");
offset += 3;
}
Add | Subtract | Multiply | Divide | Negate | Return => {}
}
println!();
offset
}
}

58
vm/src/error.rs Normal file
View file

@ -0,0 +1,58 @@
use itertools::Itertools;
use rlox2_frontend::lexer::LexerError;
use thiserror::Error;
use crate::{Opcode, Value};
#[derive(Error, Debug)]
pub enum CompileError {}
#[derive(Error, Debug)]
pub enum RuntimeError {
#[error("Opcopde {opcode} had invalid operand {operand}")]
UnaryInvalidOperand { opcode: Opcode, operand: Value },
#[error("Opcopde {opcode} had invalid operands {left} and {right}")]
BinaryInvalidOperand { opcode: Opcode, left: Value, right: Value },
#[error("Division by zero")]
DivisionByZero,
}
#[derive(Error, Debug)]
pub enum InterpretError {
#[error("{0}", format_multiple_errors(inner))]
LexerError { inner: Vec<LexerError> },
#[error("{inner}")]
CompileError { inner: CompileError },
#[error("{inner}")]
RuntimeError { inner: RuntimeError },
#[error("Called exit() with exit code {exit_code}")]
Exit { exit_code: i32 },
}
impl From<Vec<LexerError>> for InterpretError {
fn from(lexer_errs: Vec<LexerError>) -> Self {
InterpretError::LexerError { inner: lexer_errs }
}
}
impl From<CompileError> for InterpretError {
fn from(compile_err: CompileError) -> Self {
InterpretError::CompileError { inner: compile_err }
}
}
impl From<RuntimeError> for InterpretError {
fn from(runtime_err: RuntimeError) -> Self {
InterpretError::RuntimeError { inner: runtime_err }
}
}
fn format_multiple_errors(errs: &Vec<impl std::error::Error>) -> String {
let msg = if errs.len() == 1 {
errs[0].to_string()
} else {
errs.iter().map(|err| err.to_string()).join("\n")
};
msg
}

17
vm/src/lib.rs Normal file
View file

@ -0,0 +1,17 @@
mod chunk;
mod compile;
mod debug;
mod disassemble;
mod error;
mod opcode;
mod run;
mod value;
mod vm;
pub use chunk::Chunk;
pub use compile::compile;
pub use error::InterpretError;
pub use opcode::Opcode;
pub use run::{run, run_repl};
pub use value::Value;
pub use vm::VM;

35
vm/src/opcode.rs Normal file
View file

@ -0,0 +1,35 @@
use std::fmt::Display;
use itertools::Itertools;
use lazy_static::lazy_static;
use num_derive::FromPrimitive;
use regex::Regex;
lazy_static! {
static ref CAMEL_CASE_REGEX: Regex = Regex::new(r"[A-Z][a-z]*").unwrap();
}
#[repr(u8)]
#[derive(Debug, FromPrimitive, Clone, Copy)]
#[allow(non_camel_case_types)] // hack so Constant_Long will be printed as CONSTANT_LONG rather than CONSTANTLONG
pub enum Opcode {
LoadConst,
LoadConstLong,
Add,
Subtract,
Multiply,
Divide,
Negate,
Return,
}
impl Display for Opcode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let name = CAMEL_CASE_REGEX
.captures_iter(&format!("{self:?}"))
.map(|cap| cap[0].to_ascii_uppercase())
.join("_");
f.pad(&name)
}
}

97
vm/src/run.rs Normal file
View file

@ -0,0 +1,97 @@
use std::io::Write;
use rlox2_frontend::lexer::{scan_tokens, Token};
use crate::{compile, InterpretError, VM};
/* pub fn vm_main() {
let mut chunk = Chunk::new();
let constant1 = chunk.add_constant(Value::Number(1.2));
let constant2 = chunk.add_constant(Value::Number(3.4));
let constant3 = chunk.add_constant(Value::Number(5.6));
chunk.write_opcode(Opcode::LoadConst, 123);
chunk.write_byte(constant1, 123);
chunk.write_opcode(Opcode::LoadConst, 123);
chunk.write_byte(constant2, 123);
chunk.write_opcode(Opcode::Add, 123);
chunk.write_opcode(Opcode::LoadConst, 124);
chunk.write_byte(constant3, 124);
chunk.write_opcode(Opcode::Divide, 124);
chunk.write_opcode(Opcode::Negate, 124);
chunk.write_opcode(Opcode::Return, 125);
println!();
chunk.disassemble("test chunk");
println!();
let mut vm = VM::new();
vm.interpret(&chunk).unwrap();
} */
pub fn run_repl(vm: &mut VM) {
let stdin = std::io::stdin();
loop {
let mut input_buf = String::new();
print!("> ");
std::io::stdout().flush().unwrap();
'inner: loop {
stdin.read_line(&mut input_buf).unwrap_or_else(|err| {
eprintln!("Could not read from stdin: {err}");
std::process::exit(66);
});
let num_open_braces = (input_buf.matches('{').count() as i64) - (input_buf.matches('}').count() as i64);
let num_open_parens = (input_buf.matches('(').count() as i64) - (input_buf.matches(')').count() as i64);
let num_open_brackets = (input_buf.matches('[').count() as i64) - (input_buf.matches(']').count() as i64);
// all braces/parens/brackets closed => break
if num_open_braces == 0 && num_open_parens == 0 && num_open_brackets == 0 {
break 'inner;
}
// any braces/parens/brackets more closing than opening => break (will be parse error)
if num_open_braces < 0 || num_open_parens < 0 || num_open_brackets < 0 {
break 'inner;
}
print!("< ");
// let indentation = " ".repeat((num_open_braces + num_open_brackets + num_open_parens) as usize);
// print!("{indentation}");
std::io::stdout().flush().unwrap();
}
let input_buf = input_buf.trim();
if input_buf.is_empty() || input_buf == "exit" || input_buf == "quit" {
std::process::exit(0);
}
match run(input_buf, vm) {
Ok(()) => {}
Err(InterpretError::Exit { exit_code }) => std::process::exit(exit_code),
Err(err) => eprintln!("{err}"),
}
}
}
pub fn run(source: &str, _vm: &mut VM) -> Result<(), InterpretError> {
let tokens: Vec<Token> = scan_tokens(source)?;
compile(tokens);
Ok(())
}

22
vm/src/value.rs Normal file
View file

@ -0,0 +1,22 @@
use std::fmt::Display;
#[derive(Debug, Clone, PartialEq)]
pub enum Value {
Nil,
Number(f64),
}
impl Default for Value {
fn default() -> Self {
Value::Nil
}
}
impl Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Value::Nil => write!(f, "nil"),
Value::Number(num) => write!(f, "{num}"),
}
}
}

225
vm/src/vm.rs Normal file
View file

@ -0,0 +1,225 @@
use std::ptr;
use num_traits::FromPrimitive;
use crate::error::RuntimeError;
use crate::{Chunk, Opcode};
use crate::{InterpretError, Value};
/*====================================================================================================================*/
const STACK_MAX: usize = 256;
const DEBUG_TRACE_EXECUTION: bool = true;
/*====================================================================================================================*/
#[derive(Debug)]
pub struct VM {
chunk_ptr: *const Chunk,
ip: *const u8,
stack: [Value; STACK_MAX],
stack_top: *mut Value,
}
/* macro_rules! debug_println {
($($arg:tt)*) => {
if cfg!(debug_assertions) {
println!($($arg)*);
}
};
} */
impl VM {
pub fn new() -> Self {
const NIL: Value = Value::Nil;
let stack = [NIL; STACK_MAX];
let mut vm = VM {
chunk_ptr: ptr::null(),
ip: ptr::null(),
stack,
stack_top: ptr::null_mut(),
};
vm.stack_top = vm.stack.as_mut_ptr();
vm
}
unsafe fn chunk(&self) -> &Chunk {
&*self.chunk_ptr
}
unsafe fn offset(&self) -> usize {
let offset = self.ip.offset_from(self.chunk().code().as_ptr());
debug_assert!(offset >= 0);
offset as usize
}
unsafe fn stack_len(&self) -> usize {
let offset = self.stack_top.offset_from(self.stack.as_ptr());
debug_assert!(offset >= 0);
offset as usize
}
unsafe fn push_value(&mut self, value: Value) {
// ptr::write(self.stack_top, value);
debug_assert!(self.stack_len() < STACK_MAX);
let old_value = std::mem::replace(&mut *self.stack_top, value);
debug_assert_eq!(old_value, Value::Nil);
self.stack_top = self.stack_top.add(1);
}
unsafe fn pop_value(&mut self) -> Value {
assert!(self.stack_len() < STACK_MAX);
self.stack_top = self.stack_top.sub(1);
// ptr::read(self.stack_top)
std::mem::take(&mut *self.stack_top)
}
pub fn interpret(&mut self, chunk: &Chunk) -> Result<(), InterpretError> {
self.chunk_ptr = chunk;
self.ip = chunk.code().as_ptr();
unsafe { self.run()? }
Ok(())
}
unsafe fn read_byte(&mut self) -> u8 {
debug_assert!(self.offset() < self.chunk().code().len());
let byte = *self.ip;
self.ip = self.ip.add(1);
byte
}
unsafe fn read_constant(&mut self) -> &Value {
let constant_idx = self.read_byte() as usize;
self.chunk().get_constant(constant_idx)
}
unsafe fn read_constant_long(&mut self) -> &Value {
let bytes = [self.read_byte(), self.read_byte(), self.read_byte(), 0];
let constant_idx = u32::from_le_bytes(bytes) as usize;
self.chunk().get_constant(constant_idx)
}
unsafe fn run(&mut self) -> Result<(), RuntimeError> {
loop {
if DEBUG_TRACE_EXECUTION {
println!();
self.print_stack();
self.chunk().disassemble_instruction(self.offset());
println!();
}
let opcode: Opcode = FromPrimitive::from_u8(self.read_byte()).unwrap();
match opcode {
Opcode::LoadConst => {
let value = self.read_constant().clone();
println!("Constant: {value}");
self.push_value(value);
}
Opcode::LoadConstLong => {
let value = self.read_constant_long().clone();
println!("LongConstant: {value}");
self.push_value(value);
}
Opcode::Add => {
let right = self.pop_value();
let left = self.pop_value();
match (left, right) {
(Value::Number(left), Value::Number(right)) => {
self.push_value(Value::Number(left + right));
}
// (Value::String(left), Value::String(right)) => todo!(),
(left, right) => return Err(RuntimeError::BinaryInvalidOperand { opcode, left, right }),
}
}
Opcode::Subtract => {
let right = self.pop_value();
let left = self.pop_value();
match (left, right) {
(Value::Number(left), Value::Number(right)) => {
self.push_value(Value::Number(left - right));
}
(left, right) => return Err(RuntimeError::BinaryInvalidOperand { opcode, left, right }),
}
}
Opcode::Multiply => {
let right = self.pop_value();
let left = self.pop_value();
match (left, right) {
(Value::Number(left), Value::Number(right)) => {
self.push_value(Value::Number(left * right));
}
(left, right) => return Err(RuntimeError::BinaryInvalidOperand { opcode, left, right }),
}
}
Opcode::Divide => {
let right = self.pop_value();
let left = self.pop_value();
match (left, right) {
(Value::Number(left), Value::Number(right)) => {
if right == 0.0 {
return Err(RuntimeError::DivisionByZero);
}
self.push_value(Value::Number(left / right));
}
(left, right) => return Err(RuntimeError::BinaryInvalidOperand { opcode, left, right }),
}
}
Opcode::Negate => {
let value = self.pop_value();
println!("Negate: {value}");
if let Value::Number(num) = value {
self.push_value(Value::Number(-num));
} else {
return Err(RuntimeError::UnaryInvalidOperand { opcode, operand: value });
}
}
Opcode::Return => {
let value = self.pop_value();
debug_assert_eq!(self.stack_len(), 0);
println!("Return: {value}");
return Ok(());
}
}
}
}
unsafe fn print_stack(&self) {
/* let s = self.stack[0..self.stack_len()]
.iter()
.map(|value| value.to_string())
.join(", ");
print!("{s}"); */
print!("Stack: ");
for value in self.stack[0..self.stack_len()].iter() {
print!("[ {value} ]");
}
println!()
}
}
impl Default for VM {
fn default() -> Self {
VM::new()
}
}