mirror of
https://github.com/MorizzG/rlox.git
synced 2025-12-06 04:12:42 +00:00
Chapter 7
This commit is contained in:
commit
42dbe531ad
15 changed files with 1112 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
/target
|
||||
26
.vscode/launch.json
vendored
Normal file
26
.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Debug executable 'rlox2'",
|
||||
"cargo": {
|
||||
"args": [
|
||||
"build",
|
||||
"--bin=rlox2",
|
||||
"--package=rlox2"
|
||||
],
|
||||
"filter": {
|
||||
"name": "rlox2",
|
||||
"kind": "bin"
|
||||
}
|
||||
},
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}"
|
||||
}
|
||||
]
|
||||
}
|
||||
129
Cargo.lock
generated
Normal file
129
Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c"
|
||||
dependencies = [
|
||||
"phf_macros",
|
||||
"phf_shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92aacdc5f16768709a569e913f7451034034178b05bdc8acda226659a3dccc66"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.50"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
|
||||
[[package]]
|
||||
name = "rlox2"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"phf",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.107"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
|
||||
10
Cargo.toml
Normal file
10
Cargo.toml
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
[package]
|
||||
name = "rlox2"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
phf = { version = "0.11.1", features = ["macros"] }
|
||||
thiserror = "1.0"
|
||||
57
src/error.rs
Normal file
57
src/error.rs
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
use thiserror::Error;
|
||||
|
||||
use crate::lexer::Token;
|
||||
use crate::misc::CodePos;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum LexerError {
|
||||
#[error("Unexpected character '{c}' at {code_pos}.")]
|
||||
UnexpectedCharacter { c: char, code_pos: CodePos },
|
||||
#[error("Unterminated string literal starting at {code_pos}.")]
|
||||
UnterminatedStringLiteral { code_pos: CodePos },
|
||||
#[error("Unterminated block comment starting at {code_pos}.")]
|
||||
UnterminatedBlockComment { code_pos: CodePos },
|
||||
#[error("Invalid number literal {lexeme} at {code_pos}: {msg}")]
|
||||
InvalidNumberLiteral {
|
||||
lexeme: String,
|
||||
msg: String,
|
||||
code_pos: CodePos,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum ParserError {
|
||||
#[error("Token stream ended unexpectedly.")]
|
||||
TokenStreamEnded,
|
||||
#[error("Unexpected token {token} at {0}.", token.code_pos())]
|
||||
UnexpectedToken { token: Token },
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum LoxError {
|
||||
#[error("{msg}")]
|
||||
LexerError { msg: String },
|
||||
#[error("{msg}")]
|
||||
ParserError { msg: String },
|
||||
}
|
||||
|
||||
impl From<Vec<LexerError>> for LoxError {
|
||||
fn from(lexer_errs: Vec<LexerError>) -> Self {
|
||||
let msg = if lexer_errs.len() == 1 {
|
||||
format!("{}", lexer_errs[0])
|
||||
} else {
|
||||
let msgs: Vec<String> = lexer_errs.iter().map(|err| format!("{}", err)).collect();
|
||||
msgs.join("\n")
|
||||
};
|
||||
|
||||
LoxError::LexerError { msg }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParserError> for LoxError {
|
||||
fn from(parser_error: ParserError) -> Self {
|
||||
LoxError::ParserError {
|
||||
msg: format!("{parser_error}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
94
src/interpreter.rs
Normal file
94
src/interpreter.rs
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
use std::io::Write;
|
||||
|
||||
use crate::error::LoxError;
|
||||
use crate::lexer::{scan_tokens, Token};
|
||||
use crate::parser::parser::parse_tokens;
|
||||
|
||||
pub fn interpreter_main() {
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
|
||||
match args.len() {
|
||||
1 => run_repl(),
|
||||
2 => run_file(&args[1]),
|
||||
_ => {
|
||||
eprintln!("Usage: rlox [script]");
|
||||
std::process::exit(64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn run_file(script_path: &str) {
|
||||
let source_code = std::fs::read_to_string(script_path).unwrap_or_else(|err| {
|
||||
eprintln!("Reading script file {} failed: {}", script_path, err);
|
||||
std::process::exit(66);
|
||||
});
|
||||
|
||||
if let Err(err) = run(&source_code) {
|
||||
eprintln!("{}", err);
|
||||
std::process::exit(65);
|
||||
}
|
||||
}
|
||||
|
||||
fn run_repl() {
|
||||
let stdin = std::io::stdin();
|
||||
|
||||
'outer: loop {
|
||||
let mut input_buf = String::new();
|
||||
|
||||
print!("> ");
|
||||
std::io::stdout().flush().unwrap();
|
||||
|
||||
'inner: loop {
|
||||
stdin.read_line(&mut input_buf).unwrap_or_else(|err| {
|
||||
eprintln!("Could not read from stdin: {}", err);
|
||||
std::process::exit(66);
|
||||
});
|
||||
|
||||
let num_open_braces = (input_buf.matches('{').count() as i64) - (input_buf.matches('}').count() as i64);
|
||||
let num_open_parens = (input_buf.matches('(').count() as i64) - (input_buf.matches(')').count() as i64);
|
||||
let num_open_brackets = (input_buf.matches('[').count() as i64) - (input_buf.matches(']').count() as i64);
|
||||
|
||||
// all braces/parens/brackets closed => break
|
||||
if num_open_braces == 0 && num_open_parens == 0 && num_open_brackets == 0 {
|
||||
break 'inner;
|
||||
}
|
||||
|
||||
// any braces/parens/brackets more closing than opening => break (will be parse error)
|
||||
if num_open_braces < 0 || num_open_parens < 0 || num_open_brackets < 0 {
|
||||
break 'inner;
|
||||
}
|
||||
|
||||
print!("< ");
|
||||
std::io::stdout().flush().unwrap();
|
||||
}
|
||||
|
||||
input_buf = input_buf.trim().to_owned();
|
||||
|
||||
if input_buf.is_empty() || input_buf == "exit" || input_buf == "quit" {
|
||||
break 'outer;
|
||||
}
|
||||
|
||||
match run(&input_buf) {
|
||||
Ok(()) => {}
|
||||
Err(err) => eprintln!("{}", err),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn run(code_string: &str) -> Result<(), LoxError> {
|
||||
let tokens: Vec<Token> = scan_tokens(code_string)?;
|
||||
|
||||
/* let token_str = tokens
|
||||
.iter()
|
||||
.map(|token| format!("{token}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join(" ");
|
||||
|
||||
println!("{token_str}"); */
|
||||
|
||||
let expr = parse_tokens(tokens)?;
|
||||
|
||||
println!("{expr}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
315
src/lexer/lexer.rs
Normal file
315
src/lexer/lexer.rs
Normal file
|
|
@ -0,0 +1,315 @@
|
|||
use phf::phf_map;
|
||||
|
||||
use crate::error::LexerError;
|
||||
use crate::misc::CodePos;
|
||||
|
||||
use super::{Token, TokenType};
|
||||
|
||||
/*====================================================================================================================*/
|
||||
|
||||
static KEYWORDS: phf::Map<&'static str, TokenType> = phf_map! {
|
||||
"and" => TokenType::And,
|
||||
"class" => TokenType::Class,
|
||||
"else" => TokenType::Else,
|
||||
"false" => TokenType::Else,
|
||||
"for" => TokenType::For,
|
||||
"fun" => TokenType::Fun,
|
||||
"if" => TokenType::If,
|
||||
"nil" => TokenType::Nil,
|
||||
"or" => TokenType::Or,
|
||||
"print" => TokenType::Print,
|
||||
"return" => TokenType::Return,
|
||||
"super" => TokenType::Super,
|
||||
"this" => TokenType::This,
|
||||
"true" => TokenType::True,
|
||||
"var" => TokenType::Var,
|
||||
"while" => TokenType::While
|
||||
};
|
||||
|
||||
/*====================================================================================================================*/
|
||||
|
||||
pub fn scan_tokens(source_code: &str) -> Result<Vec<Token>, Vec<LexerError>> {
|
||||
let lexer = Lexer::new(source_code);
|
||||
|
||||
lexer.scan_tokens()
|
||||
}
|
||||
|
||||
/*====================================================================================================================*/
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Lexer {
|
||||
source: Vec<char>,
|
||||
|
||||
tokens: Vec<Token>,
|
||||
|
||||
start: usize,
|
||||
current: usize,
|
||||
|
||||
code_pos: CodePos,
|
||||
|
||||
errors: Vec<LexerError>,
|
||||
}
|
||||
|
||||
impl Lexer {
|
||||
fn new(source_code: &str) -> Self {
|
||||
let source = source_code.chars().collect();
|
||||
|
||||
Lexer {
|
||||
source,
|
||||
tokens: Vec::new(),
|
||||
start: 0,
|
||||
current: 0,
|
||||
code_pos: CodePos::default(),
|
||||
errors: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_tokens(self) -> Result<Vec<Token>, Vec<LexerError>> {
|
||||
let mut me = self;
|
||||
|
||||
while !me.source_is_empty() {
|
||||
me.scan_token();
|
||||
}
|
||||
|
||||
me.tokens.push(Token::new(TokenType::EOF, "".to_owned(), me.code_pos));
|
||||
|
||||
if me.errors.is_empty() {
|
||||
Ok(me.tokens)
|
||||
} else {
|
||||
Err(me.errors)
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_token(&mut self) {
|
||||
use TokenType::*;
|
||||
|
||||
self.start = self.current;
|
||||
|
||||
let c = self.advance();
|
||||
|
||||
let token_type = match c {
|
||||
'(' => Some(LeftParen),
|
||||
')' => Some(RightParen),
|
||||
'{' => Some(LeftBrace),
|
||||
'}' => Some(RightBrace),
|
||||
',' => Some(Comma),
|
||||
'.' => Some(Dot),
|
||||
'+' => Some(Plus),
|
||||
'-' => Some(Minus),
|
||||
';' => Some(Semicolon),
|
||||
'*' => Some(Star),
|
||||
'!' => {
|
||||
if self.consume('=') {
|
||||
Some(BangEqual)
|
||||
} else {
|
||||
Some(Bang)
|
||||
}
|
||||
}
|
||||
'=' => {
|
||||
if self.consume('=') {
|
||||
Some(EqualEqual)
|
||||
} else {
|
||||
Some(Equal)
|
||||
}
|
||||
}
|
||||
'<' => {
|
||||
if self.consume('=') {
|
||||
Some(LessEqual)
|
||||
} else {
|
||||
Some(Less)
|
||||
}
|
||||
}
|
||||
'>' => {
|
||||
if self.consume('=') {
|
||||
Some(GreaterEqual)
|
||||
} else {
|
||||
Some(Greater)
|
||||
}
|
||||
}
|
||||
'/' => {
|
||||
if self.consume('/') {
|
||||
// line comment
|
||||
// advance until either source is empty or newline if found
|
||||
while !self.source_is_empty() && self.advance() != '\n' {}
|
||||
|
||||
None
|
||||
} else if self.consume('*') {
|
||||
// block comment
|
||||
|
||||
let mut depth = 1;
|
||||
loop {
|
||||
if depth == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
if self.source_is_empty() {
|
||||
self.errors.push(LexerError::UnterminatedBlockComment {
|
||||
code_pos: self.code_pos,
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
if self.peek() == Some('/') && self.peek_two() == Some('*') {
|
||||
// nested block comment
|
||||
// consume '/' and '*'
|
||||
self.advance();
|
||||
self.advance();
|
||||
depth += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if self.peek() == Some('*') && self.peek_two() == Some('/') {
|
||||
// consume '*' and '/'
|
||||
self.advance();
|
||||
self.advance();
|
||||
depth -= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
self.advance();
|
||||
}
|
||||
|
||||
None
|
||||
} else {
|
||||
Some(Slash)
|
||||
}
|
||||
}
|
||||
'"' => self.try_parse_string(),
|
||||
'0'..='9' => self.try_parse_number(),
|
||||
' ' | '\r' | '\n' | '\t' => None, // handled automatically in advance()
|
||||
c @ '_' | c if c.is_ascii_alphabetic() => self.try_parse_identifier(),
|
||||
_ => {
|
||||
self.errors.push(LexerError::UnexpectedCharacter {
|
||||
c,
|
||||
code_pos: self.code_pos,
|
||||
});
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(token_type) = token_type {
|
||||
self.push_token(token_type);
|
||||
}
|
||||
}
|
||||
|
||||
fn source_is_empty(&self) -> bool {
|
||||
self.current >= self.source.len()
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> char {
|
||||
assert!(!self.source_is_empty());
|
||||
|
||||
let c = self.source[self.current];
|
||||
|
||||
self.current += 1;
|
||||
self.code_pos.col += 1;
|
||||
|
||||
if c == '\t' {
|
||||
self.code_pos.col += 3;
|
||||
} else if c == '\n' {
|
||||
self.code_pos.col = 0;
|
||||
self.code_pos.line += 1;
|
||||
}
|
||||
|
||||
c
|
||||
}
|
||||
|
||||
fn peek(&self) -> Option<char> {
|
||||
self.source.get(self.current).copied()
|
||||
}
|
||||
|
||||
fn peek_two(&self) -> Option<char> {
|
||||
self.source.get(self.current + 1).copied()
|
||||
}
|
||||
|
||||
fn consume(&mut self, c: char) -> bool {
|
||||
if self.peek() == Some(c) {
|
||||
self.advance();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn push_token(&mut self, token_type: TokenType) {
|
||||
let lexeme: String = self.source[self.start..self.current].iter().collect();
|
||||
|
||||
self.tokens.push(Token::new(token_type, lexeme, self.code_pos));
|
||||
}
|
||||
|
||||
fn try_parse_string(&mut self) -> Option<TokenType> {
|
||||
// advance until second "
|
||||
while self.advance() != '"' {
|
||||
if self.source_is_empty() {
|
||||
self.errors.push(LexerError::UnterminatedStringLiteral {
|
||||
code_pos: self.code_pos,
|
||||
});
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let string_literal = self.source[self.start + 1..self.current - 1].iter().collect();
|
||||
|
||||
Some(TokenType::String(string_literal))
|
||||
}
|
||||
|
||||
fn try_parse_number(&mut self) -> Option<TokenType> {
|
||||
let is_some_digit = |c: Option<char>| c.map_or(false, |c| c.is_ascii_digit());
|
||||
|
||||
// eat all digits
|
||||
while is_some_digit(self.peek()) {
|
||||
self.advance();
|
||||
}
|
||||
|
||||
// consume separator dot and continue eating digits
|
||||
if self.peek() == Some('.') && is_some_digit(self.peek_two()) {
|
||||
// consume the '.'
|
||||
self.advance();
|
||||
|
||||
while is_some_digit(self.peek()) {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
// consume exponential e and continue eating digits
|
||||
if self.peek() == Some('e') && is_some_digit(self.peek_two()) {
|
||||
// consume the 'e'
|
||||
self.advance();
|
||||
|
||||
while is_some_digit(self.peek()) {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
let lexeme: String = self.source[self.start..self.current].iter().collect();
|
||||
|
||||
let num: f64 = match lexeme.parse() {
|
||||
Ok(num) => num,
|
||||
Err(err) => {
|
||||
self.errors.push(LexerError::InvalidNumberLiteral {
|
||||
lexeme,
|
||||
msg: format!("{err}"),
|
||||
code_pos: self.code_pos,
|
||||
});
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
Some(TokenType::Number(num))
|
||||
}
|
||||
|
||||
fn try_parse_identifier(&mut self) -> Option<TokenType> {
|
||||
let is_alpha_num_underscore =
|
||||
|c: Option<char>| c.map_or(false, |c| matches!(c, '0'..='9' | 'A'..='Z' | '_' | 'a'..='z'));
|
||||
|
||||
while is_alpha_num_underscore(self.peek()) {
|
||||
self.advance();
|
||||
}
|
||||
|
||||
let lexeme: String = self.source[self.start..self.current].iter().collect();
|
||||
|
||||
let token_type = KEYWORDS.get(&lexeme).cloned().unwrap_or(TokenType::Identifier(lexeme));
|
||||
|
||||
Some(token_type)
|
||||
}
|
||||
}
|
||||
5
src/lexer/mod.rs
Normal file
5
src/lexer/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
mod lexer;
|
||||
mod token;
|
||||
|
||||
pub use lexer::scan_tokens;
|
||||
pub use token::{Token, TokenType};
|
||||
64
src/lexer/token.rs
Normal file
64
src/lexer/token.rs
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
use crate::misc::CodePos;
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[rustfmt::skip]
|
||||
pub enum TokenType {
|
||||
// Single-character tokens
|
||||
LeftParen, RightParen, LeftBrace, RightBrace,
|
||||
Comma, Dot, Minus, Plus, Semicolon, Slash, Star,
|
||||
|
||||
// One or two character tokens
|
||||
Bang, BangEqual,
|
||||
Equal, EqualEqual,
|
||||
Greater, GreaterEqual,
|
||||
Less, LessEqual,
|
||||
|
||||
// Literals
|
||||
Identifier(String),
|
||||
String(String),
|
||||
Number(f64),
|
||||
|
||||
// Keywords
|
||||
And, Class, Else, False, Fun, For, If, Nil, Or,
|
||||
Print, Return, Super, This, True, Var, While,
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
pub struct Token {
|
||||
token_type: TokenType,
|
||||
lexeme: String,
|
||||
|
||||
code_pos: CodePos,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn new(token_type: TokenType, lexeme: String, pos: CodePos) -> Self {
|
||||
Token {
|
||||
token_type,
|
||||
lexeme: lexeme,
|
||||
code_pos: pos,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn token_type(&self) -> &TokenType {
|
||||
&self.token_type
|
||||
}
|
||||
|
||||
pub fn code_pos(&self) -> CodePos {
|
||||
self.code_pos
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "<{:?}> (\"{}\")", self.token_type, self.lexeme)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "<{:?}>", self.token_type)
|
||||
}
|
||||
}
|
||||
9
src/main.rs
Normal file
9
src/main.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
mod error;
|
||||
mod interpreter;
|
||||
mod lexer;
|
||||
mod misc;
|
||||
mod parser;
|
||||
|
||||
fn main() {
|
||||
interpreter::interpreter_main();
|
||||
}
|
||||
25
src/misc.rs
Normal file
25
src/misc.rs
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
use std::fmt::{Debug, Display};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct CodePos {
|
||||
pub line: u32,
|
||||
pub col: u32,
|
||||
}
|
||||
|
||||
impl Default for CodePos {
|
||||
fn default() -> Self {
|
||||
Self { line: 1, col: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for CodePos {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "line {}, col {}", self.line, self.col)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for CodePos {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}:{}", self.line, self.col)
|
||||
}
|
||||
}
|
||||
8
src/parser/GRAMMAR
Normal file
8
src/parser/GRAMMAR
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
expression -> equality
|
||||
|
||||
equality -> comparison ( ( "==" | "!=" ) comparison )* ;
|
||||
comparison -> term ( ">" | ">=" | "<" | "<=" term )* ;
|
||||
term -> factor ( ( "+" | "-" ) factor )*
|
||||
factor -> unary ( ( "*" | "/" ) unary )* ;
|
||||
unary -> ( "!" | "-" ) unary | primary ;
|
||||
primary -> "(" expression ")" | NUMBER | STRING | "true" | "false" | "nil" ;
|
||||
121
src/parser/expr.rs
Normal file
121
src/parser/expr.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Expr {
|
||||
Literal(Literal),
|
||||
Unary(UnaryOp, Box<Expr>),
|
||||
Binary(Box<Expr>, BinaryOp, Box<Expr>),
|
||||
Grouping(Box<Expr>),
|
||||
}
|
||||
|
||||
impl Expr {
|
||||
pub fn new_string(s: String) -> Self {
|
||||
Expr::Literal(Literal::String(s))
|
||||
}
|
||||
|
||||
pub fn new_number(num: f64) -> Self {
|
||||
Expr::Literal(Literal::Number(num))
|
||||
}
|
||||
|
||||
pub fn new_bool(b: bool) -> Self {
|
||||
Expr::Literal(Literal::Bool(b))
|
||||
}
|
||||
|
||||
pub fn new_nil() -> Self {
|
||||
Expr::Literal(Literal::Nil)
|
||||
}
|
||||
|
||||
pub fn new_unary(operator: UnaryOp, expr: Expr) -> Self {
|
||||
Expr::Unary(operator, Box::new(expr))
|
||||
}
|
||||
|
||||
pub fn new_binary(left: Expr, operator: BinaryOp, right: Expr) -> Self {
|
||||
Expr::Binary(Box::new(left), operator, Box::new(right))
|
||||
}
|
||||
|
||||
pub fn new_grouping(expr: Expr) -> Self {
|
||||
Expr::Grouping(Box::new(expr))
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Expr {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Expr::Literal(literal) => write!(f, "{literal}"),
|
||||
Expr::Unary(op, expr) => write!(f, "({op} {expr})"),
|
||||
Expr::Binary(left, op, right) => write!(f, "({op} {left} {right})"),
|
||||
Expr::Grouping(expr) => write!(f, "(group {expr})"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*====================================================================================================================*/
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Literal {
|
||||
String(String),
|
||||
Number(f64),
|
||||
Bool(bool),
|
||||
Nil,
|
||||
}
|
||||
|
||||
impl Display for Literal {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Literal::String(s) => write!(f, "\"{s}\""),
|
||||
Literal::Number(num) => write!(f, "{num}"),
|
||||
Literal::Bool(b) => write!(f, "{b}"),
|
||||
Literal::Nil => write!(f, "nil"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*====================================================================================================================*/
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum UnaryOp {
|
||||
Negate,
|
||||
Not,
|
||||
}
|
||||
|
||||
impl Display for UnaryOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
UnaryOp::Negate => write!(f, "-"),
|
||||
UnaryOp::Not => write!(f, "!"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*====================================================================================================================*/
|
||||
|
||||
#[derive(Debug)]
|
||||
#[rustfmt::skip]
|
||||
pub enum BinaryOp {
|
||||
// arithmetic
|
||||
Add, Subtract, Multiply, Divide,
|
||||
|
||||
// equality
|
||||
Equal, NotEqual,
|
||||
|
||||
// comparison
|
||||
Less, LessEqual,
|
||||
Greater, GreaterEqual,
|
||||
}
|
||||
|
||||
impl Display for BinaryOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
BinaryOp::Add => write!(f, "+"),
|
||||
BinaryOp::Subtract => write!(f, "-"),
|
||||
BinaryOp::Multiply => write!(f, "*"),
|
||||
BinaryOp::Divide => write!(f, "/"),
|
||||
BinaryOp::Equal => write!(f, "=="),
|
||||
BinaryOp::NotEqual => write!(f, "!="),
|
||||
BinaryOp::Less => write!(f, "<"),
|
||||
BinaryOp::LessEqual => write!(f, "<="),
|
||||
BinaryOp::Greater => write!(f, ">"),
|
||||
BinaryOp::GreaterEqual => write!(f, ">="),
|
||||
}
|
||||
}
|
||||
}
|
||||
2
src/parser/mod.rs
Normal file
2
src/parser/mod.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
pub mod expr;
|
||||
pub mod parser;
|
||||
246
src/parser/parser.rs
Normal file
246
src/parser/parser.rs
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
use std::vec::IntoIter;
|
||||
|
||||
use crate::error::ParserError;
|
||||
use crate::lexer::{Token, TokenType};
|
||||
use crate::parser::expr::BinaryOp;
|
||||
|
||||
use super::expr::{Expr, UnaryOp};
|
||||
|
||||
/*====================================================================================================================*/
|
||||
|
||||
type ParserResult<T> = Result<T, ParserError>;
|
||||
|
||||
pub fn parse_tokens(tokens: Vec<Token>) -> ParserResult<Expr> {
|
||||
Parser::new(tokens).parse()
|
||||
}
|
||||
|
||||
/*====================================================================================================================*/
|
||||
|
||||
// takes care of token iteration
|
||||
struct TokenIter {
|
||||
token_iter: IntoIter<Token>,
|
||||
|
||||
peek_token: Option<Token>,
|
||||
}
|
||||
|
||||
impl TokenIter {
|
||||
pub fn new(tokens: Vec<Token>) -> Self {
|
||||
TokenIter {
|
||||
token_iter: tokens.into_iter(),
|
||||
peek_token: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<&Token> {
|
||||
// if peek_token is empty: fill with next token from token_iter
|
||||
if self.peek_token.is_none() && self.token_iter.len() != 0 {
|
||||
self.peek_token = self.token_iter.next();
|
||||
}
|
||||
|
||||
self.peek_token.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for TokenIter {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// return the peeked token if any, or else next token from token_iter
|
||||
self.peek_token.take().or_else(|| self.token_iter.next())
|
||||
}
|
||||
}
|
||||
|
||||
/*====================================================================================================================*/
|
||||
|
||||
struct Parser {
|
||||
token_iter: TokenIter,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new(tokens: Vec<Token>) -> Self {
|
||||
Parser {
|
||||
token_iter: TokenIter::new(tokens),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(self) -> ParserResult<Expr> {
|
||||
let mut me = self;
|
||||
|
||||
me.expression()
|
||||
}
|
||||
|
||||
fn synchronise(&mut self) {
|
||||
loop {
|
||||
let peek_token = self
|
||||
.peek_token()
|
||||
.unwrap_or_else(|err| panic!("peek_token returned error in synchronise: {err}"));
|
||||
|
||||
// if we match a synchronisation point: return
|
||||
match peek_token.token_type() {
|
||||
TokenType::Class
|
||||
| TokenType::Fun
|
||||
| TokenType::Var
|
||||
| TokenType::For
|
||||
| TokenType::If
|
||||
| TokenType::While
|
||||
| TokenType::Print
|
||||
| TokenType::Return
|
||||
| TokenType::EOF => return,
|
||||
TokenType::Semicolon => {
|
||||
// discard semicolon first, then return
|
||||
let _ = self
|
||||
.next_token()
|
||||
.unwrap_or_else(|err| panic!("next_token returned error in synchronise: {err}"));
|
||||
return;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// no sync point: discard token
|
||||
let _ = self
|
||||
.next_token()
|
||||
.unwrap_or_else(|err| panic!("next_token returned error in synchronise: {err}"));
|
||||
}
|
||||
}
|
||||
|
||||
fn expression(&mut self) -> ParserResult<Expr> {
|
||||
self.equality()
|
||||
}
|
||||
|
||||
fn equality(&mut self) -> ParserResult<Expr> {
|
||||
let mut expr = self.comparison()?;
|
||||
|
||||
loop {
|
||||
// get comparison operator as BinaryOp; otherwise break out of loop
|
||||
let operator = match self.peek_token()?.token_type() {
|
||||
TokenType::EqualEqual => BinaryOp::Equal,
|
||||
TokenType::BangEqual => BinaryOp::NotEqual,
|
||||
_ => break,
|
||||
};
|
||||
|
||||
// consume operator token
|
||||
let _ = self.next_token().unwrap();
|
||||
|
||||
let right = self.comparison()?;
|
||||
|
||||
expr = Expr::new_binary(expr, operator, right);
|
||||
}
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn comparison(&mut self) -> ParserResult<Expr> {
|
||||
let mut expr = self.term()?;
|
||||
|
||||
loop {
|
||||
let operator = match self.peek_token()?.token_type() {
|
||||
TokenType::Less => BinaryOp::Less,
|
||||
TokenType::LessEqual => BinaryOp::LessEqual,
|
||||
TokenType::Greater => BinaryOp::Greater,
|
||||
TokenType::GreaterEqual => BinaryOp::GreaterEqual,
|
||||
_ => break,
|
||||
};
|
||||
|
||||
// consume operator token
|
||||
let _ = self.next_token().unwrap();
|
||||
|
||||
let right = self.term()?;
|
||||
|
||||
expr = Expr::new_binary(expr, operator, right);
|
||||
}
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn term(&mut self) -> ParserResult<Expr> {
|
||||
let mut expr = self.factor()?;
|
||||
|
||||
loop {
|
||||
let operator = match self.peek_token()?.token_type() {
|
||||
TokenType::Plus => BinaryOp::Add,
|
||||
TokenType::Minus => BinaryOp::Subtract,
|
||||
_ => break,
|
||||
};
|
||||
|
||||
// consume operator token
|
||||
let _ = self.next_token().unwrap();
|
||||
|
||||
let right = self.factor()?;
|
||||
|
||||
expr = Expr::new_binary(expr, operator, right);
|
||||
}
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn factor(&mut self) -> ParserResult<Expr> {
|
||||
let mut expr = self.unary()?;
|
||||
|
||||
loop {
|
||||
let operator = match self.peek_token()?.token_type() {
|
||||
TokenType::Star => BinaryOp::Multiply,
|
||||
TokenType::Slash => BinaryOp::Divide,
|
||||
_ => break,
|
||||
};
|
||||
|
||||
// consume operator token
|
||||
let _ = self.next_token().unwrap();
|
||||
|
||||
let right = self.unary()?;
|
||||
|
||||
expr = Expr::new_binary(expr, operator, right);
|
||||
}
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn unary(&mut self) -> ParserResult<Expr> {
|
||||
match self.peek_token()?.token_type() {
|
||||
TokenType::Bang => {
|
||||
let _ = self.next_token().unwrap();
|
||||
Ok(Expr::new_unary(UnaryOp::Not, self.unary()?))
|
||||
}
|
||||
TokenType::Minus => {
|
||||
let _ = self.next_token().unwrap();
|
||||
Ok(Expr::new_unary(UnaryOp::Negate, self.unary()?))
|
||||
}
|
||||
_ => self.primary(),
|
||||
}
|
||||
}
|
||||
|
||||
fn primary(&mut self) -> ParserResult<Expr> {
|
||||
let token = self.next_token()?;
|
||||
|
||||
match token.token_type() {
|
||||
TokenType::Number(num) => Ok(Expr::new_number(*num)),
|
||||
TokenType::String(s) => Ok(Expr::new_string(s.clone())),
|
||||
TokenType::False => Ok(Expr::new_bool(false)),
|
||||
TokenType::True => Ok(Expr::new_bool(true)),
|
||||
TokenType::Nil => Ok(Expr::new_nil()),
|
||||
TokenType::LeftParen => {
|
||||
let expr = self.expression()?;
|
||||
self.consume_token(TokenType::RightParen)?;
|
||||
Ok(Expr::new_grouping(expr))
|
||||
}
|
||||
_ => Err(ParserError::UnexpectedToken { token }),
|
||||
}
|
||||
}
|
||||
|
||||
fn next_token(&mut self) -> ParserResult<Token> {
|
||||
self.token_iter.next().ok_or(ParserError::TokenStreamEnded)
|
||||
}
|
||||
|
||||
fn peek_token(&mut self) -> ParserResult<&Token> {
|
||||
self.token_iter.peek().ok_or(ParserError::TokenStreamEnded)
|
||||
}
|
||||
|
||||
fn consume_token(&mut self, token_type: TokenType) -> ParserResult<Token> {
|
||||
self.next_token().and_then(|token| {
|
||||
if token.token_type() == &token_type {
|
||||
Ok(token)
|
||||
} else {
|
||||
Err(ParserError::UnexpectedToken { token })
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue