From 42dbe531ade41ade96133e5ce3105ed7c394f3f6 Mon Sep 17 00:00:00 2001 From: Moritz Gmeiner Date: Fri, 20 Jan 2023 16:10:03 +0100 Subject: [PATCH] Chapter 7 --- .gitignore | 1 + .vscode/launch.json | 26 ++++ Cargo.lock | 129 ++++++++++++++++++ Cargo.toml | 10 ++ src/error.rs | 57 ++++++++ src/interpreter.rs | 94 +++++++++++++ src/lexer/lexer.rs | 315 +++++++++++++++++++++++++++++++++++++++++++ src/lexer/mod.rs | 5 + src/lexer/token.rs | 64 +++++++++ src/main.rs | 9 ++ src/misc.rs | 25 ++++ src/parser/GRAMMAR | 8 ++ src/parser/expr.rs | 121 +++++++++++++++++ src/parser/mod.rs | 2 + src/parser/parser.rs | 246 +++++++++++++++++++++++++++++++++ 15 files changed, 1112 insertions(+) create mode 100644 .gitignore create mode 100644 .vscode/launch.json create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/error.rs create mode 100644 src/interpreter.rs create mode 100644 src/lexer/lexer.rs create mode 100644 src/lexer/mod.rs create mode 100644 src/lexer/token.rs create mode 100644 src/main.rs create mode 100644 src/misc.rs create mode 100644 src/parser/GRAMMAR create mode 100644 src/parser/expr.rs create mode 100644 src/parser/mod.rs create mode 100644 src/parser/parser.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..cfb04d4 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,26 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug executable 'rlox2'", + "cargo": { + "args": [ + "build", + "--bin=rlox2", + "--package=rlox2" + ], + "filter": { + "name": "rlox2", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + } + ] +} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..dc62d3b --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,129 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "phf" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" +dependencies = [ + "phf_macros", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92aacdc5f16768709a569e913f7451034034178b05bdc8acda226659a3dccc66" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" +dependencies = [ + "siphasher", +] + +[[package]] +name = "proc-macro2" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "rlox2" +version = "0.1.0" +dependencies = [ + "phf", + "thiserror", +] + +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..c8df177 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "rlox2" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +phf = { version = "0.11.1", features = ["macros"] } +thiserror = "1.0" diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..ae5f689 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,57 @@ +use thiserror::Error; + +use crate::lexer::Token; +use crate::misc::CodePos; + +#[derive(Error, Debug)] +pub enum LexerError { + #[error("Unexpected character '{c}' at {code_pos}.")] + UnexpectedCharacter { c: char, code_pos: CodePos }, + #[error("Unterminated string literal starting at {code_pos}.")] + UnterminatedStringLiteral { code_pos: CodePos }, + #[error("Unterminated block comment starting at {code_pos}.")] + UnterminatedBlockComment { code_pos: CodePos }, + #[error("Invalid number literal {lexeme} at {code_pos}: {msg}")] + InvalidNumberLiteral { + lexeme: String, + msg: String, + code_pos: CodePos, + }, +} + +#[derive(Error, Debug)] +pub enum ParserError { + #[error("Token stream ended unexpectedly.")] + TokenStreamEnded, + #[error("Unexpected token {token} at {0}.", token.code_pos())] + UnexpectedToken { token: Token }, +} + +#[derive(Error, Debug)] +pub enum LoxError { + #[error("{msg}")] + LexerError { msg: String }, + #[error("{msg}")] + ParserError { msg: String }, +} + +impl From> for LoxError { + fn from(lexer_errs: Vec) -> Self { + let msg = if lexer_errs.len() == 1 { + format!("{}", lexer_errs[0]) + } else { + let msgs: Vec = lexer_errs.iter().map(|err| format!("{}", err)).collect(); + msgs.join("\n") + }; + + LoxError::LexerError { msg } + } +} + +impl From for LoxError { + fn from(parser_error: ParserError) -> Self { + LoxError::ParserError { + msg: format!("{parser_error}"), + } + } +} diff --git a/src/interpreter.rs b/src/interpreter.rs new file mode 100644 index 0000000..0b8437f --- /dev/null +++ b/src/interpreter.rs @@ -0,0 +1,94 @@ +use std::io::Write; + +use crate::error::LoxError; +use crate::lexer::{scan_tokens, Token}; +use crate::parser::parser::parse_tokens; + +pub fn interpreter_main() { + let args: Vec = std::env::args().collect(); + + match args.len() { + 1 => run_repl(), + 2 => run_file(&args[1]), + _ => { + eprintln!("Usage: rlox [script]"); + std::process::exit(64); + } + } +} + +fn run_file(script_path: &str) { + let source_code = std::fs::read_to_string(script_path).unwrap_or_else(|err| { + eprintln!("Reading script file {} failed: {}", script_path, err); + std::process::exit(66); + }); + + if let Err(err) = run(&source_code) { + eprintln!("{}", err); + std::process::exit(65); + } +} + +fn run_repl() { + let stdin = std::io::stdin(); + + 'outer: loop { + let mut input_buf = String::new(); + + print!("> "); + std::io::stdout().flush().unwrap(); + + 'inner: loop { + stdin.read_line(&mut input_buf).unwrap_or_else(|err| { + eprintln!("Could not read from stdin: {}", err); + std::process::exit(66); + }); + + let num_open_braces = (input_buf.matches('{').count() as i64) - (input_buf.matches('}').count() as i64); + let num_open_parens = (input_buf.matches('(').count() as i64) - (input_buf.matches(')').count() as i64); + let num_open_brackets = (input_buf.matches('[').count() as i64) - (input_buf.matches(']').count() as i64); + + // all braces/parens/brackets closed => break + if num_open_braces == 0 && num_open_parens == 0 && num_open_brackets == 0 { + break 'inner; + } + + // any braces/parens/brackets more closing than opening => break (will be parse error) + if num_open_braces < 0 || num_open_parens < 0 || num_open_brackets < 0 { + break 'inner; + } + + print!("< "); + std::io::stdout().flush().unwrap(); + } + + input_buf = input_buf.trim().to_owned(); + + if input_buf.is_empty() || input_buf == "exit" || input_buf == "quit" { + break 'outer; + } + + match run(&input_buf) { + Ok(()) => {} + Err(err) => eprintln!("{}", err), + } + } +} + +fn run(code_string: &str) -> Result<(), LoxError> { + let tokens: Vec = scan_tokens(code_string)?; + + /* let token_str = tokens + .iter() + .map(|token| format!("{token}")) + .collect::>() + .join(" "); + + println!("{token_str}"); */ + + let expr = parse_tokens(tokens)?; + + println!("{expr}"); + + Ok(()) +} diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs new file mode 100644 index 0000000..c25af25 --- /dev/null +++ b/src/lexer/lexer.rs @@ -0,0 +1,315 @@ +use phf::phf_map; + +use crate::error::LexerError; +use crate::misc::CodePos; + +use super::{Token, TokenType}; + +/*====================================================================================================================*/ + +static KEYWORDS: phf::Map<&'static str, TokenType> = phf_map! { + "and" => TokenType::And, + "class" => TokenType::Class, + "else" => TokenType::Else, + "false" => TokenType::Else, + "for" => TokenType::For, + "fun" => TokenType::Fun, + "if" => TokenType::If, + "nil" => TokenType::Nil, + "or" => TokenType::Or, + "print" => TokenType::Print, + "return" => TokenType::Return, + "super" => TokenType::Super, + "this" => TokenType::This, + "true" => TokenType::True, + "var" => TokenType::Var, + "while" => TokenType::While +}; + +/*====================================================================================================================*/ + +pub fn scan_tokens(source_code: &str) -> Result, Vec> { + let lexer = Lexer::new(source_code); + + lexer.scan_tokens() +} + +/*====================================================================================================================*/ + +#[derive(Debug)] +struct Lexer { + source: Vec, + + tokens: Vec, + + start: usize, + current: usize, + + code_pos: CodePos, + + errors: Vec, +} + +impl Lexer { + fn new(source_code: &str) -> Self { + let source = source_code.chars().collect(); + + Lexer { + source, + tokens: Vec::new(), + start: 0, + current: 0, + code_pos: CodePos::default(), + errors: Vec::new(), + } + } + + fn scan_tokens(self) -> Result, Vec> { + let mut me = self; + + while !me.source_is_empty() { + me.scan_token(); + } + + me.tokens.push(Token::new(TokenType::EOF, "".to_owned(), me.code_pos)); + + if me.errors.is_empty() { + Ok(me.tokens) + } else { + Err(me.errors) + } + } + + fn scan_token(&mut self) { + use TokenType::*; + + self.start = self.current; + + let c = self.advance(); + + let token_type = match c { + '(' => Some(LeftParen), + ')' => Some(RightParen), + '{' => Some(LeftBrace), + '}' => Some(RightBrace), + ',' => Some(Comma), + '.' => Some(Dot), + '+' => Some(Plus), + '-' => Some(Minus), + ';' => Some(Semicolon), + '*' => Some(Star), + '!' => { + if self.consume('=') { + Some(BangEqual) + } else { + Some(Bang) + } + } + '=' => { + if self.consume('=') { + Some(EqualEqual) + } else { + Some(Equal) + } + } + '<' => { + if self.consume('=') { + Some(LessEqual) + } else { + Some(Less) + } + } + '>' => { + if self.consume('=') { + Some(GreaterEqual) + } else { + Some(Greater) + } + } + '/' => { + if self.consume('/') { + // line comment + // advance until either source is empty or newline if found + while !self.source_is_empty() && self.advance() != '\n' {} + + None + } else if self.consume('*') { + // block comment + + let mut depth = 1; + loop { + if depth == 0 { + break; + } + + if self.source_is_empty() { + self.errors.push(LexerError::UnterminatedBlockComment { + code_pos: self.code_pos, + }); + break; + } + + if self.peek() == Some('/') && self.peek_two() == Some('*') { + // nested block comment + // consume '/' and '*' + self.advance(); + self.advance(); + depth += 1; + continue; + } + + if self.peek() == Some('*') && self.peek_two() == Some('/') { + // consume '*' and '/' + self.advance(); + self.advance(); + depth -= 1; + continue; + } + + self.advance(); + } + + None + } else { + Some(Slash) + } + } + '"' => self.try_parse_string(), + '0'..='9' => self.try_parse_number(), + ' ' | '\r' | '\n' | '\t' => None, // handled automatically in advance() + c @ '_' | c if c.is_ascii_alphabetic() => self.try_parse_identifier(), + _ => { + self.errors.push(LexerError::UnexpectedCharacter { + c, + code_pos: self.code_pos, + }); + None + } + }; + + if let Some(token_type) = token_type { + self.push_token(token_type); + } + } + + fn source_is_empty(&self) -> bool { + self.current >= self.source.len() + } + + fn advance(&mut self) -> char { + assert!(!self.source_is_empty()); + + let c = self.source[self.current]; + + self.current += 1; + self.code_pos.col += 1; + + if c == '\t' { + self.code_pos.col += 3; + } else if c == '\n' { + self.code_pos.col = 0; + self.code_pos.line += 1; + } + + c + } + + fn peek(&self) -> Option { + self.source.get(self.current).copied() + } + + fn peek_two(&self) -> Option { + self.source.get(self.current + 1).copied() + } + + fn consume(&mut self, c: char) -> bool { + if self.peek() == Some(c) { + self.advance(); + true + } else { + false + } + } + + fn push_token(&mut self, token_type: TokenType) { + let lexeme: String = self.source[self.start..self.current].iter().collect(); + + self.tokens.push(Token::new(token_type, lexeme, self.code_pos)); + } + + fn try_parse_string(&mut self) -> Option { + // advance until second " + while self.advance() != '"' { + if self.source_is_empty() { + self.errors.push(LexerError::UnterminatedStringLiteral { + code_pos: self.code_pos, + }); + return None; + } + } + + let string_literal = self.source[self.start + 1..self.current - 1].iter().collect(); + + Some(TokenType::String(string_literal)) + } + + fn try_parse_number(&mut self) -> Option { + let is_some_digit = |c: Option| c.map_or(false, |c| c.is_ascii_digit()); + + // eat all digits + while is_some_digit(self.peek()) { + self.advance(); + } + + // consume separator dot and continue eating digits + if self.peek() == Some('.') && is_some_digit(self.peek_two()) { + // consume the '.' + self.advance(); + + while is_some_digit(self.peek()) { + self.advance(); + } + } + + // consume exponential e and continue eating digits + if self.peek() == Some('e') && is_some_digit(self.peek_two()) { + // consume the 'e' + self.advance(); + + while is_some_digit(self.peek()) { + self.advance(); + } + } + + let lexeme: String = self.source[self.start..self.current].iter().collect(); + + let num: f64 = match lexeme.parse() { + Ok(num) => num, + Err(err) => { + self.errors.push(LexerError::InvalidNumberLiteral { + lexeme, + msg: format!("{err}"), + code_pos: self.code_pos, + }); + return None; + } + }; + + Some(TokenType::Number(num)) + } + + fn try_parse_identifier(&mut self) -> Option { + let is_alpha_num_underscore = + |c: Option| c.map_or(false, |c| matches!(c, '0'..='9' | 'A'..='Z' | '_' | 'a'..='z')); + + while is_alpha_num_underscore(self.peek()) { + self.advance(); + } + + let lexeme: String = self.source[self.start..self.current].iter().collect(); + + let token_type = KEYWORDS.get(&lexeme).cloned().unwrap_or(TokenType::Identifier(lexeme)); + + Some(token_type) + } +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..ad2abcf --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,5 @@ +mod lexer; +mod token; + +pub use lexer::scan_tokens; +pub use token::{Token, TokenType}; diff --git a/src/lexer/token.rs b/src/lexer/token.rs new file mode 100644 index 0000000..aa8e941 --- /dev/null +++ b/src/lexer/token.rs @@ -0,0 +1,64 @@ +use crate::misc::CodePos; + +#[allow(dead_code)] +#[derive(Debug, Clone, PartialEq)] +#[rustfmt::skip] +pub enum TokenType { + // Single-character tokens + LeftParen, RightParen, LeftBrace, RightBrace, + Comma, Dot, Minus, Plus, Semicolon, Slash, Star, + + // One or two character tokens + Bang, BangEqual, + Equal, EqualEqual, + Greater, GreaterEqual, + Less, LessEqual, + + // Literals + Identifier(String), + String(String), + Number(f64), + + // Keywords + And, Class, Else, False, Fun, For, If, Nil, Or, + Print, Return, Super, This, True, Var, While, + + EOF +} + +pub struct Token { + token_type: TokenType, + lexeme: String, + + code_pos: CodePos, +} + +impl Token { + pub fn new(token_type: TokenType, lexeme: String, pos: CodePos) -> Self { + Token { + token_type, + lexeme: lexeme, + code_pos: pos, + } + } + + pub fn token_type(&self) -> &TokenType { + &self.token_type + } + + pub fn code_pos(&self) -> CodePos { + self.code_pos + } +} + +impl std::fmt::Debug for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<{:?}> (\"{}\")", self.token_type, self.lexeme) + } +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<{:?}>", self.token_type) + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..636da3c --- /dev/null +++ b/src/main.rs @@ -0,0 +1,9 @@ +mod error; +mod interpreter; +mod lexer; +mod misc; +mod parser; + +fn main() { + interpreter::interpreter_main(); +} diff --git a/src/misc.rs b/src/misc.rs new file mode 100644 index 0000000..955df68 --- /dev/null +++ b/src/misc.rs @@ -0,0 +1,25 @@ +use std::fmt::{Debug, Display}; + +#[derive(Copy, Clone)] +pub struct CodePos { + pub line: u32, + pub col: u32, +} + +impl Default for CodePos { + fn default() -> Self { + Self { line: 1, col: 0 } + } +} + +impl Display for CodePos { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "line {}, col {}", self.line, self.col) + } +} + +impl Debug for CodePos { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.line, self.col) + } +} diff --git a/src/parser/GRAMMAR b/src/parser/GRAMMAR new file mode 100644 index 0000000..68b3cf3 --- /dev/null +++ b/src/parser/GRAMMAR @@ -0,0 +1,8 @@ +expression -> equality + +equality -> comparison ( ( "==" | "!=" ) comparison )* ; +comparison -> term ( ">" | ">=" | "<" | "<=" term )* ; +term -> factor ( ( "+" | "-" ) factor )* +factor -> unary ( ( "*" | "/" ) unary )* ; +unary -> ( "!" | "-" ) unary | primary ; +primary -> "(" expression ")" | NUMBER | STRING | "true" | "false" | "nil" ; \ No newline at end of file diff --git a/src/parser/expr.rs b/src/parser/expr.rs new file mode 100644 index 0000000..32a4e7a --- /dev/null +++ b/src/parser/expr.rs @@ -0,0 +1,121 @@ +use std::fmt::Display; + +#[derive(Debug)] +pub enum Expr { + Literal(Literal), + Unary(UnaryOp, Box), + Binary(Box, BinaryOp, Box), + Grouping(Box), +} + +impl Expr { + pub fn new_string(s: String) -> Self { + Expr::Literal(Literal::String(s)) + } + + pub fn new_number(num: f64) -> Self { + Expr::Literal(Literal::Number(num)) + } + + pub fn new_bool(b: bool) -> Self { + Expr::Literal(Literal::Bool(b)) + } + + pub fn new_nil() -> Self { + Expr::Literal(Literal::Nil) + } + + pub fn new_unary(operator: UnaryOp, expr: Expr) -> Self { + Expr::Unary(operator, Box::new(expr)) + } + + pub fn new_binary(left: Expr, operator: BinaryOp, right: Expr) -> Self { + Expr::Binary(Box::new(left), operator, Box::new(right)) + } + + pub fn new_grouping(expr: Expr) -> Self { + Expr::Grouping(Box::new(expr)) + } +} + +impl Display for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Expr::Literal(literal) => write!(f, "{literal}"), + Expr::Unary(op, expr) => write!(f, "({op} {expr})"), + Expr::Binary(left, op, right) => write!(f, "({op} {left} {right})"), + Expr::Grouping(expr) => write!(f, "(group {expr})"), + } + } +} + +/*====================================================================================================================*/ + +#[derive(Debug)] +pub enum Literal { + String(String), + Number(f64), + Bool(bool), + Nil, +} + +impl Display for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Literal::String(s) => write!(f, "\"{s}\""), + Literal::Number(num) => write!(f, "{num}"), + Literal::Bool(b) => write!(f, "{b}"), + Literal::Nil => write!(f, "nil"), + } + } +} + +/*====================================================================================================================*/ + +#[derive(Debug)] +pub enum UnaryOp { + Negate, + Not, +} + +impl Display for UnaryOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + UnaryOp::Negate => write!(f, "-"), + UnaryOp::Not => write!(f, "!"), + } + } +} + +/*====================================================================================================================*/ + +#[derive(Debug)] +#[rustfmt::skip] +pub enum BinaryOp { + // arithmetic + Add, Subtract, Multiply, Divide, + + // equality + Equal, NotEqual, + + // comparison + Less, LessEqual, + Greater, GreaterEqual, +} + +impl Display for BinaryOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + BinaryOp::Add => write!(f, "+"), + BinaryOp::Subtract => write!(f, "-"), + BinaryOp::Multiply => write!(f, "*"), + BinaryOp::Divide => write!(f, "/"), + BinaryOp::Equal => write!(f, "=="), + BinaryOp::NotEqual => write!(f, "!="), + BinaryOp::Less => write!(f, "<"), + BinaryOp::LessEqual => write!(f, "<="), + BinaryOp::Greater => write!(f, ">"), + BinaryOp::GreaterEqual => write!(f, ">="), + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..79b536a --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,2 @@ +pub mod expr; +pub mod parser; diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 0000000..1204e52 --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,246 @@ +use std::vec::IntoIter; + +use crate::error::ParserError; +use crate::lexer::{Token, TokenType}; +use crate::parser::expr::BinaryOp; + +use super::expr::{Expr, UnaryOp}; + +/*====================================================================================================================*/ + +type ParserResult = Result; + +pub fn parse_tokens(tokens: Vec) -> ParserResult { + Parser::new(tokens).parse() +} + +/*====================================================================================================================*/ + +// takes care of token iteration +struct TokenIter { + token_iter: IntoIter, + + peek_token: Option, +} + +impl TokenIter { + pub fn new(tokens: Vec) -> Self { + TokenIter { + token_iter: tokens.into_iter(), + peek_token: None, + } + } + + fn peek(&mut self) -> Option<&Token> { + // if peek_token is empty: fill with next token from token_iter + if self.peek_token.is_none() && self.token_iter.len() != 0 { + self.peek_token = self.token_iter.next(); + } + + self.peek_token.as_ref() + } +} + +impl Iterator for TokenIter { + type Item = Token; + + fn next(&mut self) -> Option { + // return the peeked token if any, or else next token from token_iter + self.peek_token.take().or_else(|| self.token_iter.next()) + } +} + +/*====================================================================================================================*/ + +struct Parser { + token_iter: TokenIter, +} + +impl Parser { + pub fn new(tokens: Vec) -> Self { + Parser { + token_iter: TokenIter::new(tokens), + } + } + + pub fn parse(self) -> ParserResult { + let mut me = self; + + me.expression() + } + + fn synchronise(&mut self) { + loop { + let peek_token = self + .peek_token() + .unwrap_or_else(|err| panic!("peek_token returned error in synchronise: {err}")); + + // if we match a synchronisation point: return + match peek_token.token_type() { + TokenType::Class + | TokenType::Fun + | TokenType::Var + | TokenType::For + | TokenType::If + | TokenType::While + | TokenType::Print + | TokenType::Return + | TokenType::EOF => return, + TokenType::Semicolon => { + // discard semicolon first, then return + let _ = self + .next_token() + .unwrap_or_else(|err| panic!("next_token returned error in synchronise: {err}")); + return; + } + _ => {} + } + + // no sync point: discard token + let _ = self + .next_token() + .unwrap_or_else(|err| panic!("next_token returned error in synchronise: {err}")); + } + } + + fn expression(&mut self) -> ParserResult { + self.equality() + } + + fn equality(&mut self) -> ParserResult { + let mut expr = self.comparison()?; + + loop { + // get comparison operator as BinaryOp; otherwise break out of loop + let operator = match self.peek_token()?.token_type() { + TokenType::EqualEqual => BinaryOp::Equal, + TokenType::BangEqual => BinaryOp::NotEqual, + _ => break, + }; + + // consume operator token + let _ = self.next_token().unwrap(); + + let right = self.comparison()?; + + expr = Expr::new_binary(expr, operator, right); + } + + Ok(expr) + } + + fn comparison(&mut self) -> ParserResult { + let mut expr = self.term()?; + + loop { + let operator = match self.peek_token()?.token_type() { + TokenType::Less => BinaryOp::Less, + TokenType::LessEqual => BinaryOp::LessEqual, + TokenType::Greater => BinaryOp::Greater, + TokenType::GreaterEqual => BinaryOp::GreaterEqual, + _ => break, + }; + + // consume operator token + let _ = self.next_token().unwrap(); + + let right = self.term()?; + + expr = Expr::new_binary(expr, operator, right); + } + + Ok(expr) + } + + fn term(&mut self) -> ParserResult { + let mut expr = self.factor()?; + + loop { + let operator = match self.peek_token()?.token_type() { + TokenType::Plus => BinaryOp::Add, + TokenType::Minus => BinaryOp::Subtract, + _ => break, + }; + + // consume operator token + let _ = self.next_token().unwrap(); + + let right = self.factor()?; + + expr = Expr::new_binary(expr, operator, right); + } + + Ok(expr) + } + + fn factor(&mut self) -> ParserResult { + let mut expr = self.unary()?; + + loop { + let operator = match self.peek_token()?.token_type() { + TokenType::Star => BinaryOp::Multiply, + TokenType::Slash => BinaryOp::Divide, + _ => break, + }; + + // consume operator token + let _ = self.next_token().unwrap(); + + let right = self.unary()?; + + expr = Expr::new_binary(expr, operator, right); + } + + Ok(expr) + } + + fn unary(&mut self) -> ParserResult { + match self.peek_token()?.token_type() { + TokenType::Bang => { + let _ = self.next_token().unwrap(); + Ok(Expr::new_unary(UnaryOp::Not, self.unary()?)) + } + TokenType::Minus => { + let _ = self.next_token().unwrap(); + Ok(Expr::new_unary(UnaryOp::Negate, self.unary()?)) + } + _ => self.primary(), + } + } + + fn primary(&mut self) -> ParserResult { + let token = self.next_token()?; + + match token.token_type() { + TokenType::Number(num) => Ok(Expr::new_number(*num)), + TokenType::String(s) => Ok(Expr::new_string(s.clone())), + TokenType::False => Ok(Expr::new_bool(false)), + TokenType::True => Ok(Expr::new_bool(true)), + TokenType::Nil => Ok(Expr::new_nil()), + TokenType::LeftParen => { + let expr = self.expression()?; + self.consume_token(TokenType::RightParen)?; + Ok(Expr::new_grouping(expr)) + } + _ => Err(ParserError::UnexpectedToken { token }), + } + } + + fn next_token(&mut self) -> ParserResult { + self.token_iter.next().ok_or(ParserError::TokenStreamEnded) + } + + fn peek_token(&mut self) -> ParserResult<&Token> { + self.token_iter.peek().ok_or(ParserError::TokenStreamEnded) + } + + fn consume_token(&mut self, token_type: TokenType) -> ParserResult { + self.next_token().and_then(|token| { + if token.token_type() == &token_type { + Ok(token) + } else { + Err(ParserError::UnexpectedToken { token }) + } + }) + } +}