finished resolver (chapter 11) and started classes (chapter 12)

This commit is contained in:
Moritz Gmeiner 2023-01-28 01:11:55 +01:00
commit 10540708d4
34 changed files with 1449 additions and 439 deletions

11
frontend/Cargo.toml Normal file
View file

@ -0,0 +1,11 @@
[package]
name = "rlox2-frontend"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
phf = { version = "0.11.1", features = ["macros"] }
thiserror = "1.0.38"
itertools = "0.10.5"

View file

@ -0,0 +1,313 @@
use phf::phf_map;
use super::{CodePos, LexerError, Token, TokenType};
/*====================================================================================================================*/
static KEYWORDS: phf::Map<&'static str, TokenType> = phf_map! {
"and" => TokenType::And,
"break" => TokenType::Break,
"class" => TokenType::Class,
"else" => TokenType::Else,
"false" => TokenType::False,
"for" => TokenType::For,
"fun" => TokenType::Fun,
"if" => TokenType::If,
"nil" => TokenType::Nil,
"or" => TokenType::Or,
"print" => TokenType::Print,
"return" => TokenType::Return,
"super" => TokenType::Super,
"this" => TokenType::This,
"true" => TokenType::True,
"var" => TokenType::Var,
"while" => TokenType::While
};
/*====================================================================================================================*/
pub fn scan_tokens(source_code: &str) -> Result<Vec<Token>, Vec<LexerError>> {
let lexer = Lexer::new(source_code);
lexer.scan_tokens()
}
/*====================================================================================================================*/
#[derive(Debug)]
struct Lexer {
source: Vec<char>,
tokens: Vec<Token>,
start: usize,
current: usize,
code_pos: CodePos,
errors: Vec<LexerError>,
}
impl Lexer {
fn new(source_code: &str) -> Self {
let source = source_code.chars().collect();
Lexer {
source,
tokens: Vec::new(),
start: 0,
current: 0,
code_pos: CodePos::default(),
errors: Vec::new(),
}
}
fn scan_tokens(self) -> Result<Vec<Token>, Vec<LexerError>> {
let mut me = self;
while !me.source_is_empty() {
me.scan_token();
}
me.tokens.push(Token::new(TokenType::EOF, me.code_pos));
if me.errors.is_empty() {
Ok(me.tokens)
} else {
Err(me.errors)
}
}
fn scan_token(&mut self) {
use TokenType::*;
self.start = self.current;
let c = self.advance();
let token_type = match c {
'(' => Some(LeftParen),
')' => Some(RightParen),
'{' => Some(LeftBrace),
'}' => Some(RightBrace),
',' => Some(Comma),
'.' => Some(Dot),
'+' => Some(Plus),
'-' => Some(Minus),
';' => Some(Semicolon),
'*' => Some(Star),
'!' => {
if self.consume('=') {
Some(BangEqual)
} else {
Some(Bang)
}
}
'=' => {
if self.consume('=') {
Some(EqualEqual)
} else {
Some(Equal)
}
}
'<' => {
if self.consume('=') {
Some(LessEqual)
} else {
Some(Less)
}
}
'>' => {
if self.consume('=') {
Some(GreaterEqual)
} else {
Some(Greater)
}
}
'/' => {
if self.consume('/') {
// line comment
// advance until either source is empty or newline if found
while !self.source_is_empty() && self.advance() != '\n' {}
None
} else if self.consume('*') {
// block comment
let mut depth = 1;
loop {
if depth == 0 {
break;
}
if self.source_is_empty() {
self.errors.push(LexerError::UnterminatedBlockComment {
code_pos: self.code_pos,
});
break;
}
if self.peek() == Some('/') && self.peek_two() == Some('*') {
// nested block comment
// consume '/' and '*'
self.advance();
self.advance();
depth += 1;
continue;
}
if self.peek() == Some('*') && self.peek_two() == Some('/') {
// consume '*' and '/'
self.advance();
self.advance();
depth -= 1;
continue;
}
self.advance();
}
None
} else {
Some(Slash)
}
}
'"' => self.try_parse_string(),
'0'..='9' => self.try_parse_number(),
' ' | '\r' | '\n' | '\t' => None, // handled automatically in advance()
c @ '_' | c if c.is_ascii_alphabetic() => self.try_parse_identifier(),
_ => {
self.errors.push(LexerError::UnexpectedCharacter {
c,
code_pos: self.code_pos,
});
None
}
};
if let Some(token_type) = token_type {
self.push_token(token_type);
}
}
fn source_is_empty(&self) -> bool {
self.current >= self.source.len()
}
fn advance(&mut self) -> char {
assert!(!self.source_is_empty());
let c = self.source[self.current];
self.current += 1;
self.code_pos.col += 1;
if c == '\t' {
self.code_pos.col += 3;
} else if c == '\n' {
self.code_pos.col = 0;
self.code_pos.line += 1;
}
c
}
fn peek(&self) -> Option<char> {
self.source.get(self.current).copied()
}
fn peek_two(&self) -> Option<char> {
self.source.get(self.current + 1).copied()
}
fn consume(&mut self, c: char) -> bool {
if self.peek() == Some(c) {
self.advance();
true
} else {
false
}
}
fn push_token(&mut self, token_type: TokenType) {
// let lexeme: String = self.source[self.start..self.current].iter().collect();
self.tokens.push(Token::new(token_type, self.code_pos));
}
fn try_parse_string(&mut self) -> Option<TokenType> {
// advance until second "
while self.advance() != '"' {
if self.source_is_empty() {
self.errors.push(LexerError::UnterminatedStringLiteral {
code_pos: self.code_pos,
});
return None;
}
}
let string_literal = self.source[self.start + 1..self.current - 1].iter().collect();
Some(TokenType::String(string_literal))
}
fn try_parse_number(&mut self) -> Option<TokenType> {
let is_some_digit = |c: Option<char>| c.map_or(false, |c| c.is_ascii_digit());
// eat all digits
while is_some_digit(self.peek()) {
self.advance();
}
// consume separator dot and continue eating digits
if self.peek() == Some('.') && is_some_digit(self.peek_two()) {
// consume the '.'
self.advance();
while is_some_digit(self.peek()) {
self.advance();
}
}
// consume exponential e and continue eating digits
if self.peek() == Some('e') && is_some_digit(self.peek_two()) {
// consume the 'e'
self.advance();
while is_some_digit(self.peek()) {
self.advance();
}
}
let lexeme: String = self.source[self.start..self.current].iter().collect();
let num: f64 = match lexeme.parse() {
Ok(num) => num,
Err(err) => {
self.errors.push(LexerError::InvalidNumberLiteral {
lexeme,
msg: err.to_string(),
code_pos: self.code_pos,
});
return None;
}
};
Some(TokenType::Number(num))
}
fn try_parse_identifier(&mut self) -> Option<TokenType> {
let is_alpha_num_underscore =
|c: Option<char>| c.map_or(false, |c| matches!(c, '0'..='9' | 'A'..='Z' | '_' | 'a'..='z'));
while is_alpha_num_underscore(self.peek()) {
self.advance();
}
let lexeme: String = self.source[self.start..self.current].iter().collect();
let token_type = KEYWORDS.get(&lexeme).cloned().unwrap_or(TokenType::Identifier(lexeme));
Some(token_type)
}
}

View file

@ -0,0 +1,23 @@
#[derive(Copy, Clone)]
pub struct CodePos {
pub line: u32,
pub col: u32,
}
impl Default for CodePos {
fn default() -> Self {
Self { line: 1, col: 0 }
}
}
impl std::fmt::Display for CodePos {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "line {}, col {}", self.line, self.col)
}
}
impl std::fmt::Debug for CodePos {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.line, self.col)
}
}

View file

@ -0,0 +1,19 @@
use thiserror::Error;
use super::CodePos;
#[derive(Error, Debug)]
pub enum LexerError {
#[error("Unexpected character '{c}' at {code_pos}.")]
UnexpectedCharacter { c: char, code_pos: CodePos },
#[error("Unterminated string literal starting at {code_pos}.")]
UnterminatedStringLiteral { code_pos: CodePos },
#[error("Unterminated block comment starting at {code_pos}.")]
UnterminatedBlockComment { code_pos: CodePos },
#[error("Invalid number literal {lexeme} at {code_pos}: {msg}")]
InvalidNumberLiteral {
lexeme: String,
msg: String,
code_pos: CodePos,
},
}

View file

@ -0,0 +1,9 @@
mod _lexer;
mod code_pos;
mod error;
mod token;
pub use _lexer::scan_tokens;
pub use code_pos::CodePos;
pub use error::LexerError;
pub use token::{Token, TokenType};

View file

@ -0,0 +1,57 @@
use super::CodePos;
#[allow(dead_code, clippy::upper_case_acronyms)]
#[derive(Debug, Clone, PartialEq)]
#[rustfmt::skip]
pub enum TokenType {
// Single-character tokens
LeftParen, RightParen, LeftBrace, RightBrace,
Comma, Dot, Minus, Plus, Semicolon, Slash, Star,
// One or two character tokens
Bang, BangEqual,
Equal, EqualEqual,
Greater, GreaterEqual,
Less, LessEqual,
// Literals
Identifier(String),
String(String),
Number(f64),
// Keywords
And, Break, Class, Else, False, Fun, For, If, Nil, Or,
Print, Return, Super, This, True, Var, While,
EOF
}
#[derive(Clone)]
pub struct Token {
pub token_type: TokenType,
// pub lexeme: String,
pub code_pos: CodePos,
}
impl Token {
pub fn new(token_type: TokenType, pos: CodePos) -> Self {
Token {
token_type,
// lexeme,
code_pos: pos,
}
}
}
impl std::fmt::Debug for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "<{:?}>", self.token_type)
// write!(f, "<{:?}> (\"{}\")", self.token_type, self.lexeme)
}
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "<{:?}>", self.token_type)
}
}

2
frontend/src/lib.rs Normal file
View file

@ -0,0 +1,2 @@
pub mod lexer;
pub mod parser;

View file

@ -0,0 +1,45 @@
use thiserror::Error;
use crate::lexer::{CodePos, Token};
use super::Expr;
#[derive(Error, Debug)]
pub enum ParserError {
#[error("Token stream ended unexpectedly.")]
TokenStreamEnded,
#[error("Expected a primary expression, but found a {token} token instead at {0}.", token.code_pos)]
ExpectedPrimary { token: Token },
#[error("Missing semicolon at {code_pos}")]
MissingSemicolon { code_pos: CodePos },
#[error("Expected variable name at {0}, got {token} instead", token.code_pos)]
ExpectedVarName { token: Token },
#[error("Can't assign to {expr} at {code_pos}")]
InvalidAssignment { expr: Expr, code_pos: CodePos },
#[error("Missing closing curly brace at {code_pos}")]
MissingRightBrace { code_pos: CodePos },
#[error("Missing closing parenthesis at {code_pos}")]
MissingRightParen { code_pos: CodePos },
#[error("Missing parenthesis after if at {code_pos}")]
MissingParenAfterIf { code_pos: CodePos },
#[error("Missing parenthesis after while at {code_pos}")]
MissingParenAfterWhile { code_pos: CodePos },
#[error("Missing parenthesis after for at {code_pos}")]
MissingParenAfterFor { code_pos: CodePos },
#[error("Call at {code_pos} has too many arguments")]
TooManyArguments { code_pos: CodePos },
#[error("{msg} at {code_pos}")]
MissingIdentifier { msg: String, code_pos: CodePos },
#[error("Missing arguments to function declaration at {code_pos}")]
MissingFunctionArgs { code_pos: CodePos },
#[error("Missing body to function declaration at {code_pos}")]
MissingFunctionBody { code_pos: CodePos },
#[error("Function declaration at {code_pos} has too many parameters")]
TooManyParams { code_pos: CodePos },
#[error("Return statement outside of function definition")]
InvalidReturn { code_pos: CodePos },
#[error("Break statement outside of loop")]
InvalidBreak { code_pos: CodePos },
#[error("Missing class body at {code_pos}")]
MissingClassBody { code_pos: CodePos },
}

241
frontend/src/parser/expr.rs Normal file
View file

@ -0,0 +1,241 @@
use std::fmt::Display;
use std::rc::Rc;
use itertools::Itertools;
use super::Stmt;
#[derive(Debug, Clone)]
pub enum Expr {
Literal {
literal: Literal,
},
Unary {
op: UnaryOp,
expr: Box<Expr>,
},
Binary {
left: Box<Expr>,
op: BinaryOp,
right: Box<Expr>,
},
Logical {
left: Box<Expr>,
op: LogicalOp,
right: Box<Expr>,
},
Grouping {
expr: Box<Expr>,
},
Variable {
name: String,
},
LocalVariable {
name: String,
level: usize,
},
GlobalVariable {
name: String,
},
Assignment {
target: Box<Expr>,
value: Box<Expr>,
},
Call {
callee: Box<Expr>,
args: Vec<Expr>,
},
Function {
name: String,
param_names: Vec<String>,
closure_vars: Vec<(String, usize)>,
body: Box<Stmt>,
},
}
impl Expr {
pub fn string(s: impl Into<String>) -> Self {
let s = s.into();
Expr::Literal {
literal: Literal::String(Rc::new(s)),
}
}
pub fn number(num: f64) -> Self {
Expr::Literal {
literal: Literal::Number(num),
}
}
pub fn bool(b: bool) -> Self {
Expr::Literal {
literal: Literal::Bool(b),
}
}
pub fn nil() -> Self {
Expr::Literal { literal: Literal::Nil }
}
pub fn unary(op: UnaryOp, expr: Expr) -> Self {
let expr = Box::new(expr);
Expr::Unary { op, expr }
}
pub fn binary(left: Expr, op: BinaryOp, right: Expr) -> Self {
let left = Box::new(left);
let right = Box::new(right);
Expr::Binary { left, op, right }
}
pub fn logical(left: Expr, op: LogicalOp, right: Expr) -> Self {
let left = Box::new(left);
let right = Box::new(right);
Expr::Logical { left, op, right }
}
pub fn grouping(expr: Expr) -> Self {
let expr = Box::new(expr);
Expr::Grouping { expr }
}
pub fn assignment(target: Expr, value: Expr) -> Self {
let target = Box::new(target);
let value = Box::new(value);
Expr::Assignment { target, value }
}
pub fn call(callee: Expr, args: Vec<Expr>) -> Self {
let callee = Box::new(callee);
Expr::Call { callee, args }
}
pub fn function(name: String, param_names: Vec<String>, body: Stmt) -> Self {
let body = Box::new(body);
Self::Function {
name,
param_names,
closure_vars: Vec::new(),
body,
}
}
}
impl Display for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expr::Literal { literal } => write!(f, "{literal}"),
Expr::Unary { op, expr } => write!(f, "({op} {expr})"),
Expr::Binary { left, op, right } => write!(f, "({op} {left} {right})"),
Expr::Logical { left, op, right } => {
write!(f, "({op} {left} {right})")
}
Expr::Grouping { expr } => write!(f, "(group {expr})"),
Expr::Variable { name } => write!(f, "(var {name})"),
Expr::LocalVariable { name, level } => write!(f, "(var {name} local({level}))"),
Expr::GlobalVariable { name } => write!(f, "(var {name} global)"),
Expr::Assignment { target, value } => write!(f, "{target} = {value}"),
Expr::Call { callee, args } => write!(f, "({callee} {})", args.iter().map(|arg| arg.to_string()).join(" ")),
Expr::Function {
name,
param_names,
closure_vars,
body,
} => {
if !closure_vars.is_empty() {
let closure_fmt = closure_vars.iter().map(|(name, _level)| name).join(", ");
write!(f, "fun [{closure_fmt}] {name}({}) {body}", param_names.join(", "))
} else {
write!(f, "fun {name}({}) {body}", param_names.join(", "))
}
}
}
}
}
/*====================================================================================================================*/
#[derive(Debug, Clone)]
pub enum Literal {
String(Rc<String>),
Number(f64),
Bool(bool),
Nil,
}
impl Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Literal::String(s) => write!(f, "\"{s}\""),
Literal::Number(num) => write!(f, "{num}"),
Literal::Bool(b) => write!(f, "{b}"),
Literal::Nil => write!(f, "nil"),
}
}
}
/*====================================================================================================================*/
#[derive(Debug, Clone, Copy)]
pub enum UnaryOp {
Negate,
Not,
}
impl Display for UnaryOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
UnaryOp::Negate => write!(f, "-"),
UnaryOp::Not => write!(f, "!"),
}
}
}
/*====================================================================================================================*/
#[derive(Debug, Clone, Copy)]
#[rustfmt::skip]
pub enum BinaryOp {
// arithmetic
Add, Subtract, Multiply, Divide,
// equality
Equal, NotEqual,
// comparison
Less, LessEqual,
Greater, GreaterEqual,
}
impl Display for BinaryOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BinaryOp::Add => write!(f, "+"),
BinaryOp::Subtract => write!(f, "-"),
BinaryOp::Multiply => write!(f, "*"),
BinaryOp::Divide => write!(f, "/"),
BinaryOp::Equal => write!(f, "=="),
BinaryOp::NotEqual => write!(f, "!="),
BinaryOp::Less => write!(f, "<"),
BinaryOp::LessEqual => write!(f, "<="),
BinaryOp::Greater => write!(f, ">"),
BinaryOp::GreaterEqual => write!(f, ">="),
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum LogicalOp {
Or,
And,
}
impl Display for LogicalOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LogicalOp::Or => write!(f, "or"),
LogicalOp::And => write!(f, "and"),
}
}
}

View file

@ -0,0 +1,5 @@
use itertools::Itertools;
pub fn indent(s: String) -> String {
s.split('\n').map(|line| format!("\t{line}")).join("\n")
}

View file

@ -0,0 +1,10 @@
mod error;
mod expr;
mod misc;
mod parse;
mod stmt;
pub use error::ParserError;
pub use expr::{BinaryOp, Expr, Literal, LogicalOp, UnaryOp};
pub use parse::parse_tokens;
pub use stmt::Stmt;

View file

@ -0,0 +1,777 @@
use std::vec::IntoIter;
use crate::lexer::{Token, TokenType};
use crate::parser::expr::BinaryOp;
use super::expr::{Expr, UnaryOp};
use super::{LogicalOp, ParserError, Stmt};
/*====================================================================================================================*/
type ParserResult<T> = Result<T, ParserError>;
pub fn parse_tokens(tokens: Vec<Token>) -> Result<Vec<Stmt>, Vec<ParserError>> {
Parser::new(tokens).parse()
}
/*====================================================================================================================*/
// takes care of token iteration
struct TokenIter {
token_iter: IntoIter<Token>,
peek_token: Option<Token>,
}
impl TokenIter {
pub fn new(tokens: Vec<Token>) -> Self {
TokenIter {
token_iter: tokens.into_iter(),
peek_token: None,
}
}
fn peek(&mut self) -> Option<&Token> {
// if peek_token is empty: fill with next token from token_iter
if self.peek_token.is_none() && self.token_iter.len() != 0 {
self.peek_token = self.token_iter.next();
}
self.peek_token.as_ref()
}
fn is_empty(&self) -> bool {
// peek_token is None and there are no more tokens to take from token_iter
self.peek_token.is_none() && self.token_iter.len() == 0
}
}
impl Iterator for TokenIter {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
// return the peeked token if any, or else next token from token_iter
self.peek_token.take().or_else(|| self.token_iter.next())
}
}
/*====================================================================================================================*/
struct Parser {
token_iter: TokenIter,
parse_errors: Vec<ParserError>,
is_in_function: bool,
is_in_loop: bool,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Parser {
token_iter: TokenIter::new(tokens),
parse_errors: Vec::new(),
is_in_function: false,
is_in_loop: false,
}
}
pub fn parse(self) -> Result<Vec<Stmt>, Vec<ParserError>> {
let mut me = self;
let mut statements = Vec::new();
// let mut parse_errors = Vec::new();
while !me.token_iter.is_empty() && me.peek_token().token_type != TokenType::EOF {
// statements.push(me.statement()?);
match me.statement() {
Ok(stmt) => {
statements.push(stmt);
}
Err(err) => {
me.parse_errors.push(err);
// println!("Synchronising...");
me.synchronise();
// println!("Synchronised")
}
}
}
// me.consume_token(TokenType::EOF).unwrap();
if !me.parse_errors.is_empty() {
Err(me.parse_errors)
} else {
Ok(statements)
}
}
fn synchronise(&mut self) {
loop {
if self.token_iter.is_empty() {
return;
}
let peek_token = self.peek_token();
// if we match a synchronisation point: return
match peek_token.token_type {
TokenType::Class
| TokenType::Fun
| TokenType::Var
| TokenType::For
| TokenType::If
| TokenType::While
| TokenType::Print
| TokenType::Return
| TokenType::EOF => return,
TokenType::Semicolon => {
// discard semicolon first, then return
let _ = self.next_token();
return;
}
_ => {}
}
// no sync point: discard token
let _ = self.next_token();
// println!("Discarding {} token", self.next_token());
}
}
fn statement(&mut self) -> ParserResult<Stmt> {
match self.peek_token().token_type {
TokenType::Print => self.print_statement(),
TokenType::If => self.if_statement(),
TokenType::While => self.while_statement(),
TokenType::For => self.for_statement(),
TokenType::Var => self.var_declaration(),
TokenType::Class => self.class_declaration(),
TokenType::Fun => self.fun_declaration(),
TokenType::LeftBrace => self.block(),
TokenType::Break => {
let code_pos = self.peek_token().code_pos;
assert_eq!(self.next_token().token_type, TokenType::Break);
self.semicolon()?;
if !self.is_in_loop {
return Err(ParserError::InvalidBreak { code_pos });
}
Ok(Stmt::Break)
}
TokenType::Return => {
let code_pos = self.peek_token().code_pos;
assert_eq!(self.next_token().token_type, TokenType::Return);
let expr = match self.peek_token().token_type {
TokenType::Semicolon => Expr::nil(),
_ => self.expression()?,
};
self.semicolon()?;
if !self.is_in_function {
return Err(ParserError::InvalidReturn { code_pos });
}
Ok(Stmt::return_stmt(expr))
}
_ => self.expression_statement(),
}
}
fn if_statement(&mut self) -> ParserResult<Stmt> {
assert_eq!(self.next_token().token_type, TokenType::If);
self.consume_token(TokenType::LeftParen, |token| ParserError::MissingParenAfterIf {
code_pos: token.code_pos,
})?;
let condition = self.expression()?;
self.consume_token(TokenType::RightParen, |token| ParserError::MissingRightParen {
code_pos: token.code_pos,
})?;
let then_branch = self.statement()?;
let else_branch = if self.peek_token().token_type == TokenType::Else {
// consume else token
let _ = self.next_token();
Some(self.statement()?)
} else {
None
};
Ok(Stmt::if_stmt(condition, then_branch, else_branch))
}
fn while_statement(&mut self) -> ParserResult<Stmt> {
assert_eq!(self.next_token().token_type, TokenType::While);
self.consume_token(TokenType::LeftParen, |token| ParserError::MissingParenAfterWhile {
code_pos: token.code_pos,
})?;
let condition = self.expression()?;
self.consume_token(TokenType::RightParen, |token| ParserError::MissingRightParen {
code_pos: token.code_pos,
})?;
let is_in_loop = std::mem::replace(&mut self.is_in_loop, true);
let body = match self.statement() {
Ok(body) => body,
Err(err) => {
self.is_in_loop = is_in_loop;
return Err(err);
}
};
self.is_in_loop = is_in_loop;
Ok(Stmt::while_stmt(condition, body))
}
fn for_statement(&mut self) -> ParserResult<Stmt> {
assert_eq!(self.next_token().token_type, TokenType::For);
self.consume_token(TokenType::LeftParen, |token| ParserError::MissingParenAfterFor {
code_pos: token.code_pos,
})?;
let initializer = match self.peek_token().token_type {
TokenType::Semicolon => {
assert_eq!(self.next_token().token_type, TokenType::Semicolon);
None
}
TokenType::Var => Some(self.var_declaration()?),
_ => Some(self.expression_statement()?),
};
let condition = match self.peek_token().token_type {
TokenType::Semicolon => Expr::bool(true),
_ => self.expression()?,
};
self.semicolon()?;
let increment = match self.peek_token().token_type {
TokenType::RightParen => None,
_ => Some(self.expression()?),
};
self.consume_token(TokenType::RightParen, |token| ParserError::MissingRightParen {
code_pos: token.code_pos,
})?;
let is_in_loop = std::mem::replace(&mut self.is_in_loop, true);
let mut body = match self.statement() {
Ok(body) => body,
Err(err) => {
self.is_in_loop = is_in_loop;
return Err(err);
}
};
self.is_in_loop = is_in_loop;
if let Some(increment) = increment {
body = Stmt::Block {
statements: vec![body, Stmt::expr_stmt(increment)],
}
}
let mut for_stmt = Stmt::while_stmt(condition, body);
if let Some(initializer) = initializer {
for_stmt = Stmt::Block {
statements: vec![initializer, for_stmt],
};
}
Ok(for_stmt)
}
fn print_statement(&mut self) -> ParserResult<Stmt> {
// self.consume_token(TokenType::Print)?;
assert_eq!(self.next_token().token_type, TokenType::Print);
let expr = self.expression()?;
self.semicolon()?;
Ok(Stmt::print_stmt(expr))
}
fn var_declaration(&mut self) -> ParserResult<Stmt> {
// self.consume_token(TokenType::Var)?;
assert_eq!(self.next_token().token_type, TokenType::Var);
let name = match self.next_token() {
Token {
token_type: TokenType::Identifier(name),
..
} => name,
token => return Err(ParserError::ExpectedVarName { token }),
};
let initializer = if self.peek_token().token_type == TokenType::Equal {
// self.consume_token(TokenType::Equal).unwrap();
assert_eq!(self.next_token().token_type, TokenType::Equal);
self.expression()?
} else {
Expr::nil()
};
self.semicolon()?;
Ok(Stmt::var_decl(name, initializer))
}
fn class_declaration(&mut self) -> ParserResult<Stmt> {
assert_eq!(self.next_token().token_type, TokenType::Class);
let name = self.identifier("Missing class name")?;
self.consume_token(TokenType::LeftBrace, |token| ParserError::MissingClassBody {
code_pos: token.code_pos,
})?;
let mut methods = Vec::new();
while self.peek_token().token_type != TokenType::RightBrace {
let method_name = self.identifier("Expected method name").map_err(|err| {
if self.peek_token().token_type == TokenType::EOF {
ParserError::MissingRightBrace {
code_pos: self.peek_token().code_pos,
}
} else {
err
}
})?;
let method = self.fun_params_and_body(method_name)?;
methods.push(method);
}
assert_eq!(self.next_token().token_type, TokenType::RightBrace);
Ok(Stmt::Class { name, methods })
}
fn fun_declaration(&mut self) -> ParserResult<Stmt> {
assert_eq!(self.next_token().token_type, TokenType::Fun);
let name = self.identifier("Missing function name")?;
let fun = self.fun_params_and_body(name.clone())?;
Ok(Stmt::var_decl(name, fun))
}
fn fun_params_and_body(&mut self, name: impl Into<String>) -> ParserResult<Expr> {
// <Fun> token has already been eaten by primary or fun_declaration
// assert_eq!(self.next_token().token_type, TokenType::Fun);
if self.peek_token().token_type != TokenType::LeftParen {
return Err(ParserError::MissingFunctionArgs {
code_pos: self.peek_token().code_pos,
});
}
let params_code_pos = self.peek_token().code_pos;
let param_names = self.collect_params()?;
if param_names.len() > 255 {
self.parse_errors.push(ParserError::TooManyParams {
code_pos: params_code_pos,
});
}
if self.peek_token().token_type != TokenType::LeftBrace {
return Err(ParserError::MissingFunctionBody {
code_pos: self.peek_token().code_pos,
});
}
let is_in_function = std::mem::replace(&mut self.is_in_function, true);
let is_in_loop = std::mem::replace(&mut self.is_in_loop, false);
let body = match self.block() {
Ok(body) => body,
Err(err) => {
self.is_in_function = is_in_function;
self.is_in_loop = is_in_loop;
return Err(err);
}
};
self.is_in_function = is_in_function;
self.is_in_loop = is_in_loop;
let name = name.into();
Ok(Expr::function(name, param_names, body))
}
fn collect_params(&mut self) -> ParserResult<Vec<String>> {
assert_eq!(self.next_token().token_type, TokenType::LeftParen);
if self.peek_token().token_type == TokenType::RightParen {
assert_eq!(self.next_token().token_type, TokenType::RightParen);
return Ok(Vec::new());
}
let mut param_names = Vec::new();
param_names.push(self.identifier("Expected parameter name")?);
while self.peek_token().token_type == TokenType::Comma {
assert_eq!(self.next_token().token_type, TokenType::Comma);
param_names.push(self.identifier("Expected parameter name")?);
}
self.consume_token(TokenType::RightParen, |token| ParserError::MissingRightParen {
code_pos: token.code_pos,
})?;
Ok(param_names)
}
fn block(&mut self) -> ParserResult<Stmt> {
// self.consume_token(TokenType::LeftBrace)?;
assert_eq!(self.next_token().token_type, TokenType::LeftBrace);
let mut statements = Vec::new();
while self.peek_token().token_type != TokenType::RightBrace {
let statement = self.statement().map_err(|err| {
if self.peek_token().token_type == TokenType::EOF {
ParserError::MissingRightBrace {
code_pos: self.peek_token().code_pos,
}
} else {
err
}
})?;
statements.push(statement);
}
// self.consume_token(TokenType::RightBrace)?;
assert_eq!(self.next_token().token_type, TokenType::RightBrace);
Ok(Stmt::Block { statements })
}
fn expression_statement(&mut self) -> ParserResult<Stmt> {
let expr = self.expression()?;
self.semicolon()?;
Ok(Stmt::expr_stmt(expr))
}
fn expression(&mut self) -> ParserResult<Expr> {
self.assignment()
}
fn assignment(&mut self) -> ParserResult<Expr> {
let code_pos = self.peek_token().code_pos;
let expr = self.logical_or()?;
if self.peek_token().token_type != TokenType::Equal {
return Ok(expr);
}
// self.consume_token(TokenType::Equal).unwrap();
assert_eq!(self.next_token().token_type, TokenType::Equal);
let value = self.assignment()?;
match expr {
Expr::Variable { name } => Ok(Expr::assignment(Expr::Variable { name }, value)),
_ => Err(ParserError::InvalidAssignment { expr, code_pos }),
}
}
fn logical_or(&mut self) -> ParserResult<Expr> {
let mut expr = self.logical_and()?;
if self.peek_token().token_type == TokenType::Or {
// consume or
let _ = self.next_token();
let right = self.logical_or()?;
expr = Expr::logical(expr, LogicalOp::Or, right);
}
Ok(expr)
}
fn logical_and(&mut self) -> ParserResult<Expr> {
let mut expr = self.equality()?;
if self.peek_token().token_type == TokenType::And {
// consume and
let _ = self.next_token();
let right = self.logical_and()?;
expr = Expr::logical(expr, LogicalOp::And, right);
}
Ok(expr)
}
fn equality(&mut self) -> ParserResult<Expr> {
let mut expr = self.comparison()?;
loop {
// get comparison operator as BinaryOp; otherwise break out of loop
let operator = match self.peek_token().token_type {
TokenType::EqualEqual => BinaryOp::Equal,
TokenType::BangEqual => BinaryOp::NotEqual,
_ => break,
};
// consume operator token
let _ = self.next_token();
let right = self.comparison()?;
expr = Expr::binary(expr, operator, right);
}
Ok(expr)
}
fn comparison(&mut self) -> ParserResult<Expr> {
let mut expr = self.term()?;
loop {
let operator = match self.peek_token().token_type {
TokenType::Less => BinaryOp::Less,
TokenType::LessEqual => BinaryOp::LessEqual,
TokenType::Greater => BinaryOp::Greater,
TokenType::GreaterEqual => BinaryOp::GreaterEqual,
_ => break,
};
// consume operator token
let _ = self.next_token();
let right = self.term()?;
expr = Expr::binary(expr, operator, right);
}
Ok(expr)
}
fn term(&mut self) -> ParserResult<Expr> {
let mut expr = self.factor()?;
loop {
let operator = match self.peek_token().token_type {
TokenType::Plus => BinaryOp::Add,
TokenType::Minus => BinaryOp::Subtract,
_ => break,
};
// consume operator token
let _ = self.next_token();
let right = self.factor()?;
expr = Expr::binary(expr, operator, right);
}
Ok(expr)
}
fn factor(&mut self) -> ParserResult<Expr> {
let mut expr = self.unary()?;
loop {
let operator = match self.peek_token().token_type {
TokenType::Star => BinaryOp::Multiply,
TokenType::Slash => BinaryOp::Divide,
_ => break,
};
// consume operator token
let _ = self.next_token();
let right = self.unary()?;
expr = Expr::binary(expr, operator, right);
}
Ok(expr)
}
fn unary(&mut self) -> ParserResult<Expr> {
match self.peek_token().token_type {
TokenType::Bang => {
self.next_token();
Ok(Expr::unary(UnaryOp::Not, self.unary()?))
}
TokenType::Minus => {
let _ = self.next_token();
Ok(Expr::unary(UnaryOp::Negate, self.unary()?))
}
_ => self.call(),
}
}
fn call(&mut self) -> ParserResult<Expr> {
let mut expr = self.primary()?;
loop {
match self.peek_token().token_type {
TokenType::LeftParen => {
let args_code_pos = self.peek_token().code_pos;
let args = self.collect_args()?;
if args.len() > 255 {
self.parse_errors.push(ParserError::TooManyArguments {
code_pos: args_code_pos,
});
}
expr = Expr::call(expr, args);
}
TokenType::Dot => todo!(),
_ => break,
}
}
Ok(expr)
}
fn collect_args(&mut self) -> ParserResult<Vec<Expr>> {
assert_eq!(self.next_token().token_type, TokenType::LeftParen);
if self.peek_token().token_type == TokenType::RightParen {
assert_eq!(self.next_token().token_type, TokenType::RightParen);
return Ok(Vec::new());
}
let mut args = Vec::new();
args.push(self.expression()?);
while self.peek_token().token_type == TokenType::Comma {
assert_eq!(self.next_token().token_type, TokenType::Comma);
args.push(self.expression()?);
}
self.consume_token(TokenType::RightParen, |token| ParserError::MissingRightParen {
code_pos: token.code_pos,
})?;
Ok(args)
}
fn primary(&mut self) -> ParserResult<Expr> {
if self.peek_token().token_type == TokenType::EOF {
return Err(ParserError::TokenStreamEnded);
}
let token = self.next_token();
match token.token_type {
TokenType::Fun => Ok(self.fun_params_and_body("<lambda>")?),
TokenType::Number(num) => Ok(Expr::number(num)),
TokenType::String(s) => Ok(Expr::string(s)),
TokenType::False => Ok(Expr::bool(false)),
TokenType::True => Ok(Expr::bool(true)),
TokenType::Nil => Ok(Expr::nil()),
TokenType::LeftParen => {
let expr = self.expression()?;
self.consume_token(TokenType::RightParen, |token| ParserError::MissingRightParen {
code_pos: token.code_pos,
})?;
Ok(Expr::grouping(expr))
}
TokenType::Identifier(name) => Ok(Expr::Variable { name }),
_ => Err(ParserError::ExpectedPrimary { token }),
}
}
fn semicolon(&mut self) -> ParserResult<()> {
self.consume_token(TokenType::Semicolon, |token| ParserError::MissingSemicolon {
code_pos: token.code_pos,
})
}
fn identifier(&mut self, msg: &str) -> ParserResult<String> {
match self.peek_token().token_type {
TokenType::Identifier(_) => {
if let TokenType::Identifier(name) = self.next_token().token_type {
Ok(name)
} else {
unreachable!()
}
}
_ => Err(ParserError::MissingIdentifier {
msg: msg.to_owned(),
code_pos: self.peek_token().code_pos,
}),
}
}
fn next_token(&mut self) -> Token {
/* let token = self.token_iter.next().unwrap();
// println!("Next token: {next:?}");
if token.token_type == TokenType::EOF {
panic!("Someone ate a EOF token");
}
// if token.token_type == TokenType::Print {
// panic!("Found the print");
// }
token */
self.token_iter.next().unwrap() // .ok_or(ParserError::TokenStreamEnded)
}
fn peek_token(&mut self) -> &Token {
self.token_iter.peek().unwrap() // .ok_or(ParserError::TokenStreamEnded)
}
fn consume_token<F>(&mut self, token_type: TokenType, err_fn: F) -> ParserResult<()>
where
F: Fn(Token) -> ParserError,
{
/* let token = self.next_token();
if token.token_type == token_type {
Ok(())
} else {
Err(err_fn(token))
} */
match &self.peek_token().token_type {
tt if tt == &token_type => {
let _ = self.next_token();
Ok(())
}
TokenType::EOF => Err(err_fn(self.peek_token().clone())),
_ => Err(err_fn(self.next_token())),
}
}
}

139
frontend/src/parser/stmt.rs Normal file
View file

@ -0,0 +1,139 @@
use std::fmt::Display;
use super::misc::indent;
use super::Expr;
#[derive(Debug, Clone)]
pub enum Stmt {
Print {
expr: Box<Expr>,
},
IfStmt {
condition: Box<Expr>,
then_branch: Box<Stmt>,
else_branch: Option<Box<Stmt>>,
},
While {
condition: Box<Expr>,
body: Box<Stmt>,
},
VarDecl {
name: String,
initializer: Box<Expr>,
},
Block {
statements: Vec<Stmt>,
},
Class {
name: String,
methods: Vec<Expr>,
},
ExprStmt {
expr: Box<Expr>,
},
Break,
Return {
expr: Box<Expr>,
},
}
impl Stmt {
pub fn print_stmt(expr: Expr) -> Self {
let expr = Box::new(expr);
Stmt::Print { expr }
}
pub fn if_stmt(
condition: impl Into<Box<Expr>>,
then_branch: impl Into<Box<Stmt>>,
else_branch: Option<impl Into<Box<Stmt>>>,
) -> Self {
let condition = condition.into();
let then_branch = then_branch.into();
let else_branch = else_branch.map(|stmt| stmt.into());
Stmt::IfStmt {
condition,
then_branch,
else_branch,
}
}
pub fn while_stmt(condition: impl Into<Box<Expr>>, body: impl Into<Box<Stmt>>) -> Self {
let condition = condition.into();
let body = body.into();
Stmt::While { condition, body }
}
pub fn var_decl(name: impl Into<String>, initializer: impl Into<Box<Expr>>) -> Self {
let name = name.into();
let initializer = initializer.into();
Stmt::VarDecl { name, initializer }
}
pub fn expr_stmt(expr: impl Into<Box<Expr>>) -> Self {
let expr = expr.into();
Stmt::ExprStmt { expr }
}
pub fn return_stmt(expr: impl Into<Box<Expr>>) -> Self {
let expr = expr.into();
Stmt::Return { expr }
}
}
impl Display for Stmt {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Stmt::Print { expr } => write!(f, "print {expr};"),
Stmt::IfStmt {
condition,
then_branch,
else_branch,
} => {
writeln!(f, "if {condition}")?;
match then_branch.as_ref() {
Stmt::Block { .. } => write!(f, "{then_branch}")?,
_ => write!(f, "{}", indent(then_branch.to_string()))?,
}
if let Some(else_branch) = else_branch {
writeln!(f, "\nelse")?;
match else_branch.as_ref() {
Stmt::Block { .. } => write!(f, "{else_branch}")?,
_ => write!(f, "{}", indent(else_branch.to_string()))?,
}
}
Ok(())
}
Stmt::While { condition, body } => {
writeln!(f, "{condition}")?;
match body.as_ref() {
Stmt::Block { .. } => write!(f, "{body}")?,
_ => write!(f, "{}", indent(body.to_string()))?,
}
Ok(())
}
Stmt::VarDecl { name, initializer } => write!(f, "var {name} = {initializer};"),
Stmt::Block { statements } => {
writeln!(f, "{{")?;
for statement in statements {
writeln!(f, "{}", indent(statement.to_string()))?;
}
write!(f, "}}")
}
Stmt::Class { name, methods } => {
writeln!(f, "class {name} {{")?;
for method in methods {
writeln!(f, "{method}")?;
}
write!(f, "}}")
}
Stmt::ExprStmt { expr } => write!(f, "{expr};"),
Stmt::Break => write!(f, "break;"),
Stmt::Return { expr } => {
write!(f, "return {expr};")
}
}
}
}