rlox/frontend/src/parser/_parser.rs
Moritz Gmeiner 3066ba9032 split off declarations from statements
added error for duplicate parameter names
2024-09-01 20:46:59 +02:00

885 lines
26 KiB
Rust

use crate::lexer::{Token, TokenType};
use crate::parser::expr::BinaryOp;
use super::expr::{Expr, UnaryOp};
use super::{LogicalOp, ParserError, Stmt};
/*====================================================================================================================*/
type ParserResult<T> = Result<T, ParserError>;
pub fn parse_tokens(tokens: Vec<Token>) -> Result<Vec<Stmt>, Vec<ParserError>> {
Parser::new(tokens).parse()
}
/*====================================================================================================================*/
// takes care of token iteration
struct TokenIter {
tokens: Vec<Token>,
pos: usize,
}
impl TokenIter {
pub fn new(tokens: Vec<Token>) -> Self {
TokenIter { tokens, pos: 0 }
}
fn skip_comments(&mut self) {
while !self.is_empty() && matches![self.tokens[self.pos].token_type, TokenType::Comment(_)]
{
self.pos += 1; // skip comment token
}
}
fn peek(&mut self) -> Option<&Token> {
if self.is_empty() {
return None;
}
self.skip_comments();
let token = &self.tokens[self.pos];
assert!(!matches!(token.token_type, TokenType::Comment(_)));
Some(token)
}
fn is_empty(&self) -> bool {
self.pos == self.tokens.len()
}
}
impl Iterator for TokenIter {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.skip_comments();
self.pos += 1;
assert!(!matches!(
self.tokens[self.pos - 1].token_type,
TokenType::Comment(_)
));
Some(self.tokens[self.pos - 1].take())
}
}
/*====================================================================================================================*/
struct Parser {
token_iter: TokenIter,
parse_errors: Vec<ParserError>,
is_in_loop: bool,
is_in_class: bool,
is_in_function: bool,
is_in_init: bool,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Parser {
token_iter: TokenIter::new(tokens),
parse_errors: Vec::new(),
is_in_loop: false,
is_in_class: false,
is_in_function: false,
is_in_init: false,
}
}
pub fn parse(self) -> Result<Vec<Stmt>, Vec<ParserError>> {
let mut me = self;
let mut statements = Vec::new();
// let mut parse_errors = Vec::new();
while !me.token_iter.is_empty() && me.peek_token().token_type != TokenType::EOF {
// statements.push(me.statement()?);
match me.declaration() {
Ok(stmt) => {
statements.push(stmt);
}
Err(err) => {
me.parse_errors.push(err);
// println!("Synchronising...");
me.synchronise();
// println!("Synchronised")
}
}
}
assert_eq!(
me.next_token().token_type,
TokenType::EOF,
"last token wasn't EOF"
);
if !me.parse_errors.is_empty() {
Err(me.parse_errors)
} else {
Ok(statements)
}
}
fn synchronise(&mut self) {
loop {
if self.token_iter.is_empty() {
return;
}
// when synchronising: assume all false
self.is_in_loop = false;
self.is_in_class = false;
self.is_in_function = false;
self.is_in_init = false;
let tt = &self.peek_token().token_type;
// if we match a synchronisation point: return
match tt {
TokenType::Class
| TokenType::Fun
| TokenType::Var
| TokenType::For
| TokenType::If
| TokenType::While
| TokenType::Print
| TokenType::Return
| TokenType::EOF => return,
TokenType::Semicolon => {
// discard semicolon first, then return
let _ = self.next_token();
return;
}
_ => {}
}
// no sync point: discard token
let _ = self.next_token();
// println!("Discarding {} token", self.next_token());
}
}
fn declaration(&mut self) -> ParserResult<Stmt> {
match self.peek_token().token_type {
TokenType::Var => self.var_declaration(),
TokenType::Class => self.class_declaration(),
TokenType::Fun => self.fun_declaration(),
_ => self.statement(),
}
}
fn statement(&mut self) -> ParserResult<Stmt> {
match self.peek_token().token_type {
TokenType::Print => self.print_statement(),
TokenType::If => self.if_statement(),
TokenType::While => self.while_statement(),
TokenType::For => self.for_statement(),
TokenType::LeftBrace => self.block(),
TokenType::Break => {
let code_pos = self.peek_token().code_pos;
assert_eq!(self.next_token().token_type, TokenType::Break);
self.semicolon()?;
if !self.is_in_loop {
return Err(ParserError::InvalidBreak { code_pos });
}
Ok(Stmt::Break)
}
TokenType::Return => self.return_statement(),
_ => self.expression_statement(),
}
}
fn return_statement(&mut self) -> ParserResult<Stmt> {
let code_pos = self.peek_token().code_pos;
assert_eq!(self.next_token().token_type, TokenType::Return);
let expr = match (self.is_in_init, &self.peek_token().token_type) {
(false, TokenType::Semicolon) => Expr::nil(),
(true, TokenType::Semicolon) => Expr::variable("this"),
(false, _) => self.expression()?,
(true, _) => {
return Err(ParserError::ReturnInInit { code_pos });
}
};
self.semicolon()?;
if !self.is_in_function {
return Err(ParserError::ReturnOutsideFunction { code_pos });
}
Ok(Stmt::return_stmt(expr))
}
fn if_statement(&mut self) -> ParserResult<Stmt> {
assert_eq!(self.next_token().token_type, TokenType::If);
self.consume_token(TokenType::LeftParen, |token| {
ParserError::MissingParenAfterIf {
code_pos: token.code_pos,
}
})?;
let condition = self.expression()?;
self.consume_token(TokenType::RightParen, |token| {
ParserError::MissingRightParen {
code_pos: token.code_pos,
}
})?;
let then_branch = self.statement()?;
let else_branch = if self.peek_token().token_type == TokenType::Else {
// consume else token
let _ = self.next_token();
Some(self.statement()?)
} else {
None
};
Ok(Stmt::if_stmt(condition, then_branch, else_branch))
}
fn while_statement(&mut self) -> ParserResult<Stmt> {
assert_eq!(self.next_token().token_type, TokenType::While);
self.consume_token(TokenType::LeftParen, |token| {
ParserError::MissingParenAfterWhile {
code_pos: token.code_pos,
}
})?;
let condition = self.expression()?;
self.consume_token(TokenType::RightParen, |token| {
ParserError::MissingRightParen {
code_pos: token.code_pos,
}
})?;
let is_in_loop = std::mem::replace(&mut self.is_in_loop, true);
let body = self.statement()?;
self.is_in_loop = is_in_loop;
Ok(Stmt::while_stmt(condition, body))
}
fn for_statement(&mut self) -> ParserResult<Stmt> {
assert_eq!(self.next_token().token_type, TokenType::For);
self.consume_token(TokenType::LeftParen, |token| {
ParserError::MissingParenAfterFor {
code_pos: token.code_pos,
}
})?;
let initializer = match self.peek_token().token_type {
TokenType::Semicolon => {
assert_eq!(self.next_token().token_type, TokenType::Semicolon);
None
}
TokenType::Var => Some(self.var_declaration()?),
_ => Some(self.expression_statement()?),
};
let condition = match self.peek_token().token_type {
TokenType::Semicolon => Expr::bool(true),
_ => self.expression()?,
};
self.semicolon()?;
let increment = match self.peek_token().token_type {
TokenType::RightParen => None,
_ => Some(self.expression()?),
};
self.consume_token(TokenType::RightParen, |token| {
ParserError::MissingRightParen {
code_pos: token.code_pos,
}
})?;
let is_in_loop = std::mem::replace(&mut self.is_in_loop, true);
let mut body = self.statement()?;
self.is_in_loop = is_in_loop;
if let Some(increment) = increment {
body = Stmt::Block {
statements: vec![body, Stmt::expr_stmt(increment)],
}
}
let mut for_stmt = Stmt::while_stmt(condition, body);
if let Some(initializer) = initializer {
for_stmt = Stmt::Block {
statements: vec![initializer, for_stmt],
};
}
Ok(for_stmt)
}
fn print_statement(&mut self) -> ParserResult<Stmt> {
// self.consume_token(TokenType::Print)?;
assert_eq!(self.next_token().token_type, TokenType::Print);
let expr = self.expression()?;
self.semicolon()?;
Ok(Stmt::print_stmt(expr))
}
fn var_declaration(&mut self) -> ParserResult<Stmt> {
// self.consume_token(TokenType::Var)?;
assert_eq!(self.next_token().token_type, TokenType::Var);
let token = self.next_token();
let name = match token.token_type {
TokenType::Identifier(s) => s,
_ => return Err(ParserError::ExpectedVarName { token }),
};
let initializer = if self.peek_token().token_type == TokenType::Equal {
// self.consume_token(TokenType::Equal).unwrap();
assert_eq!(self.next_token().token_type, TokenType::Equal);
self.expression()?
} else {
Expr::nil()
};
self.semicolon()?;
Ok(Stmt::var_decl(name, initializer))
}
fn class_declaration(&mut self) -> ParserResult<Stmt> {
assert_eq!(self.next_token().token_type, TokenType::Class);
let name = self.identifier("Missing class name")?;
let superclass = if self.peek_token().token_type == TokenType::Less {
assert_eq!(self.next_token().token_type, TokenType::Less);
let superclass_name = self.identifier("Expected superclass")?;
Some(Expr::Variable {
name: superclass_name,
})
} else {
None
};
self.consume_token(TokenType::LeftBrace, |token| {
ParserError::MissingClassBody {
code_pos: token.code_pos,
}
})?;
let is_in_loop = std::mem::replace(&mut self.is_in_loop, false);
let is_in_class = std::mem::replace(&mut self.is_in_class, true);
let is_in_function = std::mem::replace(&mut self.is_in_function, false);
let mut methods = Vec::new();
while self.peek_token().token_type != TokenType::RightBrace {
let method_name = self.identifier("Expected method name").map_err(|err| {
if self.peek_token().token_type == TokenType::EOF {
ParserError::MissingRightBrace {
code_pos: self.peek_token().code_pos,
}
} else {
err
}
})?;
let is_in_init = self.is_in_init;
if &*method_name == "init" {
self.is_in_init = true;
}
let method = self.fun_params_and_body(method_name)?;
self.is_in_init = is_in_init;
methods.push(method);
}
assert_eq!(self.next_token().token_type, TokenType::RightBrace);
self.is_in_loop = is_in_loop;
self.is_in_class = is_in_class;
self.is_in_function = is_in_function;
let class = Expr::class(name.clone(), methods, superclass);
Ok(Stmt::var_decl(name, class))
}
fn fun_declaration(&mut self) -> ParserResult<Stmt> {
assert_eq!(self.next_token().token_type, TokenType::Fun);
let name = self.identifier("Missing function name")?;
let fun = self.fun_params_and_body(name.clone())?;
Ok(Stmt::var_decl(name, fun))
}
fn fun_params_and_body(&mut self, name: impl Into<String>) -> ParserResult<Expr> {
// <Fun> token has already been eaten by primary or fun_declaration
// assert_eq!(self.next_token().token_type, TokenType::Fun);
if self.peek_token().token_type != TokenType::LeftParen {
return Err(ParserError::MissingFunctionArgs {
code_pos: self.peek_token().code_pos,
});
}
let params_code_pos = self.peek_token().code_pos;
let param_names = self.collect_params()?;
if param_names.len() > 255 {
self.parse_errors.push(ParserError::TooManyParams {
code_pos: params_code_pos,
});
}
if self.peek_token().token_type != TokenType::LeftBrace {
return Err(ParserError::MissingFunctionBody {
code_pos: self.peek_token().code_pos,
});
}
let is_in_function = std::mem::replace(&mut self.is_in_function, true);
let is_in_loop = std::mem::replace(&mut self.is_in_loop, false);
let body = self.block()?;
self.is_in_function = is_in_function;
self.is_in_loop = is_in_loop;
let name = name.into();
Ok(Expr::function(name, param_names, body))
}
fn collect_params(&mut self) -> ParserResult<Vec<Box<str>>> {
assert_eq!(self.next_token().token_type, TokenType::LeftParen);
if self.peek_token().token_type == TokenType::RightParen {
assert_eq!(self.next_token().token_type, TokenType::RightParen);
return Ok(Vec::new());
}
let mut param_names = Vec::new();
param_names.push(self.identifier("Expected parameter name")?);
while self.peek_token().token_type == TokenType::Comma {
assert_eq!(self.next_token().token_type, TokenType::Comma);
let code_pos = self.peek_token().code_pos;
let name = self.identifier("Expected parameter name")?;
for param in param_names.iter() {
if &name == param {
return Err(ParserError::DuplicateParameterName { code_pos });
}
}
param_names.push(name);
}
self.consume_token(TokenType::RightParen, |token| {
ParserError::MissingRightParen {
code_pos: token.code_pos,
}
})?;
Ok(param_names)
}
fn block(&mut self) -> ParserResult<Stmt> {
// self.consume_token(TokenType::LeftBrace)?;
assert_eq!(self.next_token().token_type, TokenType::LeftBrace);
let mut statements = Vec::new();
while self.peek_token().token_type != TokenType::RightBrace {
let statement = self.declaration().map_err(|err| {
if self.peek_token().token_type == TokenType::EOF {
ParserError::MissingRightBrace {
code_pos: self.peek_token().code_pos,
}
} else {
err
}
})?;
statements.push(statement);
}
// self.consume_token(TokenType::RightBrace)?;
assert_eq!(self.next_token().token_type, TokenType::RightBrace);
Ok(Stmt::Block { statements })
}
fn expression_statement(&mut self) -> ParserResult<Stmt> {
let expr = self.expression()?;
self.semicolon()?;
Ok(Stmt::expr_stmt(expr))
}
fn expression(&mut self) -> ParserResult<Expr> {
self.assignment()
}
fn assignment(&mut self) -> ParserResult<Expr> {
let code_pos = self.peek_token().code_pos;
let expr = self.logical_or()?;
if self.peek_token().token_type != TokenType::Equal {
return Ok(expr);
}
// self.consume_token(TokenType::Equal).unwrap();
assert_eq!(self.next_token().token_type, TokenType::Equal);
let value = self.assignment()?;
match expr {
Expr::Variable { name } => Ok(Expr::assignment(Expr::Variable { name }, value)),
Expr::Get { target, name } => {
let value = Box::new(value);
Ok(Expr::Set {
target,
name,
value,
})
}
_ => Err(ParserError::InvalidAssignment { expr, code_pos }),
}
}
fn logical_or(&mut self) -> ParserResult<Expr> {
let mut expr = self.logical_and()?;
if self.peek_token().token_type == TokenType::Or {
// consume or
let _ = self.next_token();
let right = self.logical_or()?;
expr = Expr::logical(expr, LogicalOp::Or, right);
}
Ok(expr)
}
fn logical_and(&mut self) -> ParserResult<Expr> {
let mut expr = self.equality()?;
if self.peek_token().token_type == TokenType::And {
// consume and
let _ = self.next_token();
let right = self.logical_and()?;
expr = Expr::logical(expr, LogicalOp::And, right);
}
Ok(expr)
}
fn equality(&mut self) -> ParserResult<Expr> {
let mut expr = self.comparison()?;
loop {
// get comparison operator as BinaryOp; otherwise break out of loop
let operator = match self.peek_token().token_type {
TokenType::EqualEqual => BinaryOp::Equal,
TokenType::BangEqual => BinaryOp::NotEqual,
_ => break,
};
// consume operator token
let _ = self.next_token();
let right = self.comparison()?;
expr = Expr::binary(expr, operator, right);
}
Ok(expr)
}
fn comparison(&mut self) -> ParserResult<Expr> {
let mut expr = self.term()?;
loop {
let operator = match self.peek_token().token_type {
TokenType::Less => BinaryOp::Less,
TokenType::LessEqual => BinaryOp::LessEqual,
TokenType::Greater => BinaryOp::Greater,
TokenType::GreaterEqual => BinaryOp::GreaterEqual,
_ => break,
};
// consume operator token
let _ = self.next_token();
let right = self.term()?;
expr = Expr::binary(expr, operator, right);
}
Ok(expr)
}
fn term(&mut self) -> ParserResult<Expr> {
let mut expr = self.factor()?;
loop {
let operator = match self.peek_token().token_type {
TokenType::Plus => BinaryOp::Add,
TokenType::Minus => BinaryOp::Subtract,
_ => break,
};
// consume operator token
let _ = self.next_token();
let right = self.factor()?;
expr = Expr::binary(expr, operator, right);
}
Ok(expr)
}
fn factor(&mut self) -> ParserResult<Expr> {
let mut expr = self.unary()?;
loop {
let operator = match self.peek_token().token_type {
TokenType::Star => BinaryOp::Multiply,
TokenType::Slash => BinaryOp::Divide,
_ => break,
};
// consume operator token
let _ = self.next_token();
let right = self.unary()?;
expr = Expr::binary(expr, operator, right);
}
Ok(expr)
}
fn unary(&mut self) -> ParserResult<Expr> {
match self.peek_token().token_type {
TokenType::Bang => {
self.next_token();
Ok(Expr::unary(UnaryOp::Not, self.unary()?))
}
TokenType::Minus => {
let _ = self.next_token();
Ok(Expr::unary(UnaryOp::Negate, self.unary()?))
}
_ => self.call_or_get(),
}
}
fn call_or_get(&mut self) -> ParserResult<Expr> {
let mut expr = self.primary()?;
loop {
match self.peek_token().token_type {
TokenType::LeftParen => {
let args_code_pos = self.peek_token().code_pos;
let args = self.collect_args()?;
if args.len() > 255 {
self.parse_errors.push(ParserError::TooManyArguments {
code_pos: args_code_pos,
});
}
expr = Expr::call(expr, args);
}
TokenType::Dot => {
assert_eq!(self.next_token().token_type, TokenType::Dot);
let name = self.identifier("Expected property name after dot")?;
expr = Expr::get(expr, name);
}
_ => break,
}
}
Ok(expr)
}
fn collect_args(&mut self) -> ParserResult<Vec<Expr>> {
assert_eq!(self.next_token().token_type, TokenType::LeftParen);
if self.peek_token().token_type == TokenType::RightParen {
assert_eq!(self.next_token().token_type, TokenType::RightParen);
return Ok(Vec::new());
}
let mut args = Vec::new();
args.push(self.expression()?);
while self.peek_token().token_type == TokenType::Comma {
assert_eq!(self.next_token().token_type, TokenType::Comma);
args.push(self.expression()?);
}
self.consume_token(TokenType::RightParen, |token| {
ParserError::MissingRightParen {
code_pos: token.code_pos,
}
})?;
Ok(args)
}
fn primary(&mut self) -> ParserResult<Expr> {
if self.peek_token().token_type == TokenType::EOF {
return Err(ParserError::TokenStreamEnded);
}
let token = self.next_token();
match token.token_type {
TokenType::Fun => Ok(self.fun_params_and_body("<lambda>")?),
TokenType::Number(x) => Ok(Expr::number(x)),
TokenType::String(s) => Ok(Expr::string(s)),
TokenType::False => Ok(Expr::bool(false)),
TokenType::True => Ok(Expr::bool(true)),
TokenType::Nil => Ok(Expr::nil()),
TokenType::This => Ok(Expr::This),
TokenType::Super => {
self.consume_token(TokenType::Dot, |token| {
ParserError::MissingMethodAfterSuper {
code_pos: token.code_pos,
}
})?;
let method = self.identifier("Expected method name after super")?;
let super_var = Expr::variable("super");
let this_var = Expr::This;
Ok(Expr::super_(super_var, this_var, method))
}
TokenType::LeftParen => {
let expr = self.expression()?;
self.consume_token(TokenType::RightParen, |token| {
ParserError::MissingRightParen {
code_pos: token.code_pos,
}
})?;
Ok(Expr::grouping(expr))
}
TokenType::Identifier(name) => Ok(Expr::Variable { name }),
_ => Err(ParserError::ExpectedPrimary { token }),
}
}
fn semicolon(&mut self) -> ParserResult<()> {
self.consume_token(TokenType::Semicolon, |token| {
ParserError::MissingSemicolon {
code_pos: token.code_pos,
}
})
}
fn identifier(&mut self, msg: &str) -> ParserResult<Box<str>> {
match self.peek_token().token_type {
TokenType::Identifier(_) => match self.next_token().token_type {
TokenType::Identifier(s) => Ok(s),
_ => unreachable!(),
},
_ => Err(ParserError::MissingIdentifier {
msg: msg.to_owned(),
code_pos: self.peek_token().code_pos,
}),
}
}
fn next_token(&mut self) -> Token {
/* let token = self.token_iter.next().unwrap();
// println!("Next token: {next:?}");
if token.token_type == TokenType::EOF {
panic!("Someone ate a EOF token");
}
// if token.token_type == TokenType::Print {
// panic!("Found the print");
// }
token */
self.token_iter.next().unwrap() // .ok_or(ParserError::TokenStreamEnded)
}
fn peek_token(&mut self) -> &Token {
self.token_iter.peek().unwrap() // .ok_or(ParserError::TokenStreamEnded)
}
fn consume_token<F>(&mut self, token_type: TokenType, err_fn: F) -> ParserResult<()>
where
F: Fn(&Token) -> ParserError,
{
/* let token = self.next_token();
if token.token_type == token_type {
Ok(())
} else {
Err(err_fn(token))
} */
match &self.peek_token().token_type {
tt if tt == &token_type => {
let _ = self.next_token();
Ok(())
}
// call err_fn with dummy token so we don't have to eat the EOF token
TokenType::EOF => Err(err_fn(self.peek_token())),
_ => Err(err_fn(&self.next_token())),
}
}
}