Chapter 17: Compiling Expressions done

This commit is contained in:
Moritz Gmeiner 2023-01-31 22:54:12 +01:00
commit 1cca1494a4
20 changed files with 702 additions and 129 deletions

View file

@ -85,43 +85,43 @@ impl Lexer {
let c = self.advance();
let token_type = match c {
'(' => Some(LeftParen),
')' => Some(RightParen),
'{' => Some(LeftBrace),
'}' => Some(RightBrace),
',' => Some(Comma),
'.' => Some(Dot),
'+' => Some(Plus),
'-' => Some(Minus),
';' => Some(Semicolon),
'*' => Some(Star),
match c {
'(' => self.push_token(LeftParen),
')' => self.push_token(RightParen),
'{' => self.push_token(LeftBrace),
'}' => self.push_token(RightBrace),
',' => self.push_token(Comma),
'.' => self.push_token(Dot),
'+' => self.push_token(Plus),
'-' => self.push_token(Minus),
';' => self.push_token(Semicolon),
'*' => self.push_token(Star),
'!' => {
if self.consume('=') {
Some(BangEqual)
self.push_token(BangEqual)
} else {
Some(Bang)
self.push_token(Bang)
}
}
'=' => {
if self.consume('=') {
Some(EqualEqual)
self.push_token(EqualEqual)
} else {
Some(Equal)
self.push_token(Equal)
}
}
'<' => {
if self.consume('=') {
Some(LessEqual)
self.push_token(LessEqual)
} else {
Some(Less)
self.push_token(Less)
}
}
'>' => {
if self.consume('=') {
Some(GreaterEqual)
self.push_token(GreaterEqual)
} else {
Some(Greater)
self.push_token(Greater)
}
}
'/' => {
@ -129,8 +129,6 @@ impl Lexer {
// line comment
// advance until either source is empty or newline if found
while !self.source_is_empty() && self.advance() != '\n' {}
None
} else if self.consume('*') {
// block comment
@ -166,28 +164,21 @@ impl Lexer {
self.advance();
}
None
} else {
Some(Slash)
self.push_token(Slash)
}
}
'"' => self.try_parse_string(),
'0'..='9' => self.try_parse_number(),
' ' | '\r' | '\n' | '\t' => None, // handled automatically in advance()
' ' | '\r' | '\n' | '\t' => {} // handled automatically in advance()
c @ '_' | c if c.is_ascii_alphabetic() => self.try_parse_identifier(),
_ => {
self.errors.push(LexerError::UnexpectedCharacter {
c,
code_pos: self.code_pos,
});
None
}
};
if let Some(token_type) = token_type {
self.push_token(token_type);
}
}
fn source_is_empty(&self) -> bool {
@ -235,23 +226,24 @@ impl Lexer {
self.tokens.push(Token::new(token_type, self.code_pos));
}
fn try_parse_string(&mut self) -> Option<TokenType> {
fn try_parse_string(&mut self) {
// advance until second "
while self.advance() != '"' {
if self.source_is_empty() {
self.errors.push(LexerError::UnterminatedStringLiteral {
code_pos: self.code_pos,
});
return None;
return;
}
}
let string_literal = self.source[self.start + 1..self.current - 1].iter().collect();
Some(TokenType::String(string_literal))
// Some(TokenType::String(Box::new(string_literal)))
self.tokens.push(Token::new_string(string_literal, self.code_pos));
}
fn try_parse_number(&mut self) -> Option<TokenType> {
fn try_parse_number(&mut self) {
let is_some_digit = |c: Option<char>| c.map_or(false, |c| c.is_ascii_digit());
// eat all digits
@ -289,14 +281,15 @@ impl Lexer {
msg: err.to_string(),
code_pos: self.code_pos,
});
return None;
return;
}
};
Some(TokenType::Number(num))
// Some(TokenType::Number(num))
self.tokens.push(Token::new_number(num, self.code_pos));
}
fn try_parse_identifier(&mut self) -> Option<TokenType> {
fn try_parse_identifier(&mut self) {
let is_alpha_num_underscore =
|c: Option<char>| c.map_or(false, |c| matches!(c, '0'..='9' | 'A'..='Z' | '_' | 'a'..='z'));
@ -306,8 +299,18 @@ impl Lexer {
let lexeme: String = self.source[self.start..self.current].iter().collect();
let token_type = KEYWORDS.get(&lexeme).cloned().unwrap_or(TokenType::Identifier(lexeme));
/* let token_type = KEYWORDS
.get(&lexeme)
.cloned()
.unwrap_or(TokenType::Identifier(Box::new(lexeme))); */
Some(token_type)
if let Some(&token_type) = KEYWORDS.get(&lexeme) {
// Token::new(token_type, self.code_pos)
self.push_token(token_type);
} else {
self.tokens.push(Token::new_identifier(lexeme, self.code_pos));
}
// Some(token_type)
}
}

View file

@ -1,7 +1,10 @@
use std::fmt::{Debug, Display};
use std::mem::ManuallyDrop;
use super::CodePos;
#[allow(dead_code, clippy::upper_case_acronyms)]
#[derive(Debug, Clone, PartialEq)]
#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[rustfmt::skip]
pub enum TokenType {
// Single-character tokens
@ -14,44 +17,139 @@ pub enum TokenType {
Greater, GreaterEqual,
Less, LessEqual,
// Literals
Identifier(String),
String(String),
Number(f64),
// Identifier and literals
Identifier, String, Number,
// Keywords
And, Break, Class, Else, False, Fun, For, If, Nil, Or,
Print, Return, Super, This, True, Var, While,
#[allow(dead_code, clippy::upper_case_acronyms)]
EOF
}
#[derive(Clone)]
union TokenData {
none: (),
#[allow(clippy::box_collection)]
s: ManuallyDrop<Box<String>>,
num: f64,
}
impl TokenData {
fn none() -> Self {
TokenData { none: () }
}
fn string(s: String) -> Self {
let s = ManuallyDrop::new(Box::new(s));
TokenData { s }
}
fn num(num: f64) -> Self {
TokenData { num }
}
}
pub struct Token {
pub token_type: TokenType,
// pub lexeme: String,
data: TokenData,
pub code_pos: CodePos,
}
impl Token {
pub fn new(token_type: TokenType, pos: CodePos) -> Self {
pub fn new(token_type: TokenType, code_pos: CodePos) -> Self {
Token {
token_type,
// lexeme,
code_pos: pos,
data: TokenData::none(),
code_pos,
}
}
pub fn new_string(s: String, code_pos: CodePos) -> Self {
Token {
token_type: TokenType::String,
data: TokenData::string(s),
code_pos,
}
}
pub fn new_identifier(name: String, code_pos: CodePos) -> Self {
Token {
token_type: TokenType::Identifier,
data: TokenData::string(name),
code_pos,
}
}
pub fn new_number(num: f64, code_pos: CodePos) -> Self {
Token {
token_type: TokenType::Number,
data: TokenData::num(num),
code_pos,
}
}
pub fn string_data(self) -> String {
assert!(self.token_type == TokenType::String || self.token_type == TokenType::Identifier);
// std::mem::take(&mut self.data.s)
unsafe {
let mut me = self;
let s = std::mem::take(&mut me.data.s);
*ManuallyDrop::into_inner(s)
}
}
pub fn num_data(self) -> f64 {
assert_eq!(self.token_type, TokenType::Number);
unsafe { self.data.num }
}
}
impl Debug for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// write!(f, "<{:?}>", self.token_type)
match self.token_type {
TokenType::Number => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.num) },
TokenType::String => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
TokenType::Identifier => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
_ => write!(f, "<{:?}>", self.token_type),
}
}
}
impl std::fmt::Debug for Token {
impl Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "<{:?}>", self.token_type)
// write!(f, "<{:?}> (\"{}\")", self.token_type, self.lexeme)
// write!(f, "<{:?}>", self.token_type)
match self.token_type {
TokenType::Number => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.num) },
TokenType::String => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
TokenType::Identifier => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
_ => write!(f, "<{:?}>", self.token_type),
}
}
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "<{:?}>", self.token_type)
/* impl Clone for Token {
fn clone(&self) -> Self {
let code_pos = self.code_pos;
match self.token_type {
TokenType::Number => Token::new_number(self.num_data(), code_pos),
TokenType::String => unsafe { Token::new_string(self.data.s.as_ref().clone(), code_pos) },
TokenType::Identifier => unsafe { Token::new_identifier(self.data.s.as_ref().clone(), code_pos) },
token_type => Token::new(token_type, code_pos),
}
}
} */
impl Drop for Token {
fn drop(&mut self) {
if self.token_type == TokenType::String {}
}
}

View file

@ -101,7 +101,7 @@ impl Parser {
}
}
// me.consume_token(TokenType::EOF).unwrap();
assert_eq!(me.next_token().token_type, TokenType::EOF);
if !me.parse_errors.is_empty() {
Err(me.parse_errors)
@ -324,12 +324,10 @@ impl Parser {
// self.consume_token(TokenType::Var)?;
assert_eq!(self.next_token().token_type, TokenType::Var);
let name = match self.next_token() {
Token {
token_type: TokenType::Identifier(name),
..
} => name,
token => return Err(ParserError::ExpectedVarName { token }),
let token = self.next_token();
let name = match token.token_type {
TokenType::Identifier => token.string_data(),
_ => return Err(ParserError::ExpectedVarName { token }),
};
let initializer = if self.peek_token().token_type == TokenType::Equal {
@ -738,8 +736,8 @@ impl Parser {
match token.token_type {
TokenType::Fun => Ok(self.fun_params_and_body("<lambda>")?),
TokenType::Number(num) => Ok(Expr::number(num)),
TokenType::String(s) => Ok(Expr::string(s)),
TokenType::Number => Ok(Expr::number(token.num_data())),
TokenType::String => Ok(Expr::string(token.string_data())),
TokenType::False => Ok(Expr::bool(false)),
TokenType::True => Ok(Expr::bool(true)),
TokenType::Nil => Ok(Expr::nil()),
@ -768,7 +766,9 @@ impl Parser {
Ok(Expr::grouping(expr))
}
TokenType::Identifier(name) => Ok(Expr::Variable { name }),
TokenType::Identifier => Ok(Expr::Variable {
name: token.string_data(),
}),
_ => Err(ParserError::ExpectedPrimary { token }),
}
}
@ -781,13 +781,7 @@ impl Parser {
fn identifier(&mut self, msg: &str) -> ParserResult<String> {
match self.peek_token().token_type {
TokenType::Identifier(_) => {
if let TokenType::Identifier(name) = self.next_token().token_type {
Ok(name)
} else {
unreachable!()
}
}
TokenType::Identifier => Ok(self.next_token().string_data()),
_ => Err(ParserError::MissingIdentifier {
msg: msg.to_owned(),
code_pos: self.peek_token().code_pos,
@ -834,7 +828,8 @@ impl Parser {
let _ = self.next_token();
Ok(())
}
TokenType::EOF => Err(err_fn(self.peek_token().clone())),
// call err_fn with dummy token so we don't have to eat the EOF token
TokenType::EOF => Err(err_fn(Token::new(TokenType::EOF, self.peek_token().code_pos))),
_ => Err(err_fn(self.next_token())),
}
}

View file

@ -1,10 +1,10 @@
mod _parser;
mod error;
mod expr;
mod misc;
mod parse;
mod stmt;
pub use _parser::parse_tokens;
pub use error::ParserError;
pub use expr::{BinaryOp, Expr, Literal, LogicalOp, UnaryOp};
pub use parse::parse_tokens;
pub use stmt::Stmt;