Chapter 17: Compiling Expressions done

2025-12-06 04:12:42 +00:00 · 2023-01-31 22:54:12 +01:00 · 2023-01-31 22:54:12 +01:00 · 1cca1494a4
commit 1cca1494a4
parent b86985deaf
20 changed files with 702 additions and 129 deletions
--- a/frontend/src/lexer/_lexer.rs
+++ b/frontend/src/lexer/_lexer.rs
@ -85,43 +85,43 @@ impl Lexer {

        let c = self.advance();

-        let token_type = match c {
-            '(' => Some(LeftParen),
-            ')' => Some(RightParen),
-            '{' => Some(LeftBrace),
-            '}' => Some(RightBrace),
-            ',' => Some(Comma),
-            '.' => Some(Dot),
-            '+' => Some(Plus),
-            '-' => Some(Minus),
-            ';' => Some(Semicolon),
-            '*' => Some(Star),
+        match c {
+            '(' => self.push_token(LeftParen),
+            ')' => self.push_token(RightParen),
+            '{' => self.push_token(LeftBrace),
+            '}' => self.push_token(RightBrace),
+            ',' => self.push_token(Comma),
+            '.' => self.push_token(Dot),
+            '+' => self.push_token(Plus),
+            '-' => self.push_token(Minus),
+            ';' => self.push_token(Semicolon),
+            '*' => self.push_token(Star),
            '!' => {
                if self.consume('=') {
-                    Some(BangEqual)
+                    self.push_token(BangEqual)
                } else {
-                    Some(Bang)
+                    self.push_token(Bang)
                }
            }
            '=' => {
                if self.consume('=') {
-                    Some(EqualEqual)
+                    self.push_token(EqualEqual)
                } else {
-                    Some(Equal)
+                    self.push_token(Equal)
                }
            }
            '<' => {
                if self.consume('=') {
-                    Some(LessEqual)
+                    self.push_token(LessEqual)
                } else {
-                    Some(Less)
+                    self.push_token(Less)
                }
            }
            '>' => {
                if self.consume('=') {
-                    Some(GreaterEqual)
+                    self.push_token(GreaterEqual)
                } else {
-                    Some(Greater)
+                    self.push_token(Greater)
                }
            }
            '/' => {
@ -129,8 +129,6 @@ impl Lexer {
                    // line comment
                    // advance until either source is empty or newline if found
                    while !self.source_is_empty() && self.advance() != '\n' {}
-
-                    None
                } else if self.consume('*') {
                    // block comment

@ -166,28 +164,21 @@ impl Lexer {

                        self.advance();
                    }
-
-                    None
                } else {
-                    Some(Slash)
+                    self.push_token(Slash)
                }
            }
            '"' => self.try_parse_string(),
            '0'..='9' => self.try_parse_number(),
-            ' ' | '\r' | '\n' | '\t' => None, // handled automatically in advance()
+            ' ' | '\r' | '\n' | '\t' => {} // handled automatically in advance()
            c @ '_' | c if c.is_ascii_alphabetic() => self.try_parse_identifier(),
            _ => {
                self.errors.push(LexerError::UnexpectedCharacter {
                    c,
                    code_pos: self.code_pos,
                });
-                None
            }
        };
-
-        if let Some(token_type) = token_type {
-            self.push_token(token_type);
-        }
    }

    fn source_is_empty(&self) -> bool {
@ -235,23 +226,24 @@ impl Lexer {
        self.tokens.push(Token::new(token_type, self.code_pos));
    }

-    fn try_parse_string(&mut self) -> Option<TokenType> {
+    fn try_parse_string(&mut self) {
        // advance until second "
        while self.advance() != '"' {
            if self.source_is_empty() {
                self.errors.push(LexerError::UnterminatedStringLiteral {
                    code_pos: self.code_pos,
                });
-                return None;
+                return;
            }
        }

        let string_literal = self.source[self.start + 1..self.current - 1].iter().collect();

-        Some(TokenType::String(string_literal))
+        // Some(TokenType::String(Box::new(string_literal)))
+        self.tokens.push(Token::new_string(string_literal, self.code_pos));
    }

-    fn try_parse_number(&mut self) -> Option<TokenType> {
+    fn try_parse_number(&mut self) {
        let is_some_digit = |c: Option<char>| c.map_or(false, |c| c.is_ascii_digit());

        // eat all digits
@ -289,14 +281,15 @@ impl Lexer {
                    msg: err.to_string(),
                    code_pos: self.code_pos,
                });
-                return None;
+                return;
            }
        };

-        Some(TokenType::Number(num))
+        // Some(TokenType::Number(num))
+        self.tokens.push(Token::new_number(num, self.code_pos));
    }

-    fn try_parse_identifier(&mut self) -> Option<TokenType> {
+    fn try_parse_identifier(&mut self) {
        let is_alpha_num_underscore =
            |c: Option<char>| c.map_or(false, |c| matches!(c, '0'..='9' | 'A'..='Z' | '_' | 'a'..='z'));

@ -306,8 +299,18 @@ impl Lexer {

        let lexeme: String = self.source[self.start..self.current].iter().collect();

-        let token_type = KEYWORDS.get(&lexeme).cloned().unwrap_or(TokenType::Identifier(lexeme));
+        /* let token_type = KEYWORDS
+        .get(&lexeme)
+        .cloned()
+        .unwrap_or(TokenType::Identifier(Box::new(lexeme))); */

-        Some(token_type)
+        if let Some(&token_type) = KEYWORDS.get(&lexeme) {
+            // Token::new(token_type, self.code_pos)
+            self.push_token(token_type);
+        } else {
+            self.tokens.push(Token::new_identifier(lexeme, self.code_pos));
+        }
+
+        // Some(token_type)
    }
 }
--- a/frontend/src/lexer/token.rs
+++ b/frontend/src/lexer/token.rs
@ -1,7 +1,10 @@
+use std::fmt::{Debug, Display};
+use std::mem::ManuallyDrop;
+
 use super::CodePos;

-#[allow(dead_code, clippy::upper_case_acronyms)]
-#[derive(Debug, Clone, PartialEq)]
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, PartialEq)]
 #[rustfmt::skip]
 pub enum TokenType {
    // Single-character tokens
@ -14,44 +17,139 @@ pub enum TokenType {
    Greater, GreaterEqual,
    Less, LessEqual,

-    // Literals
-    Identifier(String),
-	String(String),
-	Number(f64),
+    // Identifier and literals
+    Identifier, String, Number,

    // Keywords
    And, Break, Class, Else, False, Fun, For, If, Nil, Or,
    Print, Return, Super, This, True, Var, While,

+    #[allow(dead_code, clippy::upper_case_acronyms)]
    EOF
 }

-#[derive(Clone)]
+union TokenData {
+    none: (),
+    #[allow(clippy::box_collection)]
+    s: ManuallyDrop<Box<String>>,
+    num: f64,
+}
+
+impl TokenData {
+    fn none() -> Self {
+        TokenData { none: () }
+    }
+
+    fn string(s: String) -> Self {
+        let s = ManuallyDrop::new(Box::new(s));
+        TokenData { s }
+    }
+
+    fn num(num: f64) -> Self {
+        TokenData { num }
+    }
+}
+
 pub struct Token {
    pub token_type: TokenType,
    // pub lexeme: String,
+    data: TokenData,
    pub code_pos: CodePos,
 }

 impl Token {
-    pub fn new(token_type: TokenType, pos: CodePos) -> Self {
+    pub fn new(token_type: TokenType, code_pos: CodePos) -> Self {
        Token {
            token_type,
            // lexeme,
-            code_pos: pos,
+            data: TokenData::none(),
+            code_pos,
+        }
+    }
+
+    pub fn new_string(s: String, code_pos: CodePos) -> Self {
+        Token {
+            token_type: TokenType::String,
+            data: TokenData::string(s),
+            code_pos,
+        }
+    }
+
+    pub fn new_identifier(name: String, code_pos: CodePos) -> Self {
+        Token {
+            token_type: TokenType::Identifier,
+            data: TokenData::string(name),
+            code_pos,
+        }
+    }
+
+    pub fn new_number(num: f64, code_pos: CodePos) -> Self {
+        Token {
+            token_type: TokenType::Number,
+            data: TokenData::num(num),
+            code_pos,
+        }
+    }
+
+    pub fn string_data(self) -> String {
+        assert!(self.token_type == TokenType::String || self.token_type == TokenType::Identifier);
+
+        // std::mem::take(&mut self.data.s)
+        unsafe {
+            let mut me = self;
+
+            let s = std::mem::take(&mut me.data.s);
+
+            *ManuallyDrop::into_inner(s)
+        }
+    }
+
+    pub fn num_data(self) -> f64 {
+        assert_eq!(self.token_type, TokenType::Number);
+
+        unsafe { self.data.num }
+    }
+}
+
+impl Debug for Token {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // write!(f, "<{:?}>", self.token_type)
+        match self.token_type {
+            TokenType::Number => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.num) },
+            TokenType::String => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
+            TokenType::Identifier => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
+            _ => write!(f, "<{:?}>", self.token_type),
        }
    }
 }

-impl std::fmt::Debug for Token {
+impl Display for Token {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "<{:?}>", self.token_type)
-        // write!(f, "<{:?}> (\"{}\")", self.token_type, self.lexeme)
+        // write!(f, "<{:?}>", self.token_type)
+        match self.token_type {
+            TokenType::Number => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.num) },
+            TokenType::String => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
+            TokenType::Identifier => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
+            _ => write!(f, "<{:?}>", self.token_type),
+        }
    }
 }

-impl std::fmt::Display for Token {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "<{:?}>", self.token_type)
+/* impl Clone for Token {
+    fn clone(&self) -> Self {
+        let code_pos = self.code_pos;
+
+        match self.token_type {
+            TokenType::Number => Token::new_number(self.num_data(), code_pos),
+            TokenType::String => unsafe { Token::new_string(self.data.s.as_ref().clone(), code_pos) },
+            TokenType::Identifier => unsafe { Token::new_identifier(self.data.s.as_ref().clone(), code_pos) },
+            token_type => Token::new(token_type, code_pos),
+        }
+    }
+} */
+
+impl Drop for Token {
+    fn drop(&mut self) {
+        if self.token_type == TokenType::String {}
    }
 }
--- a/frontend/src/parser/_parser.rs
+++ b/frontend/src/parser/_parser.rs
@ -101,7 +101,7 @@ impl Parser {
            }
        }

-        // me.consume_token(TokenType::EOF).unwrap();
+        assert_eq!(me.next_token().token_type, TokenType::EOF);

        if !me.parse_errors.is_empty() {
            Err(me.parse_errors)
@ -324,12 +324,10 @@ impl Parser {
        // self.consume_token(TokenType::Var)?;
        assert_eq!(self.next_token().token_type, TokenType::Var);

-        let name = match self.next_token() {
-            Token {
-                token_type: TokenType::Identifier(name),
-                ..
-            } => name,
-            token => return Err(ParserError::ExpectedVarName { token }),
+        let token = self.next_token();
+        let name = match token.token_type {
+            TokenType::Identifier => token.string_data(),
+            _ => return Err(ParserError::ExpectedVarName { token }),
        };

        let initializer = if self.peek_token().token_type == TokenType::Equal {
@ -738,8 +736,8 @@ impl Parser {

        match token.token_type {
            TokenType::Fun => Ok(self.fun_params_and_body("<lambda>")?),
-            TokenType::Number(num) => Ok(Expr::number(num)),
-            TokenType::String(s) => Ok(Expr::string(s)),
+            TokenType::Number => Ok(Expr::number(token.num_data())),
+            TokenType::String => Ok(Expr::string(token.string_data())),
            TokenType::False => Ok(Expr::bool(false)),
            TokenType::True => Ok(Expr::bool(true)),
            TokenType::Nil => Ok(Expr::nil()),
@ -768,7 +766,9 @@ impl Parser {

                Ok(Expr::grouping(expr))
            }
-            TokenType::Identifier(name) => Ok(Expr::Variable { name }),
+            TokenType::Identifier => Ok(Expr::Variable {
+                name: token.string_data(),
+            }),
            _ => Err(ParserError::ExpectedPrimary { token }),
        }
    }
@ -781,13 +781,7 @@ impl Parser {

    fn identifier(&mut self, msg: &str) -> ParserResult<String> {
        match self.peek_token().token_type {
-            TokenType::Identifier(_) => {
-                if let TokenType::Identifier(name) = self.next_token().token_type {
-                    Ok(name)
-                } else {
-                    unreachable!()
-                }
-            }
+            TokenType::Identifier => Ok(self.next_token().string_data()),
            _ => Err(ParserError::MissingIdentifier {
                msg: msg.to_owned(),
                code_pos: self.peek_token().code_pos,
@ -834,7 +828,8 @@ impl Parser {
                let _ = self.next_token();
                Ok(())
            }
-            TokenType::EOF => Err(err_fn(self.peek_token().clone())),
+            // call err_fn with dummy token so we don't have to eat the EOF token
+            TokenType::EOF => Err(err_fn(Token::new(TokenType::EOF, self.peek_token().code_pos))),
            _ => Err(err_fn(self.next_token())),
        }
    }
--- a/frontend/src/parser/mod.rs
+++ b/frontend/src/parser/mod.rs
@ -1,10 +1,10 @@
+mod _parser;
 mod error;
 mod expr;
 mod misc;
-mod parse;
 mod stmt;

+pub use _parser::parse_tokens;
 pub use error::ParserError;
 pub use expr::{BinaryOp, Expr, Literal, LogicalOp, UnaryOp};
-pub use parse::parse_tokens;
 pub use stmt::Stmt;