use phf::phf_map; use super::{CodePos, LexerError, Token, TokenType}; /*====================================================================================================================*/ static KEYWORDS: phf::Map<&'static str, TokenType> = phf_map! { "and" => TokenType::And, "break" => TokenType::Break, "class" => TokenType::Class, "else" => TokenType::Else, "false" => TokenType::False, "for" => TokenType::For, "fun" => TokenType::Fun, "if" => TokenType::If, "nil" => TokenType::Nil, "or" => TokenType::Or, "print" => TokenType::Print, "return" => TokenType::Return, "super" => TokenType::Super, "this" => TokenType::This, "true" => TokenType::True, "var" => TokenType::Var, "while" => TokenType::While }; /*====================================================================================================================*/ pub fn scan_tokens(source_code: &str) -> Result, Vec> { let lexer = Lexer::new(source_code); lexer.scan_tokens() } /*====================================================================================================================*/ #[derive(Debug)] struct Lexer { source: Vec, tokens: Vec, start: usize, current: usize, code_pos: CodePos, errors: Vec, } impl Lexer { fn new(source_code: &str) -> Self { let source = source_code.chars().collect(); Lexer { source, tokens: Vec::new(), start: 0, current: 0, code_pos: CodePos::default(), errors: Vec::new(), } } fn scan_tokens(self) -> Result, Vec> { let mut me = self; while !me.source_is_empty() { me.scan_token(); } me.tokens.push(Token::new(TokenType::EOF, me.code_pos)); if me.errors.is_empty() { Ok(me.tokens) } else { Err(me.errors) } } fn scan_token(&mut self) { use TokenType::*; self.start = self.current; let c = self.advance(); match c { '(' => self.push_token(LeftParen), ')' => self.push_token(RightParen), '{' => self.push_token(LeftBrace), '}' => self.push_token(RightBrace), ',' => self.push_token(Comma), '.' => self.push_token(Dot), '+' => self.push_token(Plus), '-' => self.push_token(Minus), ';' => self.push_token(Semicolon), '*' => self.push_token(Star), '!' => { if self.consume('=') { self.push_token(BangEqual) } else { self.push_token(Bang) } } '=' => { if self.consume('=') { self.push_token(EqualEqual) } else { self.push_token(Equal) } } '<' => { if self.consume('=') { self.push_token(LessEqual) } else { self.push_token(Less) } } '>' => { if self.consume('=') { self.push_token(GreaterEqual) } else { self.push_token(Greater) } } '/' => { if self.consume('/') { // line comment // advance until either source is empty or newline if found while !self.source_is_empty() && self.advance() != '\n' {} } else if self.consume('*') { // block comment let mut depth = 1; loop { if depth == 0 { break; } if self.source_is_empty() { self.errors.push(LexerError::UnterminatedBlockComment { code_pos: self.code_pos, }); break; } if self.peek() == Some('/') && self.peek_two() == Some('*') { // nested block comment // consume '/' and '*' self.advance(); self.advance(); depth += 1; continue; } if self.peek() == Some('*') && self.peek_two() == Some('/') { // consume '*' and '/' self.advance(); self.advance(); depth -= 1; continue; } self.advance(); } } else { self.push_token(Slash) } } '"' => self.try_parse_string(), '0'..='9' => self.try_parse_number(), ' ' | '\r' | '\n' | '\t' => {} // handled automatically in advance() c @ '_' | c if c.is_ascii_alphabetic() => self.try_parse_identifier(), _ => { self.errors.push(LexerError::UnexpectedCharacter { c, code_pos: self.code_pos, }); } }; } fn source_is_empty(&self) -> bool { self.current >= self.source.len() } fn advance(&mut self) -> char { assert!(!self.source_is_empty()); let c = self.source[self.current]; self.current += 1; self.code_pos.col += 1; if c == '\t' { self.code_pos.col += 3; } else if c == '\n' { self.code_pos.col = 0; self.code_pos.line += 1; } c } fn peek(&self) -> Option { self.source.get(self.current).copied() } fn peek_two(&self) -> Option { self.source.get(self.current + 1).copied() } fn consume(&mut self, c: char) -> bool { if self.peek() == Some(c) { self.advance(); true } else { false } } fn push_token(&mut self, token_type: TokenType) { // let lexeme: String = self.source[self.start..self.current].iter().collect(); self.tokens.push(Token::new(token_type, self.code_pos)); } fn try_parse_string(&mut self) { // advance until second " while self.advance() != '"' { if self.source_is_empty() { self.errors.push(LexerError::UnterminatedStringLiteral { code_pos: self.code_pos, }); return; } } let string_literal = self.source[self.start + 1..self.current - 1].iter().collect(); // Some(TokenType::String(Box::new(string_literal))) self.tokens.push(Token::new_string(string_literal, self.code_pos)); } fn try_parse_number(&mut self) { let is_some_digit = |c: Option| c.map_or(false, |c| c.is_ascii_digit()); // eat all digits while is_some_digit(self.peek()) { self.advance(); } // consume separator dot and continue eating digits if self.peek() == Some('.') && is_some_digit(self.peek_two()) { // consume the '.' self.advance(); while is_some_digit(self.peek()) { self.advance(); } } // consume exponential e and continue eating digits if self.peek() == Some('e') && is_some_digit(self.peek_two()) { // consume the 'e' self.advance(); while is_some_digit(self.peek()) { self.advance(); } } let lexeme: String = self.source[self.start..self.current].iter().collect(); let num: f64 = match lexeme.parse() { Ok(num) => num, Err(err) => { self.errors.push(LexerError::InvalidNumberLiteral { lexeme, msg: err.to_string(), code_pos: self.code_pos, }); return; } }; // Some(TokenType::Number(num)) self.tokens.push(Token::new_number(num, self.code_pos)); } fn try_parse_identifier(&mut self) { let is_alpha_num_underscore = |c: Option| c.map_or(false, |c| matches!(c, '0'..='9' | 'A'..='Z' | '_' | 'a'..='z')); while is_alpha_num_underscore(self.peek()) { self.advance(); } let lexeme: String = self.source[self.start..self.current].iter().collect(); /* let token_type = KEYWORDS .get(&lexeme) .cloned() .unwrap_or(TokenType::Identifier(Box::new(lexeme))); */ if let Some(&token_type) = KEYWORDS.get(&lexeme) { // Token::new(token_type, self.code_pos) self.push_token(token_type); } else { self.tokens.push(Token::new_identifier(lexeme, self.code_pos)); } // Some(token_type) } }