From ca6e092b38e49c48023a3f2b749b0a5216d5314a Mon Sep 17 00:00:00 2001 From: Moritz Gmeiner Date: Mon, 2 Sep 2024 03:10:00 +0200 Subject: [PATCH] added escape sequences --- frontend/src/lexer/_lexer.rs | 52 +++++++++++++++++++++++++++++++----- frontend/src/lexer/error.rs | 2 ++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/frontend/src/lexer/_lexer.rs b/frontend/src/lexer/_lexer.rs index b1f7088..f296e47 100644 --- a/frontend/src/lexer/_lexer.rs +++ b/frontend/src/lexer/_lexer.rs @@ -238,23 +238,63 @@ impl Lexer { } fn try_parse_string(&mut self) { + // first '"' already consumed + // advance until second " - while self.advance() != '"' { + /* while self.advance() != '"' { if self.source_is_empty() { self.errors.push(LexerError::UnterminatedStringLiteral { code_pos: self.code_pos, }); return; } + } */ + + let mut s = String::new(); + + let starting_pos = self.code_pos; + + loop { + if self.source_is_empty() { + self.errors.push(LexerError::UnterminatedStringLiteral { + code_pos: starting_pos, + }); + return; + } + + match self.advance() { + '"' => break, + '\\' => { + // escape sequence -> handle later + if self.source_is_empty() { + self.errors.push(LexerError::UnterminatedStringLiteral { + code_pos: starting_pos, + }); + return; + } + + match self.advance() { + 'n' => s.push('\n'), + 'r' => s.push('\r'), + '\\' => s.push('\\'), + c => self.errors.push(LexerError::InvalidEscapeSequence { + code_pos: self.code_pos, + c, + }), + } + } + c => { + s.push(c); + } + } } - let string_literal: Box = self.source[self.start + 1..self.current - 1] - .iter() - .collect(); + // let string_literal: Box = self.source[self.start + 1..self.current - 1] + // .iter() + // .collect(); // Some(TokenType::String(Box::new(string_literal))) - self.tokens - .push(Token::new_string(string_literal, self.code_pos)); + self.tokens.push(Token::new_string(s, self.code_pos)); } fn try_parse_number(&mut self) { diff --git a/frontend/src/lexer/error.rs b/frontend/src/lexer/error.rs index 1f382fc..e6ab17f 100644 --- a/frontend/src/lexer/error.rs +++ b/frontend/src/lexer/error.rs @@ -8,6 +8,8 @@ pub enum LexerError { UnexpectedCharacter { c: char, code_pos: CodePos }, #[error("unterminated string literal starting at {code_pos}.")] UnterminatedStringLiteral { code_pos: CodePos }, + #[error("invalid escape sequence \\{c} at {code_pos}")] + InvalidEscapeSequence { code_pos: CodePos, c: char }, #[error("unterminated block comment starting at {code_pos}.")] UnterminatedBlockComment { code_pos: CodePos }, #[error("invalid number literal {lexeme} at {code_pos}: {msg}")]