updated a bunch of stuff

This commit is contained in:
Moritz Gmeiner 2024-09-01 19:16:30 +02:00
commit 67bb5fe8fd
24 changed files with 683 additions and 702 deletions

View file

@ -129,6 +129,11 @@ impl Lexer {
// line comment
// advance until either source is empty or newline if found
while !self.source_is_empty() && self.advance() != '\n' {}
let comment: Box<str> =
self.source[self.start + 2..self.current].iter().collect();
self.push_token(TokenType::Comment(comment));
} else if self.consume('*') {
// block comment
@ -164,6 +169,12 @@ impl Lexer {
self.advance();
}
let comment: Box<str> = self.source[self.start + 2..self.current - 2]
.iter()
.collect();
self.push_token(TokenType::Comment(comment));
} else {
self.push_token(Slash)
}
@ -237,10 +248,13 @@ impl Lexer {
}
}
let string_literal = self.source[self.start + 1..self.current - 1].iter().collect();
let string_literal: Box<str> = self.source[self.start + 1..self.current - 1]
.iter()
.collect();
// Some(TokenType::String(Box::new(string_literal)))
self.tokens.push(Token::new_string(string_literal, self.code_pos));
self.tokens
.push(Token::new_string(string_literal, self.code_pos));
}
fn try_parse_number(&mut self) {
@ -290,8 +304,12 @@ impl Lexer {
}
fn try_parse_identifier(&mut self) {
let is_alpha_num_underscore =
|c: Option<char>| c.map_or(false, |c| matches!(c, '0'..='9' | 'A'..='Z' | '_' | 'a'..='z'));
let is_alpha_num_underscore = |c: Option<char>| {
c.map_or(
false,
|c| matches!(c, '0'..='9' | 'A'..='Z' | '_' | 'a'..='z'),
)
};
while is_alpha_num_underscore(self.peek()) {
self.advance();
@ -304,11 +322,12 @@ impl Lexer {
.cloned()
.unwrap_or(TokenType::Identifier(Box::new(lexeme))); */
if let Some(&token_type) = KEYWORDS.get(&lexeme) {
if let Some(token_type) = KEYWORDS.get(&lexeme) {
// Token::new(token_type, self.code_pos)
self.push_token(token_type);
self.push_token(token_type.clone());
} else {
self.tokens.push(Token::new_identifier(lexeme, self.code_pos));
self.tokens
.push(Token::new_identifier(lexeme, self.code_pos));
}
// Some(token_type)

View file

@ -4,13 +4,13 @@ use super::CodePos;
#[derive(Error, Debug)]
pub enum LexerError {
#[error("Unexpected character '{c}' at {code_pos}.")]
#[error("LexerError: Unexpected character '{c}' at {code_pos}.")]
UnexpectedCharacter { c: char, code_pos: CodePos },
#[error("Unterminated string literal starting at {code_pos}.")]
#[error("LexerError: Unterminated string literal starting at {code_pos}.")]
UnterminatedStringLiteral { code_pos: CodePos },
#[error("Unterminated block comment starting at {code_pos}.")]
#[error("LexerError: Unterminated block comment starting at {code_pos}.")]
UnterminatedBlockComment { code_pos: CodePos },
#[error("Invalid number literal {lexeme} at {code_pos}: {msg}")]
#[error("LexerError: Invalid number literal {lexeme} at {code_pos}: {msg}")]
InvalidNumberLiteral {
lexeme: String,
msg: String,

View file

@ -1,10 +1,9 @@
use std::fmt::{Debug, Display};
use std::mem::ManuallyDrop;
use super::CodePos;
#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[derive(Debug, Clone, PartialEq)]
#[rustfmt::skip]
pub enum TokenType {
// Single-character tokens
@ -18,42 +17,20 @@ pub enum TokenType {
Less, LessEqual,
// Identifier and literals
Identifier, String, Number,
Identifier(Box<str>), String(Box<str>), Number(f64),
// Keywords
And, Break, Class, Else, False, Fun, For, If, Nil, Or,
Print, Return, Super, This, True, Var, While,
#[allow(dead_code, clippy::upper_case_acronyms)]
Comment(Box<str>),
#[allow(clippy::upper_case_acronyms)]
EOF
}
union TokenData {
none: (),
#[allow(clippy::box_collection)]
s: ManuallyDrop<Box<String>>,
num: f64,
}
impl TokenData {
fn none() -> Self {
TokenData { none: () }
}
fn string(s: String) -> Self {
let s = ManuallyDrop::new(Box::new(s));
TokenData { s }
}
fn num(num: f64) -> Self {
TokenData { num }
}
}
pub struct Token {
pub token_type: TokenType,
// pub lexeme: String,
data: TokenData,
pub code_pos: CodePos,
}
@ -61,95 +38,64 @@ impl Token {
pub fn new(token_type: TokenType, code_pos: CodePos) -> Self {
Token {
token_type,
// lexeme,
data: TokenData::none(),
code_pos,
}
}
pub fn new_string(s: String, code_pos: CodePos) -> Self {
pub fn new_string(s: impl Into<Box<str>>, code_pos: CodePos) -> Self {
Token {
token_type: TokenType::String,
data: TokenData::string(s),
token_type: TokenType::String(s.into()),
code_pos,
}
}
pub fn new_identifier(name: String, code_pos: CodePos) -> Self {
pub fn new_identifier(name: impl Into<Box<str>>, code_pos: CodePos) -> Self {
Token {
token_type: TokenType::Identifier,
data: TokenData::string(name),
token_type: TokenType::Identifier(name.into()),
code_pos,
}
}
pub fn new_number(num: f64, code_pos: CodePos) -> Self {
Token {
token_type: TokenType::Number,
data: TokenData::num(num),
token_type: TokenType::Number(num),
code_pos,
}
}
pub fn string_data(self) -> String {
assert!(self.token_type == TokenType::String || self.token_type == TokenType::Identifier);
pub fn take(&mut self) -> Self {
let code_pos = self.code_pos;
// std::mem::take(&mut self.data.s)
unsafe {
let mut me = self;
use TokenType::*;
let s = std::mem::take(&mut me.data.s);
let token_type = match &mut self.token_type {
String(s) => String(std::mem::take(s)),
Identifier(name) => Identifier(std::mem::take(name)),
other => other.clone(),
};
*ManuallyDrop::into_inner(s)
Token {
token_type,
code_pos,
}
}
pub fn num_data(self) -> f64 {
assert_eq!(self.token_type, TokenType::Number);
unsafe { self.data.num }
}
}
impl Debug for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// write!(f, "<{:?}>", self.token_type)
match self.token_type {
TokenType::Number => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.num) },
TokenType::String => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
TokenType::Identifier => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
_ => write!(f, "<{:?}>", self.token_type),
}
write!(f, "<{:?} {:?}> ", self.token_type, self.code_pos)
}
}
impl Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// write!(f, "<{:?}>", self.token_type)
match self.token_type {
TokenType::Number => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.num) },
TokenType::String => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
TokenType::Identifier => unsafe { write!(f, "<{:?}({})>", self.token_type, self.data.s.as_ref()) },
_ => write!(f, "<{:?}>", self.token_type),
use TokenType::*;
match &self.token_type {
String(s) => write!(f, "<String {}>", s),
Identifier(name) => write!(f, "<Identifier {}>", name),
Number(x) => write!(f, "<Number {}>", x),
tt => write!(f, "<{:?}>", tt),
}
}
}
/* impl Clone for Token {
fn clone(&self) -> Self {
let code_pos = self.code_pos;
match self.token_type {
TokenType::Number => Token::new_number(self.num_data(), code_pos),
TokenType::String => unsafe { Token::new_string(self.data.s.as_ref().clone(), code_pos) },
TokenType::Identifier => unsafe { Token::new_identifier(self.data.s.as_ref().clone(), code_pos) },
token_type => Token::new(token_type, code_pos),
}
}
} */
impl Drop for Token {
fn drop(&mut self) {
if self.token_type == TokenType::String {}
}
}