mirror of
https://github.com/MorizzG/MLox.git
synced 2025-12-06 04:22:41 +00:00
more work on the parser
This commit is contained in:
parent
686fb195af
commit
3590a78154
6 changed files with 287 additions and 12 deletions
24
lib/error.ml
24
lib/error.ml
|
|
@ -4,15 +4,24 @@ type lexer_error = { pos : code_pos; msg : string }
|
||||||
module LexerError = struct
|
module LexerError = struct
|
||||||
type t = lexer_error
|
type t = lexer_error
|
||||||
|
|
||||||
let make (pos : code_pos) (msg : string) : lexer_error =
|
let make (pos : code_pos) (msg : string) : lexer_error = { pos; msg }
|
||||||
(* let pos = { line; col } in *)
|
|
||||||
{ pos; msg }
|
|
||||||
|
|
||||||
let print (e : lexer_error) =
|
let print (e : lexer_error) =
|
||||||
Printf.printf "LexerError at line %d, column %d: %s\n" e.pos.line e.pos.col e.msg
|
Printf.printf "LexerError at line %d, column %d: %s\n" e.pos.line e.pos.col e.msg
|
||||||
end
|
end
|
||||||
|
|
||||||
type lox_error = LexerError of lexer_error list
|
type parser_error = { pos : code_pos; msg : string }
|
||||||
|
|
||||||
|
module ParserError = struct
|
||||||
|
type t = parser_error
|
||||||
|
|
||||||
|
let make (pos : code_pos) (msg : string) : parser_error = { pos; msg }
|
||||||
|
|
||||||
|
let print (e : parser_error) =
|
||||||
|
Printf.printf "ParserError at line %d, column %d: %s\n" e.pos.line e.pos.col e.msg
|
||||||
|
end
|
||||||
|
|
||||||
|
type lox_error = LexerError of lexer_error list | ParserError of parser_error list
|
||||||
|
|
||||||
let print_error (e : lox_error) =
|
let print_error (e : lox_error) =
|
||||||
match e with
|
match e with
|
||||||
|
|
@ -22,5 +31,12 @@ let print_error (e : lox_error) =
|
||||||
Printf.printf "found %d %s:\n" num_errors
|
Printf.printf "found %d %s:\n" num_errors
|
||||||
(if num_errors = 1 then "LexerError" else "LexerErrors");
|
(if num_errors = 1 then "LexerError" else "LexerErrors");
|
||||||
List.iter LexerError.print es
|
List.iter LexerError.print es
|
||||||
|
| ParserError es ->
|
||||||
|
let num_errors = List.length es in
|
||||||
|
assert (num_errors != 0);
|
||||||
|
Printf.printf "found %d %s:\n" num_errors
|
||||||
|
(if num_errors = 1 then "ParserError" else "ParserErrors");
|
||||||
|
List.iter ParserError.print es
|
||||||
|
|
||||||
let of_lexer_error e = Result.map_error (fun e -> LexerError e) e
|
let of_lexer_error e = Result.map_error (fun e -> LexerError e) e
|
||||||
|
let of_parser_error e = Result.map_error (fun e -> ParserError e) e
|
||||||
|
|
|
||||||
36
lib/expr.ml
Normal file
36
lib/expr.ml
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
type literal = String of string | Number of float | Bool of bool | Nil
|
||||||
|
(* [@@deriving show { with_path = false }] *)
|
||||||
|
|
||||||
|
let show_literal literal =
|
||||||
|
match literal with
|
||||||
|
| String s -> s
|
||||||
|
| Number x -> string_of_float x
|
||||||
|
| Bool b -> string_of_bool b
|
||||||
|
| Nil -> "nil"
|
||||||
|
|
||||||
|
type binary_op = Plus | Minus | Mul | Div | Equal | Less | Greater | LessEqual | GreaterEqual
|
||||||
|
[@@deriving show { with_path = false }]
|
||||||
|
|
||||||
|
type unary_op = Neg | Not [@@deriving show { with_path = false }]
|
||||||
|
|
||||||
|
type expr =
|
||||||
|
| Literal of literal
|
||||||
|
| BinaryExpr of { op : binary_op; left : expr; right : expr }
|
||||||
|
| UnaryExpr of { op : unary_op; expr : expr }
|
||||||
|
(* [@@deriving show { with_path = false }] *)
|
||||||
|
|
||||||
|
let rec show_expr ?(indent = 0) expr =
|
||||||
|
let show_indented = show_expr ~indent:(indent + 2) in
|
||||||
|
let ident_s = String.make indent ' ' in
|
||||||
|
match expr with
|
||||||
|
| Literal literal -> ident_s ^ show_literal literal
|
||||||
|
| BinaryExpr { op; left; right } ->
|
||||||
|
ident_s ^ show_binary_op op ^ "\n" ^ show_indented left ^ "\n" ^ show_indented right
|
||||||
|
| UnaryExpr { op; expr } -> ident_s ^ show_unary_op op ^ "\n" ^ show_indented expr
|
||||||
|
|
||||||
|
let make_string (s : string) = Literal (String s)
|
||||||
|
let make_number (x : float) = Literal (Number x)
|
||||||
|
let make_bool (b : bool) = Literal (Bool b)
|
||||||
|
let make_nil () = Literal Nil
|
||||||
|
let make_binary (op : binary_op) (left : expr) (right : expr) = BinaryExpr { op; left; right }
|
||||||
|
let make_unary (op : unary_op) (expr : expr) = UnaryExpr { op; expr }
|
||||||
11
lib/lexer.ml
11
lib/lexer.ml
|
|
@ -56,7 +56,7 @@ type state = {
|
||||||
}
|
}
|
||||||
|
|
||||||
module State = struct
|
module State = struct
|
||||||
type t = state
|
(* type t = state *)
|
||||||
|
|
||||||
let is_digit c = match c with '0' .. '9' -> true | _ -> false
|
let is_digit c = match c with '0' .. '9' -> true | _ -> false
|
||||||
let is_alpha c = match c with 'a' .. 'z' | 'A' .. 'Z' -> true | _ -> false
|
let is_alpha c = match c with 'a' .. 'z' | 'A' .. 'Z' -> true | _ -> false
|
||||||
|
|
@ -95,10 +95,6 @@ module State = struct
|
||||||
| Some c when f c -> advance_while f (snd (advance state))
|
| Some c when f c -> advance_while f (snd (advance state))
|
||||||
| _ -> state (* EOF or no match *)
|
| _ -> state (* EOF or no match *)
|
||||||
|
|
||||||
let last_char (state : state) : char =
|
|
||||||
assert (state.cur_pos > 0);
|
|
||||||
state.source.[state.cur_pos - 1]
|
|
||||||
|
|
||||||
let append_token (pos : code_pos) (token_type : token_type) (state : state) : state =
|
let append_token (pos : code_pos) (token_type : token_type) (state : state) : state =
|
||||||
(* let pos = { line = state.line; col = state.col } in *)
|
(* let pos = { line = state.line; col = state.col } in *)
|
||||||
{ state with tokens_rev = { token_type; pos } :: state.tokens_rev }
|
{ state with tokens_rev = { token_type; pos } :: state.tokens_rev }
|
||||||
|
|
@ -208,5 +204,6 @@ let tokenize (source : string) : lexer_result =
|
||||||
{ source; start_pos = 0; cur_pos = 0; tokens_rev = []; errors_rev = []; line = 1; col = 0 }
|
{ source; start_pos = 0; cur_pos = 0; tokens_rev = []; errors_rev = []; line = 1; col = 0 }
|
||||||
in
|
in
|
||||||
(* reverse the reversed tokens/errors *)
|
(* reverse the reversed tokens/errors *)
|
||||||
if List.length state.errors_rev = 0 then Ok (List.rev state.tokens_rev)
|
(* if List.length state.errors_rev = 0 then Ok (List.rev state.tokens_rev)
|
||||||
else Error (List.rev state.errors_rev)
|
else Error (List.rev state.errors_rev) *)
|
||||||
|
match state.errors_rev with [] -> Ok (List.rev state.tokens_rev) | es -> Error (List.rev es)
|
||||||
|
|
|
||||||
85
lib/lexer.mli
Normal file
85
lib/lexer.mli
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
type token_type =
|
||||||
|
| LeftParen
|
||||||
|
| RightParen
|
||||||
|
| LeftBrace
|
||||||
|
| RightBrace
|
||||||
|
| Plus
|
||||||
|
| Minus
|
||||||
|
| Star
|
||||||
|
| Slash
|
||||||
|
| Bang
|
||||||
|
| Dot
|
||||||
|
| Comma
|
||||||
|
| Semicolon
|
||||||
|
| Equal
|
||||||
|
| EqualEqual
|
||||||
|
| BangEqual
|
||||||
|
| Greater
|
||||||
|
| Less
|
||||||
|
| GreaterEqual
|
||||||
|
| LessEqual
|
||||||
|
| Identifier of string
|
||||||
|
| String of string
|
||||||
|
| Number of float
|
||||||
|
| And
|
||||||
|
| Class
|
||||||
|
| Else
|
||||||
|
| False
|
||||||
|
| Fun
|
||||||
|
| For
|
||||||
|
| If
|
||||||
|
| Nil
|
||||||
|
| Or
|
||||||
|
| Print
|
||||||
|
| Return
|
||||||
|
| Super
|
||||||
|
| This
|
||||||
|
| True
|
||||||
|
| Var
|
||||||
|
| While
|
||||||
|
| Comment of string
|
||||||
|
| Eof
|
||||||
|
|
||||||
|
val pp_token_type : Format.formatter -> token_type -> unit
|
||||||
|
val show_token_type : token_type -> string
|
||||||
|
val keywords : (string, token_type) Hashtbl.t
|
||||||
|
|
||||||
|
type token = { token_type : token_type; pos : Error.code_pos }
|
||||||
|
|
||||||
|
val show_token : token -> string
|
||||||
|
|
||||||
|
type lexer_result = (token list, Error.lexer_error list) result
|
||||||
|
|
||||||
|
(* type state = {
|
||||||
|
source : string;
|
||||||
|
start_pos : int;
|
||||||
|
cur_pos : int;
|
||||||
|
tokens_rev : token list;
|
||||||
|
errors_rev : Error.lexer_error list;
|
||||||
|
line : int;
|
||||||
|
col : int;
|
||||||
|
}
|
||||||
|
module State : sig
|
||||||
|
type t = state
|
||||||
|
|
||||||
|
val is_digit : char -> bool
|
||||||
|
val is_alpha : char -> bool
|
||||||
|
val is_alphanum : char -> bool
|
||||||
|
val is_identifier : char -> bool
|
||||||
|
val is_at_end : state -> bool
|
||||||
|
val get_lexeme : state -> int -> int -> string
|
||||||
|
val advance : state -> char * state
|
||||||
|
val peek : state -> char option
|
||||||
|
val advance_if : char -> state -> bool * state
|
||||||
|
val advance_until : char -> state -> bool * state
|
||||||
|
val advance_while : (char -> bool) -> state -> state
|
||||||
|
val last_char : state -> char
|
||||||
|
val append_token : Error.code_pos -> token_type -> state -> state
|
||||||
|
val append_error : Error.code_pos -> string -> state -> state
|
||||||
|
val parse_number : state -> state
|
||||||
|
val parse_keyword_or_identifier : state -> state
|
||||||
|
val parse_block_commend : state -> state
|
||||||
|
val tokenize_rec : state -> state
|
||||||
|
end *)
|
||||||
|
|
||||||
|
val tokenize : string -> lexer_result
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
let ( let* ) = Result.bind
|
let ( let* ) = Result.bind
|
||||||
|
|
||||||
module Lexer = Lexer
|
|
||||||
module Error = Error
|
module Error = Error
|
||||||
|
module Expr = Expr
|
||||||
|
module Lexer = Lexer
|
||||||
|
module Parser = Parser
|
||||||
|
|
||||||
type token = Lexer.token
|
type token = Lexer.token
|
||||||
type lox_error = Error.lox_error
|
type lox_error = Error.lox_error
|
||||||
|
|
@ -13,6 +15,8 @@ let run (source : string) : (unit, lox_error) result =
|
||||||
Printf.printf "Got %d tokens\n" (List.length tokens);
|
Printf.printf "Got %d tokens\n" (List.length tokens);
|
||||||
List.iter f tokens;
|
List.iter f tokens;
|
||||||
print_endline "";
|
print_endline "";
|
||||||
|
let* expr = Error.of_parser_error (Parser.parse tokens) in
|
||||||
|
Printf.printf "%s\n" (Expr.show_expr expr);
|
||||||
Ok ()
|
Ok ()
|
||||||
|
|
||||||
let runRepl () : unit =
|
let runRepl () : unit =
|
||||||
|
|
|
||||||
137
lib/parser.ml
Normal file
137
lib/parser.ml
Normal file
|
|
@ -0,0 +1,137 @@
|
||||||
|
open Error
|
||||||
|
open Expr
|
||||||
|
open Lexer
|
||||||
|
|
||||||
|
type parse_result = (expr, parser_error list) result
|
||||||
|
type state = { tokens : token list; errors_rev : parser_error list }
|
||||||
|
|
||||||
|
module State = struct
|
||||||
|
let is_at_end state = (List.hd state.tokens).token_type == Eof
|
||||||
|
|
||||||
|
let append_error msg pos state =
|
||||||
|
let e = { pos; msg } in
|
||||||
|
{ state with errors_rev = e :: state.errors_rev }
|
||||||
|
|
||||||
|
let advance state = { state with tokens = List.tl state.tokens }
|
||||||
|
let peek state = List.hd state.tokens
|
||||||
|
|
||||||
|
let next state =
|
||||||
|
assert (not ((List.hd state.tokens).token_type == Eof));
|
||||||
|
(List.hd state.tokens, advance state)
|
||||||
|
|
||||||
|
let advance_if state tt =
|
||||||
|
if (peek state).token_type == tt then (true, advance state) else (false, state)
|
||||||
|
|
||||||
|
let matches state tts =
|
||||||
|
let f = ( == ) (peek state).token_type in
|
||||||
|
List.fold_left (fun acc tt -> acc || f tt) false tts
|
||||||
|
|
||||||
|
let collect_chain (state : state) (tts : token_type list) higher_prec :
|
||||||
|
(expr * token) array * state =
|
||||||
|
(* ([], state) *)
|
||||||
|
let state_ref = ref state in
|
||||||
|
let out_list_rev = ref [] in
|
||||||
|
while (not (is_at_end !state_ref)) && matches !state_ref tts do
|
||||||
|
let token, state = next !state_ref in
|
||||||
|
let expr, state = higher_prec state in
|
||||||
|
state_ref := state;
|
||||||
|
out_list_rev := (expr, token) :: !out_list_rev
|
||||||
|
done;
|
||||||
|
(Array.of_list (List.rev !out_list_rev), !state_ref)
|
||||||
|
|
||||||
|
let mul_or_div (state : state) : expr * state =
|
||||||
|
let token, state = next state in
|
||||||
|
(make_string @@ show_token_type token.token_type, state)
|
||||||
|
|
||||||
|
let sum_or_diff (state : state) : expr * state =
|
||||||
|
let expr, state = mul_or_div state in
|
||||||
|
(* if (not (is_at_end state)) && matches state [ Plus; Minus ] then
|
||||||
|
let token, state = next state in
|
||||||
|
let expr2, state = sum_or_diff state in
|
||||||
|
let (op : binary_op) =
|
||||||
|
match token.token_type with
|
||||||
|
| Plus -> Plus
|
||||||
|
| Minus -> Minus
|
||||||
|
| _ -> assert false (* should only be here if tt is + - *)
|
||||||
|
in
|
||||||
|
let expr = make_binary op expr expr2 in
|
||||||
|
(expr, state)
|
||||||
|
else (expr, state) *)
|
||||||
|
(* turn expr and state to refs for the loop *)
|
||||||
|
(* Printf.printf "expr: %s\n\n" (show_expr expr);
|
||||||
|
let expr_ref, state_ref = (ref expr, ref state) in
|
||||||
|
while (not (is_at_end !state_ref)) && matches !state_ref [ Plus; Minus ] do
|
||||||
|
let token, state = next !state_ref in
|
||||||
|
let expr2, state = mul_or_div state in
|
||||||
|
let (op : binary_op) =
|
||||||
|
match token.token_type with
|
||||||
|
| Plus -> Plus
|
||||||
|
| Minus -> Minus
|
||||||
|
| _ -> assert false (* should only be here if tt is + - *)
|
||||||
|
in
|
||||||
|
let expr = make_binary op !expr_ref expr2 in
|
||||||
|
(* (expr_ref, state_ref) := (expr, state) *)
|
||||||
|
Printf.printf "expr: %s\n\n" (show_expr expr);
|
||||||
|
expr_ref := expr;
|
||||||
|
state_ref := state
|
||||||
|
done;
|
||||||
|
(!expr_ref, !state_ref) *)
|
||||||
|
let exprs_tokens, state = collect_chain state [ Plus; Minus ] mul_or_div in
|
||||||
|
let f acc (expr, token) =
|
||||||
|
let op : binary_op =
|
||||||
|
match token.token_type with Plus -> Plus | Minus -> Minus | _ -> assert false
|
||||||
|
in
|
||||||
|
make_binary op acc expr
|
||||||
|
in
|
||||||
|
let expr = Array.fold_left f expr exprs_tokens in
|
||||||
|
(expr, state)
|
||||||
|
|
||||||
|
let rec inequality (state : state) : expr * state =
|
||||||
|
let expr, state = sum_or_diff state in
|
||||||
|
if (not (is_at_end state)) && matches state [ Greater; GreaterEqual; Less; LessEqual ] then
|
||||||
|
let token, state = next state in
|
||||||
|
let expr2, state = inequality state in
|
||||||
|
(* TODO: maybe rework to only have Less and Greater as ops; performance? *)
|
||||||
|
let (op : binary_op) =
|
||||||
|
match token.token_type with
|
||||||
|
| Greater -> Greater
|
||||||
|
| Less -> Less
|
||||||
|
| GreaterEqual -> GreaterEqual
|
||||||
|
| LessEqual -> LessEqual
|
||||||
|
| _ -> assert false (* should only be here if tt is > < >= <= *)
|
||||||
|
in
|
||||||
|
let expr = make_binary op expr expr2 in
|
||||||
|
(expr, state)
|
||||||
|
else (expr, state)
|
||||||
|
|
||||||
|
let rec equality (state : state) : expr * state =
|
||||||
|
let expr, state = inequality state in
|
||||||
|
if matches state [ EqualEqual; BangEqual ] then
|
||||||
|
let token, state = next state in
|
||||||
|
let expr2, state = equality state in
|
||||||
|
let expr =
|
||||||
|
match token.token_type with
|
||||||
|
| EqualEqual -> make_binary Equal expr expr2
|
||||||
|
| BangEqual ->
|
||||||
|
let expr = make_binary Equal expr expr2 in
|
||||||
|
make_unary Not expr
|
||||||
|
| _ -> assert false (* should only be here if tt is == != *)
|
||||||
|
in
|
||||||
|
(expr, state)
|
||||||
|
else (expr, state)
|
||||||
|
|
||||||
|
let expression (state : state) : expr * state = equality state
|
||||||
|
end
|
||||||
|
|
||||||
|
let parse (tokens : token list) : parse_result =
|
||||||
|
let state = { tokens; errors_rev = [] } in
|
||||||
|
let expr, state = State.expression state in
|
||||||
|
let state =
|
||||||
|
if not (State.is_at_end state) then
|
||||||
|
let tt = (State.peek state).token_type in
|
||||||
|
let msg = Printf.sprintf "Unexpected %s at end" (show_token_type tt) in
|
||||||
|
State.append_error msg (State.peek state).pos state
|
||||||
|
else state
|
||||||
|
in
|
||||||
|
(* if List.length state.errors_rev != 0 then Ok expr else Error (List.rev state.errors_rev) *)
|
||||||
|
match state.errors_rev with [] -> Ok expr | es -> Error (List.rev es)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue