reworked parser error handling

now based around result; since each parsing step can emit at most one
error until synchronisation is triggered we can store the errors in the
result.

also state is now stored in a ref variable. the copy-and-update is more
pure, but really messes up the let* expression for the expr/error
results.
This commit is contained in:
Moritz Gmeiner 2024-08-14 14:15:01 +02:00
commit 485ecebdf3

View file

@ -1,167 +1,183 @@
let ( let* ) = Result.bind
open Error open Error
open Expr open Expr
open Lexer open Lexer
type parse_result = (expr, parser_error list) result type parse_result = (expr, parser_error list) result
type expr_result = (expr, parser_error) result
type state = { tokens : token list; errors_rev : parser_error list } type state = { tokens : token list; errors_rev : parser_error list }
module State = struct let is_at_end state = (List.hd !state.tokens).token_type == Eof
let is_at_end state = (List.hd state.tokens).token_type == Eof
let append_error msg pos state = let append_error msg pos state =
let e = { pos; msg } in let e = { pos; msg } in
{ state with errors_rev = e :: state.errors_rev } { state with errors_rev = e :: state.errors_rev }
let advance state = { state with tokens = List.tl state.tokens } let advance state = state := { !state with tokens = List.tl !state.tokens }
let peek state = List.hd state.tokens let peek state = List.hd !state.tokens
let next state = let next state =
assert (not ((List.hd state.tokens).token_type == Eof)); assert (not ((List.hd !state.tokens).token_type == Eof));
(List.hd state.tokens, advance state) let token = List.hd !state.tokens in
advance state;
token
let advance_if state tt = let advance_if state tt =
if (peek state).token_type == tt then (true, advance state) else (false, state) if (peek state).token_type == tt then (
advance state;
true)
else false
let consume state tt = let matches state tts =
if (peek state).token_type == tt then advance state let f = ( == ) (peek state).token_type in
Array.fold_left (fun acc tt -> acc || f tt) false tts
let cur_pos state = (peek state).pos
let collect_chain (state : state ref) (tts : token_type array)
(higher_prec : state ref -> expr_result) : ((expr * token) array, parser_error) result =
(* ([], state) *)
(* let out_list_rev = ref [] in
while (not (is_at_end state)) && matches state tts do
let token = next state in
let expr = higher_prec state in
out_list_rev := (expr, token) :: !out_list_rev
done;
Ok (Array.of_list (List.rev !out_list_rev)) *)
let rec collect_chain_rec (acc : (expr * token) list) =
if (not (is_at_end state)) && matches state tts then
let token = next state in
let* expr = higher_prec state in
let acc = (expr, token) :: acc in
collect_chain_rec acc
else Ok acc
in
(* match collect_chain_rec [] with Ok l -> Ok (Array.of_list (List.rev l)) | Error e -> Error e *)
collect_chain_rec [] |> Result.map (fun l -> Array.of_list (List.rev l))
let primary (state : state ref) : expr_result =
match (peek state).token_type with
| Number x -> Ok (make_number x)
| String s -> Ok (make_string s)
| True -> Ok (make_bool true)
| False -> Ok (make_bool false)
| Nil -> Ok (make_nil ())
| tt ->
let msg = Printf.sprintf "Unexpected %s, expected valid expression" (show_token_type tt) in
let pos = (peek state).pos in
Error { msg; pos }
let rec grouping (state : state ref) : expr_result =
if matches state [| LeftParen |] then (
advance state;
let* expr = expression state in
if advance_if state RightParen then Ok expr
else else
let state = let pos = cur_pos state in
append_error let tt = (peek state).token_type in
(Printf.sprintf "Unexpected %s, expected %s" let msg = Printf.sprintf "Expected RightParen, got %s instead" (show_token_type tt) in
(show_token_type (peek state).token_type) Error { pos; msg })
(show_token_type tt)) else primary state
(peek state).pos state
in
state
let matches state tts = and neg_not (state : state ref) : expr_result =
let f = ( == ) (peek state).token_type in if matches state [| Bang; Minus |] then
Array.fold_left (fun acc tt -> acc || f tt) false tts let token = next state in
let* expr = neg_not state in
let collect_chain (state : state) (tts : token_type array) higher_prec : let op =
(expr * token) array * state =
(* ([], state) *)
let state_ref = ref state in
let out_list_rev = ref [] in
while (not (is_at_end !state_ref)) && matches !state_ref tts do
let token, state = next !state_ref in
let expr, state = higher_prec state in
state_ref := state;
out_list_rev := (expr, token) :: !out_list_rev
done;
(Array.of_list (List.rev !out_list_rev), !state_ref)
let primary (state : state) : expr * state =
match (peek state).token_type with
| Number x -> (make_number x, advance state)
| String s -> (make_string s, advance state)
| True -> (make_bool true, advance state)
| False -> (make_bool false, advance state)
| Nil -> (make_nil (), advance state)
| tt ->
( make_nil (),
append_error
(Printf.sprintf "Unexpected %s, expected valid expression" (show_token_type tt))
(peek state).pos state )
let rec grouping (state : state) : expr * state =
if matches state [| LeftParen |] then
let state = advance state in
let expr, state = expression state in
let state = consume state RightParen in
(expr, state)
else primary state
and neg_not (state : state) : expr * state =
if matches state [| Bang; Minus |] then
let token, state = next state in
let expr, state = neg_not state in
let op =
match token.token_type with
| Bang -> Not
| Minus -> Neg
| _ -> assert false (* should only be here if tt is - ! *)
in
let expr = make_unary op expr in
(expr, state)
else grouping state
and mul_or_div (state : state) : expr * state =
let expr, state = neg_not state in
let exprs_tokens, state = collect_chain state [| Star; Slash |] neg_not in
let f acc (expr, token) =
let op : binary_op =
match token.token_type with
| Star -> Mul
| Slash -> Div
| _ -> assert false (* should only be here if tt is * / *)
in
make_binary op acc expr
in
let expr = Array.fold_left f expr exprs_tokens in
(expr, state)
and sum_or_diff (state : state) : expr * state =
let expr, state = mul_or_div state in
let exprs_tokens, state = collect_chain state [| Plus; Minus |] mul_or_div in
let f acc (expr, token) =
let op : binary_op =
match token.token_type with
| Plus -> Plus
| Minus -> Minus
| _ -> assert false (* should only be here if tt is + - *)
in
make_binary op acc expr
in
let expr = Array.fold_left f expr exprs_tokens in
(expr, state)
and inequality (state : state) : expr * state =
(* TODO: maybe rework to only have Less and Greater as ops; performance? *)
let expr, state = sum_or_diff state in
let exprs_tokens, state =
collect_chain state [| Greater; GreaterEqual; Less; LessEqual |] sum_or_diff
in
let f acc (expr, token) =
let (op : binary_op) =
match token.token_type with
| Greater -> Greater
| Less -> Less
| GreaterEqual -> GreaterEqual
| LessEqual -> LessEqual
| _ -> assert false (* should only be here if tt is > < >= <= *)
in
make_binary op acc expr
in
let expr = Array.fold_left f expr exprs_tokens in
(expr, state)
and equality (state : state) : expr * state =
let expr, state = inequality state in
let exprs_tokens, state = collect_chain state [| EqualEqual; BangEqual |] inequality in
let f acc (expr, token) =
match token.token_type with match token.token_type with
| EqualEqual -> make_binary Equal acc expr | Bang -> Not
| BangEqual -> | Minus -> Neg
let expr = make_binary Equal acc expr in | _ -> assert false (* should only be here if tt is - ! *)
make_unary Not expr
| _ -> assert false (* should only be here if tt is == != *)
in in
let expr = Array.fold_left f expr exprs_tokens in let expr = make_unary op expr in
(expr, state) Ok expr
else grouping state
and expression (state : state) : expr * state = equality state and mul_or_div (state : state ref) : expr_result =
end let* expr = neg_not state in
let* exprs_tokens = collect_chain state [| Star; Slash |] neg_not in
let f acc (expr, token) =
let op : binary_op =
match token.token_type with
| Star -> Mul
| Slash -> Div
| _ -> assert false (* should only be here if tt is * / *)
in
make_binary op acc expr
in
let expr = Array.fold_left f expr exprs_tokens in
Ok expr
and sum_or_diff (state : state ref) : expr_result =
let* expr = mul_or_div state in
let* exprs_tokens = collect_chain state [| Plus; Minus |] mul_or_div in
let f acc (expr, token) =
let op : binary_op =
match token.token_type with
| Plus -> Plus
| Minus -> Minus
| _ -> assert false (* should only be here if tt is + - *)
in
make_binary op acc expr
in
let expr = Array.fold_left f expr exprs_tokens in
Ok expr
and inequality (state : state ref) : expr_result =
(* TODO: maybe rework to only have Less and Greater as ops; performance? *)
let* expr = sum_or_diff state in
let* exprs_tokens =
collect_chain state [| Greater; GreaterEqual; Less; LessEqual |] sum_or_diff
in
let f acc (expr, token) =
let (op : binary_op) =
match token.token_type with
| Greater -> Greater
| Less -> Less
| GreaterEqual -> GreaterEqual
| LessEqual -> LessEqual
| _ -> assert false (* should only be here if tt is > < >= <= *)
in
make_binary op acc expr
in
let expr = Array.fold_left f expr exprs_tokens in
Ok expr
and equality (state : state ref) : expr_result =
let* expr = inequality state in
let* exprs_tokens = collect_chain state [| EqualEqual; BangEqual |] inequality in
let f acc (expr, token) =
match token.token_type with
| EqualEqual -> make_binary Equal acc expr
| BangEqual ->
let expr = make_binary Equal acc expr in
make_unary Not expr
| _ -> assert false (* should only be here if tt is == != *)
in
let expr = Array.fold_left f expr exprs_tokens in
Ok expr
and expression (state : state ref) : expr_result = equality state
let rec synchronise (state : state ref) =
match (peek state).token_type with
| Semicolon -> advance state
| Class | Fun | Var | For | If | While | Print | Return | Eof -> ()
| _ ->
advance state;
synchronise state
let parse (tokens : token list) : parse_result = let parse (tokens : token list) : parse_result =
let state = { tokens; errors_rev = [] } in let state = ref { tokens; errors_rev = [] } in
let expr, state = State.expression state in expression state |> Result.map_error (fun e -> [ e ])
let state = (* let expr = State.expression state in
if not (State.is_at_end state) then let state =
let tt = (State.peek state).token_type in if not (State.is_at_end state) then
let msg = Printf.sprintf "Unexpected %s at end" (show_token_type tt) in let tt = (State.peek state).token_type in
State.append_error msg (State.peek state).pos state let msg = Printf.sprintf "Unexpected %s at end" (show_token_type tt) in
else state State.append_error msg (State.peek state).pos state
in else state
(* if List.length state.errors_rev != 0 then Ok expr else Error (List.rev state.errors_rev) *) in
match state.errors_rev with [] -> Ok expr | es -> Error (List.rev es) (* if List.length state.errors_rev != 0 then Ok expr else Error (List.rev state.errors_rev) *)
match state.errors_rev with [] -> Ok expr | es -> Error (List.rev es) *)