From 485ecebdf37a0c8498e182c76c0c2679eadd2beb Mon Sep 17 00:00:00 2001 From: Moritz Gmeiner Date: Wed, 14 Aug 2024 14:15:01 +0200 Subject: [PATCH] reworked parser error handling now based around result; since each parsing step can emit at most one error until synchronisation is triggered we can store the errors in the result. also state is now stored in a ref variable. the copy-and-update is more pure, but really messes up the let* expression for the expr/error results. --- lib/parser.ml | 312 ++++++++++++++++++++++++++------------------------ 1 file changed, 164 insertions(+), 148 deletions(-) diff --git a/lib/parser.ml b/lib/parser.ml index ab384f3..bd40a70 100644 --- a/lib/parser.ml +++ b/lib/parser.ml @@ -1,167 +1,183 @@ +let ( let* ) = Result.bind + open Error open Expr open Lexer type parse_result = (expr, parser_error list) result +type expr_result = (expr, parser_error) result type state = { tokens : token list; errors_rev : parser_error list } -module State = struct - let is_at_end state = (List.hd state.tokens).token_type == Eof +let is_at_end state = (List.hd !state.tokens).token_type == Eof - let append_error msg pos state = - let e = { pos; msg } in - { state with errors_rev = e :: state.errors_rev } +let append_error msg pos state = + let e = { pos; msg } in + { state with errors_rev = e :: state.errors_rev } - let advance state = { state with tokens = List.tl state.tokens } - let peek state = List.hd state.tokens +let advance state = state := { !state with tokens = List.tl !state.tokens } +let peek state = List.hd !state.tokens - let next state = - assert (not ((List.hd state.tokens).token_type == Eof)); - (List.hd state.tokens, advance state) +let next state = + assert (not ((List.hd !state.tokens).token_type == Eof)); + let token = List.hd !state.tokens in + advance state; + token - let advance_if state tt = - if (peek state).token_type == tt then (true, advance state) else (false, state) +let advance_if state tt = + if (peek state).token_type == tt then ( + advance state; + true) + else false - let consume state tt = - if (peek state).token_type == tt then advance state +let matches state tts = + let f = ( == ) (peek state).token_type in + Array.fold_left (fun acc tt -> acc || f tt) false tts + +let cur_pos state = (peek state).pos + +let collect_chain (state : state ref) (tts : token_type array) + (higher_prec : state ref -> expr_result) : ((expr * token) array, parser_error) result = + (* ([], state) *) + (* let out_list_rev = ref [] in + while (not (is_at_end state)) && matches state tts do + let token = next state in + let expr = higher_prec state in + out_list_rev := (expr, token) :: !out_list_rev + done; + Ok (Array.of_list (List.rev !out_list_rev)) *) + let rec collect_chain_rec (acc : (expr * token) list) = + if (not (is_at_end state)) && matches state tts then + let token = next state in + let* expr = higher_prec state in + let acc = (expr, token) :: acc in + collect_chain_rec acc + else Ok acc + in + (* match collect_chain_rec [] with Ok l -> Ok (Array.of_list (List.rev l)) | Error e -> Error e *) + collect_chain_rec [] |> Result.map (fun l -> Array.of_list (List.rev l)) + +let primary (state : state ref) : expr_result = + match (peek state).token_type with + | Number x -> Ok (make_number x) + | String s -> Ok (make_string s) + | True -> Ok (make_bool true) + | False -> Ok (make_bool false) + | Nil -> Ok (make_nil ()) + | tt -> + let msg = Printf.sprintf "Unexpected %s, expected valid expression" (show_token_type tt) in + let pos = (peek state).pos in + Error { msg; pos } + +let rec grouping (state : state ref) : expr_result = + if matches state [| LeftParen |] then ( + advance state; + let* expr = expression state in + if advance_if state RightParen then Ok expr else - let state = - append_error - (Printf.sprintf "Unexpected %s, expected %s" - (show_token_type (peek state).token_type) - (show_token_type tt)) - (peek state).pos state - in - state + let pos = cur_pos state in + let tt = (peek state).token_type in + let msg = Printf.sprintf "Expected RightParen, got %s instead" (show_token_type tt) in + Error { pos; msg }) + else primary state - let matches state tts = - let f = ( == ) (peek state).token_type in - Array.fold_left (fun acc tt -> acc || f tt) false tts - - let collect_chain (state : state) (tts : token_type array) higher_prec : - (expr * token) array * state = - (* ([], state) *) - let state_ref = ref state in - let out_list_rev = ref [] in - while (not (is_at_end !state_ref)) && matches !state_ref tts do - let token, state = next !state_ref in - let expr, state = higher_prec state in - state_ref := state; - out_list_rev := (expr, token) :: !out_list_rev - done; - (Array.of_list (List.rev !out_list_rev), !state_ref) - - let primary (state : state) : expr * state = - match (peek state).token_type with - | Number x -> (make_number x, advance state) - | String s -> (make_string s, advance state) - | True -> (make_bool true, advance state) - | False -> (make_bool false, advance state) - | Nil -> (make_nil (), advance state) - | tt -> - ( make_nil (), - append_error - (Printf.sprintf "Unexpected %s, expected valid expression" (show_token_type tt)) - (peek state).pos state ) - - let rec grouping (state : state) : expr * state = - if matches state [| LeftParen |] then - let state = advance state in - let expr, state = expression state in - let state = consume state RightParen in - (expr, state) - else primary state - - and neg_not (state : state) : expr * state = - if matches state [| Bang; Minus |] then - let token, state = next state in - let expr, state = neg_not state in - let op = - match token.token_type with - | Bang -> Not - | Minus -> Neg - | _ -> assert false (* should only be here if tt is - ! *) - in - let expr = make_unary op expr in - (expr, state) - else grouping state - - and mul_or_div (state : state) : expr * state = - let expr, state = neg_not state in - let exprs_tokens, state = collect_chain state [| Star; Slash |] neg_not in - let f acc (expr, token) = - let op : binary_op = - match token.token_type with - | Star -> Mul - | Slash -> Div - | _ -> assert false (* should only be here if tt is * / *) - in - make_binary op acc expr - in - let expr = Array.fold_left f expr exprs_tokens in - (expr, state) - - and sum_or_diff (state : state) : expr * state = - let expr, state = mul_or_div state in - let exprs_tokens, state = collect_chain state [| Plus; Minus |] mul_or_div in - let f acc (expr, token) = - let op : binary_op = - match token.token_type with - | Plus -> Plus - | Minus -> Minus - | _ -> assert false (* should only be here if tt is + - *) - in - make_binary op acc expr - in - let expr = Array.fold_left f expr exprs_tokens in - (expr, state) - - and inequality (state : state) : expr * state = - (* TODO: maybe rework to only have Less and Greater as ops; performance? *) - let expr, state = sum_or_diff state in - let exprs_tokens, state = - collect_chain state [| Greater; GreaterEqual; Less; LessEqual |] sum_or_diff - in - let f acc (expr, token) = - let (op : binary_op) = - match token.token_type with - | Greater -> Greater - | Less -> Less - | GreaterEqual -> GreaterEqual - | LessEqual -> LessEqual - | _ -> assert false (* should only be here if tt is > < >= <= *) - in - make_binary op acc expr - in - let expr = Array.fold_left f expr exprs_tokens in - (expr, state) - - and equality (state : state) : expr * state = - let expr, state = inequality state in - let exprs_tokens, state = collect_chain state [| EqualEqual; BangEqual |] inequality in - let f acc (expr, token) = +and neg_not (state : state ref) : expr_result = + if matches state [| Bang; Minus |] then + let token = next state in + let* expr = neg_not state in + let op = match token.token_type with - | EqualEqual -> make_binary Equal acc expr - | BangEqual -> - let expr = make_binary Equal acc expr in - make_unary Not expr - | _ -> assert false (* should only be here if tt is == != *) + | Bang -> Not + | Minus -> Neg + | _ -> assert false (* should only be here if tt is - ! *) in - let expr = Array.fold_left f expr exprs_tokens in - (expr, state) + let expr = make_unary op expr in + Ok expr + else grouping state - and expression (state : state) : expr * state = equality state -end +and mul_or_div (state : state ref) : expr_result = + let* expr = neg_not state in + let* exprs_tokens = collect_chain state [| Star; Slash |] neg_not in + let f acc (expr, token) = + let op : binary_op = + match token.token_type with + | Star -> Mul + | Slash -> Div + | _ -> assert false (* should only be here if tt is * / *) + in + make_binary op acc expr + in + let expr = Array.fold_left f expr exprs_tokens in + Ok expr + +and sum_or_diff (state : state ref) : expr_result = + let* expr = mul_or_div state in + let* exprs_tokens = collect_chain state [| Plus; Minus |] mul_or_div in + let f acc (expr, token) = + let op : binary_op = + match token.token_type with + | Plus -> Plus + | Minus -> Minus + | _ -> assert false (* should only be here if tt is + - *) + in + make_binary op acc expr + in + let expr = Array.fold_left f expr exprs_tokens in + Ok expr + +and inequality (state : state ref) : expr_result = + (* TODO: maybe rework to only have Less and Greater as ops; performance? *) + let* expr = sum_or_diff state in + let* exprs_tokens = + collect_chain state [| Greater; GreaterEqual; Less; LessEqual |] sum_or_diff + in + let f acc (expr, token) = + let (op : binary_op) = + match token.token_type with + | Greater -> Greater + | Less -> Less + | GreaterEqual -> GreaterEqual + | LessEqual -> LessEqual + | _ -> assert false (* should only be here if tt is > < >= <= *) + in + make_binary op acc expr + in + let expr = Array.fold_left f expr exprs_tokens in + Ok expr + +and equality (state : state ref) : expr_result = + let* expr = inequality state in + let* exprs_tokens = collect_chain state [| EqualEqual; BangEqual |] inequality in + let f acc (expr, token) = + match token.token_type with + | EqualEqual -> make_binary Equal acc expr + | BangEqual -> + let expr = make_binary Equal acc expr in + make_unary Not expr + | _ -> assert false (* should only be here if tt is == != *) + in + let expr = Array.fold_left f expr exprs_tokens in + Ok expr + +and expression (state : state ref) : expr_result = equality state + +let rec synchronise (state : state ref) = + match (peek state).token_type with + | Semicolon -> advance state + | Class | Fun | Var | For | If | While | Print | Return | Eof -> () + | _ -> + advance state; + synchronise state let parse (tokens : token list) : parse_result = - let state = { tokens; errors_rev = [] } in - let expr, state = State.expression state in - let state = - if not (State.is_at_end state) then - let tt = (State.peek state).token_type in - let msg = Printf.sprintf "Unexpected %s at end" (show_token_type tt) in - State.append_error msg (State.peek state).pos state - else state - in - (* if List.length state.errors_rev != 0 then Ok expr else Error (List.rev state.errors_rev) *) - match state.errors_rev with [] -> Ok expr | es -> Error (List.rev es) + let state = ref { tokens; errors_rev = [] } in + expression state |> Result.map_error (fun e -> [ e ]) +(* let expr = State.expression state in + let state = + if not (State.is_at_end state) then + let tt = (State.peek state).token_type in + let msg = Printf.sprintf "Unexpected %s at end" (show_token_type tt) in + State.append_error msg (State.peek state).pos state + else state + in + (* if List.length state.errors_rev != 0 then Ok expr else Error (List.rev state.errors_rev) *) + match state.errors_rev with [] -> Ok expr | es -> Error (List.rev es) *)