mlox/lib/parser.ml
2024-08-27 02:46:17 +02:00

401 lines
12 KiB
OCaml

let ( let* ) = Result.bind
open Error
open Expr
open Lexer
open Stmt
type parse_result = (stmt_node list, parser_error list) result
type stmt_result = (stmt_node, parser_error) result
type expr_result = (expr_node, parser_error) result
type state = { tokens : token list; is_in_loop : bool }
let with_is_in_loop (f : state ref -> stmt_result) (state : state ref) : stmt_result =
let was_in_loop = !state.is_in_loop in
state := { !state with is_in_loop = true };
let result = f state in
state := { !state with is_in_loop = was_in_loop };
result
let make_state tokens = ref { tokens; is_in_loop = false }
let is_at_end state = (List.hd !state.tokens).token_type = Eof
let advance state = state := { !state with tokens = List.tl !state.tokens }
let next state =
assert (not ((List.hd !state.tokens).token_type = Eof));
let token = List.hd !state.tokens in
advance state;
token
let peek state = List.hd !state.tokens
let peek_tt (state : state ref) : token_type = (peek state).token_type
let cur_pos state = (peek state).pos
let advance_if state tt =
if peek_tt state = tt then (
advance state;
true)
else false
let consume state tt =
if advance_if state tt then Ok ()
else
let pos = cur_pos state in
let tt' = peek_tt state in
let msg = Printf.sprintf "Expected %s, but got %s" (show_token_type tt) (show_token_type tt') in
ParserError.make pos msg |> Result.error
let consume_identifier state =
match peek_tt state with
| Identifier name ->
advance state;
Ok name
| tt ->
let pos = cur_pos state in
let msg = Printf.sprintf "Expected identifier, but got %s" (show_token_type tt) in
ParserError.make pos msg |> Result.error
let matches state tts =
let f = ( = ) (peek_tt state) in
Array.fold_left (fun acc tt -> acc || f tt) false tts
let collect_chain (state : state ref) (tts : token_type array)
(higher_prec : state ref -> expr_result) : ((expr_node * token) list, parser_error) result =
let rec collect_chain_rec (acc : (expr_node * token) list) =
if (not (is_at_end state)) && matches state tts then
let token = next state in
let* expr = higher_prec state in
let acc = (expr, token) :: acc in
collect_chain_rec acc
else Ok acc
in
collect_chain_rec [] |> Result.map (fun l -> List.rev l)
let primary (state : state ref) : expr_result =
let pos = cur_pos state in
match peek_tt state with
| Number x ->
advance state;
make_number pos x |> Result.ok
| String s ->
advance state;
make_string pos s |> Result.ok
| True ->
advance state;
make_bool pos true |> Result.ok
| False ->
advance state;
make_bool pos false |> Result.ok
| Nil ->
advance state;
make_nil pos |> Result.ok
| Identifier name ->
advance state;
make_variable pos name |> Result.ok
| tt ->
advance state;
let msg = Printf.sprintf "Expected valid expression, got %s instead" (show_token_type tt) in
Error { msg; pos }
let rec grouping (state : state ref) : expr_result =
if matches state [| LeftParen |] then (
advance state;
let* expr = expression state in
if advance_if state RightParen then Ok expr (* expect a ) here *)
else
let pos = cur_pos state in
let tt = peek_tt state in
let msg = Printf.sprintf "Expected RightParen, got %s instead" (show_token_type tt) in
Error { pos; msg })
else primary state
and neg_not (state : state ref) : expr_result =
if matches state [| Bang; Minus |] then
let token = next state in
let pos = token.pos in
let* expr = neg_not state in
let op =
match token.token_type with
| Bang -> Not
| Minus -> Neg
| _ -> assert false (* should only be here if tt is - ! *)
in
let expr = make_unary pos op expr in
Ok expr
else grouping state
and mul_or_div (state : state ref) : expr_result =
let* expr = neg_not state in
let* exprs_tokens = collect_chain state [| Star; Slash |] neg_not in
let f acc (expr, (token : token)) =
let pos = token.pos in
let op : binary_op =
match token.token_type with
| Star -> Mul
| Slash -> Div
| _ -> assert false (* should only be here if tt is * / *)
in
make_binary pos op acc expr
in
let expr = List.fold_left f expr exprs_tokens in
Ok expr
and sum_or_diff (state : state ref) : expr_result =
let* expr = mul_or_div state in
let* exprs_tokens = collect_chain state [| Plus; Minus |] mul_or_div in
let f acc (expr, (token : token)) =
let pos = token.pos in
let op : binary_op =
match token.token_type with
| Plus -> Plus
| Minus -> Minus
| _ -> assert false (* should only be here if tt is + - *)
in
make_binary pos op acc expr
in
let expr = List.fold_left f expr exprs_tokens in
Ok expr
and inequality (state : state ref) : expr_result =
(* TODO: maybe rework to only have Less and Greater as ops; performance? *)
let* expr = sum_or_diff state in
let* exprs_tokens =
collect_chain state [| Greater; GreaterEqual; Less; LessEqual |] sum_or_diff
in
let f acc (expr, (token : token)) =
let pos = token.pos in
let (op : binary_op) =
match token.token_type with
| Greater -> Greater
| Less -> Less
| GreaterEqual -> GreaterEqual
| LessEqual -> LessEqual
| _ -> assert false (* should only be here if tt is > < >= <= *)
in
make_binary pos op acc expr
in
let expr = List.fold_left f expr exprs_tokens in
Ok expr
and equality (state : state ref) : expr_result =
let* expr = inequality state in
let* exprs_tokens = collect_chain state [| EqualEqual; BangEqual |] inequality in
let f acc (expr, (token : token)) =
let pos = token.pos in
match token.token_type with
| EqualEqual -> make_binary pos Equal acc expr
| BangEqual ->
let expr = make_binary pos Equal acc expr in
make_unary pos Not expr
| _ -> assert false (* should only be here if tt is == != *)
in
let expr = List.fold_left f expr exprs_tokens in
Ok expr
and logical_and (state : state ref) : expr_result =
let* expr = equality state in
let* exprs_tokens = collect_chain state [| And |] equality in
let f acc (expr, (token : token)) =
let pos = token.pos in
assert (token.token_type = And);
make_logical pos And acc expr
in
let expr = List.fold_left f expr exprs_tokens in
Ok expr
and logical_or (state : state ref) : expr_result =
let* expr = logical_and state in
let* exprs_tokens = collect_chain state [| Or |] logical_and in
let f acc (expr, (token : token)) =
let pos = token.pos in
assert (token.token_type = Or);
make_logical pos Or acc expr
in
let expr = List.fold_left f expr exprs_tokens in
Ok expr
and assignment (state : state ref) : expr_result =
let* expr = logical_or state in
if Equal = peek_tt state then
let pos = (next state).pos in
let* rhs = assignment state in
match expr.expr with
| Variable name -> make_assignment pos name rhs |> Result.ok
| _ ->
let msg = "Invalid assignment target" in
ParserError.make pos msg |> Result.error
else Ok expr
and expression (state : state ref) : expr_result = assignment state
let rec block (state : state ref) : stmt_result =
let pos = cur_pos state in
let* _ = consume state LeftBrace in
let rec collect_stmts state =
if is_at_end state then
let msg = "Unterminated block" in
ParserError.make pos msg |> Result.error
else
match peek_tt state with
| RightBrace -> Ok []
| _ ->
let* stmt = declaration state in
let* tail = collect_stmts state in
Ok (stmt :: tail)
in
let* stmts = collect_stmts state in
let* _ = consume state RightBrace in
make_block pos stmts |> Result.ok
and if_then_else (state : state ref) : stmt_result =
let pos = cur_pos state in
let* _ = consume state If in
let* _ = consume state LeftParen in
let* cond = expression state in
let* _ = consume state RightParen in
let* then_ = statement state in
let* (else_ : stmt_node option) =
if advance_if state Else then statement state |> Result.map Option.some else Ok None
in
make_if pos cond then_ else_ |> Result.ok
and while_loop (state : state ref) : stmt_result =
let pos = cur_pos state in
let* _ = consume state While in
let* _ = consume state LeftParen in
let* cond = expression state in
let* _ = consume state RightParen in
(* let was_in_loop = !state.is_in_loop in
state := { !state with is_in_loop = true };
let body = statement state in
state := { !state with is_in_loop = was_in_loop };
let* body = body in *)
let* body = with_is_in_loop statement state in
make_while pos cond body |> Result.ok
and for_loop (state : state ref) : stmt_result =
let for_pos = cur_pos state in
let* _ = consume state For in
let* _ = consume state LeftParen in
let* init =
match peek_tt state with
| Semicolon ->
advance state;
Ok None
| Var -> var_declaration state |> Result.map Option.some
| _ -> expr_stmt state |> Result.map Option.some
in
let* cond =
match peek_tt state with
| Semicolon ->
let pos = cur_pos state in
Ok (make_bool pos true)
| _ -> expression state
in
(* expression has no final semicolon, so we need to consume it *)
let* _ = consume state Semicolon in
let* update =
match peek_tt state with
| RightParen -> Ok None
| _ -> expression state |> Result.map Option.some
in
let* _ = consume state RightParen in
(* let was_in_loop = !state.is_in_loop in
state := { !state with is_in_loop = true };
let body = statement state in
state := { !state with is_in_loop = was_in_loop };
let* body = body in *)
let* body = with_is_in_loop statement state in
let body =
match update with
| Some update ->
let update_stmt = make_expr_stmt update.pos update in
make_block body.pos [ body; update_stmt ]
| None -> body
in
let loop = make_while for_pos cond body in
let outer_block =
match init with Some init -> make_block for_pos [ init; loop ] | None -> loop
in
Ok outer_block
and expr_stmt (state : state ref) : stmt_result =
let pos = cur_pos state in
let* expr = expression state in
let* _ = consume state Semicolon in
let stmt = make_expr_stmt pos expr in
Ok stmt
and statement (state : state ref) : stmt_result =
let pos = cur_pos state in
match peek_tt state with
| Break ->
if !state.is_in_loop then (
advance state;
let* _ = consume state Semicolon in
make_break pos |> Result.ok)
else
let msg = "Can use break only in loops" in
ParserError.make pos msg |> Result.error
| Print ->
advance state;
let* expr = expression state in
let* _ = consume state Semicolon in
let stmt = make_print pos expr in
Ok stmt
| LeftBrace -> block state
| If -> if_then_else state
| While -> while_loop state
| For -> for_loop state
| _ -> expr_stmt state
and var_declaration (state : state ref) : stmt_result =
let pos = cur_pos state in
(* consume var token *)
assert ((next state).token_type = Var);
let* name = consume_identifier state in
let* init =
if Equal = peek_tt state then
(* found =, parsing initialiser *)
let* _ = consume state Equal in
let* init = expression state in
Ok (Some init)
else (* no initialiser, default to nil *)
Ok None
in
let* _ = consume state Semicolon in
make_var_decl pos name init |> Result.ok
and declaration (state : state ref) : stmt_result =
match peek_tt state with Var -> var_declaration state | _ -> statement state
let rec synchronise (state : state ref) =
match peek_tt state with
| Semicolon -> advance state
| Class | Fun | Var | For | If | While | Print | Return | Eof -> ()
| _ ->
advance state;
synchronise state
let rec parse_impl (state : state ref) : parse_result =
if peek_tt state = Eof then Ok []
else
let result = declaration state in
match result with
| Ok stmt ->
let* stmts = parse_impl state in
Ok (stmt :: stmts)
| Error e -> (
synchronise state;
if peek_tt state = Eof then Error [ e ]
else
let tail_result = parse_impl state in
match tail_result with Ok _ -> Error [ e ] | Error es -> Error (e :: es))
let parse (tokens : token list) : parse_result =
(* filter out all the comment tokens *)
let tokens =
List.filter (fun tok -> match tok.token_type with Comment _ -> false | _ -> true) tokens
in
let state = make_state tokens in
parse_impl state