commit d33023f4351ef7cdc5c6036c12b4156c3b321628 Author: Moritz Gmeiner Date: Fri Aug 2 00:10:48 2024 +0200 init commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f527c17 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +# Dune artifacts +_build/ +dune.lock + +# Local OPAM switch +_opam/ diff --git a/.ocamlformat b/.ocamlformat new file mode 100644 index 0000000..a32dae4 --- /dev/null +++ b/.ocamlformat @@ -0,0 +1,4 @@ +profile = default +# profile = ocamlformat +# profile = janestreet +margin = 100 diff --git a/MlLox.opam b/MlLox.opam new file mode 100644 index 0000000..b296c11 --- /dev/null +++ b/MlLox.opam @@ -0,0 +1,31 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A short synopsis" +description: "A longer description" +maintainer: ["Maintainer Name"] +authors: ["Author Name"] +license: "LICENSE" +tags: ["topics" "to describe" "your" "project"] +homepage: "https://github.com/username/reponame" +doc: "https://url/to/documentation" +bug-reports: "https://github.com/username/reponame/issues" +depends: [ + "ocaml" + "dune" {>= "3.16"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/username/reponame.git" diff --git a/bin/dune b/bin/dune new file mode 100644 index 0000000..cb59a81 --- /dev/null +++ b/bin/dune @@ -0,0 +1,4 @@ +(executable + (public_name MlLox) + (libraries Lox) + (name main)) diff --git a/bin/main.ml b/bin/main.ml new file mode 100644 index 0000000..2dca889 --- /dev/null +++ b/bin/main.ml @@ -0,0 +1,16 @@ +let printUsage () = + print_endline "Usage: jlox [script]"; + exit 64 + +let () = + let argc = Array.length Sys.argv in + match argc - 1 with + | 0 -> Lox.runRepl () + | 1 -> + let path = Sys.argv.(1) in + Printf.printf "Running script %s\n" path; + let ic = open_in path in + let source = In_channel.input_all ic in + let result = Lox.run source in + Result.iter_error Lox.Error.print_error result + | _ -> printUsage () diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..a0e4244 --- /dev/null +++ b/dune-project @@ -0,0 +1,26 @@ +(lang dune 3.16) + +(name MlLox) + +(generate_opam_files true) + +(source + (github username/reponame)) + +(authors "Author Name") + +(maintainers "Maintainer Name") + +(license LICENSE) + +(documentation https://url/to/documentation) + +(package + (name MlLox) + (synopsis "A short synopsis") + (description "A longer description") + (depends ocaml dune) + (tags + (topics "to describe" your project))) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/reference/dune-project/index.html diff --git a/lib/dune b/lib/dune new file mode 100644 index 0000000..52c8dad --- /dev/null +++ b/lib/dune @@ -0,0 +1,4 @@ +(library + (name Lox) + (preprocess + (pps ppx_deriving.show))) diff --git a/lib/error.ml b/lib/error.ml new file mode 100644 index 0000000..d7a4471 --- /dev/null +++ b/lib/error.ml @@ -0,0 +1,26 @@ +type code_pos = { line : int; col : int } +type lexer_error = { pos : code_pos; msg : string } + +module LexerError = struct + type t = lexer_error + + let make (pos : code_pos) (msg : string) : lexer_error = + (* let pos = { line; col } in *) + { pos; msg } + + let print (e : lexer_error) = + Printf.printf "LexerError at line %d, column %d: %s\n" e.pos.line e.pos.col e.msg +end + +type lox_error = LexerError of lexer_error list + +let print_error (e : lox_error) = + match e with + | LexerError es -> + let num_errors = List.length es in + assert (num_errors != 0); + Printf.printf "found %d %s:\n" num_errors + (if num_errors = 1 then "LexerError" else "LexerErrors"); + List.iter LexerError.print es + +let of_lexer_error e = Result.map_error (fun e -> LexerError e) e diff --git a/lib/lexer.ml b/lib/lexer.ml new file mode 100644 index 0000000..a15fb4a --- /dev/null +++ b/lib/lexer.ml @@ -0,0 +1,177 @@ +open Error + +[@@@ocamlformat "disable"] + type token_type = + | LeftParen | RightParen | LeftBrace | RightBrace + + | Plus | Minus | Star | Slash | Bang + + | Dot | Comma | Semicolon | Equal + + | EqualEqual | BangEqual | Greater | Less | GreaterEqual | LessEqual + + | Identifier of string + | String of string + | Number of float + + | And | Class | Else | False | Fun | For | If | Nil | Or | Print | Return | Super | This | True + | Var | While + + | Comment of string + + | Eof +[@@deriving show { with_path = false }] +[@@@ocamlformat "enable"] + +type token = { token_type : token_type; pos : code_pos } + +let show_token (token : token) = + let { line; col } = token.pos in + Printf.sprintf "<%s at %d:%d>" (show_token_type token.token_type) line col + +type lexer_result = (token list, lexer_error list) result + +type state = { + (* source code *) + source : string; + start_pos : int; + cur_pos : int; + (* store tokens and errors in reverse to make building the list more efficient *) + tokens_rev : token list; + errors_rev : lexer_error list; + (* position of current char in source *) + line : int; + col : int; +} + +module State = struct + type t = state + + let is_digit c = match c with '0' .. '9' -> true | _ -> false + let is_alpha c = match c with 'a' .. 'z' | 'A' .. 'Z' -> true | _ -> false + let is_alphanum c = is_digit c || is_alpha c + let is_identifier c = is_alphanum c || c = '_' + let is_at_end (state : state) : bool = state.cur_pos = String.length state.source + + let get_lexeme (state : state) (first : int) (last : int) = + String.sub state.source first (last - first) + + let advance (state : state) : char * state = + let c = state.source.[state.cur_pos] in + let state = { state with cur_pos = state.cur_pos + 1 } in + let state = + match c with + | '\t' -> { state with col = state.col + 4 } + | '\n' -> { state with line = state.line + 1; col = 0 } + | _ -> { state with col = state.col + 1 } + in + (c, state) + + let peek (state : state) : char option = + if not (is_at_end state) then Some state.source.[state.cur_pos] else None + + let advance_if (c : char) (state : state) : bool * state = + if peek state = Some c then (true, snd (advance state)) else (false, state) + + let rec advance_until (c : char) (state : state) : bool * state = + if is_at_end state then (false, state) + else + let c', state = advance state in + if c' = c then (true, state) else advance_until c state + + let rec advance_while (f : char -> bool) (state : state) : state = + match peek state with + | Some c when f c -> advance_while f (snd (advance state)) + | _ -> state (* EOF or no match *) + + let last_char (state : state) = + assert (state.cur_pos > 0); + state.source.[state.cur_pos - 1] + + let append_token pos state token_type = + (* let pos = { line = state.line; col = state.col } in *) + { state with tokens_rev = { token_type; pos } :: state.tokens_rev } + + let append_error pos state msg = + (* let pos = { line = state.line; col = state.col } in *) + { state with errors_rev = LexerError.make pos msg :: state.errors_rev } + + let parse_number (state : state) = + let skip c state = snd @@ advance_if c state in + let code_pos = { line = state.line; col = state.col } in + let state = + state |> advance_while is_digit |> skip '.' |> advance_while is_digit |> skip 'e' + |> advance_while is_digit + in + let lexeme = get_lexeme state state.start_pos state.cur_pos in + let f = Float.of_string_opt lexeme in + match f with + | None -> append_error code_pos state (Printf.sprintf "Invalid float literal %s" lexeme) + | Some f -> append_token code_pos state (Number f) + + let rec tokenize_rec (state : state) : state = + let pos = { line = state.line; col = state.col } in + let append_token = append_token pos in + let append_error = append_error pos in + if is_at_end state then append_token state Eof + else + let state = { state with start_pos = state.cur_pos } in + let c, state = advance state in + let state = + match c with + | '(' -> append_token state LeftParen + | ')' -> append_token state RightParen + | '{' -> append_token state LeftBrace + | '}' -> append_token state RightBrace + | ',' -> append_token state Comma + | ';' -> append_token state Semicolon + | '.' -> append_token state Dot + | '+' -> append_token state Plus + | '-' -> append_token state Minus + | '*' -> append_token state Star + | '!' -> + let b, state = advance_if '=' state in + append_token state (if b then BangEqual else Bang) + | '=' -> + let b, state = advance_if '=' state in + append_token state (if b then EqualEqual else Equal) + | '<' -> + let b, state = advance_if '=' state in + append_token state (if b then LessEqual else Less) + | '>' -> + let b, state = advance_if '=' state in + append_token state (if b then GreaterEqual else Greater) + | '/' -> + let found, state = advance_if '/' state in + if not found then append_token state Slash + else + let start_pos = state.cur_pos in + let _, state = advance_until '\n' state in + let lexeme = String.trim @@ get_lexeme state start_pos state.cur_pos in + append_token state (Comment lexeme) + | '"' -> + let found, state = advance_until '"' state in + if not found then append_error state "Unterminated string literal" + else + let lexeme = get_lexeme state (state.start_pos + 1) (state.cur_pos - 1) in + append_token state (String lexeme) + | '0' .. '9' -> parse_number state + | ' ' | '\t' | '\n' -> parse_number state + | c -> append_error state (String.escaped @@ Printf.sprintf "Unexpected character '%c'" c) + in + tokenize_rec state +end + +let tokenize (source : string) : lexer_result = + print_endline "Scanning source"; + print_endline "---"; + print_endline source; + print_endline "---"; + (* Ok [] *) + let state = + State.tokenize_rec + { source; start_pos = 0; cur_pos = 0; tokens_rev = []; errors_rev = []; line = 1; col = 0 } + in + (* reverse the reversed tokens/errors *) + if List.length state.errors_rev = 0 then Ok (List.rev state.tokens_rev) + else Error (List.rev state.errors_rev) diff --git a/lib/lox.ml b/lib/lox.ml new file mode 100644 index 0000000..5195f48 --- /dev/null +++ b/lib/lox.ml @@ -0,0 +1,26 @@ +let ( let* ) = Result.bind + +module Lexer = Lexer +module Error = Error + +type token = Lexer.token +type lox_error = Error.lox_error +type lox_value = Nil + +let run (source : string) : (unit, lox_error) result = + let* tokens = Error.of_lexer_error (Lexer.tokenize source) in + let f token = Printf.printf "%s " (Lexer.show_token token) in + Printf.printf "Got %d tokens\n" (List.length tokens); + List.iter f tokens; + print_endline ""; + Ok () + +let runRepl () : unit = + try + while true do + print_string "> "; + let line = read_line () in + let result = run line in + Result.iter_error Error.print_error result + done + with End_of_file -> () diff --git a/lox/test.lox b/lox/test.lox new file mode 100644 index 0000000..44ddf16 --- /dev/null +++ b/lox/test.lox @@ -0,0 +1,3 @@ +// test comment +"string" +{}( diff --git a/test/dune b/test/dune new file mode 100644 index 0000000..c3a90fb --- /dev/null +++ b/test/dune @@ -0,0 +1,2 @@ +(test + (name test_MlLox)) diff --git a/test/test_MlLox.ml b/test/test_MlLox.ml new file mode 100644 index 0000000..e69de29