diff --git a/library/init/lean/parser/reader/basic.lean b/library/init/lean/parser/reader/basic.lean index ff1c10f097..58fa0d668b 100644 --- a/library/init/lean/parser/reader/basic.lean +++ b/library/init/lean/parser/reader/basic.lean @@ -18,7 +18,11 @@ namespace parser structure token_config := («prefix» : string) -- reading a token should not need any state -(token_reader : option (position → parser syntax) := none) +/- An optional parser that is activated after matching `prefix`. + It should return a syntax tree with a "hole" for the + `source_info` surrounding the token, which will be supplied + by the `token` reader. -/ +(token_reader : option (parser (source_info → syntax)) := none) structure reader_state := (tokens : list token_config) @@ -36,7 +40,7 @@ structure reader := (read : read_m syntax) (tokens : list token_config := []) -namespace reader +namespace read_m local attribute [reducible] read_m instance : monad read_m := infer_instance instance : alternative read_m := infer_instance @@ -45,15 +49,19 @@ instance : monad_state reader_state read_m := infer_instance instance : monad_parser read_m := infer_instance --TODO(Sebastian): expose `reader_state.errors` +protected def run {α : Type} (cfg : reader_config) (st : reader_state) (s : string) (r : read_m α) : + except parser.message α := +prod.fst <$> ((r.run cfg).run st).parse_with_eoi s +end read_m + +namespace reader +open monad_parser + protected def parse (cfg : reader_config) (s : string) (r : reader) : except parser.message syntax := -- the only hardcoded tokens, because they are never directly mentioned by a `reader` let tokens : list token_config := [⟨"/-", none⟩, ⟨"--", none⟩] in -prod.fst <$> ((r.read.run cfg).run ⟨r.tokens ++ tokens, ff, []⟩).parse_with_eoi s -end reader - -namespace reader -open monad_parser +r.read.run cfg ⟨r.tokens ++ tokens, ff, []⟩ s def node (m : macro) (ps : list reader) : reader := { read := do { diff --git a/library/init/lean/parser/reader/token.lean b/library/init/lean/parser/reader/token.lean index 09eb7ebf4e..013de518a4 100644 --- a/library/init/lean/parser/reader/token.lean +++ b/library/init/lean/parser/reader/token.lean @@ -46,8 +46,7 @@ do r ← remaining, private def whitespace_aux : nat → read_m unit | (n+1) := -do start ← pos, - tk ← whitespace *> match_token, +do tk ← whitespace *> match_token, (match tk with | some ⟨"--", _⟩ := str "--" *> take_while' (= '\n') *> whitespace_aux n | some ⟨"/-", _⟩ := str "/-" *> finish_comment_block *> whitespace_aux n @@ -55,44 +54,51 @@ do start ← pos, | 0 := error "unreachable" /-- Skip whitespace and comments. -/ ---TODO(Sebastian): store whitespace prefix and suffix in syntax objects -def whitespace : read_m unit := +def whitespace : read_m substring := -- every `whitespace_aux` loop reads at least one char -do r ← remaining, - whitespace_aux (r+1) +do start ← left_over, + whitespace_aux (start.remaining+1), + stop ← left_over, + pure ⟨start, stop⟩ + +def with_source_info {α : Type} (r : read_m α) : read_m (α × source_info) := +do leading ← whitespace, + p ← pos, + a ← r, + -- TODO(Sebastian): less greedy, more natural whitespace assignement + -- E.g. only read up to the next line break + trailing ← whitespace, + pure (a, ⟨leading, p, trailing⟩) /-- Match a string literally without consulting the token table. -/ def raw_symbol (sym : string) : reader := { tokens := [], -- no additional tokens - read := do - start ← pos, - try (whitespace *> str sym), - stop ← pos, - pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string sym⟩ } + read := try $ do + (_, info) ← with_source_info $ str sym, + pure $ syntax.atom ⟨info, atomic_val.string sym⟩ } --TODO(Sebastian): other bases -private def number' (start : position) : read_m syntax := +private def number' : read_m (source_info → syntax) := do num ← take_while1 char.is_digit, - stop ← pos, - pure $ syntax.node ⟨`base10_lit, [syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string num⟩]⟩ + pure $ λ i, syntax.node ⟨`base10_lit, [syntax.atom ⟨i, atomic_val.string num⟩]⟩ -private def ident' (start : position) : read_m syntax := +private def ident' : read_m (source_info → syntax) := do n ← identifier, - stop ← pos, - pure $ syntax.ident ⟨some ⟨start, stop⟩, n, none, none⟩ + pure $ λ i, syntax.ident ⟨i, n, none, none⟩ def token : read_m syntax := -do start ← pos, - tk ← whitespace *> match_token, - match tk with - -- constant-length token - | some ⟨tk, none⟩ := - do str tk, - stop ← pos, - pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string tk⟩ - -- variable-length token - | some ⟨tk, some r⟩ := str tk *> monad_parser.lift (r start) - | none := number' start <|> ident' start +do (r, i) ← with_source_info $ do { + tk ← match_token, + match tk with + -- constant-length token + | some ⟨tk, none⟩ := + do str tk, + pure $ λ i, syntax.atom ⟨some i, atomic_val.string tk⟩ + -- variable-length token + | some ⟨tk, some r⟩ := str tk *> monad_parser.lift r + | none := number' <|> ident' + }, + pure (r i) --TODO(Sebastian): error messages def symbol (sym : string) : reader := diff --git a/library/init/lean/parser/syntax.lean b/library/init/lean/parser/syntax.lean index c93abdbc1b..08078a97a8 100644 --- a/library/init/lean/parser/syntax.lean +++ b/library/init/lean/parser/syntax.lean @@ -13,9 +13,15 @@ namespace parser @[reducible] def var_offset := ℕ @[reducible] def macro_scope_id := ℕ -structure span := -(left : position) -(right : position) +--TODO(Sebastian): move +structure substring := +(start : string.iterator) +(stop : string.iterator) + +structure source_info := +(leading : substring) +(pos : position) +(trailing : substring) structure resolved := -- local or (overloaded) global @@ -27,15 +33,14 @@ structure resolved := instance resolved.has_to_format : has_to_format resolved := ⟨λ r, to_fmt (r.decl, r.prefix)⟩ structure syntax_ident := -(span : option span) (name : name) (msc : option macro_scope_id) (res : option resolved) +(info : option source_info) (name : name) (msc : option macro_scope_id) (res : option resolved) inductive atomic_val | string (s : string) -| number (n : nat) | name (n : name) structure syntax_atom := -(span : option span) (val : atomic_val) +(info : option source_info) (val : atomic_val) structure syntax_node (syntax : Type) := (macro : name) (args : list syntax) @@ -46,14 +51,12 @@ inductive syntax | atom (val : syntax_atom) | node (val : syntax_node syntax) +def substring.to_string (s : substring) : string := +(s.start.extract s.stop).get_or_else "" + namespace syntax open format -protected def span : syntax → option span -| (ident val) := val.span -| (atom val) := val.span -| (node val) := none -- should perhaps be the 'join' of all sub-spans? - protected mutual def to_format, to_format_lst with to_format : syntax → format | (ident id) := @@ -76,7 +79,6 @@ with to_format : syntax → format | none := "") in n | (atom ⟨_, atomic_val.string s⟩) := to_fmt $ repr s -| (atom ⟨_, atomic_val.number n⟩) := to_fmt n | (atom ⟨_, atomic_val.name n⟩) := to_fmt "`" ++ to_fmt n | (node {macro := name.anonymous, args := args, ..}) := sbracket $ join_sep (to_format_lst args) line diff --git a/tests/lean/lisp.lean b/tests/lean/lisp.lean deleted file mode 100644 index 6166ec082e..0000000000 --- a/tests/lean/lisp.lean +++ /dev/null @@ -1,207 +0,0 @@ -import init.lean.parser.macro init.lean.parser.identifier system.io -import .macro1 - -set_option eqn_compiler.lemmas false - -attribute [instance] lean.name.has_lt_quick -open lean.parser -open lean.parser.monad_parser - -def test {α} [decidable_eq α] (p : parser α) (s : string) (e : α) : io unit := -match parser_t.parse p s with -| except.ok a := if a = e then return () else io.print_ln "unexpected result" -| except.error e := io.print_ln (e.to_string s) - -def test_failure {α} (p : parser α) (s : string) : io unit := -match parser_t.parse p s with -| except.ok a := io.print_ln "unexpected success" -| except.error e := return () - -def show_result {α} [has_to_string α] (s : string) (p : except message α) : io unit := -match p with -| except.ok a := io.print_ln "result: " >> io.print_ln (to_string a) -| except.error e := io.print_ln (e.to_string s) - -namespace lean.parser.syntax.combinators -variables {m : Type → Type} [monad m] - -def ident : parser_t m syntax := -do start ← pos, - n ← identifier, - stop ← pos, - whitespace, - pure $ syntax.ident ⟨some ⟨start, stop⟩, n, none, none⟩ - -def sym (sym : string) : parser_t m syntax := -do start ← pos, - str sym, - stop ← pos, - whitespace, - pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string sym⟩ - -def num : parser_t m syntax := -do start ← pos, - n ← monad_parser.num, - stop ← pos, - whitespace, - pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.number n⟩ - -def node (n : name) (ps : list (parser_t m syntax)) : parser_t m syntax := -do args ← ps.mmap id, - pure $ syntax.node ⟨n, args⟩ - -def many (p : parser_t m syntax) : parser_t m syntax := -do args ← many p, - pure $ syntax.node ⟨name.anonymous, args⟩ - -def wrap_with_node (n : name) (stx : syntax) : syntax := -syntax.node ⟨n, [stx]⟩ - -end lean.parser.syntax.combinators - -namespace lisp -open lean.parser -open lean.parser.syntax.combinators - -abbreviation read_m := parser syntax -abbreviation ws {m} [monad m] : parser_t m unit := whitespace - -def parse_list (parse : read_m) : read_m := -(node `list [sym "(", many parse, sym ")"] <|> - node `list [sym "[", many parse, sym "]"]) "list" - -def parse : ℕ → read_m -| 0 := unexpected "nesting limit exceeded" -| (n+1) := ident <|> num <|> parse_list (parse n) - -def let_macro := {macro . - name := `let, - expand := some $ λ node, - match node.args with - | [syntax.node ⟨name.anonymous, clauses⟩, body] := - do clauses ← clauses.mmap (λ cl, match cl with - | syntax.node ⟨name.anonymous, [v, e]⟩ := some (v, e) - | stx := none), - let ⟨vars, exprs⟩ := clauses.unzip, - syntax.node {macro := `let_core, args := [ - syntax.node ⟨name.anonymous, exprs⟩, - syntax.node {macro := `bind, args := [ - syntax.node {macro := name.anonymous, args := vars}, - body - ]} - ]} - | _ := none} - -def mk_list_stx (args : list syntax) : syntax := -syntax.node ⟨`list, [ - syntax.atom ⟨none, atomic_val.string "("⟩, - syntax.node ⟨name.anonymous, args⟩, - syntax.atom ⟨none, atomic_val.string ")"⟩]⟩ - -def mk_ident (n : name) : syntax := -syntax.ident ⟨none, n, none, none⟩ - -def match_list_stx : syntax → option (list syntax) -| (syntax.node ⟨`list, [lp, syntax.node ⟨name.anonymous, args⟩, rp]⟩) := some args -| _ := none - -/- From "Macros that Work Together" -/ -def or_macro := {macro . - name := `or, - expand := some $ λ n, - -- '(or e1 e2)' - do [e1, e2] ← pure n.args | none, - -- '(let ([tmp e1]) (if tmp tmp e2))' - mk_list_stx [ - mk_ident `let, - mk_list_stx [mk_list_stx [mk_ident `tmp, e1]], - mk_list_stx [mk_ident `if, mk_ident `tmp, mk_ident `tmp, e2]]} - -/-- Recognizes and translates calls to built-in macros -/ -def list_macro := {macro . - name := `list, - expand := some $ λ n, - match match_list_stx (syntax.node n) with - | some [syntax.ident {name := `lambda, ..}, vars, body] := - do vars ← match_list_stx vars, - syntax.node ⟨`lambda, [syntax.node ⟨name.anonymous, vars⟩, body]⟩ - | some (syntax.ident {name := `or, ..}::args) := - syntax.node ⟨`or, args⟩ - | some (syntax.ident {name := `if, ..}::args) := - syntax.node ⟨`if, args⟩ - | some [syntax.ident {name := `let, ..}, clauses, body] := - do clauses ← match_list_stx clauses, - clauses ← clauses.mmap (λ cl, match match_list_stx cl with - | some cl := some $ syntax.node ⟨name.anonymous, cl⟩ - | none := none), - syntax.node ⟨`let, [syntax.node ⟨name.anonymous, clauses⟩, body]⟩ - | _ := none} - -inductive value -| number (n : nat) -| list (l : list value) -| closure (env : list value) (code : syntax) - --- TODO(Sebastian): shouldn't be meta -meta def value.to_string : value → string -| (value.number n) := to_string n -| (value.list l) := (l.map (λ v, value.to_string v)).to_string -| (value.closure _ _) := "#" - -meta def eval : nat → syntax → list value → except string value -| _ (syntax.atom ⟨_, atomic_val.number n⟩) env := except.ok $ value.number n -| _ (syntax.ident {res := some {decl := sum.inl idx, ..}, ..}) env := -(match env.nth idx with -| some v := except.ok v -| none := throw "aaah") -| _ (syntax.node ⟨`lambda_core, [syntax.node ⟨`bind, [_, body]⟩]⟩) env := - except.ok $ value.closure env body -| (fuel+1) (syntax.node ⟨`let_core, [syntax.node ⟨_, [expr]⟩, syntax.node ⟨`bind, [_, body]⟩]⟩) env := -do v ← eval fuel expr env, - eval fuel body (v::env) -| (fuel+1) (syntax.node ⟨`if, [cond, t, e]⟩) env := -do cond ← eval fuel cond env, - eval fuel (match cond with - | value.number 0 := e - | value.list [] := e - | _ := t) env -| (fuel+1) (syntax.node ⟨`list, [lp, syntax.node ⟨_, f::args⟩, rp]⟩) env := -do f ← eval fuel f env, - args.mfoldl (λ f arg, - do value.closure cenv code ← pure f | - throw $ "not a function: " ++ f.to_string, - arg ← eval fuel arg env, - eval fuel code (arg::cenv)) f -| _ stx env := throw $ "cannot evaluate " ++ to_string stx - -def cfg := {macro1.cfg with macros := - ((macro1.cfg.macros.insert `list list_macro) - .insert `let let_macro) - .insert `or or_macro} -meta def go (s) : tactic unit := -match ((parse 1000).parse_with_eoi s).run with -| except.error e := tactic.trace (e.to_string s) -| except.ok stx := -tactic.trace (to_fmt "reader: " ++ to_fmt stx) >> -match (expand' stx).run' cfg () with -| except.error e := tactic.fail e -| except.ok stx := -tactic.trace (to_fmt "expander: " ++ to_fmt stx) >> -match (resolve' stx).run' cfg () with -| except.error e := tactic.fail e -| except.ok stx := -tactic.trace (to_fmt "resolver: " ++ to_fmt stx) >> -match eval 1000 stx [] with -| except.error e := tactic.fail e -| except.ok val := -tactic.trace (to_fmt "evaluator: " ++ val.to_string) - - -#eval go "()" -#eval go "((lambda [x] x) 1)" -#eval go "((lambda [x y] x) 1 2)" -#eval go "(let ([x 1]) x)" - -#eval go "(let ([tmp 5]) (or 0 tmp))" - -end lisp diff --git a/tests/lean/macro1.lean b/tests/lean/macro1.lean index 9a52f745d6..04724833e7 100644 --- a/tests/lean/macro1.lean +++ b/tests/lean/macro1.lean @@ -3,7 +3,7 @@ attribute [instance] lean.name.has_lt_quick namespace macro1 open lean.parser -def sp : option span := none +def info : option source_info := none def lambda_macro := {macro . name := `lambda, @@ -25,7 +25,7 @@ def lambda_macro := {macro . def intro_x_macro := {macro . name := "intro_x", expand := some $ λ node, - syntax.node ⟨`lambda, syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, "x", none, none⟩]⟩ :: node.args⟩} + syntax.node ⟨`lambda, syntax.node ⟨name.anonymous, [syntax.ident ⟨info, "x", none, none⟩]⟩ :: node.args⟩} def macros : name → option macro | `lambda := some lambda_macro @@ -42,27 +42,27 @@ match (expand' stx >>= resolve').run' cfg () with | except.ok stx := tactic.trace stx run_cmd test $ syntax.node ⟨`lambda, [ - syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x, none, none⟩]⟩, - syntax.ident ⟨sp, `x, none, none⟩ + syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x, none, none⟩]⟩, + syntax.ident ⟨info, `x, none, none⟩ ]⟩ run_cmd test $ syntax.node ⟨`lambda, [ - syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x, none, none⟩]⟩, - syntax.ident ⟨sp, `y, none, none⟩ + syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x, none, none⟩]⟩, + syntax.ident ⟨info, `y, none, none⟩ ]⟩ -- test macro shadowing run_cmd test $ syntax.node ⟨`lambda, [ - syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x, none, none⟩]⟩, + syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x, none, none⟩]⟩, syntax.node ⟨`intro_x, [ - syntax.ident ⟨sp, `x, none, none⟩ + syntax.ident ⟨info, `x, none, none⟩ ]⟩ ]⟩ -- test field notation run_cmd test $ syntax.node ⟨`lambda, [ - syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x.y, none, none⟩]⟩, - syntax.ident ⟨sp, `x.y.z, none, none⟩ + syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x.y, none, none⟩]⟩, + syntax.ident ⟨info, `x.y.z, none, none⟩ ]⟩ end macro1 diff --git a/tests/lean/reader1.lean.expected.out b/tests/lean/reader1.lean.expected.out index 5615e18e66..81e0d59aad 100644 --- a/tests/lean/reader1.lean.expected.out +++ b/tests/lean/reader1.lean.expected.out @@ -9,4 +9,4 @@ result: (import "import" [(import_path [] c)])]) error at line 10, column 9: unexpected '`' -expected identifier +expected end of input