feat(library/init/lean/parser/{syntax,reader}): store whitespace around tokens
This commit is contained in:
parent
67d93ff051
commit
b2f9b2c180
6 changed files with 74 additions and 265 deletions
|
|
@ -18,7 +18,11 @@ namespace parser
|
|||
structure token_config :=
|
||||
(«prefix» : string)
|
||||
-- reading a token should not need any state
|
||||
(token_reader : option (position → parser syntax) := none)
|
||||
/- An optional parser that is activated after matching `prefix`.
|
||||
It should return a syntax tree with a "hole" for the
|
||||
`source_info` surrounding the token, which will be supplied
|
||||
by the `token` reader. -/
|
||||
(token_reader : option (parser (source_info → syntax)) := none)
|
||||
|
||||
structure reader_state :=
|
||||
(tokens : list token_config)
|
||||
|
|
@ -36,7 +40,7 @@ structure reader :=
|
|||
(read : read_m syntax)
|
||||
(tokens : list token_config := [])
|
||||
|
||||
namespace reader
|
||||
namespace read_m
|
||||
local attribute [reducible] read_m
|
||||
instance : monad read_m := infer_instance
|
||||
instance : alternative read_m := infer_instance
|
||||
|
|
@ -45,15 +49,19 @@ instance : monad_state reader_state read_m := infer_instance
|
|||
instance : monad_parser read_m := infer_instance
|
||||
|
||||
--TODO(Sebastian): expose `reader_state.errors`
|
||||
protected def run {α : Type} (cfg : reader_config) (st : reader_state) (s : string) (r : read_m α) :
|
||||
except parser.message α :=
|
||||
prod.fst <$> ((r.run cfg).run st).parse_with_eoi s
|
||||
end read_m
|
||||
|
||||
namespace reader
|
||||
open monad_parser
|
||||
|
||||
protected def parse (cfg : reader_config) (s : string) (r : reader) :
|
||||
except parser.message syntax :=
|
||||
-- the only hardcoded tokens, because they are never directly mentioned by a `reader`
|
||||
let tokens : list token_config := [⟨"/-", none⟩, ⟨"--", none⟩] in
|
||||
prod.fst <$> ((r.read.run cfg).run ⟨r.tokens ++ tokens, ff, []⟩).parse_with_eoi s
|
||||
end reader
|
||||
|
||||
namespace reader
|
||||
open monad_parser
|
||||
r.read.run cfg ⟨r.tokens ++ tokens, ff, []⟩ s
|
||||
|
||||
def node (m : macro) (ps : list reader) : reader :=
|
||||
{ read := do {
|
||||
|
|
|
|||
|
|
@ -46,8 +46,7 @@ do r ← remaining,
|
|||
|
||||
private def whitespace_aux : nat → read_m unit
|
||||
| (n+1) :=
|
||||
do start ← pos,
|
||||
tk ← whitespace *> match_token,
|
||||
do tk ← whitespace *> match_token,
|
||||
(match tk with
|
||||
| some ⟨"--", _⟩ := str "--" *> take_while' (= '\n') *> whitespace_aux n
|
||||
| some ⟨"/-", _⟩ := str "/-" *> finish_comment_block *> whitespace_aux n
|
||||
|
|
@ -55,44 +54,51 @@ do start ← pos,
|
|||
| 0 := error "unreachable"
|
||||
|
||||
/-- Skip whitespace and comments. -/
|
||||
--TODO(Sebastian): store whitespace prefix and suffix in syntax objects
|
||||
def whitespace : read_m unit :=
|
||||
def whitespace : read_m substring :=
|
||||
-- every `whitespace_aux` loop reads at least one char
|
||||
do r ← remaining,
|
||||
whitespace_aux (r+1)
|
||||
do start ← left_over,
|
||||
whitespace_aux (start.remaining+1),
|
||||
stop ← left_over,
|
||||
pure ⟨start, stop⟩
|
||||
|
||||
def with_source_info {α : Type} (r : read_m α) : read_m (α × source_info) :=
|
||||
do leading ← whitespace,
|
||||
p ← pos,
|
||||
a ← r,
|
||||
-- TODO(Sebastian): less greedy, more natural whitespace assignement
|
||||
-- E.g. only read up to the next line break
|
||||
trailing ← whitespace,
|
||||
pure (a, ⟨leading, p, trailing⟩)
|
||||
|
||||
/-- Match a string literally without consulting the token table. -/
|
||||
def raw_symbol (sym : string) : reader :=
|
||||
{ tokens := [], -- no additional tokens
|
||||
read := do
|
||||
start ← pos,
|
||||
try (whitespace *> str sym),
|
||||
stop ← pos,
|
||||
pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string sym⟩ }
|
||||
read := try $ do
|
||||
(_, info) ← with_source_info $ str sym,
|
||||
pure $ syntax.atom ⟨info, atomic_val.string sym⟩ }
|
||||
|
||||
--TODO(Sebastian): other bases
|
||||
private def number' (start : position) : read_m syntax :=
|
||||
private def number' : read_m (source_info → syntax) :=
|
||||
do num ← take_while1 char.is_digit,
|
||||
stop ← pos,
|
||||
pure $ syntax.node ⟨`base10_lit, [syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string num⟩]⟩
|
||||
pure $ λ i, syntax.node ⟨`base10_lit, [syntax.atom ⟨i, atomic_val.string num⟩]⟩
|
||||
|
||||
private def ident' (start : position) : read_m syntax :=
|
||||
private def ident' : read_m (source_info → syntax) :=
|
||||
do n ← identifier,
|
||||
stop ← pos,
|
||||
pure $ syntax.ident ⟨some ⟨start, stop⟩, n, none, none⟩
|
||||
pure $ λ i, syntax.ident ⟨i, n, none, none⟩
|
||||
|
||||
def token : read_m syntax :=
|
||||
do start ← pos,
|
||||
tk ← whitespace *> match_token,
|
||||
match tk with
|
||||
-- constant-length token
|
||||
| some ⟨tk, none⟩ :=
|
||||
do str tk,
|
||||
stop ← pos,
|
||||
pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string tk⟩
|
||||
-- variable-length token
|
||||
| some ⟨tk, some r⟩ := str tk *> monad_parser.lift (r start)
|
||||
| none := number' start <|> ident' start
|
||||
do (r, i) ← with_source_info $ do {
|
||||
tk ← match_token,
|
||||
match tk with
|
||||
-- constant-length token
|
||||
| some ⟨tk, none⟩ :=
|
||||
do str tk,
|
||||
pure $ λ i, syntax.atom ⟨some i, atomic_val.string tk⟩
|
||||
-- variable-length token
|
||||
| some ⟨tk, some r⟩ := str tk *> monad_parser.lift r
|
||||
| none := number' <|> ident'
|
||||
},
|
||||
pure (r i)
|
||||
|
||||
--TODO(Sebastian): error messages
|
||||
def symbol (sym : string) : reader :=
|
||||
|
|
|
|||
|
|
@ -13,9 +13,15 @@ namespace parser
|
|||
@[reducible] def var_offset := ℕ
|
||||
@[reducible] def macro_scope_id := ℕ
|
||||
|
||||
structure span :=
|
||||
(left : position)
|
||||
(right : position)
|
||||
--TODO(Sebastian): move
|
||||
structure substring :=
|
||||
(start : string.iterator)
|
||||
(stop : string.iterator)
|
||||
|
||||
structure source_info :=
|
||||
(leading : substring)
|
||||
(pos : position)
|
||||
(trailing : substring)
|
||||
|
||||
structure resolved :=
|
||||
-- local or (overloaded) global
|
||||
|
|
@ -27,15 +33,14 @@ structure resolved :=
|
|||
instance resolved.has_to_format : has_to_format resolved := ⟨λ r, to_fmt (r.decl, r.prefix)⟩
|
||||
|
||||
structure syntax_ident :=
|
||||
(span : option span) (name : name) (msc : option macro_scope_id) (res : option resolved)
|
||||
(info : option source_info) (name : name) (msc : option macro_scope_id) (res : option resolved)
|
||||
|
||||
inductive atomic_val
|
||||
| string (s : string)
|
||||
| number (n : nat)
|
||||
| name (n : name)
|
||||
|
||||
structure syntax_atom :=
|
||||
(span : option span) (val : atomic_val)
|
||||
(info : option source_info) (val : atomic_val)
|
||||
|
||||
structure syntax_node (syntax : Type) :=
|
||||
(macro : name) (args : list syntax)
|
||||
|
|
@ -46,14 +51,12 @@ inductive syntax
|
|||
| atom (val : syntax_atom)
|
||||
| node (val : syntax_node syntax)
|
||||
|
||||
def substring.to_string (s : substring) : string :=
|
||||
(s.start.extract s.stop).get_or_else ""
|
||||
|
||||
namespace syntax
|
||||
open format
|
||||
|
||||
protected def span : syntax → option span
|
||||
| (ident val) := val.span
|
||||
| (atom val) := val.span
|
||||
| (node val) := none -- should perhaps be the 'join' of all sub-spans?
|
||||
|
||||
protected mutual def to_format, to_format_lst
|
||||
with to_format : syntax → format
|
||||
| (ident id) :=
|
||||
|
|
@ -76,7 +79,6 @@ with to_format : syntax → format
|
|||
| none := "") in
|
||||
n
|
||||
| (atom ⟨_, atomic_val.string s⟩) := to_fmt $ repr s
|
||||
| (atom ⟨_, atomic_val.number n⟩) := to_fmt n
|
||||
| (atom ⟨_, atomic_val.name n⟩) := to_fmt "`" ++ to_fmt n
|
||||
| (node {macro := name.anonymous, args := args, ..}) :=
|
||||
sbracket $ join_sep (to_format_lst args) line
|
||||
|
|
|
|||
|
|
@ -1,207 +0,0 @@
|
|||
import init.lean.parser.macro init.lean.parser.identifier system.io
|
||||
import .macro1
|
||||
|
||||
set_option eqn_compiler.lemmas false
|
||||
|
||||
attribute [instance] lean.name.has_lt_quick
|
||||
open lean.parser
|
||||
open lean.parser.monad_parser
|
||||
|
||||
def test {α} [decidable_eq α] (p : parser α) (s : string) (e : α) : io unit :=
|
||||
match parser_t.parse p s with
|
||||
| except.ok a := if a = e then return () else io.print_ln "unexpected result"
|
||||
| except.error e := io.print_ln (e.to_string s)
|
||||
|
||||
def test_failure {α} (p : parser α) (s : string) : io unit :=
|
||||
match parser_t.parse p s with
|
||||
| except.ok a := io.print_ln "unexpected success"
|
||||
| except.error e := return ()
|
||||
|
||||
def show_result {α} [has_to_string α] (s : string) (p : except message α) : io unit :=
|
||||
match p with
|
||||
| except.ok a := io.print_ln "result: " >> io.print_ln (to_string a)
|
||||
| except.error e := io.print_ln (e.to_string s)
|
||||
|
||||
namespace lean.parser.syntax.combinators
|
||||
variables {m : Type → Type} [monad m]
|
||||
|
||||
def ident : parser_t m syntax :=
|
||||
do start ← pos,
|
||||
n ← identifier,
|
||||
stop ← pos,
|
||||
whitespace,
|
||||
pure $ syntax.ident ⟨some ⟨start, stop⟩, n, none, none⟩
|
||||
|
||||
def sym (sym : string) : parser_t m syntax :=
|
||||
do start ← pos,
|
||||
str sym,
|
||||
stop ← pos,
|
||||
whitespace,
|
||||
pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string sym⟩
|
||||
|
||||
def num : parser_t m syntax :=
|
||||
do start ← pos,
|
||||
n ← monad_parser.num,
|
||||
stop ← pos,
|
||||
whitespace,
|
||||
pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.number n⟩
|
||||
|
||||
def node (n : name) (ps : list (parser_t m syntax)) : parser_t m syntax :=
|
||||
do args ← ps.mmap id,
|
||||
pure $ syntax.node ⟨n, args⟩
|
||||
|
||||
def many (p : parser_t m syntax) : parser_t m syntax :=
|
||||
do args ← many p,
|
||||
pure $ syntax.node ⟨name.anonymous, args⟩
|
||||
|
||||
def wrap_with_node (n : name) (stx : syntax) : syntax :=
|
||||
syntax.node ⟨n, [stx]⟩
|
||||
|
||||
end lean.parser.syntax.combinators
|
||||
|
||||
namespace lisp
|
||||
open lean.parser
|
||||
open lean.parser.syntax.combinators
|
||||
|
||||
abbreviation read_m := parser syntax
|
||||
abbreviation ws {m} [monad m] : parser_t m unit := whitespace
|
||||
|
||||
def parse_list (parse : read_m) : read_m :=
|
||||
(node `list [sym "(", many parse, sym ")"] <|>
|
||||
node `list [sym "[", many parse, sym "]"]) <?> "list"
|
||||
|
||||
def parse : ℕ → read_m
|
||||
| 0 := unexpected "nesting limit exceeded"
|
||||
| (n+1) := ident <|> num <|> parse_list (parse n)
|
||||
|
||||
def let_macro := {macro .
|
||||
name := `let,
|
||||
expand := some $ λ node,
|
||||
match node.args with
|
||||
| [syntax.node ⟨name.anonymous, clauses⟩, body] :=
|
||||
do clauses ← clauses.mmap (λ cl, match cl with
|
||||
| syntax.node ⟨name.anonymous, [v, e]⟩ := some (v, e)
|
||||
| stx := none),
|
||||
let ⟨vars, exprs⟩ := clauses.unzip,
|
||||
syntax.node {macro := `let_core, args := [
|
||||
syntax.node ⟨name.anonymous, exprs⟩,
|
||||
syntax.node {macro := `bind, args := [
|
||||
syntax.node {macro := name.anonymous, args := vars},
|
||||
body
|
||||
]}
|
||||
]}
|
||||
| _ := none}
|
||||
|
||||
def mk_list_stx (args : list syntax) : syntax :=
|
||||
syntax.node ⟨`list, [
|
||||
syntax.atom ⟨none, atomic_val.string "("⟩,
|
||||
syntax.node ⟨name.anonymous, args⟩,
|
||||
syntax.atom ⟨none, atomic_val.string ")"⟩]⟩
|
||||
|
||||
def mk_ident (n : name) : syntax :=
|
||||
syntax.ident ⟨none, n, none, none⟩
|
||||
|
||||
def match_list_stx : syntax → option (list syntax)
|
||||
| (syntax.node ⟨`list, [lp, syntax.node ⟨name.anonymous, args⟩, rp]⟩) := some args
|
||||
| _ := none
|
||||
|
||||
/- From "Macros that Work Together" -/
|
||||
def or_macro := {macro .
|
||||
name := `or,
|
||||
expand := some $ λ n,
|
||||
-- '(or e1 e2)'
|
||||
do [e1, e2] ← pure n.args | none,
|
||||
-- '(let ([tmp e1]) (if tmp tmp e2))'
|
||||
mk_list_stx [
|
||||
mk_ident `let,
|
||||
mk_list_stx [mk_list_stx [mk_ident `tmp, e1]],
|
||||
mk_list_stx [mk_ident `if, mk_ident `tmp, mk_ident `tmp, e2]]}
|
||||
|
||||
/-- Recognizes and translates calls to built-in macros -/
|
||||
def list_macro := {macro .
|
||||
name := `list,
|
||||
expand := some $ λ n,
|
||||
match match_list_stx (syntax.node n) with
|
||||
| some [syntax.ident {name := `lambda, ..}, vars, body] :=
|
||||
do vars ← match_list_stx vars,
|
||||
syntax.node ⟨`lambda, [syntax.node ⟨name.anonymous, vars⟩, body]⟩
|
||||
| some (syntax.ident {name := `or, ..}::args) :=
|
||||
syntax.node ⟨`or, args⟩
|
||||
| some (syntax.ident {name := `if, ..}::args) :=
|
||||
syntax.node ⟨`if, args⟩
|
||||
| some [syntax.ident {name := `let, ..}, clauses, body] :=
|
||||
do clauses ← match_list_stx clauses,
|
||||
clauses ← clauses.mmap (λ cl, match match_list_stx cl with
|
||||
| some cl := some $ syntax.node ⟨name.anonymous, cl⟩
|
||||
| none := none),
|
||||
syntax.node ⟨`let, [syntax.node ⟨name.anonymous, clauses⟩, body]⟩
|
||||
| _ := none}
|
||||
|
||||
inductive value
|
||||
| number (n : nat)
|
||||
| list (l : list value)
|
||||
| closure (env : list value) (code : syntax)
|
||||
|
||||
-- TODO(Sebastian): shouldn't be meta
|
||||
meta def value.to_string : value → string
|
||||
| (value.number n) := to_string n
|
||||
| (value.list l) := (l.map (λ v, value.to_string v)).to_string
|
||||
| (value.closure _ _) := "#<function>"
|
||||
|
||||
meta def eval : nat → syntax → list value → except string value
|
||||
| _ (syntax.atom ⟨_, atomic_val.number n⟩) env := except.ok $ value.number n
|
||||
| _ (syntax.ident {res := some {decl := sum.inl idx, ..}, ..}) env :=
|
||||
(match env.nth idx with
|
||||
| some v := except.ok v
|
||||
| none := throw "aaah")
|
||||
| _ (syntax.node ⟨`lambda_core, [syntax.node ⟨`bind, [_, body]⟩]⟩) env :=
|
||||
except.ok $ value.closure env body
|
||||
| (fuel+1) (syntax.node ⟨`let_core, [syntax.node ⟨_, [expr]⟩, syntax.node ⟨`bind, [_, body]⟩]⟩) env :=
|
||||
do v ← eval fuel expr env,
|
||||
eval fuel body (v::env)
|
||||
| (fuel+1) (syntax.node ⟨`if, [cond, t, e]⟩) env :=
|
||||
do cond ← eval fuel cond env,
|
||||
eval fuel (match cond with
|
||||
| value.number 0 := e
|
||||
| value.list [] := e
|
||||
| _ := t) env
|
||||
| (fuel+1) (syntax.node ⟨`list, [lp, syntax.node ⟨_, f::args⟩, rp]⟩) env :=
|
||||
do f ← eval fuel f env,
|
||||
args.mfoldl (λ f arg,
|
||||
do value.closure cenv code ← pure f |
|
||||
throw $ "not a function: " ++ f.to_string,
|
||||
arg ← eval fuel arg env,
|
||||
eval fuel code (arg::cenv)) f
|
||||
| _ stx env := throw $ "cannot evaluate " ++ to_string stx
|
||||
|
||||
def cfg := {macro1.cfg with macros :=
|
||||
((macro1.cfg.macros.insert `list list_macro)
|
||||
.insert `let let_macro)
|
||||
.insert `or or_macro}
|
||||
meta def go (s) : tactic unit :=
|
||||
match ((parse 1000).parse_with_eoi s).run with
|
||||
| except.error e := tactic.trace (e.to_string s)
|
||||
| except.ok stx :=
|
||||
tactic.trace (to_fmt "reader: " ++ to_fmt stx) >>
|
||||
match (expand' stx).run' cfg () with
|
||||
| except.error e := tactic.fail e
|
||||
| except.ok stx :=
|
||||
tactic.trace (to_fmt "expander: " ++ to_fmt stx) >>
|
||||
match (resolve' stx).run' cfg () with
|
||||
| except.error e := tactic.fail e
|
||||
| except.ok stx :=
|
||||
tactic.trace (to_fmt "resolver: " ++ to_fmt stx) >>
|
||||
match eval 1000 stx [] with
|
||||
| except.error e := tactic.fail e
|
||||
| except.ok val :=
|
||||
tactic.trace (to_fmt "evaluator: " ++ val.to_string)
|
||||
|
||||
|
||||
#eval go "()"
|
||||
#eval go "((lambda [x] x) 1)"
|
||||
#eval go "((lambda [x y] x) 1 2)"
|
||||
#eval go "(let ([x 1]) x)"
|
||||
|
||||
#eval go "(let ([tmp 5]) (or 0 tmp))"
|
||||
|
||||
end lisp
|
||||
|
|
@ -3,7 +3,7 @@ attribute [instance] lean.name.has_lt_quick
|
|||
namespace macro1
|
||||
open lean.parser
|
||||
|
||||
def sp : option span := none
|
||||
def info : option source_info := none
|
||||
|
||||
def lambda_macro := {macro .
|
||||
name := `lambda,
|
||||
|
|
@ -25,7 +25,7 @@ def lambda_macro := {macro .
|
|||
def intro_x_macro := {macro .
|
||||
name := "intro_x",
|
||||
expand := some $ λ node,
|
||||
syntax.node ⟨`lambda, syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, "x", none, none⟩]⟩ :: node.args⟩}
|
||||
syntax.node ⟨`lambda, syntax.node ⟨name.anonymous, [syntax.ident ⟨info, "x", none, none⟩]⟩ :: node.args⟩}
|
||||
|
||||
def macros : name → option macro
|
||||
| `lambda := some lambda_macro
|
||||
|
|
@ -42,27 +42,27 @@ match (expand' stx >>= resolve').run' cfg () with
|
|||
| except.ok stx := tactic.trace stx
|
||||
|
||||
run_cmd test $ syntax.node ⟨`lambda, [
|
||||
syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x, none, none⟩]⟩,
|
||||
syntax.ident ⟨sp, `x, none, none⟩
|
||||
syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x, none, none⟩]⟩,
|
||||
syntax.ident ⟨info, `x, none, none⟩
|
||||
]⟩
|
||||
|
||||
run_cmd test $ syntax.node ⟨`lambda, [
|
||||
syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x, none, none⟩]⟩,
|
||||
syntax.ident ⟨sp, `y, none, none⟩
|
||||
syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x, none, none⟩]⟩,
|
||||
syntax.ident ⟨info, `y, none, none⟩
|
||||
]⟩
|
||||
|
||||
-- test macro shadowing
|
||||
run_cmd test $ syntax.node ⟨`lambda, [
|
||||
syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x, none, none⟩]⟩,
|
||||
syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x, none, none⟩]⟩,
|
||||
syntax.node ⟨`intro_x, [
|
||||
syntax.ident ⟨sp, `x, none, none⟩
|
||||
syntax.ident ⟨info, `x, none, none⟩
|
||||
]⟩
|
||||
]⟩
|
||||
|
||||
-- test field notation
|
||||
run_cmd test $ syntax.node ⟨`lambda, [
|
||||
syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x.y, none, none⟩]⟩,
|
||||
syntax.ident ⟨sp, `x.y.z, none, none⟩
|
||||
syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x.y, none, none⟩]⟩,
|
||||
syntax.ident ⟨info, `x.y.z, none, none⟩
|
||||
]⟩
|
||||
|
||||
end macro1
|
||||
|
|
|
|||
|
|
@ -9,4 +9,4 @@ result:
|
|||
(import "import" [(import_path [] c)])])
|
||||
error at line 10, column 9:
|
||||
unexpected '`'
|
||||
expected identifier
|
||||
expected end of input
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue