feat(library/init/lean/parser/{syntax,reader}): store whitespace around tokens

This commit is contained in:
Sebastian Ullrich 2018-07-06 15:58:32 +02:00
parent 67d93ff051
commit b2f9b2c180
6 changed files with 74 additions and 265 deletions

View file

@ -18,7 +18,11 @@ namespace parser
structure token_config :=
(«prefix» : string)
-- reading a token should not need any state
(token_reader : option (position → parser syntax) := none)
/- An optional parser that is activated after matching `prefix`.
It should return a syntax tree with a "hole" for the
`source_info` surrounding the token, which will be supplied
by the `token` reader. -/
(token_reader : option (parser (source_info → syntax)) := none)
structure reader_state :=
(tokens : list token_config)
@ -36,7 +40,7 @@ structure reader :=
(read : read_m syntax)
(tokens : list token_config := [])
namespace reader
namespace read_m
local attribute [reducible] read_m
instance : monad read_m := infer_instance
instance : alternative read_m := infer_instance
@ -45,15 +49,19 @@ instance : monad_state reader_state read_m := infer_instance
instance : monad_parser read_m := infer_instance
--TODO(Sebastian): expose `reader_state.errors`
protected def run {α : Type} (cfg : reader_config) (st : reader_state) (s : string) (r : read_m α) :
except parser.message α :=
prod.fst <$> ((r.run cfg).run st).parse_with_eoi s
end read_m
namespace reader
open monad_parser
protected def parse (cfg : reader_config) (s : string) (r : reader) :
except parser.message syntax :=
-- the only hardcoded tokens, because they are never directly mentioned by a `reader`
let tokens : list token_config := [⟨"/-", none⟩, ⟨"--", none⟩] in
prod.fst <$> ((r.read.run cfg).run ⟨r.tokens ++ tokens, ff, []⟩).parse_with_eoi s
end reader
namespace reader
open monad_parser
r.read.run cfg ⟨r.tokens ++ tokens, ff, []⟩ s
def node (m : macro) (ps : list reader) : reader :=
{ read := do {

View file

@ -46,8 +46,7 @@ do r ← remaining,
private def whitespace_aux : nat → read_m unit
| (n+1) :=
do start ← pos,
tk ← whitespace *> match_token,
do tk ← whitespace *> match_token,
(match tk with
| some ⟨"--", _⟩ := str "--" *> take_while' (= '\n') *> whitespace_aux n
| some ⟨"/-", _⟩ := str "/-" *> finish_comment_block *> whitespace_aux n
@ -55,44 +54,51 @@ do start ← pos,
| 0 := error "unreachable"
/-- Skip whitespace and comments. -/
--TODO(Sebastian): store whitespace prefix and suffix in syntax objects
def whitespace : read_m unit :=
def whitespace : read_m substring :=
-- every `whitespace_aux` loop reads at least one char
do r ← remaining,
whitespace_aux (r+1)
do start ← left_over,
whitespace_aux (start.remaining+1),
stop ← left_over,
pure ⟨start, stop⟩
def with_source_info {α : Type} (r : read_m α) : read_m (α × source_info) :=
do leading ← whitespace,
p ← pos,
a ← r,
-- TODO(Sebastian): less greedy, more natural whitespace assignement
-- E.g. only read up to the next line break
trailing ← whitespace,
pure (a, ⟨leading, p, trailing⟩)
/-- Match a string literally without consulting the token table. -/
def raw_symbol (sym : string) : reader :=
{ tokens := [], -- no additional tokens
read := do
start ← pos,
try (whitespace *> str sym),
stop ← pos,
pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string sym⟩ }
read := try $ do
(_, info) ← with_source_info $ str sym,
pure $ syntax.atom ⟨info, atomic_val.string sym⟩ }
--TODO(Sebastian): other bases
private def number' (start : position) : read_m syntax :=
private def number' : read_m (source_info → syntax) :=
do num ← take_while1 char.is_digit,
stop ← pos,
pure $ syntax.node ⟨`base10_lit, [syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string num⟩]⟩
pure $ λ i, syntax.node ⟨`base10_lit, [syntax.atom ⟨i, atomic_val.string num⟩]⟩
private def ident' (start : position) : read_m syntax :=
private def ident' : read_m (source_info → syntax) :=
do n ← identifier,
stop ← pos,
pure $ syntax.ident ⟨some ⟨start, stop⟩, n, none, none⟩
pure $ λ i, syntax.ident ⟨i, n, none, none⟩
def token : read_m syntax :=
do start ← pos,
tk ← whitespace *> match_token,
match tk with
-- constant-length token
| some ⟨tk, none⟩ :=
do str tk,
stop ← pos,
pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string tk⟩
-- variable-length token
| some ⟨tk, some r⟩ := str tk *> monad_parser.lift (r start)
| none := number' start <|> ident' start
do (r, i) ← with_source_info $ do {
tk ← match_token,
match tk with
-- constant-length token
| some ⟨tk, none⟩ :=
do str tk,
pure $ λ i, syntax.atom ⟨some i, atomic_val.string tk⟩
-- variable-length token
| some ⟨tk, some r⟩ := str tk *> monad_parser.lift r
| none := number' <|> ident'
},
pure (r i)
--TODO(Sebastian): error messages
def symbol (sym : string) : reader :=

View file

@ -13,9 +13,15 @@ namespace parser
@[reducible] def var_offset :=
@[reducible] def macro_scope_id :=
structure span :=
(left : position)
(right : position)
--TODO(Sebastian): move
structure substring :=
(start : string.iterator)
(stop : string.iterator)
structure source_info :=
(leading : substring)
(pos : position)
(trailing : substring)
structure resolved :=
-- local or (overloaded) global
@ -27,15 +33,14 @@ structure resolved :=
instance resolved.has_to_format : has_to_format resolved := ⟨λ r, to_fmt (r.decl, r.prefix)⟩
structure syntax_ident :=
(span : option span) (name : name) (msc : option macro_scope_id) (res : option resolved)
(info : option source_info) (name : name) (msc : option macro_scope_id) (res : option resolved)
inductive atomic_val
| string (s : string)
| number (n : nat)
| name (n : name)
structure syntax_atom :=
(span : option span) (val : atomic_val)
(info : option source_info) (val : atomic_val)
structure syntax_node (syntax : Type) :=
(macro : name) (args : list syntax)
@ -46,14 +51,12 @@ inductive syntax
| atom (val : syntax_atom)
| node (val : syntax_node syntax)
def substring.to_string (s : substring) : string :=
(s.start.extract s.stop).get_or_else ""
namespace syntax
open format
protected def span : syntax → option span
| (ident val) := val.span
| (atom val) := val.span
| (node val) := none -- should perhaps be the 'join' of all sub-spans?
protected mutual def to_format, to_format_lst
with to_format : syntax → format
| (ident id) :=
@ -76,7 +79,6 @@ with to_format : syntax → format
| none := "") in
n
| (atom ⟨_, atomic_val.string s⟩) := to_fmt $ repr s
| (atom ⟨_, atomic_val.number n⟩) := to_fmt n
| (atom ⟨_, atomic_val.name n⟩) := to_fmt "`" ++ to_fmt n
| (node {macro := name.anonymous, args := args, ..}) :=
sbracket $ join_sep (to_format_lst args) line

View file

@ -1,207 +0,0 @@
import init.lean.parser.macro init.lean.parser.identifier system.io
import .macro1
set_option eqn_compiler.lemmas false
attribute [instance] lean.name.has_lt_quick
open lean.parser
open lean.parser.monad_parser
def test {α} [decidable_eq α] (p : parser α) (s : string) (e : α) : io unit :=
match parser_t.parse p s with
| except.ok a := if a = e then return () else io.print_ln "unexpected result"
| except.error e := io.print_ln (e.to_string s)
def test_failure {α} (p : parser α) (s : string) : io unit :=
match parser_t.parse p s with
| except.ok a := io.print_ln "unexpected success"
| except.error e := return ()
def show_result {α} [has_to_string α] (s : string) (p : except message α) : io unit :=
match p with
| except.ok a := io.print_ln "result: " >> io.print_ln (to_string a)
| except.error e := io.print_ln (e.to_string s)
namespace lean.parser.syntax.combinators
variables {m : Type → Type} [monad m]
def ident : parser_t m syntax :=
do start ← pos,
n ← identifier,
stop ← pos,
whitespace,
pure $ syntax.ident ⟨some ⟨start, stop⟩, n, none, none⟩
def sym (sym : string) : parser_t m syntax :=
do start ← pos,
str sym,
stop ← pos,
whitespace,
pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.string sym⟩
def num : parser_t m syntax :=
do start ← pos,
n ← monad_parser.num,
stop ← pos,
whitespace,
pure $ syntax.atom ⟨some ⟨start, stop⟩, atomic_val.number n⟩
def node (n : name) (ps : list (parser_t m syntax)) : parser_t m syntax :=
do args ← ps.mmap id,
pure $ syntax.node ⟨n, args⟩
def many (p : parser_t m syntax) : parser_t m syntax :=
do args ← many p,
pure $ syntax.node ⟨name.anonymous, args⟩
def wrap_with_node (n : name) (stx : syntax) : syntax :=
syntax.node ⟨n, [stx]⟩
end lean.parser.syntax.combinators
namespace lisp
open lean.parser
open lean.parser.syntax.combinators
abbreviation read_m := parser syntax
abbreviation ws {m} [monad m] : parser_t m unit := whitespace
def parse_list (parse : read_m) : read_m :=
(node `list [sym "(", many parse, sym ")"] <|>
node `list [sym "[", many parse, sym "]"]) <?> "list"
def parse : → read_m
| 0 := unexpected "nesting limit exceeded"
| (n+1) := ident <|> num <|> parse_list (parse n)
def let_macro := {macro .
name := `let,
expand := some $ λ node,
match node.args with
| [syntax.node ⟨name.anonymous, clauses⟩, body] :=
do clauses ← clauses.mmap (λ cl, match cl with
| syntax.node ⟨name.anonymous, [v, e]⟩ := some (v, e)
| stx := none),
let ⟨vars, exprs⟩ := clauses.unzip,
syntax.node {macro := `let_core, args := [
syntax.node ⟨name.anonymous, exprs⟩,
syntax.node {macro := `bind, args := [
syntax.node {macro := name.anonymous, args := vars},
body
]}
]}
| _ := none}
def mk_list_stx (args : list syntax) : syntax :=
syntax.node ⟨`list, [
syntax.atom ⟨none, atomic_val.string "("⟩,
syntax.node ⟨name.anonymous, args⟩,
syntax.atom ⟨none, atomic_val.string ")"⟩]⟩
def mk_ident (n : name) : syntax :=
syntax.ident ⟨none, n, none, none⟩
def match_list_stx : syntax → option (list syntax)
| (syntax.node ⟨`list, [lp, syntax.node ⟨name.anonymous, args⟩, rp]⟩) := some args
| _ := none
/- From "Macros that Work Together" -/
def or_macro := {macro .
name := `or,
expand := some $ λ n,
-- '(or e1 e2)'
do [e1, e2] ← pure n.args | none,
-- '(let ([tmp e1]) (if tmp tmp e2))'
mk_list_stx [
mk_ident `let,
mk_list_stx [mk_list_stx [mk_ident `tmp, e1]],
mk_list_stx [mk_ident `if, mk_ident `tmp, mk_ident `tmp, e2]]}
/-- Recognizes and translates calls to built-in macros -/
def list_macro := {macro .
name := `list,
expand := some $ λ n,
match match_list_stx (syntax.node n) with
| some [syntax.ident {name := `lambda, ..}, vars, body] :=
do vars ← match_list_stx vars,
syntax.node ⟨`lambda, [syntax.node ⟨name.anonymous, vars⟩, body]⟩
| some (syntax.ident {name := `or, ..}::args) :=
syntax.node ⟨`or, args⟩
| some (syntax.ident {name := `if, ..}::args) :=
syntax.node ⟨`if, args⟩
| some [syntax.ident {name := `let, ..}, clauses, body] :=
do clauses ← match_list_stx clauses,
clauses ← clauses.mmap (λ cl, match match_list_stx cl with
| some cl := some $ syntax.node ⟨name.anonymous, cl⟩
| none := none),
syntax.node ⟨`let, [syntax.node ⟨name.anonymous, clauses⟩, body]⟩
| _ := none}
inductive value
| number (n : nat)
| list (l : list value)
| closure (env : list value) (code : syntax)
-- TODO(Sebastian): shouldn't be meta
meta def value.to_string : value → string
| (value.number n) := to_string n
| (value.list l) := (l.map (λ v, value.to_string v)).to_string
| (value.closure _ _) := "#<function>"
meta def eval : nat → syntax → list value → except string value
| _ (syntax.atom ⟨_, atomic_val.number n⟩) env := except.ok $ value.number n
| _ (syntax.ident {res := some {decl := sum.inl idx, ..}, ..}) env :=
(match env.nth idx with
| some v := except.ok v
| none := throw "aaah")
| _ (syntax.node ⟨`lambda_core, [syntax.node ⟨`bind, [_, body]⟩]⟩) env :=
except.ok $ value.closure env body
| (fuel+1) (syntax.node ⟨`let_core, [syntax.node ⟨_, [expr]⟩, syntax.node ⟨`bind, [_, body]⟩]⟩) env :=
do v ← eval fuel expr env,
eval fuel body (v::env)
| (fuel+1) (syntax.node ⟨`if, [cond, t, e]⟩) env :=
do cond ← eval fuel cond env,
eval fuel (match cond with
| value.number 0 := e
| value.list [] := e
| _ := t) env
| (fuel+1) (syntax.node ⟨`list, [lp, syntax.node ⟨_, f::args⟩, rp]⟩) env :=
do f ← eval fuel f env,
args.mfoldl (λ f arg,
do value.closure cenv code ← pure f |
throw $ "not a function: " ++ f.to_string,
arg ← eval fuel arg env,
eval fuel code (arg::cenv)) f
| _ stx env := throw $ "cannot evaluate " ++ to_string stx
def cfg := {macro1.cfg with macros :=
((macro1.cfg.macros.insert `list list_macro)
.insert `let let_macro)
.insert `or or_macro}
meta def go (s) : tactic unit :=
match ((parse 1000).parse_with_eoi s).run with
| except.error e := tactic.trace (e.to_string s)
| except.ok stx :=
tactic.trace (to_fmt "reader: " ++ to_fmt stx) >>
match (expand' stx).run' cfg () with
| except.error e := tactic.fail e
| except.ok stx :=
tactic.trace (to_fmt "expander: " ++ to_fmt stx) >>
match (resolve' stx).run' cfg () with
| except.error e := tactic.fail e
| except.ok stx :=
tactic.trace (to_fmt "resolver: " ++ to_fmt stx) >>
match eval 1000 stx [] with
| except.error e := tactic.fail e
| except.ok val :=
tactic.trace (to_fmt "evaluator: " ++ val.to_string)
#eval go "()"
#eval go "((lambda [x] x) 1)"
#eval go "((lambda [x y] x) 1 2)"
#eval go "(let ([x 1]) x)"
#eval go "(let ([tmp 5]) (or 0 tmp))"
end lisp

View file

@ -3,7 +3,7 @@ attribute [instance] lean.name.has_lt_quick
namespace macro1
open lean.parser
def sp : option span := none
def info : option source_info := none
def lambda_macro := {macro .
name := `lambda,
@ -25,7 +25,7 @@ def lambda_macro := {macro .
def intro_x_macro := {macro .
name := "intro_x",
expand := some $ λ node,
syntax.node ⟨`lambda, syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, "x", none, none⟩]⟩ :: node.args⟩}
syntax.node ⟨`lambda, syntax.node ⟨name.anonymous, [syntax.ident ⟨info, "x", none, none⟩]⟩ :: node.args⟩}
def macros : name → option macro
| `lambda := some lambda_macro
@ -42,27 +42,27 @@ match (expand' stx >>= resolve').run' cfg () with
| except.ok stx := tactic.trace stx
run_cmd test $ syntax.node ⟨`lambda, [
syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x, none, none⟩]⟩,
syntax.ident ⟨sp, `x, none, none⟩
syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x, none, none⟩]⟩,
syntax.ident ⟨info, `x, none, none⟩
]⟩
run_cmd test $ syntax.node ⟨`lambda, [
syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x, none, none⟩]⟩,
syntax.ident ⟨sp, `y, none, none⟩
syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x, none, none⟩]⟩,
syntax.ident ⟨info, `y, none, none⟩
]⟩
-- test macro shadowing
run_cmd test $ syntax.node ⟨`lambda, [
syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x, none, none⟩]⟩,
syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x, none, none⟩]⟩,
syntax.node ⟨`intro_x, [
syntax.ident ⟨sp, `x, none, none⟩
syntax.ident ⟨info, `x, none, none⟩
]⟩
]⟩
-- test field notation
run_cmd test $ syntax.node ⟨`lambda, [
syntax.node ⟨name.anonymous, [syntax.ident ⟨sp, `x.y, none, none⟩]⟩,
syntax.ident ⟨sp, `x.y.z, none, none⟩
syntax.node ⟨name.anonymous, [syntax.ident ⟨info, `x.y, none, none⟩]⟩,
syntax.ident ⟨info, `x.y.z, none, none⟩
]⟩
end macro1

View file

@ -9,4 +9,4 @@ result:
(import "import" [(import_path [] c)])])
error at line 10, column 9:
unexpected '`'
expected identifier
expected end of input