diff --git a/library/init/lean/parser/parser.lean b/library/init/lean/parser/parser.lean index 0e0e8836f1..937b2dec81 100644 --- a/library/init/lean/parser/parser.lean +++ b/library/init/lean/parser/parser.lean @@ -77,15 +77,6 @@ match p {pos := {fname := fname}, input := s.mk_iterator} with | error msg _ := except.error msg end -def merge (msg₁ msg₂ : message) : message := -{ expected := msg₁.expected ++ msg₂.expected, ..msg₁ } - -def merge_error (msg₁ msg₂ : message) : result α := -error (merge msg₁ msg₂) ff - -def merge_ok_epsilon (a : α) (s : state) (msg₁ msg₂ : message) := -ok_eps a s (merge msg₁ msg₂) - @[inline] def mk_eps_result (a : α) (s : state) : result α := ok_eps a s { pos := s.pos } @@ -95,6 +86,24 @@ protected def pure (a : α) : parser α := def eps : parser unit := parser.pure () +protected def failure : parser α := +λ s, error { unexpected := "failure", pos := s.pos } ff + +def merge (msg₁ msg₂ : message) : message := +{ expected := msg₁.expected ++ msg₂.expected, ..msg₁ } + +def merge' (msg₁ msg₂ : message) : message := +{ expected := msg₁.expected ++ msg₂.expected, ..msg₂ } + +def merge_error (msg₁ msg₂ : message) : result α := +error (merge msg₁ msg₂) ff + +def merge_error' (msg₁ msg₂ : message) : result α := +error (merge' msg₁ msg₂) ff + +def merge_ok_epsilon (a : α) (s : state) (msg₁ msg₂ : message) := +ok_eps a s (merge msg₁ msg₂) + /-- The `bind p q` combinator behaves as follows: 1- If `p` fails, then it fails. @@ -113,7 +122,7 @@ protected def bind (p : parser α) (q : α → parser β) : parser β := | ok_eps a s msg₁ := match q a s with | ok_eps b s msg₂ := merge_ok_epsilon b s msg₁ msg₂ - | error msg₂ ff := merge_error msg₂ msg₁ + | error msg₂ ff := merge_error' msg₁ msg₂ | other := other end | error msg c := error msg c @@ -189,8 +198,9 @@ protected def orelse (p q : parser α) : parser α := | other := other end -instance : has_orelse parser := -{ orelse := @parser.orelse } +instance : alternative parser := +{ orelse := @parser.orelse, + failure := @parser.failure } @[inline] def next_pos (c : char) (p : position) : position := if c = '\n' @@ -388,6 +398,20 @@ def sep_by1 (p : parser α) (sep : parser β) : parser (list α) := def sep_by (p : parser α) (sep : parser β) : parser (list α) := sep_by1 p sep <|> return [] +def fix_aux (f : parser α → parser α) : nat → parser α +| 0 := failure +| (n+1) := f (fix_aux n) + +def fix (f : parser α → parser α) : parser α := +do n ← remaining, fix_aux f (n+1) + +/- Parse `p` without consuming any input. -/ +def lookahead (p : parser α) : parser α := +λ s, match p s with + | ok a s' := ok_eps a s { pos := s.pos } + | other := other + end + def parse (p : parser α) (s : string) (fname := "") : except message α := run p s fname diff --git a/tests/lean/parser1.lean b/tests/lean/parser1.lean new file mode 100644 index 0000000000..704b7d2427 --- /dev/null +++ b/tests/lean/parser1.lean @@ -0,0 +1,119 @@ +import system.io init.lean.parser.parser +open lean.parser + +def test {α} [decidable_eq α] (p : parser α) (s : string) (e : α) : io unit := +match parse p s with +| except.ok a := if a = e then return () else io.print_ln "unexpected result" +| except.error e := io.print_ln (to_string e) +end + +def test_failure {α} (p : parser α) (s : string) : io unit := +match parse p s with +| except.ok a := io.print_ln "unexpected success" +| except.error e := return () +end + +#eval test (ch 'a') "a" 'a' +#eval test (str "foo" <|> str "bla" <|> str "boo") "bla" "bla" +#eval test ((str "foo" >> str "foo") <|> str "bla" <|> str "boo") "bla" "bla" +#eval test_failure ((str "foo" >> str "foo") <|> str "foo2" <|> str "boo") "foo2" +#eval test (try (str "foo" >> str "foo") <|> str "foo2" <|> str "boo") "foo2" "foo2" +#eval test num "1000" 1000 +#eval test (do n ← num, whitespace, m ← num, return (n, m)) "1000 200" (1000, 200) +#eval test (do n ← num, whitespace, m ← num, return (n, m)) "1000 200" (1000, 200) +#eval test (do n ← lexeme num, m ← num, return (n, m)) "1000 200" (1000, 200) +#eval test (whitespace >> prod.mk <$> (lexeme num) <*> (lexeme num)) " 1000 200 " (1000, 200) +#eval test (whitespace >> prod.mk <$> (lexeme num) <*> (lexeme num) <* eoi) " 1000 200 " (1000, 200) +#eval test_failure (whitespace >> prod.mk <$> (lexeme num) <*> num <* eoi) " 1000 200 " + +#eval test_failure ((ch 'a' >> ch 'b') <|> (ch 'a' >> ch 'c')) "ac" +#eval test ((lookahead (str "ab") >> ch 'a' >> ch 'b') <|> (ch 'a' >> ch 'c')) "ac" 'c' +#eval test (str "ab" >> eps <|> (ch 'a' >> ch 'c' >> eps)) "ac" () +#eval test (try (ch 'a' >> ch 'b') <|> (ch 'a' >> ch 'c')) "ac" 'c' + +def symbol (c : char) : parser char := +lexeme (ch c) repr c + +def paren {α} (p : parser α) : parser α := +symbol '(' >> lexeme p <* symbol ')' + +#eval test (paren num) "( 10 )" 10 +#eval test (paren num) "(12)" 12 +#eval test (paren num) "(0)" 0 +#eval test (paren num) "(0 )" 0 + +def var : parser string := +do c ← satisfy (λ a, a.is_alpha || a = '_'), + r ← lexeme $ take_while (λ a, a.is_digit || a.is_alpha || a = '_'), + return (c.to_string ++ r) + +#eval test var "abc" "abc" +#eval test var "_a_1bc" "_a_1bc" +#eval test (paren var) "(_a_1bc )" "_a_1bc" +#eval test_failure var "1_a_1bc" +#eval test_failure var "*_a_1bc" +#eval test var "abc$" "abc" + +inductive Expr +| Add : Expr → Expr → Expr +| Num : nat → Expr +| Var : string → Expr + +open Expr + +instance eq_expr : decidable_eq Expr +| (Var x) (Var y) := if h : x = y then is_true (h ▸ rfl) + else is_false (λ h', Expr.no_confusion h' (λ h', absurd h' h)) +| (Var x) (Num n) := is_false (λ h, Expr.no_confusion h) +| (Var x) (Add e₁ e₂) := is_false (λ h, Expr.no_confusion h) +| (Num n) (Num m) := if h : n = m then is_true (h ▸ rfl) + else is_false (λ h', Expr.no_confusion h' (λ h', absurd h' h)) +| (Num n) (Var y) := is_false (λ h, Expr.no_confusion h) +| (Num n) (Add e₁ e₂) := is_false (λ h, Expr.no_confusion h) +| (Add e₁ e₂) (Num n) := is_false (λ h, Expr.no_confusion h) +| (Add e₁ e₂) (Var y) := is_false (λ h, Expr.no_confusion h) +| (Add e₁ e₂) (Add e₃ e₄) := + match eq_expr e₁ e₃ with + | is_true h := match eq_expr e₂ e₄ with + | is_true h' := is_true (h ▸ h' ▸ rfl) + | is_false h' := is_false (λ he, Expr.no_confusion he (λ h₁ h₂, absurd h₂ h')) + end + | is_false h := is_false (λ he, Expr.no_confusion he (λ h₁ h₂, absurd h₁ h)) + end + +def parse_atom (p : parser Expr) : parser Expr := +(Var <$> lexeme var "variable") +<|> +(Num <$> lexeme num "numeral") +<|> +(paren p) + +def parse_add (p : parser Expr) : parser Expr := +do l ← parse_atom p, + (do symbol '+', r ← p, return $ Add l r) <|> return l + +def parse_expr : parser Expr := +whitespace >> fix (λ F, parse_add F) <* eoi + +#eval test parse_expr "10" (Num 10) +#eval test parse_expr "(20)" (Num 20) +#eval test parse_expr "a" (Var "a") +#eval test parse_expr "(20 + a)" (Add (Num 20) (Var "a")) +#eval test parse_expr " (20 + (a) + 2 ) " (Add (Num 20) (Add (Var "a") (Num 2))) + +/- Failures -/ +#print "Failure 1" +#eval test parse_expr "(20 +" (Num 0) +#print "---------" +#print "Failure 2" +#eval test parse_expr "" (Num 0) +#print "---------" + +namespace paper_ex +#print "Failure 3" +def digit : parser char := lean.parser.digit "digit" +def letter : parser char := lean.parser.alpha "letter" +def tst : parser char := (digit <|> return '0') >> letter +#eval test tst "*" 'a' +#print "---------" +end paper_ex diff --git a/tests/lean/parser1.lean.expected.out b/tests/lean/parser1.lean.expected.out new file mode 100644 index 0000000000..45b58e29dd --- /dev/null +++ b/tests/lean/parser1.lean.expected.out @@ -0,0 +1,15 @@ +Failure 1 +error at (line : 1, column: 5) +unexpected end of input +expected variable, numeral or '(' +--------- +Failure 2 +error at (line : 1, column: 0) +unexpected end of input +expected variable, numeral or '(' +--------- +Failure 3 +error at (line : 1, column: 0) +unexpected '*' +expected digit or letter +---------