feat: add xml parser.
in order to generate the LLVM extern declarations we want to use a generator that spits out XML. Hence adding a small XML parser.
This commit is contained in:
parent
b5d8bc1b8f
commit
0d41fd03f7
7 changed files with 733 additions and 112 deletions
|
|
@ -195,6 +195,9 @@ structure Iterator where
|
|||
|
||||
def mkIterator (s : String) : Iterator :=
|
||||
⟨s, 0⟩
|
||||
|
||||
instance : DecidableEq Iterator := fun ⟨s₁, i₁⟩ ⟨s₂, i₂⟩ =>
|
||||
if h₁ : i₁ = i₂ ∧ s₁ = s₂ then isTrue (by simp_all) else isFalse (λ h₂ => by simp_all)
|
||||
|
||||
namespace Iterator
|
||||
def toString : Iterator → String
|
||||
|
|
|
|||
|
|
@ -4,7 +4,9 @@ Released under Apache 2.0 license as described in the file LICENSE.
|
|||
Authors: Sebastian Ullrich
|
||||
-/
|
||||
import Lean.Data.Format
|
||||
import Lean.Data.Parsec
|
||||
import Lean.Data.Json
|
||||
import Lean.Data.Xml
|
||||
import Lean.Data.JsonRpc
|
||||
import Lean.Data.KVMap
|
||||
import Lean.Data.LBool
|
||||
|
|
|
|||
|
|
@ -5,104 +5,19 @@ Released under Apache 2.0 license as described in the file LICENSE.
|
|||
Authors: Gabriel Ebner, Marc Huisinga
|
||||
-/
|
||||
import Lean.Data.Json.Basic
|
||||
import Lean.Data.Parsec
|
||||
|
||||
namespace Lean
|
||||
|
||||
open Std (RBNode RBNode.singleton RBNode.leaf)
|
||||
|
||||
inductive Quickparse.Result (α : Type) where
|
||||
| success (pos : String.Iterator) (res : α) : Result α
|
||||
| error (pos : String.Iterator) (err : String) : Result α
|
||||
|
||||
def Quickparse (α : Type) : Type := String.Iterator → Lean.Quickparse.Result α
|
||||
|
||||
instance (α : Type) : Inhabited (Quickparse α) :=
|
||||
⟨fun it => Quickparse.Result.error it ""⟩
|
||||
|
||||
namespace Quickparse
|
||||
|
||||
open Result
|
||||
|
||||
partial def skipWs (it : String.Iterator) : String.Iterator :=
|
||||
if it.hasNext then
|
||||
let c := it.curr
|
||||
if c = '\u0009' ∨ c = '\u000a' ∨ c = '\u000d' ∨ c = '\u0020' then
|
||||
skipWs it.next
|
||||
else
|
||||
it
|
||||
else
|
||||
it
|
||||
|
||||
@[inline]
|
||||
protected def pure (a : α) : Quickparse α := fun it =>
|
||||
success it a
|
||||
|
||||
@[inline]
|
||||
protected def bind {α β : Type} (f : Quickparse α) (g : α → Quickparse β) : Quickparse β := fun it =>
|
||||
match f it with
|
||||
| success rem a => g a rem
|
||||
| error pos msg => error pos msg
|
||||
|
||||
@[inline]
|
||||
def fail (msg : String) : Quickparse α := fun it =>
|
||||
error it msg
|
||||
|
||||
@[inline]
|
||||
instance : Monad Quickparse :=
|
||||
{ pure := @Quickparse.pure, bind := @Quickparse.bind }
|
||||
|
||||
def unexpectedEndOfInput := "unexpected end of input"
|
||||
|
||||
@[inline]
|
||||
def peek? : Quickparse (Option Char) := fun it =>
|
||||
if it.hasNext then
|
||||
success it it.curr
|
||||
else
|
||||
success it none
|
||||
|
||||
@[inline]
|
||||
def peek! : Quickparse Char := do
|
||||
let some c ← peek? | fail unexpectedEndOfInput
|
||||
c
|
||||
|
||||
@[inline]
|
||||
def skip : Quickparse Unit := fun it =>
|
||||
success it.next ()
|
||||
|
||||
@[inline]
|
||||
def next : Quickparse Char := do
|
||||
let c ← peek!
|
||||
skip
|
||||
c
|
||||
|
||||
def expect (s : String) : Quickparse Unit := fun it =>
|
||||
if it.extract (it.forward s.length) = s then
|
||||
success (it.forward s.length) ()
|
||||
else
|
||||
error it s!"expected: {s}"
|
||||
|
||||
@[inline]
|
||||
def ws : Quickparse Unit := fun it =>
|
||||
success (skipWs it) ()
|
||||
|
||||
def expectedEndOfInput := "expected end of input"
|
||||
|
||||
@[inline]
|
||||
def eoi : Quickparse Unit := fun it =>
|
||||
if it.hasNext then
|
||||
error it expectedEndOfInput
|
||||
else
|
||||
success it ()
|
||||
|
||||
end Quickparse
|
||||
|
||||
namespace Json.Parser
|
||||
|
||||
open Quickparse
|
||||
open Lean.Parsec
|
||||
|
||||
@[inline]
|
||||
def hexChar : Quickparse Nat := do
|
||||
let c ← next
|
||||
def hexChar : Parsec Nat := do
|
||||
let c ← anyChar
|
||||
if '0' ≤ c ∧ c ≤ '9' then
|
||||
pure $ c.val.toNat - '0'.val.toNat
|
||||
else if 'a' ≤ c ∧ c ≤ 'f' then
|
||||
|
|
@ -112,8 +27,8 @@ def hexChar : Quickparse Nat := do
|
|||
else
|
||||
fail "invalid hex character"
|
||||
|
||||
def escapedChar : Quickparse Char := do
|
||||
let c ← next
|
||||
def escapedChar : Parsec Char := do
|
||||
let c ← anyChar
|
||||
match c with
|
||||
| '\\' => '\\'
|
||||
| '"' => '"'
|
||||
|
|
@ -128,13 +43,13 @@ def escapedChar : Quickparse Char := do
|
|||
Char.ofNat $ 4096*u1 + 256*u2 + 16*u3 + u4
|
||||
| _ => fail "illegal \\u escape"
|
||||
|
||||
partial def strCore (acc : String) : Quickparse String := do
|
||||
partial def strCore (acc : String) : Parsec String := do
|
||||
let c ← peek!
|
||||
if c = '"' then -- "
|
||||
skip
|
||||
acc
|
||||
else
|
||||
let c ← next
|
||||
let c ← anyChar
|
||||
let ec ←
|
||||
if c = '\\' then
|
||||
escapedChar
|
||||
|
|
@ -147,9 +62,9 @@ partial def strCore (acc : String) : Quickparse String := do
|
|||
fail "unexpected character in string"
|
||||
strCore (acc.push ec)
|
||||
|
||||
def str : Quickparse String := strCore ""
|
||||
def str : Parsec String := strCore ""
|
||||
|
||||
partial def natCore (acc digits : Nat) : Quickparse (Nat × Nat) := do
|
||||
partial def natCore (acc digits : Nat) : Parsec (Nat × Nat) := do
|
||||
let some c ← peek? | (acc, digits)
|
||||
if '0' ≤ c ∧ c ≤ '9' then
|
||||
skip
|
||||
|
|
@ -159,7 +74,7 @@ partial def natCore (acc digits : Nat) : Quickparse (Nat × Nat) := do
|
|||
(acc, digits)
|
||||
|
||||
@[inline]
|
||||
def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Quickparse Unit := do
|
||||
def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Parsec Unit := do
|
||||
let c ← peek!
|
||||
if p c then
|
||||
()
|
||||
|
|
@ -167,22 +82,22 @@ def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Quickparse
|
|||
fail $ "expected " ++ desc
|
||||
|
||||
@[inline]
|
||||
def natNonZero : Quickparse Nat := do
|
||||
def natNonZero : Parsec Nat := do
|
||||
lookahead (fun c => '1' ≤ c ∧ c ≤ '9') "1-9"
|
||||
let (n, _) ← natCore 0 0
|
||||
n
|
||||
|
||||
@[inline]
|
||||
def natNumDigits : Quickparse (Nat × Nat) := do
|
||||
def natNumDigits : Parsec (Nat × Nat) := do
|
||||
lookahead (fun c => '0' ≤ c ∧ c ≤ '9') "digit"
|
||||
natCore 0 0
|
||||
|
||||
@[inline]
|
||||
def natMaybeZero : Quickparse Nat := do
|
||||
def natMaybeZero : Parsec Nat := do
|
||||
let (n, _) ← natNumDigits
|
||||
n
|
||||
|
||||
def num : Quickparse JsonNumber := do
|
||||
def num : Parsec JsonNumber := do
|
||||
let c ← peek!
|
||||
let sign : Int ←
|
||||
if c = '-' then
|
||||
|
|
@ -224,10 +139,10 @@ def num : Quickparse JsonNumber := do
|
|||
else
|
||||
res
|
||||
|
||||
partial def arrayCore (anyCore : Unit → Quickparse Json) (acc : Array Json) : Quickparse (Array Json) := do
|
||||
partial def arrayCore (anyCore : Unit → Parsec Json) (acc : Array Json) : Parsec (Array Json) := do
|
||||
let hd ← anyCore ()
|
||||
let acc' := acc.push hd
|
||||
let c ← next
|
||||
let c ← anyChar
|
||||
if c = ']' then
|
||||
ws
|
||||
acc'
|
||||
|
|
@ -237,12 +152,12 @@ partial def arrayCore (anyCore : Unit → Quickparse Json) (acc : Array Json) :
|
|||
else
|
||||
fail "unexpected character in array"
|
||||
|
||||
partial def objectCore (anyCore : Unit → Quickparse Json) : Quickparse (RBNode String (fun _ => Json)) := do
|
||||
partial def objectCore (anyCore : Unit → Parsec Json) : Parsec (RBNode String (fun _ => Json)) := do
|
||||
lookahead (fun c => c = '"') "\""; skip; -- "
|
||||
let k ← strCore ""; ws
|
||||
lookahead (fun c => c = ':') ":"; skip; ws
|
||||
let v ← anyCore ()
|
||||
let c ← next
|
||||
let c ← anyChar
|
||||
if c = '}' then
|
||||
ws
|
||||
RBNode.singleton k v
|
||||
|
|
@ -255,7 +170,7 @@ partial def objectCore (anyCore : Unit → Quickparse Json) : Quickparse (RBNode
|
|||
|
||||
-- takes a unit parameter so that
|
||||
-- we can use the equation compiler and recursion
|
||||
partial def anyCore (u : Unit) : Quickparse Json := do
|
||||
partial def anyCore (u : Unit) : Parsec Json := do
|
||||
let c ← peek!
|
||||
if c = '[' then
|
||||
skip; ws
|
||||
|
|
@ -281,13 +196,13 @@ partial def anyCore (u : Unit) : Quickparse Json := do
|
|||
ws
|
||||
Json.str s
|
||||
else if c = 'f' then
|
||||
expect "false"; ws
|
||||
skipString "false"; ws
|
||||
Json.bool false
|
||||
else if c = 't' then
|
||||
expect "true"; ws
|
||||
skipString "true"; ws
|
||||
Json.bool true
|
||||
else if c = 'n' then
|
||||
expect "null"; ws
|
||||
skipString "null"; ws
|
||||
Json.null
|
||||
else if c = '-' ∨ ('0' ≤ c ∧ c ≤ '9') then
|
||||
let n ← num
|
||||
|
|
@ -297,10 +212,10 @@ partial def anyCore (u : Unit) : Quickparse Json := do
|
|||
fail "unexpected input"
|
||||
|
||||
|
||||
def any : Quickparse Json := do
|
||||
def any : Parsec Json := do
|
||||
ws
|
||||
let res ← anyCore ()
|
||||
eoi
|
||||
eof
|
||||
res
|
||||
|
||||
end Json.Parser
|
||||
|
|
@ -309,8 +224,8 @@ namespace Json
|
|||
|
||||
def parse (s : String) : Except String Lean.Json :=
|
||||
match Json.Parser.any s.mkIterator with
|
||||
| Quickparse.Result.success _ res => Except.ok res
|
||||
| Quickparse.Result.error it err => Except.error s!"offset {it.i.repr}: {err}"
|
||||
| Parsec.ParseResult.success _ res => Except.ok res
|
||||
| Parsec.ParseResult.error it err => Except.error s!"offset {it.i.repr}: {err}"
|
||||
|
||||
end Json
|
||||
|
||||
|
|
|
|||
170
src/Lean/Data/Parsec.lean
Normal file
170
src/Lean/Data/Parsec.lean
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
|
||||
/-
|
||||
Copyright (c) 2021 Microsoft Corporation. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Author: Dany Fabian
|
||||
-/
|
||||
|
||||
namespace Lean
|
||||
|
||||
namespace Parsec
|
||||
inductive ParseResult (α : Type) where
|
||||
| success (pos : String.Iterator) (res : α)
|
||||
| error (pos : String.Iterator) (err : String)
|
||||
deriving Repr
|
||||
end Parsec
|
||||
|
||||
def Parsec (α : Type) : Type := String.Iterator → Lean.Parsec.ParseResult α
|
||||
|
||||
namespace Parsec
|
||||
|
||||
open ParseResult
|
||||
|
||||
instance (α : Type) : Inhabited (Parsec α) :=
|
||||
⟨λ it => error it ""⟩
|
||||
|
||||
@[inline]
|
||||
protected def pure (a : α) : Parsec α := λ it =>
|
||||
success it a
|
||||
|
||||
@[inline]
|
||||
def bind {α β : Type} (f : Parsec α) (g : α → Parsec β) : Parsec β := λ it =>
|
||||
match f it with
|
||||
| success rem a => g a rem
|
||||
| error pos msg => error pos msg
|
||||
|
||||
instance : Monad Parsec :=
|
||||
{ pure := Parsec.pure, bind }
|
||||
|
||||
@[inline]
|
||||
def fail (msg : String) : Parsec α := fun it =>
|
||||
error it msg
|
||||
|
||||
@[inline]
|
||||
def orElse (p q : Parsec α) : Parsec α := fun it =>
|
||||
match p it with
|
||||
| success rem a => success rem a
|
||||
| error rem err =>
|
||||
if it = rem then q it else error rem err
|
||||
|
||||
@[inline]
|
||||
def attempt (p : Parsec α) : Parsec α := λ it =>
|
||||
match p it with
|
||||
| success rem res => success rem res
|
||||
| error _ err => error it err
|
||||
|
||||
instance : Alternative Parsec :=
|
||||
{ failure := fail "", orElse }
|
||||
|
||||
def expectedEndOfInput := "expected end of input"
|
||||
|
||||
@[inline]
|
||||
def eof : Parsec Unit := fun it =>
|
||||
if it.hasNext then
|
||||
error it expectedEndOfInput
|
||||
else
|
||||
success it ()
|
||||
|
||||
@[inline]
|
||||
partial def manyCore (p : Parsec α) (acc : Array α) : Parsec $ Array α :=
|
||||
(do manyCore p (acc.push $ ←p))
|
||||
<|> pure acc
|
||||
|
||||
@[inline]
|
||||
def many (p : Parsec α) : Parsec $ Array α := manyCore p #[]
|
||||
|
||||
@[inline]
|
||||
def many1 (p : Parsec α) : Parsec $ Array α := do manyCore p #[←p]
|
||||
|
||||
@[inline]
|
||||
partial def manyCharsCore (p : Parsec Char) (acc : String) : Parsec String :=
|
||||
(do manyCharsCore p (acc.push $ ←p))
|
||||
<|> pure acc
|
||||
|
||||
@[inline]
|
||||
def manyChars (p : Parsec Char) : Parsec String := manyCharsCore p ""
|
||||
|
||||
@[inline]
|
||||
def many1Chars (p : Parsec Char) : Parsec String := do manyCharsCore p (←p).toString
|
||||
|
||||
def pstring (s : String) : Parsec String := λ it =>
|
||||
let substr := it.extract (it.forward s.length)
|
||||
if substr = s then
|
||||
success (it.forward s.length) substr
|
||||
else
|
||||
error it s!"expected: {s}"
|
||||
|
||||
@[inline]
|
||||
def skipString (s : String) : Parsec Unit := pstring s *> pure ()
|
||||
|
||||
def unexpectedEndOfInput := "unexpected end of input"
|
||||
|
||||
@[inline]
|
||||
def anyChar : Parsec Char := λ it =>
|
||||
if it.hasNext then success it.next it.curr else error it unexpectedEndOfInput
|
||||
|
||||
@[inline]
|
||||
def pchar (c : Char) : Parsec Char := attempt do
|
||||
if (←anyChar) = c then pure c else fail s!"expected: '{c}'"
|
||||
|
||||
@[inline]
|
||||
def skipChar (c : Char) : Parsec Unit := pchar c *> pure ()
|
||||
|
||||
@[inline]
|
||||
def digit : Parsec Char := attempt do
|
||||
let c ← anyChar
|
||||
if '0' ≤ c ∧ c ≤ '9' then c else fail s!"digit expected"
|
||||
|
||||
@[inline]
|
||||
def hexDigit : Parsec Char := attempt do
|
||||
let c ← anyChar
|
||||
if ('0' ≤ c ∧ c ≤ '9')
|
||||
∨ ('a' ≤ c ∧ c ≤ 'a')
|
||||
∨ ('A' ≤ c ∧ c ≤ 'A') then c else fail s!"hex digit expected"
|
||||
|
||||
@[inline]
|
||||
def asciiLetter : Parsec Char := attempt do
|
||||
let c ← anyChar
|
||||
if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') then c else fail s!"ASCII letter expected"
|
||||
|
||||
@[inline]
|
||||
def satisfy (p : Char → Bool) : Parsec Char := attempt do
|
||||
let c ← anyChar
|
||||
if p c then c else fail "condition not satisfied"
|
||||
|
||||
@[inline]
|
||||
def notFollowedBy (p : Parsec α) : Parsec Unit := λ it =>
|
||||
match p it with
|
||||
| success _ _ => error it ""
|
||||
| error _ _ => success it ()
|
||||
|
||||
partial def skipWs (it : String.Iterator) : String.Iterator :=
|
||||
if it.hasNext then
|
||||
let c := it.curr
|
||||
if c = '\u0009' ∨ c = '\u000a' ∨ c = '\u000d' ∨ c = '\u0020' then
|
||||
skipWs it.next
|
||||
else
|
||||
it
|
||||
else
|
||||
it
|
||||
|
||||
@[inline]
|
||||
def peek? : Parsec (Option Char) := fun it =>
|
||||
if it.hasNext then
|
||||
success it it.curr
|
||||
else
|
||||
success it none
|
||||
|
||||
@[inline]
|
||||
def peek! : Parsec Char := do
|
||||
let some c ← peek? | fail unexpectedEndOfInput
|
||||
c
|
||||
|
||||
@[inline]
|
||||
def skip : Parsec Unit := fun it =>
|
||||
success it.next ()
|
||||
|
||||
@[inline]
|
||||
def ws : Parsec Unit := fun it =>
|
||||
success (skipWs it) ()
|
||||
end Parsec
|
||||
2
src/Lean/Data/Xml.lean
Normal file
2
src/Lean/Data/Xml.lean
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
import Lean.Data.Xml.Basic
|
||||
import Lean.Data.Xml.Parser
|
||||
41
src/Lean/Data/Xml/Basic.lean
Normal file
41
src/Lean/Data/Xml/Basic.lean
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
/-
|
||||
Copyright (c) 2021 Microsoft Corporation. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Author: Dany Fabian
|
||||
-/
|
||||
|
||||
import Std.Data.RBMap
|
||||
namespace Lean
|
||||
namespace Xml
|
||||
|
||||
def Attributes := Std.RBMap String String compare
|
||||
instance : ToString Attributes := ⟨λ as => as.fold (λ s n v => s ++ s!" {n}=\"{v}\"") ""⟩
|
||||
|
||||
mutual
|
||||
inductive Element
|
||||
| Element
|
||||
(name : String)
|
||||
(attributes : Attributes)
|
||||
(content : Array Content)
|
||||
|
||||
inductive Content
|
||||
| Element (element : Element)
|
||||
| Comment (comment : String)
|
||||
| Character (content : String)
|
||||
deriving Inhabited
|
||||
end
|
||||
|
||||
mutual
|
||||
private partial def eToString : Element → String
|
||||
| Element.Element n a c => s!"<{n}{a}>{c.map cToString |>.foldl (· ++ ·) ""}</{n}>"
|
||||
|
||||
private partial def cToString : Content → String
|
||||
| Content.Element e => eToString e
|
||||
| Content.Comment c => s!"<!--{c}-->"
|
||||
| Content.Character c => c
|
||||
|
||||
end
|
||||
instance : ToString Element := ⟨eToString⟩
|
||||
instance : ToString Content := ⟨cToString⟩
|
||||
|
||||
488
src/Lean/Data/Xml/Parser.lean
Normal file
488
src/Lean/Data/Xml/Parser.lean
Normal file
|
|
@ -0,0 +1,488 @@
|
|||
/-
|
||||
Copyright (c) 2021 Microsoft Corporation. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Author: Dany Fabian
|
||||
-/
|
||||
|
||||
import Lean.Data.Parsec
|
||||
import Lean.Data.Xml.Basic
|
||||
open IO
|
||||
open System
|
||||
open Lean
|
||||
|
||||
namespace Lean
|
||||
namespace Xml
|
||||
|
||||
namespace Parser
|
||||
open Lean.Parsec
|
||||
open Parsec.ParseResult
|
||||
|
||||
abbrev LeanChar := Char
|
||||
|
||||
/-- consume a newline character sequence pretending, that we read '\n'. As per spec:
|
||||
https://www.w3.org/TR/xml/#sec-line-ends -/
|
||||
def endl : Parsec LeanChar := (skipString "\r\n" <|> skipChar '\r' <|> skipChar '\n') *> pure '\n'
|
||||
|
||||
def quote (p : Parsec α) : Parsec α :=
|
||||
skipChar '\'' *> p <* skipChar '\''
|
||||
<|> skipChar '"' *> p <* skipChar '"'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Char -/
|
||||
def Char : Parsec LeanChar :=
|
||||
(attempt do
|
||||
let c ← anyChar
|
||||
let cNat ← c.toNat
|
||||
if (0x20 ≤ cNat ∧ cNat ≤ 0xD7FF)
|
||||
∨ (0xE000 ≤ cNat ∧ cNat ≤ 0xFFFD)
|
||||
∨ (0x10000 ≤ cNat ∧ cNat ≤ 0x10FFFF) then c else fail "expected xml char")
|
||||
<|> pchar '\t' <|> endl
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-S -/
|
||||
def S : Parsec String :=
|
||||
many1Chars (pchar ' ' <|> endl <|> pchar '\t')
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Eq -/
|
||||
def Eq : Parsec Unit :=
|
||||
optional S *> skipChar '=' <* optional S
|
||||
|
||||
private def nameStartCharRanges : Array (Nat × Nat) :=
|
||||
#[(0xC0, 0xD6),
|
||||
(0xD8, 0xF6),
|
||||
(0xF8, 0x2FF),
|
||||
(0x370, 0x37D),
|
||||
(0x37F, 0x1FFF),
|
||||
(0x200C, 0x200D),
|
||||
(0x2070, 0x218F),
|
||||
(0x2C00, 0x2FEF),
|
||||
(0x3001, 0xD7FF),
|
||||
(0xF900, 0xFDCF),
|
||||
(0xFDF0, 0xFFFD),
|
||||
(0x10000, 0xEFFFF)]
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-NameStartChar -/
|
||||
def NameStartChar : Parsec LeanChar := attempt do
|
||||
let c ← anyChar
|
||||
if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') then c
|
||||
else if c = ':' ∨ c = '_' then c
|
||||
else
|
||||
let cNum := c.toNat
|
||||
if nameStartCharRanges.any (fun (lo, hi) => lo ≤ cNum ∧ cNum ≤ hi) then c
|
||||
else fail "expected a name character"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-NameChar -/
|
||||
def NameChar : Parsec LeanChar :=
|
||||
NameStartChar <|> digit <|> pchar '-' <|> pchar '.' <|> pchar '\xB7'
|
||||
<|> satisfy (λ c => ('\u0300' ≤ c ∧ c ≤ '\u036F') ∨ ('\u203F' ≤ c ∧ c ≤ '\u2040'))
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Name -/
|
||||
def Name : Parsec String := do
|
||||
let x ← NameStartChar
|
||||
manyCharsCore NameChar x.toString
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-VersionNum -/
|
||||
def VersionNum : Parsec Unit :=
|
||||
skipString "1." <* (many1 digit)
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-VersionInfo -/
|
||||
def VersionInfo : Parsec Unit := do
|
||||
S *>
|
||||
skipString "version"
|
||||
Eq
|
||||
quote VersionNum
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EncName -/
|
||||
def EncName : Parsec String := do
|
||||
let x ← asciiLetter
|
||||
manyCharsCore (asciiLetter <|> digit <|> pchar '-' <|> pchar '_' <|> pchar '.') x.toString
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EncodingDecl -/
|
||||
def EncodingDecl : Parsec String := do
|
||||
S *>
|
||||
skipString "encoding"
|
||||
Eq
|
||||
quote EncName
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-SDDecl -/
|
||||
def SDDecl : Parsec String := do
|
||||
S *> skipString "standalone" *> Eq *> quote (pstring "yes" <|> pstring "no")
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-XMLDecl -/
|
||||
def XMLdecl : Parsec Unit := do
|
||||
skipString "<?xml"
|
||||
VersionInfo
|
||||
optional EncodingDecl *>
|
||||
optional SDDecl *>
|
||||
optional S *>
|
||||
skipString "?>"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Comment -/
|
||||
def Comment : Parsec String :=
|
||||
let notDash := Char.toString <$> satisfy (λ c => c ≠ '-')
|
||||
skipString "<!--" *>
|
||||
Array.foldl String.append "" <$> many (notDash <|> (do
|
||||
let d ← pchar '-'
|
||||
let c ← notDash
|
||||
pure $ d.toString ++ c))
|
||||
<* skipString "-->"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PITarget -/
|
||||
def PITarget : Parsec String :=
|
||||
Name <* (skipChar 'X' <|> skipChar 'x') <* (skipChar 'M' <|> skipChar 'm') <* (skipChar 'L' <|> skipChar 'l')
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PI -/
|
||||
def PI : Parsec Unit := do
|
||||
skipString "<?"
|
||||
<* PITarget <*
|
||||
optional (S *> manyChars (notFollowedBy (skipString "?>") *> Char))
|
||||
skipString "?>"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Misc -/
|
||||
def Misc : Parsec Unit :=
|
||||
Comment *> pure () <|> PI <|> S *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-SystemLiteral -/
|
||||
def SystemLiteral : Parsec String :=
|
||||
pchar '"' *> manyChars (satisfy λ c => c ≠ '"') <* pchar '"'
|
||||
<|> pchar '\'' *> manyChars (satisfy λ c => c ≠ '\'') <* '\''
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PubidChar -/
|
||||
def PubidChar : Parsec LeanChar :=
|
||||
asciiLetter <|> digit <|> endl <|> attempt do
|
||||
let c ← anyChar
|
||||
if "-'()+,./:=?;!*#@$_%".contains c then c else fail "PublidChar expected"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PubidLiteral -/
|
||||
def PubidLiteral : Parsec String :=
|
||||
pchar '"' *> manyChars PubidChar <* pchar '"'
|
||||
<|> pchar '\'' *> manyChars (attempt do
|
||||
let c ← PubidChar
|
||||
if c = '\'' then fail "'\\'' not expected" else c) <* pchar '\''
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-ExternalID -/
|
||||
def ExternalID : Parsec Unit :=
|
||||
skipString "SYSTEM" *> S *> SystemLiteral *> pure ()
|
||||
<|> skipString "PUBLIC" *> S *> PubidLiteral *> S *> SystemLiteral *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Mixed -/
|
||||
def Mixed : Parsec Unit :=
|
||||
(do
|
||||
skipChar '('
|
||||
optional S *>
|
||||
skipString "#PCDATA" *>
|
||||
many (optional S *> skipChar '|' *> optional S *> Name) *>
|
||||
optional S *>
|
||||
skipString ")*")
|
||||
<|> skipChar '(' *> (optional S) *> skipString "#PCDATA" <* (optional S) <* skipChar ')'
|
||||
|
||||
mutual
|
||||
/-- https://www.w3.org/TR/xml/#NT-cp -/
|
||||
partial def cp : Parsec Unit :=
|
||||
(Name *> pure () <|> choice <|> seq) <* optional (skipChar '?' <|> skipChar '*' <|> skipChar '+')
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-choice -/
|
||||
partial def choice : Parsec Unit := do
|
||||
skipChar '('
|
||||
optional S *>
|
||||
cp
|
||||
many1 (optional S *> skipChar '|' *> optional S *> cp) *>
|
||||
optional S *>
|
||||
skipChar ')'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-seq -/
|
||||
partial def seq : Parsec Unit := do
|
||||
skipChar '('
|
||||
optional S *>
|
||||
cp
|
||||
many (optional S *> skipChar ',' *> optional S *> cp) *>
|
||||
optional S *>
|
||||
skipChar ')'
|
||||
end
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-children -/
|
||||
def children : Parsec Unit :=
|
||||
(choice <|> seq) <* optional (skipChar '?' <|> skipChar '*' <|> skipChar '+')
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-contentspec -/
|
||||
def contentspec : Parsec Unit := do
|
||||
skipString "EMPTY" <|> skipString "ANY" <|> Mixed <|> children
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-elementdecl -/
|
||||
def elementDecl : Parsec Unit := do
|
||||
skipString "<!ELEMENT"
|
||||
S *>
|
||||
Name *>
|
||||
contentspec *>
|
||||
optional S *>
|
||||
skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-StringType -/
|
||||
def StringType : Parsec Unit :=
|
||||
skipString "CDATA"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-TokenizedType -/
|
||||
def TokenizedType : Parsec Unit :=
|
||||
skipString "ID"
|
||||
<|> skipString "IDREF"
|
||||
<|> skipString "IDREFS"
|
||||
<|> skipString "ENTITY"
|
||||
<|> skipString "ENTITIES"
|
||||
<|> skipString "NMTOKEN"
|
||||
<|> skipString "NMTOKENS"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-NotationType -/
|
||||
def NotationType : Parsec Unit := do
|
||||
skipString "NOTATION"
|
||||
S *>
|
||||
skipChar '(' <*
|
||||
optional S
|
||||
Name *> many (optional S *> skipChar '|' *> optional S *> Name) *>
|
||||
optional S *>
|
||||
skipChar ')'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Nmtoken -/
|
||||
def Nmtoken : Parsec String := do
|
||||
many1Chars NameChar
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Enumeration -/
|
||||
def Enumeration : Parsec Unit := do
|
||||
skipChar '('
|
||||
optional S *>
|
||||
Nmtoken *> many (optional S *> skipChar '|' *> optional S *> Nmtoken) *>
|
||||
optional S *>
|
||||
skipChar ')'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EnumeratedType -/
|
||||
def EnumeratedType : Parsec Unit :=
|
||||
NotationType <|> Enumeration
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-AttType -/
|
||||
def AttType : Parsec Unit :=
|
||||
StringType <|> TokenizedType <|> EnumeratedType
|
||||
|
||||
def predefinedEntityToChar : String → Option LeanChar
|
||||
| "lt" => some '<'
|
||||
| "gt" => some '>'
|
||||
| "amp" => some '&'
|
||||
| "apos" => some '\''
|
||||
| "quot" => some '"'
|
||||
| _ => none
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EntityRef -/
|
||||
def EntityRef : Parsec $ Option LeanChar := attempt $
|
||||
skipChar '&' *> predefinedEntityToChar <$> Name <* skipChar ';'
|
||||
|
||||
@[inline]
|
||||
def hexDigitToNat (c : LeanChar) : Nat :=
|
||||
if '0' ≤ c ∧ c ≤ '9' then c.toNat - '0'.toNat
|
||||
else if 'a' ≤ c ∧ c ≤ 'f' then c.toNat - 'a'.toNat + 10
|
||||
else c.toNat - 'A'.toNat + 10
|
||||
|
||||
def digitsToNat (base : Nat) (digits : Array Nat) : Nat :=
|
||||
digits.foldl (λ r d => r * base + d) 0
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CharRef -/
|
||||
def CharRef : Parsec LeanChar := do
|
||||
skipString "&#"
|
||||
let charCode ←
|
||||
digitsToNat 10 <$> many1 (hexDigitToNat <$> digit)
|
||||
<|> skipChar 'x' *> digitsToNat 16 <$> many1 (hexDigitToNat <$> hexDigit)
|
||||
skipChar ';'
|
||||
Char.ofNat charCode
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Reference -/
|
||||
def Reference : Parsec $ Option LeanChar :=
|
||||
EntityRef <|> some <$> CharRef
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-AttValue -/
|
||||
def AttValue : Parsec String := do
|
||||
let chars ←
|
||||
(do
|
||||
skipChar '"'
|
||||
many (some <$> satisfy (λ c => c ≠ '<' ∧ c ≠ '&' ∧ c ≠ '"') <|> Reference) <*
|
||||
skipChar '"')
|
||||
<|> (do
|
||||
skipChar '\''
|
||||
many (some <$> satisfy (λ c => c ≠ '<' ∧ c ≠ '&' ∧ c ≠ '\'') <|> Reference) <*
|
||||
skipChar '\'')
|
||||
return chars.foldl (λ s c => if let some c := c then s.push c else s) ""
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-DefaultDecl -/
|
||||
def DefaultDecl : Parsec Unit :=
|
||||
skipString "#REQUIRED"
|
||||
<|> skipString "#IMPLIED"
|
||||
<|> optional (skipString "#FIXED" <* S) *> AttValue *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-AttDef -/
|
||||
def AttDef : Parsec Unit :=
|
||||
S *> Name *> S *> AttType *> S *> DefaultDecl
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-AttlistDecl -/
|
||||
def AttlistDecl : Parsec Unit :=
|
||||
skipString "<!ATTLIST" *> S *> Name *> many AttDef *> optional S *> skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PEReference -/
|
||||
def PEReference : Parsec Unit :=
|
||||
skipChar '%' *> Name *> skipChar ';'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EntityValue -/
|
||||
def EntityValue : Parsec String := do
|
||||
let chars ←
|
||||
(do
|
||||
skipChar '"'
|
||||
many (some <$> satisfy (λ c => c ≠ '%' ∧ c ≠ '&' ∧ c ≠ '"') <|> PEReference *> pure none <|> Reference) <*
|
||||
skipChar '"')
|
||||
<|> (do
|
||||
skipChar '\''
|
||||
many (some <$> satisfy (λ c => c ≠ '%' ∧ c ≠ '&' ∧ c ≠ '\'') <|> PEReference *> pure none <|> Reference) <*
|
||||
skipChar '\'')
|
||||
return chars.foldl (λ s c => if let some c := c then s.push c else s) ""
|
||||
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-NDataDecl -/
|
||||
def NDataDecl : Parsec Unit :=
|
||||
S *> skipString "NDATA" <* S <* Name
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EntityDef -/
|
||||
def EntityDef : Parsec Unit :=
|
||||
EntityValue *> pure () <|> (ExternalID <* optional NDataDecl)
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-GEDecl -/
|
||||
def GEDecl : Parsec Unit :=
|
||||
skipString "<!ENTITY" *> S *> Name *> S *> EntityDef *> optional S *> skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PEDef -/
|
||||
def PEDef : Parsec Unit :=
|
||||
EntityValue *> pure () <|> ExternalID
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PEDecl -/
|
||||
def PEDecl : Parsec Unit :=
|
||||
skipString "<!ENTITY" *> S *> skipChar '%' *> S *> Name *> PEDef *> optional S *> skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EntityDecl -/
|
||||
def EntityDecl : Parsec Unit :=
|
||||
GEDecl <|> PEDecl
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PublicID -/
|
||||
def PublicID : Parsec Unit :=
|
||||
skipString "PUBLIC" <* S <* PubidLiteral
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-NotationDecl -/
|
||||
def NotationDecl : Parsec Unit :=
|
||||
skipString "<!NOTATION" *> S *> Name *> (ExternalID <|> PublicID) *> optional S *> skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-markupdecl -/
|
||||
def markupDecl : Parsec Unit :=
|
||||
elementDecl <|> AttlistDecl <|> EntityDecl <|> NotationDecl <|> PI <|> (Comment *> pure ())
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-DeclSep -/
|
||||
def DeclSep : Parsec Unit :=
|
||||
PEReference <|> S *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-intSubset -/
|
||||
def intSubset : Parsec Unit :=
|
||||
many (markupDecl <|> DeclSep) *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-doctypedecl -/
|
||||
def doctypedecl : Parsec Unit := do
|
||||
skipString "<!DOCTYPE"
|
||||
S *>
|
||||
Name *>
|
||||
optional (S *> ExternalID) *> pure ()
|
||||
<* optional S
|
||||
optional (skipChar '[' *> intSubset <* skipChar ']' <* optional S) *>
|
||||
skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-prolog -/
|
||||
def prolog : Parsec Unit :=
|
||||
optional XMLdecl *>
|
||||
many Misc *>
|
||||
optional (doctypedecl <* many Misc) *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Attribute -/
|
||||
def Attribute : Parsec (String × String) := do
|
||||
let name ← Name
|
||||
Eq
|
||||
let value ← AttValue
|
||||
(name, value)
|
||||
|
||||
protected def elementPrefix : Parsec (Array Content → Element) := do
|
||||
skipChar '<'
|
||||
let name ← Name
|
||||
let attributes ← many (S *> Attribute)
|
||||
optional S *> pure ()
|
||||
Element.Element name (Std.RBMap.fromList attributes.toList compare)
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EmptyElemTag -/
|
||||
def EmptyElemTag (elem : Array Content → Element) : Parsec Element := do
|
||||
skipString "/>" *> pure (elem #[])
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-STag -/
|
||||
def STag (elem : Array Content → Element) : Parsec (Array Content → Element) := do
|
||||
skipChar '>' *> pure elem
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-ETag -/
|
||||
def ETag : Parsec Unit :=
|
||||
skipString "</" *> Name *> optional S *> skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CDStart -/
|
||||
def CDStart : Parsec Unit :=
|
||||
skipString "<![CDATA["
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CDEnd -/
|
||||
def CDEnd : Parsec Unit :=
|
||||
skipString "]]>"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CData -/
|
||||
def CData : Parsec String :=
|
||||
manyChars (notFollowedBy (skipString "]]>") *> anyChar)
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CDSect -/
|
||||
def CDSect : Parsec String :=
|
||||
CDStart *> CData <* CDEnd
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CharData -/
|
||||
def CharData : Parsec String :=
|
||||
notFollowedBy (skipString "]]>") *> manyChars (satisfy λ c => c ≠ '<' ∧ c ≠ '&')
|
||||
|
||||
mutual
|
||||
/-- https://www.w3.org/TR/xml/#NT-content -/
|
||||
partial def content : Parsec (Array Content) := do
|
||||
let x ← optional (Content.Character <$> CharData)
|
||||
let xs ← many do
|
||||
let y ←
|
||||
attempt (some <$> Content.Element <$> element)
|
||||
<|> (do let c ← Reference; c.map (Content.Character ∘ Char.toString))
|
||||
<|> some <$> Content.Character <$> CDSect
|
||||
<|> PI *> none
|
||||
<|> some <$> Content.Comment <$> Comment
|
||||
|
||||
let z ← optional (Content.Character <$> CharData)
|
||||
#[y, z]
|
||||
let xs := #[x] ++ xs.concatMap id |>.filterMap id
|
||||
let mut res := #[]
|
||||
for x in xs do
|
||||
if res.size > 0 then
|
||||
match res.back, x with
|
||||
| Content.Character x, Content.Character y => res ← res.set! (res.size - 1) (Content.Character $ x ++ y)
|
||||
| _, x => res ← res.push x
|
||||
else res ← res.push x
|
||||
res
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-element -/
|
||||
partial def element : Parsec Element := do
|
||||
let elem ← Parser.elementPrefix
|
||||
EmptyElemTag elem <|> STag elem <*> content <* ETag
|
||||
|
||||
end
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-document -/
|
||||
def document : Parsec Element := prolog *> element <* many Misc <* eof
|
||||
|
||||
end Parser
|
||||
|
||||
def parse (s : String) : Except String Element :=
|
||||
match Xml.Parser.document s.mkIterator with
|
||||
| Parsec.ParseResult.success _ res => Except.ok res
|
||||
| Parsec.ParseResult.error it err => Except.error s!"offset {it.i.repr}: {err}\n{(it.prevn 10).extract it}"
|
||||
|
||||
end Xml
|
||||
Loading…
Add table
Reference in a new issue