From 1ed3ce20056c56a91d035b4a9fac7f9b49b40dbf Mon Sep 17 00:00:00 2001 From: Sebastian Ullrich Date: Wed, 4 Feb 2026 10:54:25 +0100 Subject: [PATCH] chore: remove unused `Lean.Data.Xml` (#12302) NB: This module is used in doc-gen for, uh, HTML parsing but should be moved there or into a separate package for that purpose --- src/Lean/Data.lean | 1 - src/Lean/Data/Xml.lean | 10 - src/Lean/Data/Xml/Basic.lean | 45 --- src/Lean/Data/Xml/Parser.lean | 492 ------------------------ tests/lean/run/PPTopDownAnalyze.lean | 1 - tests/lean/xmlParsing.lean | 10 - tests/lean/xmlParsing.lean.expected.out | 2 - 7 files changed, 561 deletions(-) delete mode 100644 src/Lean/Data/Xml.lean delete mode 100644 src/Lean/Data/Xml/Basic.lean delete mode 100644 src/Lean/Data/Xml/Parser.lean delete mode 100644 tests/lean/xmlParsing.lean delete mode 100644 tests/lean/xmlParsing.lean.expected.out diff --git a/src/Lean/Data.lean b/src/Lean/Data.lean index 543f9e699c..9bd413c38f 100644 --- a/src/Lean/Data.lean +++ b/src/Lean/Data.lean @@ -25,7 +25,6 @@ public import Lean.Data.Position public import Lean.Data.PrefixTree public import Lean.Data.SMap public import Lean.Data.Trie -public import Lean.Data.Xml public import Lean.Data.NameTrie public import Lean.Data.RBTree public import Lean.Data.RBMap diff --git a/src/Lean/Data/Xml.lean b/src/Lean/Data/Xml.lean deleted file mode 100644 index e0d1808bba..0000000000 --- a/src/Lean/Data/Xml.lean +++ /dev/null @@ -1,10 +0,0 @@ -/- -Copyright (c) 2021 Daniel Fabian. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. -Authors: Daniel Fabian --/ -module - -prelude -public import Lean.Data.Xml.Basic -public import Lean.Data.Xml.Parser diff --git a/src/Lean/Data/Xml/Basic.lean b/src/Lean/Data/Xml/Basic.lean deleted file mode 100644 index e00965eddc..0000000000 --- a/src/Lean/Data/Xml/Basic.lean +++ /dev/null @@ -1,45 +0,0 @@ -/- -Copyright (c) 2021 Microsoft Corporation. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. -Author: Dany Fabian --/ -module - -prelude -public import Std.Data.TreeMap.Basic -public import Init.Data.Ord.String - -public section - -namespace Lean -namespace Xml - -@[expose] def Attributes := Std.TreeMap String String -instance : ToString Attributes := ⟨λ as => as.foldl (λ s n v => s ++ s!" {n}=\"{v}\"") ""⟩ - -mutual -inductive Element -| Element - (name : String) - (attributes : Attributes) - (content : Array Content) - -inductive Content -| Element (element : Element) -| Comment (comment : String) -| Character (content : String) -deriving Inhabited -end - -mutual -private partial def eToString : Element → String -| Element.Element n a c => s!"<{n}{a}>{c.map cToString |>.foldl (· ++ ·) ""}" - -private partial def cToString : Content → String -| Content.Element e => eToString e -| Content.Comment c => s!"" -| Content.Character c => c - -end -instance : ToString Element := ⟨private_decl% eToString⟩ -instance : ToString Content := ⟨private_decl% cToString⟩ diff --git a/src/Lean/Data/Xml/Parser.lean b/src/Lean/Data/Xml/Parser.lean deleted file mode 100644 index c258e43b88..0000000000 --- a/src/Lean/Data/Xml/Parser.lean +++ /dev/null @@ -1,492 +0,0 @@ -/- -Copyright (c) 2021 Microsoft Corporation. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. -Author: Dany Fabian --/ -module - -prelude -public import Std.Internal.Parsec -public import Lean.Data.Xml.Basic -import Init.Data.String.Search - -public section - -open System -open Lean - -namespace Lean -namespace Xml - -open Std.Internal.Parsec -open Std.Internal.Parsec.String - -namespace Parser - -abbrev LeanChar := Char - -/-- consume a newline character sequence pretending, that we read '\n'. As per spec: - https://www.w3.org/TR/xml/#sec-line-ends -/ -def endl : Parser LeanChar := (skipString "\r\n" <|> skipChar '\r' <|> skipChar '\n') *> pure '\n' - -def quote (p : Parser α) : Parser α := - skipChar '\'' *> p <* skipChar '\'' - <|> skipChar '"' *> p <* skipChar '"' - -/-- https://www.w3.org/TR/xml/#NT-Char -/ -def Char : Parser LeanChar := - (attempt do - let c ← any - let cNat := c.toNat - if (0x20 ≤ cNat ∧ cNat ≤ 0xD7FF) - ∨ (0xE000 ≤ cNat ∧ cNat ≤ 0xFFFD) - ∨ (0x10000 ≤ cNat ∧ cNat ≤ 0x10FFFF) then pure c else fail "expected xml char") - <|> pchar '\t' <|> endl - -/-- https://www.w3.org/TR/xml/#NT-S -/ -def S : Parser String := - many1Chars (pchar ' ' <|> endl <|> pchar '\t') - -/-- https://www.w3.org/TR/xml/#NT-Eq -/ -def Eq : Parser Unit := - optional S *> skipChar '=' <* optional S - -private def nameStartCharRanges : Array (Nat × Nat) := - #[(0xC0, 0xD6), - (0xD8, 0xF6), - (0xF8, 0x2FF), - (0x370, 0x37D), - (0x37F, 0x1FFF), - (0x200C, 0x200D), - (0x2070, 0x218F), - (0x2C00, 0x2FEF), - (0x3001, 0xD7FF), - (0xF900, 0xFDCF), - (0xFDF0, 0xFFFD), - (0x10000, 0xEFFFF)] - -/-- https://www.w3.org/TR/xml/#NT-NameStartChar -/ -def NameStartChar : Parser LeanChar := attempt do - let c ← any - if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') then pure c - else if c = ':' ∨ c = '_' then pure c - else - let cNum := c.toNat - if nameStartCharRanges.any (fun (lo, hi) => lo ≤ cNum ∧ cNum ≤ hi) then pure c - else fail "expected a name character" - -/-- https://www.w3.org/TR/xml/#NT-NameChar -/ -def NameChar : Parser LeanChar := - NameStartChar <|> digit <|> pchar '-' <|> pchar '.' <|> pchar '\xB7' - <|> satisfy (λ c => ('\u0300' ≤ c ∧ c ≤ '\u036F') ∨ ('\u203F' ≤ c ∧ c ≤ '\u2040')) - -/-- https://www.w3.org/TR/xml/#NT-Name -/ -def Name : Parser String := do - let x ← NameStartChar - manyCharsCore NameChar x.toString - -/-- https://www.w3.org/TR/xml/#NT-VersionNum -/ -def VersionNum : Parser Unit := - skipString "1." <* (many1 digit) - -/-- https://www.w3.org/TR/xml/#NT-VersionInfo -/ -def VersionInfo : Parser Unit := do - S *> - skipString "version" - Eq - quote VersionNum - -/-- https://www.w3.org/TR/xml/#NT-EncName -/ -def EncName : Parser String := do - let x ← asciiLetter - manyCharsCore (asciiLetter <|> digit <|> pchar '-' <|> pchar '_' <|> pchar '.') x.toString - -/-- https://www.w3.org/TR/xml/#NT-EncodingDecl -/ -def EncodingDecl : Parser String := do - S *> - skipString "encoding" - Eq - quote EncName - -/-- https://www.w3.org/TR/xml/#NT-SDDecl -/ -def SDDecl : Parser String := do - S *> skipString "standalone" *> Eq *> quote (pstring "yes" <|> pstring "no") - -/-- https://www.w3.org/TR/xml/#NT-XMLDecl -/ -def XMLdecl : Parser Unit := do - skipString " - optional SDDecl *> - optional S *> - skipString "?>" - -/-- https://www.w3.org/TR/xml/#NT-Comment -/ -def Comment : Parser String := - let notDash := Char.toString <$> satisfy (λ c => c ≠ '-') - skipString "" - -/-- https://www.w3.org/TR/xml/#NT-PITarget -/ -def PITarget : Parser String := - Name <* (skipChar 'X' <|> skipChar 'x') <* (skipChar 'M' <|> skipChar 'm') <* (skipChar 'L' <|> skipChar 'l') - -/-- https://www.w3.org/TR/xml/#NT-PI -/ -def PI : Parser Unit := do - skipString " manyChars (notFollowedBy (skipString "?>") *> Char)) - skipString "?>" - -/-- https://www.w3.org/TR/xml/#NT-Misc -/ -def Misc : Parser Unit := - Comment *> pure () <|> PI <|> S *> pure () - -/-- https://www.w3.org/TR/xml/#NT-SystemLiteral -/ -def SystemLiteral : Parser String := - pchar '"' *> manyChars (satisfy λ c => c ≠ '"') <* pchar '"' - <|> pchar '\'' *> manyChars (satisfy λ c => c ≠ '\'') <* pure '\'' - -/-- https://www.w3.org/TR/xml/#NT-PubidChar -/ -def PubidChar : Parser LeanChar := - asciiLetter <|> digit <|> endl <|> attempt do - let c : _root_.Char := ← any - if "-'()+,./:=?;!*#@$_%".contains c then pure c else fail "PublidChar expected" - -/-- https://www.w3.org/TR/xml/#NT-PubidLiteral -/ -def PubidLiteral : Parser String := - pchar '"' *> manyChars PubidChar <* pchar '"' - <|> pchar '\'' *> manyChars (attempt do - let c ← PubidChar - if c = '\'' then fail "'\\'' not expected" else pure c) <* pchar '\'' - -/-- https://www.w3.org/TR/xml/#NT-ExternalID -/ -def ExternalID : Parser Unit := - skipString "SYSTEM" *> S *> SystemLiteral *> pure () - <|> skipString "PUBLIC" *> S *> PubidLiteral *> S *> SystemLiteral *> pure () - -/-- https://www.w3.org/TR/xml/#NT-Mixed -/ -def Mixed : Parser Unit := - (do - skipChar '(' - optional S *> - skipString "#PCDATA" *> - many (optional S *> skipChar '|' *> optional S *> Name) *> - optional S *> - skipString ")*") - <|> skipChar '(' *> (optional S) *> skipString "#PCDATA" <* (optional S) <* skipChar ')' - -mutual - /-- https://www.w3.org/TR/xml/#NT-cp -/ - partial def cp : Parser Unit := - (Name *> pure () <|> choice <|> seq) <* optional (skipChar '?' <|> skipChar '*' <|> skipChar '+') - - /-- https://www.w3.org/TR/xml/#NT-choice -/ - partial def choice : Parser Unit := do - skipChar '(' - optional S *> - cp - many1 (optional S *> skipChar '|' *> optional S *> cp) *> - optional S *> - skipChar ')' - - /-- https://www.w3.org/TR/xml/#NT-seq -/ - partial def seq : Parser Unit := do - skipChar '(' - optional S *> - cp - many (optional S *> skipChar ',' *> optional S *> cp) *> - optional S *> - skipChar ')' -end - -/-- https://www.w3.org/TR/xml/#NT-children -/ -def children : Parser Unit := - (choice <|> seq) <* optional (skipChar '?' <|> skipChar '*' <|> skipChar '+') - -/-- https://www.w3.org/TR/xml/#NT-contentspec -/ -def contentspec : Parser Unit := do - skipString "EMPTY" <|> skipString "ANY" <|> Mixed <|> children - -/-- https://www.w3.org/TR/xml/#NT-elementdecl -/ -def elementDecl : Parser Unit := do - skipString " - Name *> - contentspec *> - optional S *> - skipChar '>' - -/-- https://www.w3.org/TR/xml/#NT-StringType -/ -def StringType : Parser Unit := - skipString "CDATA" - -/-- https://www.w3.org/TR/xml/#NT-TokenizedType -/ -def TokenizedType : Parser Unit := - skipString "ID" - <|> skipString "IDREF" - <|> skipString "IDREFS" - <|> skipString "ENTITY" - <|> skipString "ENTITIES" - <|> skipString "NMTOKEN" - <|> skipString "NMTOKENS" - -/-- https://www.w3.org/TR/xml/#NT-NotationType -/ -def NotationType : Parser Unit := do - skipString "NOTATION" - S *> - skipChar '(' <* - optional S - Name *> many (optional S *> skipChar '|' *> optional S *> Name) *> - optional S *> - skipChar ')' - -/-- https://www.w3.org/TR/xml/#NT-Nmtoken -/ -def Nmtoken : Parser String := do - many1Chars NameChar - -/-- https://www.w3.org/TR/xml/#NT-Enumeration -/ -def Enumeration : Parser Unit := do - skipChar '(' - optional S *> - Nmtoken *> many (optional S *> skipChar '|' *> optional S *> Nmtoken) *> - optional S *> - skipChar ')' - -/-- https://www.w3.org/TR/xml/#NT-EnumeratedType -/ -def EnumeratedType : Parser Unit := - NotationType <|> Enumeration - -/-- https://www.w3.org/TR/xml/#NT-AttType -/ -def AttType : Parser Unit := - StringType <|> TokenizedType <|> EnumeratedType - -def predefinedEntityToChar : String → Option LeanChar -| "lt" => some '<' -| "gt" => some '>' -| "amp" => some '&' -| "apos" => some '\'' -| "quot" => some '"' -| _ => none - -/-- https://www.w3.org/TR/xml/#NT-EntityRef -/ -def EntityRef : Parser $ Option LeanChar := attempt $ - skipChar '&' *> predefinedEntityToChar <$> Name <* skipChar ';' - -@[inline] -def hexDigitToNat (c : LeanChar) : Nat := - if '0' ≤ c ∧ c ≤ '9' then c.toNat - '0'.toNat - else if 'a' ≤ c ∧ c ≤ 'f' then c.toNat - 'a'.toNat + 10 - else c.toNat - 'A'.toNat + 10 - -def digitsToNat (base : Nat) (digits : Array Nat) : Nat := - digits.foldl (λ r d => r * base + d) 0 - -/-- https://www.w3.org/TR/xml/#NT-CharRef -/ -def CharRef : Parser LeanChar := do - skipString "&#" - let charCode ← - digitsToNat 10 <$> many1 (hexDigitToNat <$> digit) - <|> skipChar 'x' *> digitsToNat 16 <$> many1 (hexDigitToNat <$> hexDigit) - skipChar ';' - return Char.ofNat charCode - -/-- https://www.w3.org/TR/xml/#NT-Reference -/ -def Reference : Parser $ Option LeanChar := - EntityRef <|> some <$> CharRef - -/-- https://www.w3.org/TR/xml/#NT-AttValue -/ -def AttValue : Parser String := do - let chars ← - (do - skipChar '"' - many (some <$> satisfy (λ c => c ≠ '<' ∧ c ≠ '&' ∧ c ≠ '"') <|> Reference) <* - skipChar '"') - <|> (do - skipChar '\'' - many (some <$> satisfy (λ c => c ≠ '<' ∧ c ≠ '&' ∧ c ≠ '\'') <|> Reference) <* - skipChar '\'') - return chars.foldl (λ s c => if let some c := c then s.push c else s) "" - -/-- https://www.w3.org/TR/xml/#NT-DefaultDecl -/ -def DefaultDecl : Parser Unit := - skipString "#REQUIRED" - <|> skipString "#IMPLIED" - <|> optional (skipString "#FIXED" <* S) *> AttValue *> pure () - -/-- https://www.w3.org/TR/xml/#NT-AttDef -/ -def AttDef : Parser Unit := - S *> Name *> S *> AttType *> S *> DefaultDecl - -/-- https://www.w3.org/TR/xml/#NT-AttlistDecl -/ -def AttlistDecl : Parser Unit := - skipString " S *> Name *> many AttDef *> optional S *> skipChar '>' - -/-- https://www.w3.org/TR/xml/#NT-PEReference -/ -def PEReference : Parser Unit := - skipChar '%' *> Name *> skipChar ';' - -/-- https://www.w3.org/TR/xml/#NT-EntityValue -/ -def EntityValue : Parser String := do - let chars ← - (do - skipChar '"' - many (some <$> satisfy (λ c => c ≠ '%' ∧ c ≠ '&' ∧ c ≠ '"') <|> PEReference *> pure none <|> Reference) <* - skipChar '"') - <|> (do - skipChar '\'' - many (some <$> satisfy (λ c => c ≠ '%' ∧ c ≠ '&' ∧ c ≠ '\'') <|> PEReference *> pure none <|> Reference) <* - skipChar '\'') - return chars.foldl (λ s c => if let some c := c then s.push c else s) "" - - -/-- https://www.w3.org/TR/xml/#NT-NDataDecl -/ -def NDataDecl : Parser Unit := - S *> skipString "NDATA" <* S <* Name - -/-- https://www.w3.org/TR/xml/#NT-EntityDef -/ -def EntityDef : Parser Unit := - EntityValue *> pure () <|> (ExternalID <* optional NDataDecl) - -/-- https://www.w3.org/TR/xml/#NT-GEDecl -/ -def GEDecl : Parser Unit := - skipString " S *> Name *> S *> EntityDef *> optional S *> skipChar '>' - -/-- https://www.w3.org/TR/xml/#NT-PEDef -/ -def PEDef : Parser Unit := - EntityValue *> pure () <|> ExternalID - -/-- https://www.w3.org/TR/xml/#NT-PEDecl -/ -def PEDecl : Parser Unit := - skipString " S *> skipChar '%' *> S *> Name *> PEDef *> optional S *> skipChar '>' - -/-- https://www.w3.org/TR/xml/#NT-EntityDecl -/ -def EntityDecl : Parser Unit := - GEDecl <|> PEDecl - -/-- https://www.w3.org/TR/xml/#NT-PublicID -/ -def PublicID : Parser Unit := - skipString "PUBLIC" <* S <* PubidLiteral - -/-- https://www.w3.org/TR/xml/#NT-NotationDecl -/ -def NotationDecl : Parser Unit := - skipString " S *> Name *> (ExternalID <|> PublicID) *> optional S *> skipChar '>' - -/-- https://www.w3.org/TR/xml/#NT-markupdecl -/ -def markupDecl : Parser Unit := - elementDecl <|> AttlistDecl <|> EntityDecl <|> NotationDecl <|> PI <|> (Comment *> pure ()) - -/-- https://www.w3.org/TR/xml/#NT-DeclSep -/ -def DeclSep : Parser Unit := - PEReference <|> S *> pure () - -/-- https://www.w3.org/TR/xml/#NT-intSubset -/ -def intSubset : Parser Unit := - many (markupDecl <|> DeclSep) *> pure () - -/-- https://www.w3.org/TR/xml/#NT-doctypedecl -/ -def doctypedecl : Parser Unit := do - skipString " - Name *> - optional (S *> ExternalID) *> pure () - <* optional S - optional (skipChar '[' *> intSubset <* skipChar ']' <* optional S) *> - skipChar '>' - -/-- https://www.w3.org/TR/xml/#NT-prolog -/ -def prolog : Parser Unit := - optional XMLdecl *> - many Misc *> - optional (doctypedecl <* many Misc) *> pure () - -/-- https://www.w3.org/TR/xml/#NT-Attribute -/ -def Attribute : Parser (String × String) := do - let name ← Name - Eq - let value ← AttValue - return (name, value) - -protected def elementPrefix : Parser (Array Content → Element) := do - skipChar '<' - let name ← Name - let attributes ← many (attempt <| S *> Attribute) - optional S *> pure () - return Element.Element name (Std.TreeMap.ofList attributes.toList compare) - -/-- https://www.w3.org/TR/xml/#NT-EmptyElemTag -/ -def EmptyElemTag (elem : Array Content → Element) : Parser Element := do - skipString "/>" *> pure (elem #[]) - -/-- https://www.w3.org/TR/xml/#NT-STag -/ -def STag (elem : Array Content → Element) : Parser (Array Content → Element) := do - skipChar '>' *> pure elem - -/-- https://www.w3.org/TR/xml/#NT-ETag -/ -def ETag : Parser Unit := - skipString " Name *> optional S *> skipChar '>' - -/-- https://www.w3.org/TR/xml/#NT-CDStart -/ -def CDStart : Parser Unit := - skipString "" - -/-- https://www.w3.org/TR/xml/#NT-CData -/ -def CData : Parser String := - manyChars (notFollowedBy (skipString "]]>") *> any) - -/-- https://www.w3.org/TR/xml/#NT-CDSect -/ -def CDSect : Parser String := - CDStart *> CData <* CDEnd - -/-- https://www.w3.org/TR/xml/#NT-CharData -/ -def CharData : Parser String := - notFollowedBy (skipString "]]>") *> manyChars (satisfy λ c => c ≠ '<' ∧ c ≠ '&') - -mutual - /-- https://www.w3.org/TR/xml/#NT-content -/ - partial def content : Parser (Array Content) := do - let x ← optional (Content.Character <$> CharData) - let xs ← many do - let y ← - attempt (some <$> Content.Element <$> element) - <|> (do let c ← Reference; pure <| c.map (Content.Character ∘ Char.toString)) - <|> some <$> Content.Character <$> CDSect - <|> PI *> pure none - <|> some <$> Content.Comment <$> Comment - - let z ← optional (Content.Character <$> CharData) - pure #[y, z] - let xs := #[x] ++ xs.flatMap id |>.filterMap id - let mut res := #[] - for x in xs do - if res.size > 0 then - match res.back!, x with - | Content.Character x, Content.Character y => res := res.set! (res.size - 1) (Content.Character $ x ++ y) - | _, x => res := res.push x - else res := res.push x - return res - - /-- https://www.w3.org/TR/xml/#NT-element -/ - partial def element : Parser Element := do - let elem ← Parser.elementPrefix - EmptyElemTag elem <|> STag elem <*> content <* ETag - -end - -/-- https://www.w3.org/TR/xml/#NT-document -/ -def document : Parser Element := prolog *> element <* many Misc <* eof - -end Parser - -def parse (s : String) : Except String Element := - Parser.run Xml.Parser.document s - -end Xml diff --git a/tests/lean/run/PPTopDownAnalyze.lean b/tests/lean/run/PPTopDownAnalyze.lean index c78d571a3f..ac4dfbae87 100644 --- a/tests/lean/run/PPTopDownAnalyze.lean +++ b/tests/lean/run/PPTopDownAnalyze.lean @@ -343,7 +343,6 @@ set_option pp.analyze.trustSubtypeMk true in #testDelabN StateT.modifyGet #testDelabN Nat.gcd_one_left #testDelabN List.decidableLT -#testDelabN Lean.Xml.parse #testDelabN Add.noConfusionType #testDelabN List.filterMapM.loop #testDelabN instMonadReaderOfOfMonadLift diff --git a/tests/lean/xmlParsing.lean b/tests/lean/xmlParsing.lean deleted file mode 100644 index f0947cdaf5..0000000000 --- a/tests/lean/xmlParsing.lean +++ /dev/null @@ -1,10 +0,0 @@ -import Lean.Data.Xml -open Lean.Xml - -/-! Test XML parsing. -/ - -/-! Test whether trailing whitespace in opening tags is handled correctly. -/ -#eval parse "" - -/-! Test whether comments are parsed correctly. -/ -#eval parse "" diff --git a/tests/lean/xmlParsing.lean.expected.out b/tests/lean/xmlParsing.lean.expected.out deleted file mode 100644 index bede94e914..0000000000 --- a/tests/lean/xmlParsing.lean.expected.out +++ /dev/null @@ -1,2 +0,0 @@ -ok: -ok: