70 lines
2.5 KiB
Text
70 lines
2.5 KiB
Text
/-
|
|
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
|
|
Released under Apache 2.0 license as described in the file LICENSE.
|
|
Authors: Leonardo de Moura
|
|
-/
|
|
prelude
|
|
import init.data.char.basic init.lean.parser.parsec
|
|
|
|
namespace lean
|
|
|
|
def is_greek (c : char) : bool :=
|
|
0x391 ≤ c.val && c.val ≤ 0x3dd
|
|
|
|
def is_letter_like (c : char) : bool :=
|
|
(0x3b1 ≤ c.val && c.val ≤ 0x3c9 && c.val ≠ 0x3bb) || -- Lower greek, but lambda
|
|
(0x391 ≤ c.val && c.val ≤ 0x3A9 && c.val ≠ 0x3A0 && c.val ≠ 0x3A3) || -- Upper greek, but Pi and Sigma
|
|
(0x3ca ≤ c.val && c.val ≤ 0x3fb) || -- Coptic letters
|
|
(0x1f00 ≤ c.val && c.val ≤ 0x1ffe) || -- Polytonic Greek Extended Character Set
|
|
(0x2100 ≤ c.val && c.val ≤ 0x214f) || -- Letter like block
|
|
(0x1d49c ≤ c.val && c.val ≤ 0x1d59f) -- Latin letters, Script, Double-struck, Fractur
|
|
|
|
def is_sub_script_alnum (c : char) : bool :=
|
|
(0x207f ≤ c.val && c.val ≤ 0x2089) || -- n superscript and numberic subscripts
|
|
(0x2090 ≤ c.val && c.val ≤ 0x209c) ||
|
|
(0x1d62 ≤ c.val && c.val ≤ 0x1d6a)
|
|
|
|
def is_id_first (c : char) : bool :=
|
|
c.is_alpha || c = '_' || is_letter_like c
|
|
|
|
def is_id_rest (c : char) : bool :=
|
|
c.is_alphanum || c = '_' || c = '\'' || is_letter_like c || is_sub_script_alnum c
|
|
|
|
def id_begin_escape := '«'
|
|
def id_end_escape := '»'
|
|
def is_id_begin_escape (c : char) : bool :=
|
|
c = id_begin_escape
|
|
def is_id_end_escape (c : char) : bool :=
|
|
c = id_end_escape
|
|
|
|
namespace parser
|
|
variables {m : Type → Type} [monad m] [monad_parsec m] [alternative m]
|
|
open monad_parsec
|
|
|
|
def id_part_default : m string :=
|
|
do c ← satisfy is_id_first,
|
|
take_while_cont is_id_rest (to_string c)
|
|
|
|
def id_part_escaped : m string :=
|
|
ch id_begin_escape >> take_until1 is_id_end_escape <* ch id_end_escape
|
|
|
|
def id_part : m string :=
|
|
cond is_id_begin_escape
|
|
id_part_escaped
|
|
id_part_default
|
|
|
|
def identifier : m name :=
|
|
(try $ do s ← id_part,
|
|
foldl name.mk_string (mk_simple_name s) (ch '.' >> id_part)) <?> "identifier"
|
|
|
|
def c_identifier : m string :=
|
|
(try $ do c ← satisfy (λ c, c.is_alpha || c = '_'),
|
|
take_while_cont (λ c, c.is_alphanum || c = '_') (to_string c)) <?> "C identifier"
|
|
|
|
def cpp_identifier : m string :=
|
|
(try $ do n ← c_identifier,
|
|
ns ← many ((++) <$> str "::" <*> c_identifier),
|
|
return $ string.join (n::ns)) <?> "C++ identifier"
|
|
|
|
end parser
|
|
end lean
|