feat(frontends/lean): escape identifiers when pretty-printing
This commit is contained in:
parent
16fe494736
commit
f53fa97c4a
11 changed files with 168 additions and 94 deletions
|
|
@ -3853,7 +3853,7 @@ static expr resolve_local_name(environment const & env, local_context const & lc
|
|||
} catch (exception &) {}
|
||||
}
|
||||
if (!r)
|
||||
throw elaborator_exception(src, format("unknown identifier '") + format(id) + format("'"));
|
||||
throw elaborator_exception(src, format("unknown identifier '") + format(id.escape()) + format("'"));
|
||||
return *r;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1988,7 +1988,7 @@ expr parser::id_to_expr(name const & id, pos_info const & p, bool resolve_only,
|
|||
} catch (exception &) {}
|
||||
}
|
||||
if (!r) {
|
||||
return parser_error_or_expr({sstream() << "unknown identifier '" << id << "'", p});
|
||||
return parser_error_or_expr({sstream() << "unknown identifier '" << id.escape() << "'", p});
|
||||
}
|
||||
return *r;
|
||||
}
|
||||
|
|
@ -2043,7 +2043,7 @@ list<name> parser::to_constants(name const & id, char const * msg, pos_info cons
|
|||
}
|
||||
|
||||
if (rs.empty()) {
|
||||
throw parser_error(sstream() << "unknown identifier '" << id << "'", p);
|
||||
throw parser_error(sstream() << "unknown identifier '" << id.escape() << "'", p);
|
||||
}
|
||||
|
||||
return to_list(rs);
|
||||
|
|
|
|||
|
|
@ -669,6 +669,10 @@ optional<name> pretty_fn::is_aliased(name const & n) const {
|
|||
}
|
||||
}
|
||||
|
||||
static format escape(name const & n) {
|
||||
return format(n.escape());
|
||||
}
|
||||
|
||||
auto pretty_fn::pp_const(expr const & e, optional<unsigned> const & num_ref_univ_params) -> result {
|
||||
if (is_neutral_expr(e) && m_unicode)
|
||||
return format("◾");
|
||||
|
|
@ -720,11 +724,11 @@ auto pretty_fn::pp_const(expr const & e, optional<unsigned> const & num_ref_univ
|
|||
to_buffer(const_levels(e), ls);
|
||||
if (num_ref_univ_params) {
|
||||
if (ls.size() <= *num_ref_univ_params)
|
||||
return result(format(n));
|
||||
return result(escape(n));
|
||||
else
|
||||
first_idx = *num_ref_univ_params;
|
||||
}
|
||||
format r = compose(format(n), format(".{"));
|
||||
format r = compose(escape(n), format(".{"));
|
||||
bool first = true;
|
||||
for (unsigned i = first_idx; i < ls.size(); i++) {
|
||||
level const & l = ls[i];
|
||||
|
|
@ -740,7 +744,7 @@ auto pretty_fn::pp_const(expr const & e, optional<unsigned> const & num_ref_univ
|
|||
r += format("}");
|
||||
return result(group(r));
|
||||
} else {
|
||||
return result(format(n));
|
||||
return result(escape(n));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -765,7 +769,7 @@ auto pretty_fn::pp_local(expr const & e) -> result {
|
|||
if (m_locals_full_names)
|
||||
return result(format("<") + format(n + mlocal_name(e)) + format(">"));
|
||||
else
|
||||
return format(n);
|
||||
return format(escape(n));
|
||||
}
|
||||
|
||||
bool pretty_fn::has_implicit_args(expr const & f) {
|
||||
|
|
@ -893,7 +897,7 @@ format pretty_fn::pp_binder(expr const & local) {
|
|||
auto bi = local_info(local);
|
||||
if (bi != binder_info())
|
||||
r += format(open_binder_string(bi, m_unicode));
|
||||
r += format(local_pp_name(local));
|
||||
r += escape(local_pp_name(local));
|
||||
if (m_binder_types) {
|
||||
r += space();
|
||||
r += compose(colon(), nest(m_indent, compose(line(), pp_child(mlocal_type(local), 0).fmt())));
|
||||
|
|
@ -908,7 +912,7 @@ format pretty_fn::pp_binder_block(buffer<name> const & names, expr const & type,
|
|||
if (m_binder_types || bi != binder_info())
|
||||
r += format(open_binder_string(bi, m_unicode));
|
||||
for (name const & n : names) {
|
||||
r += format(n);
|
||||
r += escape(n);
|
||||
}
|
||||
if (m_binder_types) {
|
||||
r += space();
|
||||
|
|
@ -1009,7 +1013,7 @@ auto pretty_fn::pp_have(expr const & e) -> result {
|
|||
format proof_fmt = pp_child(proof, 0).fmt();
|
||||
format body_fmt = pp_child(body, 0).fmt();
|
||||
format head_fmt = *g_have_fmt;
|
||||
format r = head_fmt + space() + format(n) + space();
|
||||
format r = head_fmt + space() + escape(n) + space();
|
||||
r += colon() + nest(m_indent, line() + type_fmt + comma() + space() + *g_from_fmt);
|
||||
r = group(r);
|
||||
r += nest(m_indent, line() + proof_fmt + comma());
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ Author: Leonardo de Moura
|
|||
#include <cctype>
|
||||
#include <string>
|
||||
#include "util/exception.h"
|
||||
#include "util/name.h"
|
||||
#include "util/utf8.h"
|
||||
#include "library/type_context.h"
|
||||
#include "library/message_builder.h"
|
||||
|
|
@ -25,50 +26,6 @@ unsigned scanner::get_utf8_size(uchar c) {
|
|||
return r;
|
||||
}
|
||||
|
||||
static unsigned utf8_to_unicode(uchar const * begin, uchar const * end) {
|
||||
unsigned result = 0;
|
||||
if (begin == end)
|
||||
return result;
|
||||
auto it = begin;
|
||||
unsigned c = *it;
|
||||
++it;
|
||||
if (c < 128)
|
||||
return c;
|
||||
unsigned mask = (1u << 6) -1;
|
||||
unsigned hmask = mask;
|
||||
unsigned shift = 0;
|
||||
unsigned num_bits = 0;
|
||||
while ((c & 0xC0) == 0xC0) {
|
||||
c <<= 1;
|
||||
c &= 0xff;
|
||||
num_bits += 6;
|
||||
hmask >>= 1;
|
||||
shift++;
|
||||
result <<= 6;
|
||||
if (it == end)
|
||||
return 0;
|
||||
result |= *it & mask;
|
||||
++it;
|
||||
}
|
||||
result |= ((c >> shift) & hmask) << num_bits;
|
||||
return result;
|
||||
}
|
||||
|
||||
bool is_greek_unicode(unsigned u) { return 0x391 <= u && u <= 0x3DD; }
|
||||
bool is_letter_like_unicode(unsigned u) {
|
||||
return
|
||||
(0x3b1 <= u && u <= 0x3c9 && u != 0x3bb) || // Lower greek, but lambda
|
||||
(0x391 <= u && u <= 0x3A9 && u != 0x3A0 && u != 0x3A3) || // Upper greek, but Pi and Sigma
|
||||
(0x3ca <= u && u <= 0x3fb) || // Coptic letters
|
||||
(0x1f00 <= u && u <= 0x1ffe) || // Polytonic Greek Extended Character Set
|
||||
(0x2100 <= u && u <= 0x214f); // Letter like block
|
||||
}
|
||||
bool is_sub_script_alnum_unicode(unsigned u) {
|
||||
return
|
||||
(0x207f <= u && u <= 0x2089) || // n superscript and numberic subscripts
|
||||
(0x2090 <= u && u <= 0x209c); // letter-like subscripts
|
||||
}
|
||||
|
||||
void scanner::fetch_line() {
|
||||
m_curr_line.clear();
|
||||
if (std::getline(*m_stream, m_curr_line)) {
|
||||
|
|
@ -465,23 +422,6 @@ void scanner::next_utf(buffer<uchar> & cs) {
|
|||
next_utf_core(curr(), cs);
|
||||
}
|
||||
|
||||
constexpr char16_t id_begin_escape = u'«';
|
||||
constexpr char16_t id_end_escape = u'»';
|
||||
|
||||
static bool is_id_first(uchar const * begin, uchar const * end) {
|
||||
if (std::isalpha(*begin) || *begin == '_')
|
||||
return true;
|
||||
unsigned u = utf8_to_unicode(begin, end);
|
||||
return u == id_begin_escape || is_letter_like_unicode(u);
|
||||
}
|
||||
|
||||
bool is_id_rest(uchar const * begin, uchar const * end) {
|
||||
if (std::isalnum(*begin) || *begin == '_' || *begin == '\'')
|
||||
return true;
|
||||
unsigned u = utf8_to_unicode(begin, end);
|
||||
return is_letter_like_unicode(u) || is_sub_script_alnum_unicode(u);
|
||||
}
|
||||
|
||||
static char const * g_error_key_msg = "unexpected token";
|
||||
|
||||
void scanner::read_field_idx() {
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ Author: Leonardo de Moura
|
|||
#include "kernel/pos_info_provider.h"
|
||||
#include "util/name.h"
|
||||
#include "util/flet.h"
|
||||
#include "util/utf8.h"
|
||||
#include "util/numerics/mpq.h"
|
||||
#include "kernel/environment.h"
|
||||
#include "library/io_state.h"
|
||||
|
|
@ -20,8 +21,6 @@ enum class token_kind {Keyword, CommandKeyword, Identifier, Numeral, Decimal,
|
|||
String, Char, QuotedSymbol,
|
||||
DocBlock, ModDocBlock, FieldNum, FieldName, Eof};
|
||||
|
||||
using uchar = unsigned char;
|
||||
|
||||
/**
|
||||
\brief Scanner. The behavior of the scanner is controlled using a token set.
|
||||
|
||||
|
|
@ -119,11 +118,6 @@ public:
|
|||
};
|
||||
};
|
||||
std::ostream & operator<<(std::ostream & out, token_kind k);
|
||||
bool is_id_rest(uchar const * begin, uchar const * end);
|
||||
inline bool is_id_rest(char const * begin, char const * end) {
|
||||
return is_id_rest(reinterpret_cast<uchar const *>(begin),
|
||||
reinterpret_cast<uchar const *>(end));
|
||||
}
|
||||
|
||||
class token {
|
||||
token_kind m_kind;
|
||||
|
|
|
|||
|
|
@ -23,23 +23,71 @@ Author: Leonardo de Moura
|
|||
namespace lean {
|
||||
constexpr char const * anonymous_str = "[anonymous]";
|
||||
|
||||
void name::imp::display_core(std::ostream & out, imp * p, char const * sep) {
|
||||
lean_assert(p != nullptr);
|
||||
if (p->m_prefix) {
|
||||
display_core(out, p->m_prefix, sep);
|
||||
out << sep;
|
||||
}
|
||||
if (p->m_is_string)
|
||||
out << p->m_str;
|
||||
else
|
||||
out << p->m_k;
|
||||
bool is_greek_unicode(unsigned u) { return 0x391 <= u && u <= 0x3DD; }
|
||||
bool is_letter_like_unicode(unsigned u) {
|
||||
return
|
||||
(0x3b1 <= u && u <= 0x3c9 && u != 0x3bb) || // Lower greek, but lambda
|
||||
(0x391 <= u && u <= 0x3A9 && u != 0x3A0 && u != 0x3A3) || // Upper greek, but Pi and Sigma
|
||||
(0x3ca <= u && u <= 0x3fb) || // Coptic letters
|
||||
(0x1f00 <= u && u <= 0x1ffe) || // Polytonic Greek Extended Character Set
|
||||
(0x2100 <= u && u <= 0x214f); // Letter like block
|
||||
}
|
||||
bool is_sub_script_alnum_unicode(unsigned u) {
|
||||
return
|
||||
(0x207f <= u && u <= 0x2089) || // n superscript and numberic subscripts
|
||||
(0x2090 <= u && u <= 0x209c); // letter-like subscripts
|
||||
}
|
||||
|
||||
void name::imp::display(std::ostream & out, imp * p, char const * sep) {
|
||||
bool is_id_first(char const * begin, char const * end) {
|
||||
if (std::isalpha(*begin) || *begin == '_')
|
||||
return true;
|
||||
unsigned u = utf8_to_unicode(begin, end);
|
||||
return u == id_begin_escape || is_letter_like_unicode(u);
|
||||
}
|
||||
|
||||
bool is_id_rest(char const * begin, char const * end) {
|
||||
if (std::isalnum(*begin) || *begin == '_' || *begin == '\'')
|
||||
return true;
|
||||
unsigned u = utf8_to_unicode(begin, end);
|
||||
return is_letter_like_unicode(u) || is_sub_script_alnum_unicode(u);
|
||||
}
|
||||
|
||||
void name::imp::display_core(std::ostream & out, imp * p, bool escape, char const * sep) {
|
||||
lean_assert(p != nullptr);
|
||||
if (p->m_prefix) {
|
||||
display_core(out, p->m_prefix, escape, sep);
|
||||
out << sep;
|
||||
}
|
||||
if (p->m_is_string) {
|
||||
size_t sz = strlen(p->m_str);
|
||||
bool must_escape = false;
|
||||
if (escape && *p->m_str) {
|
||||
if (!is_id_first(p->m_str, p->m_str + sz))
|
||||
must_escape = true;
|
||||
// don't escape names produced by server::display_decl
|
||||
if (must_escape && p->m_str[0] == '?')
|
||||
must_escape = false;
|
||||
if (escape) {
|
||||
for (char * s = p->m_str + get_utf8_size(p->m_str[0]); !must_escape && *s; s += get_utf8_size(*s)) {
|
||||
if (!is_id_rest(s, p->m_str + sz))
|
||||
must_escape = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (must_escape)
|
||||
out << "«" << p->m_str << "»";
|
||||
else
|
||||
out << p->m_str;
|
||||
} else {
|
||||
out << p->m_k;
|
||||
}
|
||||
}
|
||||
|
||||
void name::imp::display(std::ostream & out, imp * p, bool escape, char const * sep) {
|
||||
if (p == nullptr)
|
||||
out << anonymous_str;
|
||||
else
|
||||
display_core(out, p, sep);
|
||||
display_core(out, p, escape, sep);
|
||||
}
|
||||
|
||||
void copy_limbs(name::imp * p, buffer<name::imp *> & limbs) {
|
||||
|
|
@ -297,12 +345,18 @@ name name::get_root() const {
|
|||
|
||||
std::string name::to_string(char const * sep) const {
|
||||
std::ostringstream s;
|
||||
imp::display(s, m_ptr, sep);
|
||||
imp::display(s, m_ptr, false, sep);
|
||||
return s.str();
|
||||
}
|
||||
|
||||
std::string name::escape(char const * sep) const {
|
||||
std::ostringstream s;
|
||||
imp::display(s, m_ptr, true, sep);
|
||||
return s.str();
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & out, name const & n) {
|
||||
name::imp::display(out, n.m_ptr);
|
||||
name::imp::display(out, n.m_ptr, false);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,21 @@ Author: Leonardo de Moura
|
|||
|
||||
namespace lean {
|
||||
constexpr char const * lean_name_separator = ".";
|
||||
constexpr char16_t id_begin_escape = u'«';
|
||||
constexpr char16_t id_end_escape = u'»';
|
||||
|
||||
bool is_id_first(char const * begin, char const * end);
|
||||
inline bool is_id_first(unsigned char const * begin, unsigned char const * end) {
|
||||
return is_id_first(reinterpret_cast<char const *>(begin),
|
||||
reinterpret_cast<char const *>(end));
|
||||
}
|
||||
|
||||
bool is_id_rest(char const * begin, char const * end);
|
||||
inline bool is_id_rest(unsigned char const * begin, unsigned char const * end) {
|
||||
return is_id_rest(reinterpret_cast<char const *>(begin),
|
||||
reinterpret_cast<char const *>(end));
|
||||
}
|
||||
|
||||
enum class name_kind { ANONYMOUS, STRING, NUMERAL };
|
||||
/**
|
||||
\brief Hierarchical names.
|
||||
|
|
@ -36,8 +51,8 @@ public:
|
|||
};
|
||||
void dealloc();
|
||||
imp(bool s, imp * p):m_rc(1), m_is_string(s), m_hash(0), m_prefix(p) { if (p) p->inc_ref(); }
|
||||
static void display_core(std::ostream & out, imp * p, char const * sep);
|
||||
static void display(std::ostream & out, imp * p, char const * sep = lean_name_separator);
|
||||
static void display_core(std::ostream & out, imp * p, bool escape, char const * sep);
|
||||
static void display(std::ostream & out, imp * p, bool escape, char const * sep = lean_name_separator);
|
||||
friend void copy_limbs(imp * p, buffer<name::imp *> & limbs);
|
||||
};
|
||||
private:
|
||||
|
|
@ -125,6 +140,7 @@ public:
|
|||
name get_root() const;
|
||||
/** \brief Convert this hierarchical name into a string. */
|
||||
std::string to_string(char const * sep = lean_name_separator) const;
|
||||
std::string escape(char const * sep = lean_name_separator) const;
|
||||
/** \brief Size of the this name (in characters). */
|
||||
size_t size() const;
|
||||
/** \brief Size of the this name in unicode. */
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ Author: Leonardo de Moura
|
|||
#include <string>
|
||||
#include "util/debug.h"
|
||||
#include "util/optional.h"
|
||||
#include "util/utf8.h"
|
||||
|
||||
namespace lean {
|
||||
bool is_utf8_next(unsigned char c) { return (c & 0xC0) == 0x80; }
|
||||
|
|
@ -81,4 +82,33 @@ std::string utf8_trim(std::string const & s) {
|
|||
stop = s.size();
|
||||
return s.substr(start, stop - start);
|
||||
}
|
||||
|
||||
unsigned utf8_to_unicode(uchar const * begin, uchar const * end) {
|
||||
unsigned result = 0;
|
||||
if (begin == end)
|
||||
return result;
|
||||
auto it = begin;
|
||||
unsigned c = *it;
|
||||
++it;
|
||||
if (c < 128)
|
||||
return c;
|
||||
unsigned mask = (1u << 6) -1;
|
||||
unsigned hmask = mask;
|
||||
unsigned shift = 0;
|
||||
unsigned num_bits = 0;
|
||||
while ((c & 0xC0) == 0xC0) {
|
||||
c <<= 1;
|
||||
c &= 0xff;
|
||||
num_bits += 6;
|
||||
hmask >>= 1;
|
||||
shift++;
|
||||
result <<= 6;
|
||||
if (it == end)
|
||||
return 0;
|
||||
result |= *it & mask;
|
||||
++it;
|
||||
}
|
||||
result |= ((c >> shift) & hmask) << num_bits;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,10 +9,17 @@ Author: Leonardo de Moura
|
|||
#include "util/optional.h"
|
||||
|
||||
namespace lean {
|
||||
using uchar = unsigned char;
|
||||
|
||||
bool is_utf8_next(unsigned char c);
|
||||
unsigned get_utf8_size(unsigned char c);
|
||||
size_t utf8_strlen(char const * str);
|
||||
optional<size_t> utf8_char_pos(char const * str, size_t char_idx);
|
||||
char const * get_utf8_last_char(char const * str);
|
||||
std::string utf8_trim(std::string const & s);
|
||||
unsigned utf8_to_unicode(uchar const * begin, uchar const * end);
|
||||
inline unsigned utf8_to_unicode(char const * begin, char const * end) {
|
||||
return utf8_to_unicode(reinterpret_cast<uchar const *>(begin),
|
||||
reinterpret_cast<uchar const *>(end));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
17
tests/lean/escape_id.lean
Normal file
17
tests/lean/escape_id.lean
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
def «def» := 1
|
||||
#check «def»
|
||||
|
||||
def «[ ]» := 1
|
||||
#check «[ ]»
|
||||
#check «[»
|
||||
|
||||
def a.«b.c» := 1
|
||||
#check a.«b.c»
|
||||
#check a.b.c
|
||||
#check «a.b.c»
|
||||
|
||||
#check [1].«length»
|
||||
|
||||
#check ««»
|
||||
#check «
|
||||
»
|
||||
12
tests/lean/escape_id.lean.expected.out
Normal file
12
tests/lean/escape_id.lean.expected.out
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
def : ℕ
|
||||
«[ ]» : ℕ
|
||||
escape_id.lean:6:7: error: unknown identifier '«[»'
|
||||
a.«b.c» : ℕ
|
||||
escape_id.lean:10:7: error: unknown identifier 'a.b.c'
|
||||
escape_id.lean:11:7: error: unknown identifier '«a.b.c»'
|
||||
list.length [1] : ℕ
|
||||
escape_id.lean:15:8: error: illegal character in escaped identifier
|
||||
escape_id.lean:16:0: error: invalid expression, unexpected token
|
||||
escape_id.lean:16:8: error: illegal character in escaped identifier
|
||||
escape_id.lean:17:0: error: unexpected token
|
||||
escape_id.lean:17:1: error: invalid expression, unexpected token
|
||||
Loading…
Add table
Reference in a new issue