feat(frontends/lean): escape identifiers when pretty-printing

This commit is contained in:
Sebastian Ullrich 2016-08-26 17:26:11 -04:00 committed by Leonardo de Moura
parent 16fe494736
commit f53fa97c4a
11 changed files with 168 additions and 94 deletions

View file

@ -3853,7 +3853,7 @@ static expr resolve_local_name(environment const & env, local_context const & lc
} catch (exception &) {}
}
if (!r)
throw elaborator_exception(src, format("unknown identifier '") + format(id) + format("'"));
throw elaborator_exception(src, format("unknown identifier '") + format(id.escape()) + format("'"));
return *r;
}

View file

@ -1988,7 +1988,7 @@ expr parser::id_to_expr(name const & id, pos_info const & p, bool resolve_only,
} catch (exception &) {}
}
if (!r) {
return parser_error_or_expr({sstream() << "unknown identifier '" << id << "'", p});
return parser_error_or_expr({sstream() << "unknown identifier '" << id.escape() << "'", p});
}
return *r;
}
@ -2043,7 +2043,7 @@ list<name> parser::to_constants(name const & id, char const * msg, pos_info cons
}
if (rs.empty()) {
throw parser_error(sstream() << "unknown identifier '" << id << "'", p);
throw parser_error(sstream() << "unknown identifier '" << id.escape() << "'", p);
}
return to_list(rs);

View file

@ -669,6 +669,10 @@ optional<name> pretty_fn::is_aliased(name const & n) const {
}
}
static format escape(name const & n) {
return format(n.escape());
}
auto pretty_fn::pp_const(expr const & e, optional<unsigned> const & num_ref_univ_params) -> result {
if (is_neutral_expr(e) && m_unicode)
return format("");
@ -720,11 +724,11 @@ auto pretty_fn::pp_const(expr const & e, optional<unsigned> const & num_ref_univ
to_buffer(const_levels(e), ls);
if (num_ref_univ_params) {
if (ls.size() <= *num_ref_univ_params)
return result(format(n));
return result(escape(n));
else
first_idx = *num_ref_univ_params;
}
format r = compose(format(n), format(".{"));
format r = compose(escape(n), format(".{"));
bool first = true;
for (unsigned i = first_idx; i < ls.size(); i++) {
level const & l = ls[i];
@ -740,7 +744,7 @@ auto pretty_fn::pp_const(expr const & e, optional<unsigned> const & num_ref_univ
r += format("}");
return result(group(r));
} else {
return result(format(n));
return result(escape(n));
}
}
@ -765,7 +769,7 @@ auto pretty_fn::pp_local(expr const & e) -> result {
if (m_locals_full_names)
return result(format("<") + format(n + mlocal_name(e)) + format(">"));
else
return format(n);
return format(escape(n));
}
bool pretty_fn::has_implicit_args(expr const & f) {
@ -893,7 +897,7 @@ format pretty_fn::pp_binder(expr const & local) {
auto bi = local_info(local);
if (bi != binder_info())
r += format(open_binder_string(bi, m_unicode));
r += format(local_pp_name(local));
r += escape(local_pp_name(local));
if (m_binder_types) {
r += space();
r += compose(colon(), nest(m_indent, compose(line(), pp_child(mlocal_type(local), 0).fmt())));
@ -908,7 +912,7 @@ format pretty_fn::pp_binder_block(buffer<name> const & names, expr const & type,
if (m_binder_types || bi != binder_info())
r += format(open_binder_string(bi, m_unicode));
for (name const & n : names) {
r += format(n);
r += escape(n);
}
if (m_binder_types) {
r += space();
@ -1009,7 +1013,7 @@ auto pretty_fn::pp_have(expr const & e) -> result {
format proof_fmt = pp_child(proof, 0).fmt();
format body_fmt = pp_child(body, 0).fmt();
format head_fmt = *g_have_fmt;
format r = head_fmt + space() + format(n) + space();
format r = head_fmt + space() + escape(n) + space();
r += colon() + nest(m_indent, line() + type_fmt + comma() + space() + *g_from_fmt);
r = group(r);
r += nest(m_indent, line() + proof_fmt + comma());

View file

@ -7,6 +7,7 @@ Author: Leonardo de Moura
#include <cctype>
#include <string>
#include "util/exception.h"
#include "util/name.h"
#include "util/utf8.h"
#include "library/type_context.h"
#include "library/message_builder.h"
@ -25,50 +26,6 @@ unsigned scanner::get_utf8_size(uchar c) {
return r;
}
static unsigned utf8_to_unicode(uchar const * begin, uchar const * end) {
unsigned result = 0;
if (begin == end)
return result;
auto it = begin;
unsigned c = *it;
++it;
if (c < 128)
return c;
unsigned mask = (1u << 6) -1;
unsigned hmask = mask;
unsigned shift = 0;
unsigned num_bits = 0;
while ((c & 0xC0) == 0xC0) {
c <<= 1;
c &= 0xff;
num_bits += 6;
hmask >>= 1;
shift++;
result <<= 6;
if (it == end)
return 0;
result |= *it & mask;
++it;
}
result |= ((c >> shift) & hmask) << num_bits;
return result;
}
bool is_greek_unicode(unsigned u) { return 0x391 <= u && u <= 0x3DD; }
bool is_letter_like_unicode(unsigned u) {
return
(0x3b1 <= u && u <= 0x3c9 && u != 0x3bb) || // Lower greek, but lambda
(0x391 <= u && u <= 0x3A9 && u != 0x3A0 && u != 0x3A3) || // Upper greek, but Pi and Sigma
(0x3ca <= u && u <= 0x3fb) || // Coptic letters
(0x1f00 <= u && u <= 0x1ffe) || // Polytonic Greek Extended Character Set
(0x2100 <= u && u <= 0x214f); // Letter like block
}
bool is_sub_script_alnum_unicode(unsigned u) {
return
(0x207f <= u && u <= 0x2089) || // n superscript and numberic subscripts
(0x2090 <= u && u <= 0x209c); // letter-like subscripts
}
void scanner::fetch_line() {
m_curr_line.clear();
if (std::getline(*m_stream, m_curr_line)) {
@ -465,23 +422,6 @@ void scanner::next_utf(buffer<uchar> & cs) {
next_utf_core(curr(), cs);
}
constexpr char16_t id_begin_escape = u'«';
constexpr char16_t id_end_escape = u'»';
static bool is_id_first(uchar const * begin, uchar const * end) {
if (std::isalpha(*begin) || *begin == '_')
return true;
unsigned u = utf8_to_unicode(begin, end);
return u == id_begin_escape || is_letter_like_unicode(u);
}
bool is_id_rest(uchar const * begin, uchar const * end) {
if (std::isalnum(*begin) || *begin == '_' || *begin == '\'')
return true;
unsigned u = utf8_to_unicode(begin, end);
return is_letter_like_unicode(u) || is_sub_script_alnum_unicode(u);
}
static char const * g_error_key_msg = "unexpected token";
void scanner::read_field_idx() {

View file

@ -10,6 +10,7 @@ Author: Leonardo de Moura
#include "kernel/pos_info_provider.h"
#include "util/name.h"
#include "util/flet.h"
#include "util/utf8.h"
#include "util/numerics/mpq.h"
#include "kernel/environment.h"
#include "library/io_state.h"
@ -20,8 +21,6 @@ enum class token_kind {Keyword, CommandKeyword, Identifier, Numeral, Decimal,
String, Char, QuotedSymbol,
DocBlock, ModDocBlock, FieldNum, FieldName, Eof};
using uchar = unsigned char;
/**
\brief Scanner. The behavior of the scanner is controlled using a token set.
@ -119,11 +118,6 @@ public:
};
};
std::ostream & operator<<(std::ostream & out, token_kind k);
bool is_id_rest(uchar const * begin, uchar const * end);
inline bool is_id_rest(char const * begin, char const * end) {
return is_id_rest(reinterpret_cast<uchar const *>(begin),
reinterpret_cast<uchar const *>(end));
}
class token {
token_kind m_kind;

View file

@ -23,23 +23,71 @@ Author: Leonardo de Moura
namespace lean {
constexpr char const * anonymous_str = "[anonymous]";
void name::imp::display_core(std::ostream & out, imp * p, char const * sep) {
lean_assert(p != nullptr);
if (p->m_prefix) {
display_core(out, p->m_prefix, sep);
out << sep;
}
if (p->m_is_string)
out << p->m_str;
else
out << p->m_k;
bool is_greek_unicode(unsigned u) { return 0x391 <= u && u <= 0x3DD; }
bool is_letter_like_unicode(unsigned u) {
return
(0x3b1 <= u && u <= 0x3c9 && u != 0x3bb) || // Lower greek, but lambda
(0x391 <= u && u <= 0x3A9 && u != 0x3A0 && u != 0x3A3) || // Upper greek, but Pi and Sigma
(0x3ca <= u && u <= 0x3fb) || // Coptic letters
(0x1f00 <= u && u <= 0x1ffe) || // Polytonic Greek Extended Character Set
(0x2100 <= u && u <= 0x214f); // Letter like block
}
bool is_sub_script_alnum_unicode(unsigned u) {
return
(0x207f <= u && u <= 0x2089) || // n superscript and numberic subscripts
(0x2090 <= u && u <= 0x209c); // letter-like subscripts
}
void name::imp::display(std::ostream & out, imp * p, char const * sep) {
bool is_id_first(char const * begin, char const * end) {
if (std::isalpha(*begin) || *begin == '_')
return true;
unsigned u = utf8_to_unicode(begin, end);
return u == id_begin_escape || is_letter_like_unicode(u);
}
bool is_id_rest(char const * begin, char const * end) {
if (std::isalnum(*begin) || *begin == '_' || *begin == '\'')
return true;
unsigned u = utf8_to_unicode(begin, end);
return is_letter_like_unicode(u) || is_sub_script_alnum_unicode(u);
}
void name::imp::display_core(std::ostream & out, imp * p, bool escape, char const * sep) {
lean_assert(p != nullptr);
if (p->m_prefix) {
display_core(out, p->m_prefix, escape, sep);
out << sep;
}
if (p->m_is_string) {
size_t sz = strlen(p->m_str);
bool must_escape = false;
if (escape && *p->m_str) {
if (!is_id_first(p->m_str, p->m_str + sz))
must_escape = true;
// don't escape names produced by server::display_decl
if (must_escape && p->m_str[0] == '?')
must_escape = false;
if (escape) {
for (char * s = p->m_str + get_utf8_size(p->m_str[0]); !must_escape && *s; s += get_utf8_size(*s)) {
if (!is_id_rest(s, p->m_str + sz))
must_escape = true;
}
}
}
if (must_escape)
out << "«" << p->m_str << "»";
else
out << p->m_str;
} else {
out << p->m_k;
}
}
void name::imp::display(std::ostream & out, imp * p, bool escape, char const * sep) {
if (p == nullptr)
out << anonymous_str;
else
display_core(out, p, sep);
display_core(out, p, escape, sep);
}
void copy_limbs(name::imp * p, buffer<name::imp *> & limbs) {
@ -297,12 +345,18 @@ name name::get_root() const {
std::string name::to_string(char const * sep) const {
std::ostringstream s;
imp::display(s, m_ptr, sep);
imp::display(s, m_ptr, false, sep);
return s.str();
}
std::string name::escape(char const * sep) const {
std::ostringstream s;
imp::display(s, m_ptr, true, sep);
return s.str();
}
std::ostream & operator<<(std::ostream & out, name const & n) {
name::imp::display(out, n.m_ptr);
name::imp::display(out, n.m_ptr, false);
return out;
}

View file

@ -18,6 +18,21 @@ Author: Leonardo de Moura
namespace lean {
constexpr char const * lean_name_separator = ".";
constexpr char16_t id_begin_escape = u'«';
constexpr char16_t id_end_escape = u'»';
bool is_id_first(char const * begin, char const * end);
inline bool is_id_first(unsigned char const * begin, unsigned char const * end) {
return is_id_first(reinterpret_cast<char const *>(begin),
reinterpret_cast<char const *>(end));
}
bool is_id_rest(char const * begin, char const * end);
inline bool is_id_rest(unsigned char const * begin, unsigned char const * end) {
return is_id_rest(reinterpret_cast<char const *>(begin),
reinterpret_cast<char const *>(end));
}
enum class name_kind { ANONYMOUS, STRING, NUMERAL };
/**
\brief Hierarchical names.
@ -36,8 +51,8 @@ public:
};
void dealloc();
imp(bool s, imp * p):m_rc(1), m_is_string(s), m_hash(0), m_prefix(p) { if (p) p->inc_ref(); }
static void display_core(std::ostream & out, imp * p, char const * sep);
static void display(std::ostream & out, imp * p, char const * sep = lean_name_separator);
static void display_core(std::ostream & out, imp * p, bool escape, char const * sep);
static void display(std::ostream & out, imp * p, bool escape, char const * sep = lean_name_separator);
friend void copy_limbs(imp * p, buffer<name::imp *> & limbs);
};
private:
@ -125,6 +140,7 @@ public:
name get_root() const;
/** \brief Convert this hierarchical name into a string. */
std::string to_string(char const * sep = lean_name_separator) const;
std::string escape(char const * sep = lean_name_separator) const;
/** \brief Size of the this name (in characters). */
size_t size() const;
/** \brief Size of the this name in unicode. */

View file

@ -8,6 +8,7 @@ Author: Leonardo de Moura
#include <string>
#include "util/debug.h"
#include "util/optional.h"
#include "util/utf8.h"
namespace lean {
bool is_utf8_next(unsigned char c) { return (c & 0xC0) == 0x80; }
@ -81,4 +82,33 @@ std::string utf8_trim(std::string const & s) {
stop = s.size();
return s.substr(start, stop - start);
}
unsigned utf8_to_unicode(uchar const * begin, uchar const * end) {
unsigned result = 0;
if (begin == end)
return result;
auto it = begin;
unsigned c = *it;
++it;
if (c < 128)
return c;
unsigned mask = (1u << 6) -1;
unsigned hmask = mask;
unsigned shift = 0;
unsigned num_bits = 0;
while ((c & 0xC0) == 0xC0) {
c <<= 1;
c &= 0xff;
num_bits += 6;
hmask >>= 1;
shift++;
result <<= 6;
if (it == end)
return 0;
result |= *it & mask;
++it;
}
result |= ((c >> shift) & hmask) << num_bits;
return result;
}
}

View file

@ -9,10 +9,17 @@ Author: Leonardo de Moura
#include "util/optional.h"
namespace lean {
using uchar = unsigned char;
bool is_utf8_next(unsigned char c);
unsigned get_utf8_size(unsigned char c);
size_t utf8_strlen(char const * str);
optional<size_t> utf8_char_pos(char const * str, size_t char_idx);
char const * get_utf8_last_char(char const * str);
std::string utf8_trim(std::string const & s);
unsigned utf8_to_unicode(uchar const * begin, uchar const * end);
inline unsigned utf8_to_unicode(char const * begin, char const * end) {
return utf8_to_unicode(reinterpret_cast<uchar const *>(begin),
reinterpret_cast<uchar const *>(end));
}
}

17
tests/lean/escape_id.lean Normal file
View file

@ -0,0 +1,17 @@
def «def» := 1
#check «def»
def «[ ]» := 1
#check «[ ]»
#check «[»
def a.«b.c» := 1
#check a.«b.c»
#check a.b.c
#check «a.b.c»
#check [1].«length»
#check ««»
#check «
»

View file

@ -0,0 +1,12 @@
def :
«[ ]» :
escape_id.lean:6:7: error: unknown identifier '«[»'
a.«b.c» :
escape_id.lean:10:7: error: unknown identifier 'a.b.c'
escape_id.lean:11:7: error: unknown identifier '«a.b.c»'
list.length [1] :
escape_id.lean:15:8: error: illegal character in escaped identifier
escape_id.lean:16:0: error: invalid expression, unexpected token
escape_id.lean:16:8: error: illegal character in escaped identifier
escape_id.lean:17:0: error: unexpected token
escape_id.lean:17:1: error: invalid expression, unexpected token