lean4-htt/src/util/name.cpp
2024-08-12 14:14:42 +02:00

433 lines
13 KiB
C++

/*
Copyright (c) 2013 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
#include <cctype>
#include <cstring>
#include <vector>
#include <algorithm>
#include <sstream>
#include <string>
#include "runtime/thread.h"
#include "runtime/debug.h"
#include "runtime/sstream.h"
#include "runtime/utf8.h"
#include "runtime/hash.h"
#include "runtime/buffer.h"
#include "util/name.h"
#include "util/ascii.h"
namespace lean {
extern "C" obj_res lean_name_mk_string(obj_arg p, obj_arg s);
extern "C" obj_res lean_name_mk_numeral(obj_arg p, obj_arg n);
static inline obj_res name_mk_string_of_cstr(obj_arg p, char const * s) {
return lean_name_mk_string(p, mk_string(s));
}
constexpr char const * anonymous_str = "[anonymous]";
bool is_greek_unicode(unsigned u) { return 0x391 <= u && u <= 0x3DD; }
bool is_letter_like_unicode(unsigned u) {
return
(0x3b1 <= u && u <= 0x3c9 && u != 0x3bb) || // Lower greek, but lambda
(0x391 <= u && u <= 0x3A9 && u != 0x3A0 && u != 0x3A3) || // Upper greek, but Pi and Sigma
(0x3ca <= u && u <= 0x3fb) || // Coptic letters
(0x1f00 <= u && u <= 0x1ffe) || // Polytonic Greek Extended Character Set
(0x2100 <= u && u <= 0x214f) || // Letter like block
(0x1d49c <= u && u <= 0x1d59f); // Latin letters, Script, Double-struck, Fractur
}
bool is_sub_script_alnum_unicode(unsigned u) {
return
(0x207f <= u && u <= 0x2089) || // n superscript and numeric subscripts
(0x2090 <= u && u <= 0x209c) || // letter-like subscripts
(0x1d62 <= u && u <= 0x1d6a); // letter-like subscripts
}
bool is_id_first(unsigned char const * begin, unsigned char const * end) {
if (std::isalpha(*begin) || *begin == '_')
return true;
unsigned u = utf8_to_unicode(begin, end);
return u == id_begin_escape || is_letter_like_unicode(u);
}
bool is_id_rest(unsigned char const * begin, unsigned char const * end) {
if (std::isalnum(*begin) || *begin == '_' || *begin == '\'' || *begin == '?' || *begin == '!')
return true;
unsigned u = utf8_to_unicode(begin, end);
return is_letter_like_unicode(u) || is_sub_script_alnum_unicode(u);
}
static void display_name_core(std::ostream & out, name const & n, bool escape, char const * sep) {
lean_assert(!n.is_anonymous());
name pre = n.get_prefix();
if (pre) {
display_name_core(out, pre, escape, sep);
out << sep;
}
if (n.is_string()) {
std::string str = n.get_string().to_std_string();
size_t sz = str.size();
bool must_escape = false;
if (escape) {
if (sz == 0)
must_escape = true;
if (!is_id_first(str.data(), str.data() + sz))
must_escape = true;
// don't escape names produced by server::display_decl
if (must_escape && str[0] == '?') // TODO(Kha, Leo): do we need this hack
must_escape = false;
for (size_t i = get_utf8_size(str[0]); !must_escape && i < sz; i += get_utf8_size(str[i])) {
if (!is_id_rest(str.data() + i, str.data() + sz))
must_escape = true;
}
}
if (must_escape || sz == 0)
out << "«" << str << "»";
else
out << str;
} else {
out << n.get_numeral().to_std_string();
}
}
static void display_name(std::ostream & out, name const & n, bool escape, char const * sep) {
if (n.is_anonymous())
out << anonymous_str;
else
display_name_core(out, n, escape, sep);
}
name::name(name const & prefix, char const * n):
object_ref(name_mk_string_of_cstr(prefix.raw(), n)) {
inc(prefix.raw());
}
name::name(name const & prefix, unsigned k):
object_ref(lean_name_mk_numeral(prefix.raw(), mk_nat_obj(k))) {
inc(prefix.raw());
}
name::name(name const & prefix, string_ref const & s):
object_ref(lean_name_mk_string(prefix.raw(), s.raw())) {
inc(prefix.raw());
inc(s.raw());
}
name::name(name const & prefix, nat const & k):
object_ref(lean_name_mk_numeral(prefix.raw(), k.raw())) {
inc(prefix.raw());
inc(k.raw());
}
name::name(std::initializer_list<char const *> const & l):name() {
if (l.size() == 0) {
return;
} else {
auto it = l.begin();
*this = name(*it);
++it;
for (; it != l.end(); ++it)
*this = name(*this, *it);
}
}
static name * g_anonymous = nullptr;
name const & name::anonymous() {
lean_assert(g_anonymous->is_anonymous());
return *g_anonymous;
}
static void copy_limbs(object * p, buffer<object *> & limbs) {
limbs.clear();
while (!is_scalar(p)) {
limbs.push_back(p);
p = name::get_prefix(p);
}
std::reverse(limbs.begin(), limbs.end());
}
bool is_prefix_of(name const & n1, name const & n2) {
if (n2.is_atomic())
return n1 == n2;
buffer<object*> limbs1, limbs2;
object* i1 = n1.raw();
object* i2 = n2.raw();
copy_limbs(i1, limbs1);
copy_limbs(i2, limbs2);
unsigned sz1 = limbs1.size();
unsigned sz2 = limbs2.size();
if (sz1 > sz2)
return false;
else if (sz1 == sz2 && n1.hash() != n2.hash())
return false;
auto it1 = limbs1.begin();
auto it2 = limbs2.begin();
for (; it1 != limbs1.end(); ++it1, ++it2) {
i1 = *it1;
i2 = *it2;
if (cnstr_tag(i1) != cnstr_tag(i2))
return false;
if (static_cast<name_kind>(cnstr_tag(i1)) == name_kind::STRING) {
if (name::get_string(i1) != name::get_string(i2))
return false;
} else if (name::get_numeral(i1) != name::get_numeral(i2)) {
return false;
}
}
return true;
}
bool operator==(name const & a, char const * b) {
return
a.kind() == name_kind::STRING &&
is_scalar(name::get_prefix(a.raw())) &&
name::get_string(a.raw()) == b;
}
int name::cmp_core(object * i1, object * i2) {
buffer<object*> limbs1, limbs2;
copy_limbs(i1, limbs1);
copy_limbs(i2, limbs2);
auto it1 = limbs1.begin();
auto it2 = limbs2.begin();
for (; it1 != limbs1.end() && it2 != limbs2.end(); ++it1, ++it2) {
i1 = *it1;
i2 = *it2;
name_kind k1 = static_cast<name_kind>(cnstr_tag(i1));
name_kind k2 = static_cast<name_kind>(cnstr_tag(i2));
if (k1 != k2)
return k1 == name_kind::STRING ? 1 : -1;
if (k1 == name_kind::STRING) {
if (get_string(i1) < get_string(i2))
return -1;
if (get_string(i2) < get_string(i1))
return 1;
} else {
if (get_numeral(i1) < get_numeral(i2))
return -1;
if (get_numeral(i2) < get_numeral(i1))
return 1;
}
}
if (it1 == limbs1.end() && it2 == limbs2.end())
return 0;
else return it1 == limbs1.end() ? -1 : 1;
}
static unsigned num_digits(nat k) {
if (k == 0u)
return 1;
int r = 0;
while (k != 0u) {
k = k / nat(10);
r++;
}
return r;
}
size_t name::size_core(bool unicode) const {
if (is_scalar(raw())) {
return strlen(anonymous_str);
} else {
object * i = raw();
size_t sep_sz = strlen(lean_name_separator);
size_t r = 0;
while (true) {
lean_assert(!is_scalar(i));
if (kind(i) == name_kind::STRING) {
r += unicode ? get_string(i).length() : get_string(i).num_bytes();
} else {
// TODO(Leo): we are ignoring the case the numeral is not small.
r += num_digits(get_numeral(i));
}
i = get_prefix(i);
if (is_scalar(i))
break;
r += sep_sz;
}
return r;
}
}
size_t name::size() const {
return size_core(false);
}
size_t name::utf8_size() const {
return size_core(true);
}
bool name::is_safe_ascii() const {
object * i = raw();
while (!is_scalar(i)) {
if (kind(i) == name_kind::STRING) {
if (!::lean::is_safe_ascii(get_string(i).data(), get_string(i).num_bytes()))
return false;
}
i = get_prefix(i);
}
return true;
}
name name::get_root() const {
name n = *this;
while (n.get_prefix()) {
n = n.get_prefix();
}
return n;
}
std::string name::to_string(char const * sep) const {
std::ostringstream s;
display_name(s, *this, false, sep);
return s.str();
}
std::string name::escape(char const * sep) const {
std::ostringstream s;
display_name(s, *this, true, sep);
return s.str();
}
LEAN_EXPORT std::ostream & operator<<(std::ostream & out, name const & n) {
display_name(out, n, false, lean_name_separator);
return out;
}
name operator+(name const & n1, name const & n2) {
if (n2.is_anonymous()) {
return n1;
} else if (n1.is_anonymous()) {
return n2;
} else {
name prefix;
if (!n2.is_atomic())
prefix = n1 + n2.get_prefix();
else
prefix = n1;
if (n2.is_string())
return name(prefix, n2.get_string());
else
return name(prefix, n2.get_numeral()); // <<< TODO(Leo): ignoring the case the numeral is not small.
}
}
extern "C" obj_res lean_name_append_after(obj_arg n, obj_arg s);
extern "C" obj_res lean_name_append_before(obj_arg n, obj_arg s);
extern "C" obj_res lean_name_append_index_after(obj_arg n, obj_arg i);
name name::append_before(char const * p) const {
return name(lean_name_append_before(to_obj_arg(), lean_mk_string(p)));
}
name name::append_after(char const * s) const {
return name(lean_name_append_after(to_obj_arg(), lean_mk_string(s)));
}
name name::get_subscript_base() const {
if (is_string()) {
return *this;
} else {
return name(*this, "");
}
}
name name::append_after(unsigned i) const {
return name(lean_name_append_index_after(to_obj_arg(), lean_unsigned_to_nat(i)));
}
optional<pair<name, unsigned>> name::is_subscripted() const {
optional<pair<name, unsigned>> none;
if (!is_string()) return none;
std::string s = get_string().to_std_string();
auto underscore_pos = s.find_last_of('_');
if (underscore_pos == std::string::npos) return none;
std::string::iterator it = s.begin() + underscore_pos + 1;
if (it == s.end() || *it == '0') return none;
unsigned idx = 0;
for (; it != s.end() && '0' <= *it && *it <= '9'; it++)
idx = 10*idx + (*it - '0');
if (it != s.end()) return none;
name prefix(get_prefix(), string_ref(s.substr(0, underscore_pos)));
return optional<pair<name, unsigned>>(prefix, idx);
}
name name::replace_prefix(name const & prefix, name const & new_prefix) const {
if (*this == prefix)
return new_prefix;
if (is_anonymous())
return *this;
name p = get_prefix().replace_prefix(prefix, new_prefix);
if (p.raw() == raw())
return *this;
if (is_string())
return name(p, get_string());
else
return name(p, get_numeral());
}
bool is_part_of(std::string const & p, name n) {
while (true) {
if (n.is_string()) {
std::string s = n.get_string().to_std_string();
if (s.find(p) != std::string::npos)
return true;
}
if (n.is_atomic() || n.is_anonymous())
return false;
n = n.get_prefix();
}
}
name string_to_name(std::string const & str) {
static_assert(*(lean_name_separator+1) == 0, "this function assumes the length of lean_name_separator is 1");
name result;
std::string id_part;
for (unsigned i = 0; i < str.size(); i++) {
if (str[i] == lean_name_separator[0]) {
result = name(result, id_part.c_str());
id_part.clear();
} else {
id_part.push_back(str[i]);
}
}
return name(result, id_part.c_str());
}
bool is_internal_name(name const & n) {
name it = n;
while (!it.is_anonymous()) {
if (!it.is_anonymous() && it.is_string() && it.get_string().data() && it.get_string().data()[0] == '_')
return true;
it = it.get_prefix();
}
return false;
}
static atomic<unsigned> * g_next_id = nullptr;
name name::mk_internal_unique_name() {
unsigned id = (*g_next_id)++;
return name(name(), id);
}
void initialize_name() {
g_anonymous = new name();
mark_persistent(g_anonymous->raw());
g_next_id = new atomic<unsigned>(0);
}
void finalize_name() {
delete g_next_id;
delete g_anonymous;
}
}
void print(lean::name const & n) { std::cout << n << std::endl; }