refactor(util/name): implement name using object

This commit is contained in:
Leonardo de Moura 2018-05-20 13:22:34 -07:00
parent 4de9b8c177
commit 256a1f720c
2 changed files with 146 additions and 217 deletions

View file

@ -54,105 +54,52 @@ bool is_id_rest(unsigned char const * begin, unsigned char const * end) {
return is_letter_like_unicode(u) || is_sub_script_alnum_unicode(u);
}
void name::imp::display_core(std::ostream & out, imp * p, bool escape, char const * sep) {
lean_assert(p != nullptr);
if (p->m_prefix) {
display_core(out, p->m_prefix, escape, sep);
static void display_name_core(std::ostream & out, name const & n, bool escape, char const * sep) {
lean_assert(!n.is_anonymous());
name pre = n.get_prefix();
if (pre) {
display_name_core(out, pre, escape, sep);
out << sep;
}
if (p->m_is_string) {
size_t sz = strlen(p->m_str);
if (n.is_string()) {
char const * str = n.get_string();
size_t sz = strlen(str);
bool must_escape = false;
if (escape && *p->m_str) {
if (!is_id_first(p->m_str, p->m_str + sz))
if (escape && *str) {
if (!is_id_first(str, str + sz))
must_escape = true;
// don't escape names produced by server::display_decl
if (must_escape && p->m_str[0] == '?')
if (must_escape && str[0] == '?')
must_escape = false;
if (escape) {
for (char * s = p->m_str + get_utf8_size(p->m_str[0]); !must_escape && *s; s += get_utf8_size(*s)) {
if (!is_id_rest(s, p->m_str + sz))
for (char const * s = str + get_utf8_size(str[0]); !must_escape && *s; s += get_utf8_size(*s)) {
if (!is_id_rest(s, str + sz))
must_escape = true;
}
}
}
if (must_escape || sz == 0)
out << "«" << p->m_str << "»";
out << "«" << str << "»";
else
out << p->m_str;
out << str;
} else {
out << p->m_k;
out << n.get_numeral();
}
}
void name::imp::display(std::ostream & out, imp * p, bool escape, char const * sep) {
if (p == nullptr)
out << anonymous_str;
else
display_core(out, p, escape, sep);
name::name(name const & prefix, char const * n):
object_ref(mk_cnstr(static_cast<unsigned>(name_kind::STRING),
prefix.raw(), mk_string(n), sizeof(unsigned))) {
size_t sz = strlen(n);
unsigned h = hash_str(static_cast<unsigned>(sz), n, prefix.hash());
cnstr_set_scalar<unsigned>(raw(), 2*sizeof(object*), h);
}
void copy_limbs(name::imp * p, buffer<name::imp *> & limbs) {
limbs.clear();
while (p != nullptr) {
limbs.push_back(p);
p = p->m_prefix;
}
std::reverse(limbs.begin(), limbs.end());
}
void name::imp::dealloc() {
imp * curr = this;
while (true) {
lean_assert(curr->get_rc() == 0);
imp * p = curr->m_prefix;
if (curr->m_is_string)
delete[] reinterpret_cast<char*>(curr);
else
delete curr;
curr = p;
if (!curr || !curr->dec_ref_core())
break;
}
}
name::name(imp * p) {
m_ptr = p;
if (m_ptr)
m_ptr->inc_ref();
}
name::name() {
m_ptr = nullptr;
}
name::name(name const & prefix, char const * nam) {
size_t sz = strlen(nam);
lean_assert(sz < (1u << 31));
char * mem = new char[sizeof(imp) + sz + 1];
m_ptr = new (mem) imp(true, prefix.m_ptr);
std::memcpy(mem + sizeof(imp), nam, sz + 1);
m_ptr->m_str = mem + sizeof(imp);
// Emscripten easily breaks with small changes here. The main fix seems to be to use m_ptr->m_str instead of name.
m_ptr->m_hash = hash_str(static_cast<unsigned>(sz), m_ptr->m_str, prefix.hash());
}
name::name(name const & prefix, unsigned k, bool) {
m_ptr = new imp(false, prefix.m_ptr);
m_ptr->m_k = k;
if (m_ptr->m_prefix)
m_ptr->m_hash = ::lean::hash(m_ptr->m_prefix->m_hash, k);
else
m_ptr->m_hash = k;
}
name::name(name const & prefix, unsigned k):name(prefix, k, true) {
}
name::name(char const * n):name(name(), n) {
}
name::name(unsigned k):name(name(), k, true) {
name::name(name const & prefix, unsigned k):
object_ref(mk_cnstr(static_cast<unsigned>(name_kind::NUMERAL),
prefix.raw(), mk_nat_obj(k), sizeof(unsigned))) {
unsigned h = ::lean::hash(k, prefix.hash());
cnstr_set_scalar<unsigned>(raw(), 2*sizeof(object*), h);
}
name::name(std::initializer_list<char const *> const & l):name() {
@ -174,47 +121,38 @@ name const & name::anonymous() {
return *g_anonymous;
}
static atomic<unsigned> * g_next_id = nullptr;
name name::mk_internal_unique_name() {
unsigned id = (*g_next_id)++;
return name(id);
}
name & name::operator=(name const & other) { LEAN_COPY_REF(other); }
name & name::operator=(name && other) { LEAN_MOVE_REF(other); }
name_kind name::kind() const {
if (m_ptr == nullptr)
return name_kind::ANONYMOUS;
else
return m_ptr->m_is_string ? name_kind::STRING : name_kind::NUMERAL;
static void copy_limbs(object * p, buffer<object *> & limbs) {
limbs.clear();
while (!is_scalar(p)) {
limbs.push_back(p);
p = name::get_prefix(p);
}
std::reverse(limbs.begin(), limbs.end());
}
/* Equality test core procedure, it is invoked by operator== */
bool eq_core(name::imp * i1, name::imp * i2) {
bool name::eq_core(object * i1, object * i2) {
while (true) {
lean_assert(i1 != nullptr);
lean_assert(i2 != nullptr);
lean_assert(!is_scalar(i1));
lean_assert(!is_scalar(i2));
lean_assert(i1 && i2);
lean_assert(i1->m_hash == i2->m_hash);
if (i1->m_is_string != i2->m_is_string)
lean_assert(hash(i1) == hash(i2));
if (cnstr_tag(i1) != cnstr_tag(i2))
return false;
if (i1->m_is_string) {
if (strcmp(i1->m_str, i2->m_str) != 0)
if (static_cast<name_kind>(cnstr_tag(i1)) == name_kind::STRING) {
if (strcmp(get_string(i1), get_string(i2)) != 0)
return false;
} else {
if (i1->m_k != i2->m_k)
if (!nat_eq(get_num_obj(i1), get_num_obj(i2)))
return false;
}
i1 = i1->m_prefix;
i2 = i2->m_prefix;
i1 = get_prefix(i1);
i2 = get_prefix(i2);
if (i1 == i2)
return true;
if ((i1 == nullptr) != (i2 == nullptr))
if (is_scalar(i1) != is_scalar(i2))
return false;
if (i1->m_hash != i2->m_hash)
if (hash(i1) != hash(i2))
return false;
}
}
@ -222,9 +160,9 @@ bool eq_core(name::imp * i1, name::imp * i2) {
bool is_prefix_of(name const & n1, name const & n2) {
if (n2.is_atomic())
return n1 == n2;
buffer<name::imp*> limbs1, limbs2;
name::imp* i1 = n1.m_ptr;
name::imp* i2 = n2.m_ptr;
buffer<object*> limbs1, limbs2;
object* i1 = n1.raw();
object* i2 = n2.raw();
copy_limbs(i1, limbs1);
copy_limbs(i2, limbs2);
unsigned sz1 = limbs1.size();
@ -238,12 +176,12 @@ bool is_prefix_of(name const & n1, name const & n2) {
for (; it1 != limbs1.end(); ++it1, ++it2) {
i1 = *it1;
i2 = *it2;
if (i1->m_is_string != i2->m_is_string)
if (cnstr_tag(i1) != cnstr_tag(i2))
return false;
if (i1->m_is_string) {
if (strcmp(i1->m_str, i2->m_str) != 0)
if (static_cast<name_kind>(cnstr_tag(i1)) == name_kind::STRING) {
if (strcmp(name::get_string(i1), name::get_string(i2)) != 0)
return false;
} else if (i1->m_k != i2->m_k) {
} else if (!nat_eq(name::get_num_obj(i1), name::get_num_obj(i2))) {
return false;
}
}
@ -251,11 +189,14 @@ bool is_prefix_of(name const & n1, name const & n2) {
}
bool operator==(name const & a, char const * b) {
return a.m_ptr && a.m_ptr->m_is_string && strcmp(a.m_ptr->m_str, b) == 0;
return
a.kind() == name_kind::STRING &&
is_scalar(name::get_prefix(a.raw())) &&
strcmp(a.get_string(), b) == 0;
}
int cmp(name::imp * i1, name::imp * i2) {
buffer<name::imp *> limbs1, limbs2;
int name::cmp(object * i1, object * i2) {
buffer<object*> limbs1, limbs2;
copy_limbs(i1, limbs1);
copy_limbs(i2, limbs2);
auto it1 = limbs1.begin();
@ -263,16 +204,20 @@ int cmp(name::imp * i1, name::imp * i2) {
for (; it1 != limbs1.end() && it2 != limbs2.end(); ++it1, ++it2) {
i1 = *it1;
i2 = *it2;
name_kind k1 = static_cast<name_kind>(cnstr_tag(i1));
name_kind k2 = static_cast<name_kind>(cnstr_tag(i2));
if (k1 != k2)
return k1 == name_kind::STRING ? 1 : -1;
if (i1->m_is_string != i2->m_is_string)
return i1->m_is_string ? 1 : -1;
if (i1->m_is_string) {
int c = strcmp(i1->m_str, i2->m_str);
if (k1 == name_kind::STRING) {
int c = strcmp(get_string(i1), get_string(i2));
if (c != 0)
return c;
} else if (i1->m_k != i2->m_k) {
return i1->m_k < i2->m_k ? -1 : 1;
} else {
object * n1 = get_num_obj(i1);
object * n2 = get_num_obj(i2);
if (nat_ne(n1, n2))
return nat_lt(n1, n2) ? -1 : 1;
}
}
if (it1 == limbs1.end() && it2 == limbs2.end())
@ -292,24 +237,24 @@ static unsigned num_digits(unsigned k) {
}
size_t name::size_core(bool unicode) const {
if (m_ptr == nullptr) {
if (is_scalar(raw())) {
return strlen(anonymous_str);
} else {
imp * i = m_ptr;
object * i = raw();
size_t sep_sz = strlen(lean_name_separator);
size_t r = 0;
while (true) {
if (i->m_is_string) {
r += unicode ? utf8_strlen(i->m_str) : strlen(i->m_str);
lean_assert(!is_scalar(i));
if (kind(i) == name_kind::STRING) {
r += unicode ? string_len(get_string_obj(i)) : strlen(get_string(i));
} else {
r += num_digits(i->m_k);
// TODO(Leo): we are ignoring the case the numeral is not small.
r += num_digits(get_numeral(i));
}
if (i->m_prefix) {
r += sep_sz;
i = i->m_prefix;
} else {
i = get_prefix(i);
if (is_scalar(i))
break;
}
r += sep_sz;
}
return r;
}
@ -324,13 +269,13 @@ size_t name::utf8_size() const {
}
bool name::is_safe_ascii() const {
imp * i = m_ptr;
while (i) {
if (i->m_is_string) {
if (!::lean::is_safe_ascii(i->m_str))
object * i = raw();
while (!is_scalar(i)) {
if (kind(i) == name_kind::STRING) {
if (!::lean::is_safe_ascii(get_string(i)))
return false;
}
i = i->m_prefix;
i = get_prefix(i);
}
return true;
}
@ -345,18 +290,18 @@ name name::get_root() const {
std::string name::to_string(char const * sep) const {
std::ostringstream s;
imp::display(s, m_ptr, false, sep);
display_name_core(s, *this, false, sep);
return s.str();
}
std::string name::escape(char const * sep) const {
std::ostringstream s;
imp::display(s, m_ptr, true, sep);
display_name_core(s, *this, true, sep);
return s.str();
}
std::ostream & operator<<(std::ostream & out, name const & n) {
name::imp::display(out, n.m_ptr, false);
display_name_core(out, n, false, lean_name_separator);
return out;
}
@ -368,13 +313,13 @@ name operator+(name const & n1, name const & n2) {
} else {
name prefix;
if (!n2.is_atomic())
prefix = n1 + name(n2.m_ptr->m_prefix);
prefix = n1 + n2.get_prefix();
else
prefix = n1;
if (n2.m_ptr->m_is_string)
return name(prefix, n2.m_ptr->m_str);
if (n2.is_string())
return name(prefix, n2.get_string());
else
return name(prefix, n2.m_ptr->m_k);
return name(prefix, n2.get_numeral()); // <<< TODO(Leo): ignoring the case the numeral is not small.
}
}
@ -438,7 +383,7 @@ name name::replace_prefix(name const & prefix, name const & new_prefix) const {
if (is_anonymous())
return *this;
name p = get_prefix().replace_prefix(prefix, new_prefix);
if (p.m_ptr == m_ptr)
if (p.raw() == raw())
return *this;
if (is_string())
return name(p, get_string());
@ -512,7 +457,7 @@ public:
switch (k) {
case LL_ANON: return name();
case LL_STRING: return name(d.read_string().c_str());
case LL_INT: return name(name(), d.read_unsigned(), true);
case LL_INT: return name(name(), d.read_unsigned());
case LL_STRING_PREFIX: {
name prefix = read();
return name(prefix, d.read_string().c_str());
@ -555,6 +500,13 @@ bool is_internal_name(name const & n) {
return false;
}
static atomic<unsigned> * g_next_id = nullptr;
name name::mk_internal_unique_name() {
unsigned id = (*g_next_id)++;
return name(name(), id);
}
void initialize_name() {
g_anonymous = new name();
g_name_sd = new name_sd();

View file

@ -15,6 +15,7 @@ Author: Leonardo de Moura
#include "util/rc.h"
#include "util/pair.h"
#include "util/list.h"
#include "util/object_ref.h"
namespace lean {
constexpr char const * lean_name_separator = ".";
@ -42,49 +43,36 @@ enum class name_kind { ANONYMOUS, STRING, NUMERAL };
/**
\brief Hierarchical names.
*/
class name {
class name : object_ref {
public:
/** \brief Actual implementation of hierarchical names. */
struct imp {
MK_LEAN_RC()
bool m_is_string;
unsigned m_hash;
imp * m_prefix;
union {
char * m_str;
unsigned m_k;
};
void dealloc();
imp(bool s, imp * p):m_rc(1), m_is_string(s), m_hash(0), m_prefix(p) { if (p) p->inc_ref(); }
static void display_core(std::ostream & out, imp * p, bool escape, char const * sep);
static void display(std::ostream & out, imp * p, bool escape, char const * sep = lean_name_separator);
friend void copy_limbs(imp * p, buffer<name::imp *> & limbs);
};
private:
friend int cmp(imp * i1, imp * i2);
friend class name_deserializer;
imp * m_ptr;
explicit name(imp * p);
explicit name(unsigned k);
// the parameter bool is only used to distinguish this constructor from the public one.
name(name const & prefix, unsigned k, bool);
/* Low level primitives */
static name_kind kind(object * o) { return static_cast<name_kind>(obj_tag(o)); }
static bool is_anonymous(object * o) { return is_scalar(o); }
static object * get_prefix(object * o) { return cnstr_obj(o, 0); }
static object * get_string_obj(object * o) { return cnstr_obj(o, 1); }
static char const * get_string(object * o) { return c_str(get_string_obj(o)); }
static object * get_num_obj(object * o) { return cnstr_obj(o, 1); }
static unsigned get_numeral(object * o) { return unbox(cnstr_obj(o, 1)); }
static unsigned hash(object * o) { return cnstr_scalar<unsigned>(o, 2*sizeof(object*)); }
static bool eq_core(object * o1, object * o2);
static int cmp(object * o1, object * o2);
size_t size_core(bool unicode) const;
private:
name(object_ref const & r):object_ref(r) {}
public:
name();
name(char const * name);
name(std::string const & s):name(s.c_str()) {}
name():object_ref(box(static_cast<unsigned>(name_kind::ANONYMOUS))) {}
name(name const & prefix, char const * name);
name(name const & prefix, unsigned k);
name(name const & other):m_ptr(other.m_ptr) { if (m_ptr) m_ptr->inc_ref(); }
name(name && other):m_ptr(other.m_ptr) { other.m_ptr = nullptr; }
name(char const * n):name(name(), n) {}
name(std::string const & s):name(s.c_str()) {}
name(name const & other):object_ref(other) {}
name(name && other):object_ref(other) {}
/**
\brief Create a hierarchical name using the given strings.
Example: <code>name{"foo", "bla", "tst"}</code> creates the hierarchical
name <tt>foo::bla::tst</tt>.
*/
name(std::initializer_list<char const *> const & l);
~name() { if (m_ptr) m_ptr->dec_ref(); }
static name const & anonymous();
/**
\brief Create a unique internal name that is not meant to exposed
@ -100,47 +88,41 @@ public:
</code>
*/
static name mk_internal_unique_name();
name & operator=(name const & other);
name & operator=(name && other);
name & operator=(name const & other) { object_ref::operator=(other); return *this; }
name & operator=(name && other) { object_ref::operator=(other); return *this; }
unsigned hash() const { return is_scalar(raw()) ? 11 : hash(raw()); }
/** \brief Return true iff \c n1 is a prefix of \c n2. */
friend bool is_prefix_of(name const & n1, name const & n2);
friend bool eq_core(name::imp * i1, name::imp * i2);
friend bool operator==(name const & a, name const & b) {
if (a.m_ptr == b.m_ptr)
if (a.raw() == b.raw())
return true;
if ((a.m_ptr == nullptr) != (b.m_ptr == nullptr))
if (is_scalar(a.raw()) != is_scalar(b.raw()))
return false;
if (a.m_ptr->m_hash != b.m_ptr->m_hash)
if (a.hash() != b.hash())
return false;
return eq_core(a.m_ptr, b.m_ptr);
return eq_core(a.raw(), b.raw());
}
friend bool operator!=(name const & a, name const & b) { return !(a == b); }
friend bool operator==(name const & a, char const * b);
friend bool operator!=(name const & a, char const * b) { return !(a == b); }
/**
\brief Total order on hierarchical names.
*/
friend int cmp(name const & a, name const & b) { return cmp(a.m_ptr, b.m_ptr); }
/** \brief Total order on hierarchical names. */
friend int cmp(name const & a, name const & b) { return cmp(a.raw(), b.raw()); }
friend bool operator<(name const & a, name const & b) { return cmp(a, b) < 0; }
friend bool operator>(name const & a, name const & b) { return cmp(a, b) > 0; }
friend bool operator<=(name const & a, name const & b) { return cmp(a, b) <= 0; }
friend bool operator>=(name const & a, name const & b) { return cmp(a, b) >= 0; }
name_kind kind() const;
bool is_anonymous() const { return m_ptr == nullptr; }
bool is_string() const { return m_ptr != nullptr && m_ptr->m_is_string; }
bool is_numeral() const { return m_ptr != nullptr && !m_ptr->m_is_string; }
explicit operator bool() const { return m_ptr != nullptr; }
unsigned get_numeral() const { lean_assert(is_numeral()); return m_ptr->m_k; }
/**
\brief If the tail of the given hierarchical name is a string, then it returns this string.
\pre is_string()
*/
char const * get_string() const { lean_assert(is_string()); return m_ptr->m_str; }
bool is_atomic() const { return m_ptr == nullptr || m_ptr->m_prefix == nullptr; }
/**
\brief Return the prefix of a hierarchical name
*/
name get_prefix() const { return is_atomic() ? name() : name(m_ptr->m_prefix); }
name_kind kind() const { return kind(raw()); }
bool is_anonymous() const { return kind() == name_kind::ANONYMOUS; }
bool is_string() const { return kind() == name_kind::STRING; }
bool is_numeral() const { return kind() == name_kind::NUMERAL; }
explicit operator bool() const { return kind() != name_kind::ANONYMOUS; }
unsigned get_numeral() const { lean_assert(is_numeral()); return get_numeral(raw()); }
char const * get_string() const { lean_assert(is_string()); return get_string(raw()); }
name const & get_prefix() const {
if (is_anonymous()) return *this;
else return static_cast<name const &>(cnstr_obj_ref(*this, 0));
}
bool is_atomic() const { return is_anonymous() || kind(get_prefix(raw())) == name_kind::ANONYMOUS; }
/** \brief Given a name of the form a_1.a_2. ... .a_k, return a_1 if k >= 1, or the empty name otherwise. */
name get_root() const;
/** \brief Convert this hierarchical name into a string. */
@ -150,7 +132,6 @@ public:
size_t size() const;
/** \brief Size of the this name in unicode. */
size_t utf8_size() const;
unsigned hash() const { return m_ptr ? m_ptr->m_hash : 11; }
/** \brief Return true iff the name contains only safe ASCII chars */
bool is_safe_ascii() const;
friend std::ostream & operator<<(std::ostream & out, name const & n);
@ -195,17 +176,14 @@ public:
*/
name replace_prefix(name const & prefix, name const & new_prefix) const;
friend void swap(name & a, name & b) {
std::swap(a.m_ptr, b.m_ptr);
}
friend void swap(name & a, name & b) { object_ref::swap(a, b); }
/**
\brief Quicker version of \c cmp that uses the hashcode.
Remark: we should not use it when we want to order names using
lexicographical order.
*/
friend int quick_cmp(name const & a, name const & b) {
if (a.m_ptr == b.m_ptr)
if (a.raw() == b.raw())
return 0;
unsigned h1 = a.hash();
unsigned h2 = b.hash();
@ -217,9 +195,8 @@ public:
return cmp(a, b);
}
}
struct ptr_hash { unsigned operator()(name const & n) const { return std::hash<imp*>()(n.m_ptr); } };
struct ptr_eq { bool operator()(name const & n1, name const & n2) const { return n1.m_ptr == n2.m_ptr; } };
struct ptr_hash { unsigned operator()(name const & n) const { return std::hash<object*>()(n.raw()); } };
struct ptr_eq { bool operator()(name const & n1, name const & n2) const { return n1.raw() == n2.raw(); } };
};
name string_to_name(std::string const & str);