- Lean strings (like std::string) may contain null characters. The codebase was ignoring this issue. - We now have a wrapper `string_ref` for wrapping Lean string objects in C++. This wrapper also implements correctly the coercions std::string <-> string_ref. Remark: I also found a few places where the code relies on the following property which is not true Forall s : std::string, std::string(s.c_str()) == s - `name` object wrapper was assuming that all numerals were small `nat` values. This is true in most cases, but the system would crash when processing if it is a big number. - The commit tries to make sure runtime/util/kernel are correct. Modules that will be deleted contain many `TODO` comments indicating they may crash and/or produce incorrect results when strings contain null characters and numerals are big. cc @kha @kha: I thought about using `string` instead of `string_ref`. We consistently use `std::string`. So, it should be fine, but I was concerned about code readability. After we bootstrap Lean4, we will be able to delete `lean::list` template, and rename `lean::list_ref` to `lean::list`. I am going to add `pair_ref` for wrapping Lean pair objects. If we use `lean::string` instead of `lean::string_ref`, then we should also use `lean::pair` instead of `lean::pair_ref`. But, there is a problem in this case since we have https://github.com/leanprover/lean4/blob/master/src/util/pair.h#L13 :(
52 lines
1.4 KiB
C++
52 lines
1.4 KiB
C++
/*
|
|
Copyright (c) 2013 Microsoft Corporation. All rights reserved.
|
|
Released under Apache 2.0 license as described in the file LICENSE.
|
|
|
|
Author: Leonardo de Moura
|
|
*/
|
|
#include <initializer_list>
|
|
#include <cstring>
|
|
namespace lean {
|
|
static char g_safe_ascii[256];
|
|
|
|
static void set(int i, bool v) { g_safe_ascii[static_cast<unsigned char>(i)] = v; }
|
|
|
|
void initialize_ascii() {
|
|
for (int i = 0; i <= 255; i++)
|
|
set(i, false);
|
|
// digits and characters are safe
|
|
for (int i = '0'; i <= '9'; i++) set(i, true);
|
|
for (int i = 'a'; i <= 'z'; i++) set(i, true);
|
|
for (int i = 'A'; i <= 'Z'; i++) set(i, true);
|
|
// the following characters are also safe
|
|
for (unsigned char b : {'_', ' ', '\t', '\r', '\n', '(', ')', '{', '}', ':', '.', ',', '\"', '\'', '`', '!', '#',
|
|
'=', '<', '>', '@', '^', '|', '&', '~', '+', '-', '*', '/', '\\', '$', '%', '?', ';', '[', ']'})
|
|
set(b, true);
|
|
}
|
|
|
|
void finalize_ascii() {
|
|
}
|
|
|
|
bool is_safe_ascii(char c) {
|
|
return g_safe_ascii[static_cast<unsigned char>(c)];
|
|
}
|
|
|
|
bool is_safe_ascii(char const * str) {
|
|
if (str) {
|
|
while (*str != 0) {
|
|
if (!is_safe_ascii(*str))
|
|
return false;
|
|
str++;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool is_safe_ascii(char const * str, size_t sz) {
|
|
for (size_t i = 0; i < sz; i++) {
|
|
if (!is_safe_ascii(str[i]))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
}
|