fix: compiled string literals containing null bytes

This commit is contained in:
Sebastian Ullrich 2022-05-16 13:23:37 +02:00 committed by Leonardo de Moura
parent 1e271c3432
commit eb170d1f43
5 changed files with 18 additions and 24 deletions

View file

@ -501,7 +501,7 @@ def emitLit (z : VarId) (t : IRType) (v : LitVal) : M Unit := do
emitLhs z;
match v with
| LitVal.num v => emitNumLit t v; emitLn ";"
| LitVal.str v => emit "lean_mk_string("; emit (quoteString v); emitLn ");"
| LitVal.str v => emit "lean_mk_string_from_bytes("; emit (quoteString v); emit ", "; emit v.utf8ByteSize; emitLn ");"
def emitVDecl (z : VarId) (t : IRType) (v : Expr) : M Unit :=
match v with

View file

@ -994,6 +994,7 @@ static inline size_t lean_string_capacity(lean_object * o) { return lean_to_stri
static inline size_t lean_string_byte_size(lean_object * o) { return sizeof(lean_string_object) + lean_string_capacity(o); }
/* instance : inhabited char := ⟨'A'⟩ */
static inline uint32_t lean_char_default_value() { return 'A'; }
LEAN_SHARED lean_obj_res lean_mk_string_from_bytes(char const * s, size_t sz);
LEAN_SHARED lean_obj_res lean_mk_string(char const * s);
static inline char const * lean_string_cstr(b_lean_obj_arg o) {
assert(lean_is_string(o));

View file

@ -1570,23 +1570,24 @@ static object * string_ensure_capacity(object * o, size_t extra) {
}
}
extern "C" LEAN_EXPORT object * lean_mk_string(char const * s) {
size_t sz = strlen(s);
size_t len = utf8_strlen(s);
extern "C" LEAN_EXPORT object * lean_mk_string_core(char const * s, size_t sz, size_t len) {
size_t rsz = sz + 1;
object * r = lean_alloc_string(rsz, rsz, len);
memcpy(w_string_cstr(r), s, sz+1);
memcpy(w_string_cstr(r), s, sz);
w_string_cstr(r)[sz] = 0;
return r;
}
extern "C" LEAN_EXPORT object * lean_mk_string_from_bytes(char const * s, size_t sz) {
return lean_mk_string_core(s, sz, utf8_strlen(s, sz));
}
extern "C" LEAN_EXPORT object * lean_mk_string(char const * s) {
return lean_mk_string_from_bytes(s, strlen(s));
}
extern "C" LEAN_EXPORT obj_res lean_string_from_utf8_unchecked(b_obj_arg a) {
size_t sz = lean_sarray_size(a);
size_t len = utf8_strlen(reinterpret_cast<char *>(lean_sarray_cptr(a)), sz);
size_t rsz = sz + 1;
obj_res r = lean_alloc_string(rsz, rsz, len);
memcpy(w_string_cstr(r), lean_sarray_cptr(a), sz);
w_string_cstr(r)[sz] = 0;
return r;
return lean_mk_string_from_bytes(reinterpret_cast<char *>(lean_sarray_cptr(a)), lean_sarray_size(a));
}
extern "C" LEAN_EXPORT obj_res lean_string_to_utf8(b_obj_arg s) {
@ -1597,13 +1598,7 @@ extern "C" LEAN_EXPORT obj_res lean_string_to_utf8(b_obj_arg s) {
}
object * mk_string(std::string const & s) {
size_t sz = s.size();
size_t len = utf8_strlen(s);
size_t rsz = sz + 1;
object * r = lean_alloc_string(rsz, rsz, len);
memcpy(w_string_cstr(r), s.data(), sz);
w_string_cstr(r)[sz] = 0;
return r;
return lean_mk_string_from_bytes(s.data(), s.size());
}
std::string string_to_std(b_obj_arg o) {
@ -1828,11 +1823,7 @@ extern "C" LEAN_EXPORT obj_res lean_string_utf8_extract(b_obj_arg s, b_obj_arg b
if (e < sz && !is_utf8_first_byte(str[e])) e = sz;
usize new_sz = e - b;
lean_assert(new_sz > 0);
obj_res r = lean_alloc_string(new_sz+1, new_sz+1, 0);
memcpy(w_string_cstr(r), lean_string_cstr(s) + b, new_sz);
w_string_cstr(r)[new_sz] = 0;
lean_to_string(r)->m_length = utf8_strlen(w_string_cstr(r), new_sz);
return r;
return lean_mk_string_from_bytes(lean_string_cstr(s) + b, new_sz);
}
extern "C" LEAN_EXPORT obj_res lean_string_utf8_prev(b_obj_arg s, b_obj_arg i0) {

View file

@ -25,4 +25,5 @@ IO.println ("".isPrefixOf "") *>
IO.println ("ab".isPrefixOf "cb") *>
IO.println ("ab".isPrefixOf "a") *>
IO.println ("αb".isPrefixOf "αbc") *>
IO.println ("\x00a").length *>
pure 0

View file

@ -28,3 +28,4 @@ true
false
false
true
2