From eb170d1f43e8d30d0149aef0ede2d7c31d5f2a08 Mon Sep 17 00:00:00 2001 From: Sebastian Ullrich Date: Mon, 16 May 2022 13:23:37 +0200 Subject: [PATCH] fix: compiled string literals containing null bytes --- src/Lean/Compiler/IR/EmitC.lean | 2 +- src/include/lean/lean.h | 1 + src/runtime/object.cpp | 37 +++++++++++----------------- tests/compiler/str.lean | 1 + tests/compiler/str.lean.expected.out | 1 + 5 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/Lean/Compiler/IR/EmitC.lean b/src/Lean/Compiler/IR/EmitC.lean index f6377307b1..6a11e7bd3a 100644 --- a/src/Lean/Compiler/IR/EmitC.lean +++ b/src/Lean/Compiler/IR/EmitC.lean @@ -501,7 +501,7 @@ def emitLit (z : VarId) (t : IRType) (v : LitVal) : M Unit := do emitLhs z; match v with | LitVal.num v => emitNumLit t v; emitLn ";" - | LitVal.str v => emit "lean_mk_string("; emit (quoteString v); emitLn ");" + | LitVal.str v => emit "lean_mk_string_from_bytes("; emit (quoteString v); emit ", "; emit v.utf8ByteSize; emitLn ");" def emitVDecl (z : VarId) (t : IRType) (v : Expr) : M Unit := match v with diff --git a/src/include/lean/lean.h b/src/include/lean/lean.h index 3739b88e46..93b23d9bca 100644 --- a/src/include/lean/lean.h +++ b/src/include/lean/lean.h @@ -994,6 +994,7 @@ static inline size_t lean_string_capacity(lean_object * o) { return lean_to_stri static inline size_t lean_string_byte_size(lean_object * o) { return sizeof(lean_string_object) + lean_string_capacity(o); } /* instance : inhabited char := ⟨'A'⟩ */ static inline uint32_t lean_char_default_value() { return 'A'; } +LEAN_SHARED lean_obj_res lean_mk_string_from_bytes(char const * s, size_t sz); LEAN_SHARED lean_obj_res lean_mk_string(char const * s); static inline char const * lean_string_cstr(b_lean_obj_arg o) { assert(lean_is_string(o)); diff --git a/src/runtime/object.cpp b/src/runtime/object.cpp index fdb17220d2..bdfb0fe3e2 100644 --- a/src/runtime/object.cpp +++ b/src/runtime/object.cpp @@ -1570,23 +1570,24 @@ static object * string_ensure_capacity(object * o, size_t extra) { } } -extern "C" LEAN_EXPORT object * lean_mk_string(char const * s) { - size_t sz = strlen(s); - size_t len = utf8_strlen(s); +extern "C" LEAN_EXPORT object * lean_mk_string_core(char const * s, size_t sz, size_t len) { size_t rsz = sz + 1; object * r = lean_alloc_string(rsz, rsz, len); - memcpy(w_string_cstr(r), s, sz+1); + memcpy(w_string_cstr(r), s, sz); + w_string_cstr(r)[sz] = 0; return r; } +extern "C" LEAN_EXPORT object * lean_mk_string_from_bytes(char const * s, size_t sz) { + return lean_mk_string_core(s, sz, utf8_strlen(s, sz)); +} + +extern "C" LEAN_EXPORT object * lean_mk_string(char const * s) { + return lean_mk_string_from_bytes(s, strlen(s)); +} + extern "C" LEAN_EXPORT obj_res lean_string_from_utf8_unchecked(b_obj_arg a) { - size_t sz = lean_sarray_size(a); - size_t len = utf8_strlen(reinterpret_cast(lean_sarray_cptr(a)), sz); - size_t rsz = sz + 1; - obj_res r = lean_alloc_string(rsz, rsz, len); - memcpy(w_string_cstr(r), lean_sarray_cptr(a), sz); - w_string_cstr(r)[sz] = 0; - return r; + return lean_mk_string_from_bytes(reinterpret_cast(lean_sarray_cptr(a)), lean_sarray_size(a)); } extern "C" LEAN_EXPORT obj_res lean_string_to_utf8(b_obj_arg s) { @@ -1597,13 +1598,7 @@ extern "C" LEAN_EXPORT obj_res lean_string_to_utf8(b_obj_arg s) { } object * mk_string(std::string const & s) { - size_t sz = s.size(); - size_t len = utf8_strlen(s); - size_t rsz = sz + 1; - object * r = lean_alloc_string(rsz, rsz, len); - memcpy(w_string_cstr(r), s.data(), sz); - w_string_cstr(r)[sz] = 0; - return r; + return lean_mk_string_from_bytes(s.data(), s.size()); } std::string string_to_std(b_obj_arg o) { @@ -1828,11 +1823,7 @@ extern "C" LEAN_EXPORT obj_res lean_string_utf8_extract(b_obj_arg s, b_obj_arg b if (e < sz && !is_utf8_first_byte(str[e])) e = sz; usize new_sz = e - b; lean_assert(new_sz > 0); - obj_res r = lean_alloc_string(new_sz+1, new_sz+1, 0); - memcpy(w_string_cstr(r), lean_string_cstr(s) + b, new_sz); - w_string_cstr(r)[new_sz] = 0; - lean_to_string(r)->m_length = utf8_strlen(w_string_cstr(r), new_sz); - return r; + return lean_mk_string_from_bytes(lean_string_cstr(s) + b, new_sz); } extern "C" LEAN_EXPORT obj_res lean_string_utf8_prev(b_obj_arg s, b_obj_arg i0) { diff --git a/tests/compiler/str.lean b/tests/compiler/str.lean index 265e52d4a1..23900624f9 100644 --- a/tests/compiler/str.lean +++ b/tests/compiler/str.lean @@ -25,4 +25,5 @@ IO.println ("".isPrefixOf "") *> IO.println ("ab".isPrefixOf "cb") *> IO.println ("ab".isPrefixOf "a") *> IO.println ("αb".isPrefixOf "αbc") *> +IO.println ("\x00a").length *> pure 0 diff --git a/tests/compiler/str.lean.expected.out b/tests/compiler/str.lean.expected.out index 89847d6c72..ebfe76f9ea 100644 --- a/tests/compiler/str.lean.expected.out +++ b/tests/compiler/str.lean.expected.out @@ -28,3 +28,4 @@ true false false true +2