feat: add Lean.CompactedRegion.save/read supporting cross-file object sharing (#13185)
This PR adds new incremental module serialization functions that save/load a single module at a time with explicit sharing via dep regions and compactor state, generalizing the existing batch saveModuleDataParts API. Two sharing mechanisms that can be mixed: - `CompactedRegion` dep regions for sharing with loaded regions - `CompactorState` for same-process chaining (pre-loaded `m_obj_table`)
This commit is contained in:
parent
1b8f1f140c
commit
895752dc2e
13 changed files with 602 additions and 270 deletions
81
src/Lean/CompactedRegion.lean
Normal file
81
src/Lean/CompactedRegion.lean
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
/-
|
||||
Copyright (c) 2026 Lean FRO, LLC. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Authors: Sebastian Ullrich
|
||||
-/
|
||||
module
|
||||
|
||||
prelude
|
||||
public import Init.System.IO
|
||||
public import Lean.Data.Name
|
||||
|
||||
namespace Lean
|
||||
|
||||
/--
|
||||
A compacted region holds multiple Lean objects in a contiguous memory region, which can be
|
||||
read/written to/from disk. Objects inside the region do not have reference counters and cannot be
|
||||
freed individually. The contents of `.olean` files are compacted regions.
|
||||
-/
|
||||
@[expose] public def CompactedRegion := USize
|
||||
|
||||
@[extern "lean_compacted_region_is_memory_mapped"]
|
||||
public opaque CompactedRegion.isMemoryMapped : CompactedRegion → Bool
|
||||
|
||||
/-- Size in bytes. -/
|
||||
@[extern "lean_compacted_region_size"]
|
||||
public opaque CompactedRegion.size : CompactedRegion → USize
|
||||
|
||||
/--
|
||||
Frees a compacted region and its contents. No live references to the contents may exist at the
|
||||
time of invocation.
|
||||
-/
|
||||
@[extern "lean_compacted_region_free"]
|
||||
public unsafe opaque CompactedRegion.free : CompactedRegion → IO Unit
|
||||
|
||||
opaque CompactorSpec : NonemptyType.{0}
|
||||
/--
|
||||
Holds an opaque compactor handle returned by `CompactedRegion.save`, used to chain subsequent saves
|
||||
so that objects shared between parts are emitted exactly once.
|
||||
|
||||
Not thread-safe: a `Compactor` value must not be used concurrently from multiple threads. The
|
||||
`CompactedRegion`s passed as `depRegions` when the `Compactor` was first created must outlive the
|
||||
`Compactor`; if any are freed (via `CompactedRegion.free`) while the `Compactor` is still in use,
|
||||
subsequent saves will dereference dangling pointers.
|
||||
-/
|
||||
public def Compactor := CompactorSpec.type
|
||||
|
||||
/--
|
||||
Saves arbitrary data to a compacted region on disk.
|
||||
|
||||
The `α` type parameter is erased at the runtime/extern boundary: the compactor walks the live
|
||||
object graph rooted at `data` regardless of its Lean type. `α` is purely a hint for the caller to
|
||||
align save and load sites. Mismatched types between save and load yield undefined behavior on use.
|
||||
|
||||
`key` is hashed to derive a deterministic mmap base address; for module saves, pass the module
|
||||
`Name`. `depRegions` are loaded compacted regions (typically from imports) whose objects should
|
||||
not be re-serialized. `prev`, when present, likewise allows for reuse of objects from prior saves
|
||||
in the same session.
|
||||
|
||||
Returns a `Compactor` that may be passed as `prev` to subsequent saves. Unsafe because the
|
||||
returned `Compactor` carries thread-safety and `depRegions` lifetime contracts the type system
|
||||
cannot enforce; see `Compactor`.
|
||||
-/
|
||||
@[extern "lean_compacted_region_save"]
|
||||
public unsafe opaque CompactedRegion.save {α : Type} (fname : @& System.FilePath) (key : @& Name)
|
||||
(data : @& α) (depRegions : @& Array CompactedRegion) (prev : Option Compactor) :
|
||||
IO Compactor
|
||||
|
||||
/--
|
||||
Reads a compacted region from disk.
|
||||
|
||||
`depRegions` are existing compacted regions whose address ranges are needed for cross-region
|
||||
pointer fixup. The result is the root object reinterpreted at type `α` paired with the new
|
||||
`CompactedRegion`. Unsafe because `α` is type-erased at the extern boundary: it is the caller's
|
||||
responsibility to ensure `α` matches the type the data was saved at; mismatched types yield
|
||||
undefined behavior on use.
|
||||
-/
|
||||
@[extern "lean_compacted_region_read"]
|
||||
public unsafe opaque CompactedRegion.read {α : Type} (fname : @& System.FilePath)
|
||||
(depRegions : @& Array CompactedRegion) : IO (α × CompactedRegion)
|
||||
|
||||
end Lean
|
||||
|
|
@ -20,6 +20,7 @@ public import Lean.Util.InstantiateLevelParams
|
|||
public import Lean.Util.FoldConsts
|
||||
public import Lean.PrivateName
|
||||
public import Lean.LoadDynlib
|
||||
public import Lean.CompactedRegion
|
||||
public import Init.Dynamic
|
||||
import Init.Data.Slice
|
||||
import Init.Data.String.TakeDrop
|
||||
|
|
@ -98,23 +99,6 @@ instance : GetElem? (Array α) ModuleIdx α (fun a i => i.toNat < a.size) where
|
|||
|
||||
abbrev ConstMap := SMap Name ConstantInfo
|
||||
|
||||
/--
|
||||
A compacted region holds multiple Lean objects in a contiguous memory region, which can be read/written to/from disk.
|
||||
Objects inside the region do not have reference counters and cannot be freed individually. The contents of .olean
|
||||
files are compacted regions. -/
|
||||
@[expose] def CompactedRegion := USize
|
||||
|
||||
@[extern "lean_compacted_region_is_memory_mapped"]
|
||||
opaque CompactedRegion.isMemoryMapped : CompactedRegion → Bool
|
||||
|
||||
/-- Size in bytes. -/
|
||||
@[extern "lean_compacted_region_size"]
|
||||
opaque CompactedRegion.size : CompactedRegion → USize
|
||||
|
||||
/-- Free a compacted region and its contents. No live references to the contents may exist at the time of invocation. -/
|
||||
@[extern "lean_compacted_region_free"]
|
||||
unsafe opaque CompactedRegion.free : CompactedRegion → IO Unit
|
||||
|
||||
/-- Opaque persistent environment extension entry. -/
|
||||
opaque EnvExtensionEntrySpec : NonemptyType.{0}
|
||||
@[expose] def EnvExtensionEntry : Type := EnvExtensionEntrySpec.type
|
||||
|
|
@ -1752,24 +1736,31 @@ duplicated. Thus the data cannot be loaded with individual `readModuleData` call
|
|||
passing (a prefix of) the file names to `readModuleDataParts`. `mod` is used to determine an
|
||||
arbitrary but deterministic base address for `mmap`.
|
||||
-/
|
||||
@[extern "lean_save_module_data_parts"]
|
||||
opaque saveModuleDataParts (mod : @& Name) (parts : @& Array (System.FilePath × ModuleData)) : IO Unit
|
||||
def saveModuleDataParts (mod : Name) (parts : Array (System.FilePath × ModuleData)) : IO Unit := do
|
||||
let mut cs : Option Compactor := none
|
||||
for h : i in [:parts.size] do
|
||||
let (fname, data) := parts[i]
|
||||
cs := some (← unsafe CompactedRegion.save fname mod data #[] cs)
|
||||
|
||||
/--
|
||||
Loads the module data from the given file names. The files must be (a prefix of) the result of a
|
||||
`saveModuleDataParts` call.
|
||||
-/
|
||||
@[extern "lean_read_module_data_parts"]
|
||||
opaque readModuleDataParts (fnames : @& Array System.FilePath) : IO (Array (ModuleData × CompactedRegion))
|
||||
def readModuleDataParts (fnames : Array System.FilePath) :
|
||||
IO (Array (ModuleData × CompactedRegion)) := do
|
||||
let mut depRegions : Array CompactedRegion := #[]
|
||||
let mut result : Array (ModuleData × CompactedRegion) := #[]
|
||||
for fname in fnames do
|
||||
let part ← unsafe CompactedRegion.read (α := ModuleData) fname depRegions
|
||||
result := result.push part
|
||||
depRegions := depRegions.push part.2
|
||||
return result
|
||||
|
||||
def saveModuleData (fname : System.FilePath) (mod : Name) (data : ModuleData) : IO Unit :=
|
||||
saveModuleDataParts mod #[(fname, data)]
|
||||
def saveModuleData (fname : System.FilePath) (mod : Name) (data : ModuleData) : IO Unit := do
|
||||
let _ ← unsafe CompactedRegion.save fname mod data #[] none
|
||||
|
||||
def readModuleData (fname : @& System.FilePath) : IO (ModuleData × CompactedRegion) := do
|
||||
let parts ← readModuleDataParts #[fname]
|
||||
assert! parts.size == 1
|
||||
let some part := parts[0]? | unreachable!
|
||||
return part
|
||||
def readModuleData (fname : @& System.FilePath) : IO (ModuleData × CompactedRegion) :=
|
||||
unsafe CompactedRegion.read fname #[]
|
||||
|
||||
/--
|
||||
Free compacted regions of imports. No live references to imported objects may exist at the time of invocation; in
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
|
|||
#include "runtime/compact.h"
|
||||
#include "runtime/buffer.h"
|
||||
#include "runtime/array_ref.h"
|
||||
#include "runtime/option_ref.h"
|
||||
#include "util/io.h"
|
||||
#include "util/name_map.h"
|
||||
#include "library/module.h"
|
||||
|
|
@ -42,6 +43,13 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
|
|||
#include <fcntl.h>
|
||||
#endif
|
||||
|
||||
// `MAP_FIXED_NOREPLACE` turns out to improve mmap hits in the snapshot use case but is exclusive to
|
||||
// Linux 4.17+ and glibc 2.28+; as this is not a core use case and older kernels will silently
|
||||
// ignore the flag, we simply define it conditionally.
|
||||
#if defined(__linux__) && !defined(MAP_FIXED_NOREPLACE)
|
||||
#define MAP_FIXED_NOREPLACE 0x100000
|
||||
#endif
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(address_sanitizer)
|
||||
#include <sanitizer/lsan_interface.h>
|
||||
|
|
@ -125,275 +133,276 @@ struct olean_header {
|
|||
// make sure we don't have any padding bytes, which also ensures `data` is properly aligned
|
||||
static_assert(sizeof(olean_header) == 5 + 1 + 1 + 33 + 40 + sizeof(size_t), "olean_header must be packed");
|
||||
|
||||
extern "C" LEAN_EXPORT object * lean_save_module_data_parts(b_obj_arg mod, b_obj_arg oparts, object *) {
|
||||
|
||||
// Compactor external object: wraps a live `object_compactor` for incremental compaction.
|
||||
// Keeping the full compactor alive preserves both the `obj_table` (pointer-identity dedup)
|
||||
// and the `max_sharing_table` (structural dedup) across sequential `lean_compacted_region_save`
|
||||
// calls so that objects shared between parts are emitted exactly once.
|
||||
|
||||
static lean_external_class * g_compactor_class = nullptr;
|
||||
|
||||
static void compactor_finalizer(void * data) {
|
||||
delete static_cast<object_compactor *>(data);
|
||||
}
|
||||
|
||||
static void compactor_foreach(void *, b_lean_obj_arg) {
|
||||
// no Lean object references to trace
|
||||
}
|
||||
|
||||
static void ensure_compactor_class() {
|
||||
if (!g_compactor_class) {
|
||||
g_compactor_class = lean_register_external_class(compactor_finalizer, compactor_foreach);
|
||||
}
|
||||
}
|
||||
|
||||
static lean_object * mk_compactor(void * base_addr, std::vector<compacted_region *> dep_regions) {
|
||||
ensure_compactor_class();
|
||||
return lean_alloc_external(g_compactor_class,
|
||||
new object_compactor(base_addr, std::move(dep_regions)));
|
||||
}
|
||||
|
||||
static object_compactor * to_compactor(lean_object * o) {
|
||||
return static_cast<object_compactor *>(lean_get_external_data(o));
|
||||
}
|
||||
|
||||
// Extract `compacted_region *` pointers from an `Array CompactedRegion`.
|
||||
static std::vector<compacted_region *> extract_dep_regions(b_obj_arg odep_regions) {
|
||||
std::vector<compacted_region *> result;
|
||||
size_t n = lean_array_size(odep_regions);
|
||||
result.reserve(n);
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
result.push_back(reinterpret_cast<compacted_region *>(
|
||||
lean_unbox_usize(lean_array_get_core(odep_regions, i))));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
extern "C" LEAN_EXPORT object * lean_compacted_region_save(b_obj_arg ofname, b_obj_arg mod, b_obj_arg odata,
|
||||
b_obj_arg odep_regions, obj_arg oprev, object *) {
|
||||
// `mmap` addresses must be page-aligned. The default (non-huge) page size on x86-64 is 4KB;
|
||||
// `MapViewOfFileEx` addresses must be aligned to the "memory allocation granularity" (64KB).
|
||||
const size_t ALIGN = 1LL<<16;
|
||||
|
||||
option_ref<object_ref> prev(oprev);
|
||||
object_ref cs_obj;
|
||||
if (prev) {
|
||||
cs_obj = prev.get_val();
|
||||
} else {
|
||||
// Derive a base address that is uniformly distributed but deterministic, and should most
|
||||
// likely work for `mmap` on all interesting platforms.
|
||||
// NOTE: an overlapping/non-compatible base address does not prevent the module from being
|
||||
// imported, merely from using `mmap` for that.
|
||||
// Start with a hash of the module name. While our string hash is a dubious 32-bit
|
||||
// algorithm, the mixing of multiple `Name` parts seems to result in a nicely distributed
|
||||
// 64-bit output.
|
||||
size_t base_addr = name(mod, true).hash();
|
||||
// x86-64 user space is currently limited to the lower 47 bits
|
||||
// (https://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details).
|
||||
// On Linux at least, the stack grows down from ~0x7fff... followed by shared libraries,
|
||||
// so reserve a bit of space for them (0x7fff...-0x7f00... = 1TB).
|
||||
base_addr = base_addr % 0x7f0000000000;
|
||||
base_addr = base_addr & ~(ALIGN - 1);
|
||||
std::vector<compacted_region *> dep_regions = extract_dep_regions(odep_regions);
|
||||
cs_obj = object_ref(mk_compactor(reinterpret_cast<void *>(base_addr), std::move(dep_regions)));
|
||||
}
|
||||
|
||||
object_compactor & compactor = *to_compactor(cs_obj.raw());
|
||||
|
||||
char const * olean_fn = lean_string_cstr(ofname);
|
||||
// We first write to a temp file and then move it to the correct path (possibly deleting an
|
||||
// older file) so that we neither expose partially-written files nor modify possibly
|
||||
// memory-mapped files.
|
||||
#ifdef LEAN_WINDOWS
|
||||
uint32_t pid = GetCurrentProcessId();
|
||||
#else
|
||||
uint32_t pid = getpid();
|
||||
#endif
|
||||
// Derive a base address that is uniformly distributed by deterministic, and should most likely
|
||||
// work for `mmap` on all interesting platforms
|
||||
// NOTE: an overlapping/non-compatible base address does not prevent the module from being imported,
|
||||
// merely from using `mmap` for that
|
||||
std::string olean_tmp_fn = std::string(olean_fn) + ".tmp." + std::to_string(pid);
|
||||
|
||||
// Let's start with a hash of the module name. Note that while our string hash is a dubious 32-bit
|
||||
// algorithm, the mixing of multiple `Name` parts seems to result in a nicely distributed 64-bit
|
||||
// output
|
||||
size_t base_addr = name(mod, true).hash();
|
||||
// x86-64 user space is currently limited to the lower 47 bits
|
||||
// https://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
|
||||
// On Linux at least, the stack grows down from ~0x7fff... followed by shared libraries, so reserve
|
||||
// a bit of space for them (0x7fff...-0x7f00... = 1TB)
|
||||
base_addr = base_addr % 0x7f0000000000;
|
||||
// `mmap` addresses must be page-aligned. The default (non-huge) page size on x86-64 is 4KB.
|
||||
// `MapViewOfFileEx` addresses must be aligned to the "memory allocation granularity", which is 64KB.
|
||||
const size_t ALIGN = 1LL<<16;
|
||||
base_addr = base_addr & ~(ALIGN - 1);
|
||||
try {
|
||||
std::ofstream out(olean_tmp_fn, std::ios_base::binary);
|
||||
|
||||
object_compactor compactor(reinterpret_cast<void *>(base_addr));
|
||||
|
||||
array_ref<pair_ref<string_ref, object_ref>> parts(oparts, true);
|
||||
std::vector<std::string> tmp_fnames;
|
||||
for (auto const & part : parts) {
|
||||
std::string olean_fn = part.fst().to_std_string();
|
||||
try {
|
||||
// we first write to a temp file and then move it to the correct path (possibly deleting an older file)
|
||||
// so that we neither expose partially-written files nor modify possibly memory-mapped files
|
||||
std::string olean_tmp_fn = olean_fn + ".tmp." + std::to_string(pid);
|
||||
tmp_fnames.push_back(olean_tmp_fn);
|
||||
|
||||
std::ofstream out(olean_tmp_fn, std::ios_base::binary);
|
||||
|
||||
if (compactor.size() % ALIGN != 0) {
|
||||
compactor.alloc(ALIGN - (compactor.size() % ALIGN));
|
||||
}
|
||||
size_t file_offset = compactor.size();
|
||||
|
||||
compactor.alloc(sizeof(olean_header));
|
||||
olean_header header = {};
|
||||
// see/sync with file format description above
|
||||
header.base_addr = base_addr + file_offset;
|
||||
strncpy(header.lean_version, get_short_version_string().c_str(), sizeof(header.lean_version));
|
||||
strncpy(header.githash, LEAN_GITHASH, sizeof(header.githash));
|
||||
out.write(reinterpret_cast<char *>(&header), sizeof(header));
|
||||
|
||||
compactor(part.snd().raw());
|
||||
|
||||
if (out.fail()) {
|
||||
throw exception((sstream() << "failed to create file '" << olean_fn << "'").str());
|
||||
}
|
||||
out.write(static_cast<char const *>(compactor.data()) + file_offset + sizeof(olean_header), compactor.size() - file_offset - sizeof(olean_header));
|
||||
out.close();
|
||||
} catch (exception & ex) {
|
||||
return io_result_mk_error((sstream() << "failed to write '" << olean_fn << "': " << ex.what()).str());
|
||||
if (compactor.size() % ALIGN != 0) {
|
||||
compactor.alloc(ALIGN - (compactor.size() % ALIGN));
|
||||
}
|
||||
size_t file_offset = compactor.size();
|
||||
|
||||
// Reserve space in the compactor buffer for the header so subsequent objects'
|
||||
// `base_addr`-relative offsets land in the right spot. The header is written directly
|
||||
// to `out` below; these reserved bytes are never read back.
|
||||
compactor.alloc(sizeof(olean_header));
|
||||
olean_header header = {};
|
||||
header.base_addr = reinterpret_cast<size_t>(compactor.base_addr()) + file_offset;
|
||||
strncpy(header.lean_version, get_short_version_string().c_str(), sizeof(header.lean_version));
|
||||
strncpy(header.githash, LEAN_GITHASH, sizeof(header.githash));
|
||||
out.write(reinterpret_cast<char *>(&header), sizeof(header));
|
||||
|
||||
compactor(odata);
|
||||
|
||||
if (out.fail()) {
|
||||
throw exception((sstream() << "failed to create file '" << olean_fn << "'").str());
|
||||
}
|
||||
out.write(static_cast<char const *>(compactor.data()) + file_offset + sizeof(olean_header),
|
||||
compactor.size() - file_offset - sizeof(olean_header));
|
||||
out.close();
|
||||
} catch (exception & ex) {
|
||||
std::remove(olean_tmp_fn.c_str());
|
||||
return io_result_mk_error((sstream() << "failed to write '" << olean_fn << "': " << ex.what()).str());
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < parts.size(); i++) {
|
||||
std::string olean_fn = parts[i].fst().to_std_string();
|
||||
while (std::rename(tmp_fnames[i].c_str(), olean_fn.c_str()) != 0) {
|
||||
while (std::rename(olean_tmp_fn.c_str(), olean_fn) != 0) {
|
||||
#ifdef LEAN_WINDOWS
|
||||
if (errno == EEXIST) {
|
||||
// Memory-mapped files can be deleted starting with Windows 10 using "POSIX semantics"
|
||||
HANDLE h_olean_fn = CreateFile(olean_fn.c_str(), GENERIC_READ | DELETE, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
if (h_olean_fn == INVALID_HANDLE_VALUE) {
|
||||
return io_result_mk_error((sstream() << "failed to open '" << olean_fn << "': " << GetLastError()).str());
|
||||
}
|
||||
|
||||
FILE_DISPOSITION_INFO_EX fdi = { FILE_DISPOSITION_FLAG_DELETE | FILE_DISPOSITION_FLAG_POSIX_SEMANTICS };
|
||||
if (SetFileInformationByHandle(h_olean_fn, static_cast<FILE_INFO_BY_HANDLE_CLASS>(21) /* FileDispositionInfoEx */, &fdi, sizeof(fdi)) != 0) {
|
||||
lean_always_assert(CloseHandle(h_olean_fn));
|
||||
continue;
|
||||
} else {
|
||||
return io_result_mk_error((sstream() << "failed to delete '" << olean_fn << "': " << GetLastError()).str());
|
||||
}
|
||||
if (errno == EEXIST) {
|
||||
// Memory-mapped files can be deleted starting with Windows 10 using "POSIX semantics".
|
||||
HANDLE h_olean_fn = CreateFile(olean_fn, GENERIC_READ | DELETE, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
if (h_olean_fn == INVALID_HANDLE_VALUE) {
|
||||
return io_result_mk_error((sstream() << "failed to open '" << olean_fn << "': " << GetLastError()).str());
|
||||
}
|
||||
FILE_DISPOSITION_INFO_EX fdi = { FILE_DISPOSITION_FLAG_DELETE | FILE_DISPOSITION_FLAG_POSIX_SEMANTICS };
|
||||
if (SetFileInformationByHandle(h_olean_fn, static_cast<FILE_INFO_BY_HANDLE_CLASS>(21) /* FileDispositionInfoEx */, &fdi, sizeof(fdi)) != 0) {
|
||||
lean_always_assert(CloseHandle(h_olean_fn));
|
||||
continue;
|
||||
} else {
|
||||
return io_result_mk_error((sstream() << "failed to delete '" << olean_fn << "': " << GetLastError()).str());
|
||||
}
|
||||
#endif
|
||||
return io_result_mk_error((sstream() << "failed to write '" << olean_fn << "': " << errno << " " << strerror(errno)).str());
|
||||
}
|
||||
#endif
|
||||
return io_result_mk_error((sstream() << "failed to write '" << olean_fn << "': " << errno << " " << strerror(errno)).str());
|
||||
}
|
||||
return io_result_mk_ok(box(0));
|
||||
|
||||
return io_result_mk_ok(cs_obj.steal());
|
||||
}
|
||||
|
||||
struct module_file {
|
||||
std::string m_fname;
|
||||
file_descriptor m_fd;
|
||||
// Implements `Lean.CompactedRegion.read`. Loads a compacted region from disk. `odep_regions`
|
||||
// carries `CompactedRegion`s whose address ranges must be known to resolve cross-region pointers
|
||||
// in this file. Returns `(α × CompactedRegion)`, where `α` is the type the Lean caller asks the
|
||||
// root to be interpreted as — the C side does no type checking and the caller is responsible for
|
||||
// using a type compatible with what was saved (see `CompactedRegion.read`).
|
||||
extern "C" LEAN_EXPORT object * lean_compacted_region_read(b_obj_arg ofname, b_obj_arg odep_regions, object *) {
|
||||
std::string olean_fn(lean_string_cstr(ofname));
|
||||
std::vector<compacted_region *> dep_regions = extract_dep_regions(odep_regions);
|
||||
try {
|
||||
#ifdef LEAN_WINDOWS
|
||||
HANDLE m_handle; // store the original Windows for mmap
|
||||
#endif
|
||||
char * m_base_addr;
|
||||
size_t m_size;
|
||||
char * m_buffer;
|
||||
std::function<void()> m_free_data;
|
||||
};
|
||||
|
||||
extern "C" LEAN_EXPORT object * lean_read_module_data_parts(b_obj_arg ofnames, object *) {
|
||||
array_ref<string_ref> fnames(ofnames, true);
|
||||
|
||||
// first read in all headers
|
||||
std::vector<module_file> files;
|
||||
for (auto const & fname : fnames) {
|
||||
std::string olean_fn = fname.to_std_string();
|
||||
try {
|
||||
#ifdef LEAN_WINDOWS
|
||||
// Use CreateFile with proper sharing flags, then convert to POSIX fd for shared code
|
||||
// `FILE_SHARE_DELETE` is necessary to allow the file to (be marked to) be deleted while in use
|
||||
HANDLE h_file = CreateFile(olean_fn.c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
if (h_file == INVALID_HANDLE_VALUE) {
|
||||
return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "': " << GetLastError()).str());
|
||||
}
|
||||
int raw_fd = _open_osfhandle((intptr_t)h_file, _O_RDONLY);
|
||||
if (raw_fd == -1) {
|
||||
CloseHandle(h_file);
|
||||
return io_result_mk_error((sstream() << "failed to convert handle to fd for '" << olean_fn << "'").str());
|
||||
}
|
||||
file_descriptor fd(raw_fd);
|
||||
HANDLE h_file = CreateFile(olean_fn.c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
if (h_file == INVALID_HANDLE_VALUE) {
|
||||
return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "': " << GetLastError()).str());
|
||||
}
|
||||
int raw_fd = _open_osfhandle((intptr_t)h_file, _O_RDONLY);
|
||||
if (raw_fd == -1) {
|
||||
CloseHandle(h_file);
|
||||
return io_result_mk_error((sstream() << "failed to convert handle to fd for '" << olean_fn << "'").str());
|
||||
}
|
||||
file_descriptor fd(raw_fd);
|
||||
#else
|
||||
file_descriptor fd(open(olean_fn.c_str(), O_RDONLY));
|
||||
if (!fd) {
|
||||
return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "': " << strerror(errno)).str());
|
||||
}
|
||||
file_descriptor fd(open(olean_fn.c_str(), O_RDONLY));
|
||||
if (!fd) {
|
||||
return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "': " << strerror(errno)).str());
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Get file size */
|
||||
struct stat st;
|
||||
if (fstat(fd.get(), &st) == -1) {
|
||||
return io_result_mk_error((sstream() << "failed to stat file '" << olean_fn << "': " << strerror(errno)).str());
|
||||
}
|
||||
size_t size = st.st_size;
|
||||
struct stat st;
|
||||
if (fstat(fd.get(), &st) == -1) {
|
||||
return io_result_mk_error((sstream() << "failed to stat file '" << olean_fn << "': " << strerror(errno)).str());
|
||||
}
|
||||
size_t size = st.st_size;
|
||||
|
||||
olean_header default_header = {};
|
||||
olean_header header;
|
||||
ssize_t read_size = readn(fd.get(), &header, sizeof(header));
|
||||
if (read_size < 0) {
|
||||
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "': " << strerror(errno)).str());
|
||||
}
|
||||
if (read_size != sizeof(header)
|
||||
|| memcmp(header.marker, default_header.marker, sizeof(header.marker)) != 0) {
|
||||
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str());
|
||||
}
|
||||
lseek(fd.get(), 0, SEEK_SET);
|
||||
if (header.version != default_header.version || header.flags != default_header.flags
|
||||
olean_header default_header = {};
|
||||
olean_header header;
|
||||
ssize_t read_size = readn(fd.get(), &header, sizeof(header));
|
||||
if (read_size < 0) {
|
||||
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "': " << strerror(errno)).str());
|
||||
}
|
||||
if (read_size != sizeof(header)
|
||||
|| memcmp(header.marker, default_header.marker, sizeof(header.marker)) != 0) {
|
||||
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str());
|
||||
}
|
||||
if (header.version != default_header.version || header.flags != default_header.flags
|
||||
#ifdef LEAN_CHECK_OLEAN_VERSION
|
||||
|| strncmp(header.githash, LEAN_GITHASH, sizeof(header.githash)) != 0
|
||||
|| strncmp(header.githash, LEAN_GITHASH, sizeof(header.githash)) != 0
|
||||
#endif
|
||||
) {
|
||||
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', incompatible header").str());
|
||||
}
|
||||
char * base_addr = reinterpret_cast<char *>(header.base_addr);
|
||||
#ifdef LEAN_WINDOWS
|
||||
files.push_back({olean_fn, std::move(fd), h_file, base_addr, size, nullptr, nullptr});
|
||||
#else
|
||||
files.push_back({olean_fn, std::move(fd), base_addr, size, nullptr, nullptr});
|
||||
#endif
|
||||
} catch (exception & ex) {
|
||||
return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str());
|
||||
) {
|
||||
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', incompatible header").str());
|
||||
}
|
||||
}
|
||||
char * base_addr = reinterpret_cast<char *>(header.base_addr);
|
||||
|
||||
#ifndef LEAN_MMAP
|
||||
bool is_mmap = false;
|
||||
#else
|
||||
// now try mmapping *all* files
|
||||
bool is_mmap = true;
|
||||
for (auto & file : files) {
|
||||
std::string const & olean_fn = file.m_fname;
|
||||
char * base_addr = file.m_base_addr;
|
||||
try {
|
||||
char * buffer = nullptr;
|
||||
bool is_mmap = false;
|
||||
std::function<void()> free_data;
|
||||
|
||||
// Map file COW-writable. The fallback walk in `compacted_region::read()` writes
|
||||
// fixed pointers back into the region's memory when any dep region didn't land at its
|
||||
// saved `base_addr`, so the mapping must be writable. Unwritten pages remain shared
|
||||
// with the page cache under `MAP_PRIVATE` regardless of protection, so `PROT_WRITE`
|
||||
// is free when no fixup happens.
|
||||
#ifdef LEAN_MMAP
|
||||
#ifdef LEAN_WINDOWS
|
||||
// Use the stored handle that was created with proper sharing flags
|
||||
HANDLE h_olean_fn = file.m_handle;
|
||||
HANDLE h_map = CreateFileMapping(h_olean_fn, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
if (h_map == NULL) {
|
||||
return io_result_mk_error((sstream() << "failed to map '" << olean_fn << "': " << GetLastError()).str());
|
||||
}
|
||||
char * buffer = static_cast<char *>(MapViewOfFileEx(h_map, FILE_MAP_READ, 0, 0, 0, base_addr));
|
||||
HANDLE h_map = CreateFileMapping(h_file, NULL, PAGE_WRITECOPY, 0, 0, NULL);
|
||||
if (h_map != NULL) {
|
||||
// `FILE_MAP_COPY` already implies read+copy-on-write; OR'ing with `FILE_MAP_READ`
|
||||
// silently degrades the mapping to read-only and turns the fix-up walk's pointer
|
||||
// writes into access violations.
|
||||
buffer = static_cast<char *>(MapViewOfFileEx(h_map, FILE_MAP_COPY, 0, 0, 0, base_addr));
|
||||
lean_always_assert(CloseHandle(h_map));
|
||||
// NOTE: no need to close `h_olean_fn` as it's owned by `file.m_fd`
|
||||
if (!buffer) {
|
||||
is_mmap = false;
|
||||
break;
|
||||
if (buffer && buffer == base_addr) {
|
||||
is_mmap = true;
|
||||
free_data = [=]() { lean_always_assert(UnmapViewOfFile(base_addr)); };
|
||||
} else if (buffer) {
|
||||
lean_always_assert(UnmapViewOfFile(buffer));
|
||||
buffer = nullptr;
|
||||
}
|
||||
file.m_free_data = [=]() {
|
||||
lean_always_assert(UnmapViewOfFile(base_addr));
|
||||
};
|
||||
}
|
||||
#else
|
||||
int fd = file.m_fd.get();
|
||||
// NOTE: `file.m_fd` does NOT need to outlive `buffer` after this call
|
||||
char * buffer = static_cast<char *>(mmap(base_addr, file.m_size, PROT_READ, MAP_PRIVATE, fd, 0));
|
||||
if (buffer == MAP_FAILED) {
|
||||
is_mmap = false;
|
||||
break;
|
||||
}
|
||||
size_t size = file.m_size;
|
||||
file.m_free_data = [=]() {
|
||||
lean_always_assert(munmap(buffer, size) == 0);
|
||||
};
|
||||
// On Linux 4.17+ kernels, `MAP_FIXED_NOREPLACE` makes the kernel atomically reject the
|
||||
// mapping if `base_addr` is already taken; older kernels silently ignore the bit. macOS
|
||||
// doesn't expose the flag at all (see top-of-file fallback). The post-check + cleanup
|
||||
// `munmap` handles every "did not land at `base_addr`" case uniformly.
|
||||
int mmap_flags = MAP_PRIVATE
|
||||
#ifdef MAP_FIXED_NOREPLACE
|
||||
| MAP_FIXED_NOREPLACE
|
||||
#endif
|
||||
if (buffer == base_addr) {
|
||||
file.m_buffer = buffer;
|
||||
} else {
|
||||
is_mmap = false;
|
||||
break;
|
||||
}
|
||||
} catch (exception & ex) {
|
||||
return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str());
|
||||
;
|
||||
buffer = static_cast<char *>(mmap(base_addr, size, PROT_READ | PROT_WRITE,
|
||||
mmap_flags, fd.get(), 0));
|
||||
if (buffer != MAP_FAILED && buffer == base_addr) {
|
||||
is_mmap = true;
|
||||
free_data = [=]() { lean_always_assert(munmap(buffer, size) == 0); };
|
||||
} else {
|
||||
if (buffer != MAP_FAILED) munmap(buffer, size);
|
||||
buffer = nullptr;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// if *any* file failed to mmap, read all of them into a single big allocation so that offsets
|
||||
// between them are unchanged
|
||||
if (!is_mmap && !files.empty()) {
|
||||
for (auto & file : files) {
|
||||
if (file.m_free_data) {
|
||||
file.m_free_data();
|
||||
file.m_free_data = {};
|
||||
if (!buffer) {
|
||||
buffer = static_cast<char *>(malloc(size));
|
||||
lseek(fd.get(), 0, SEEK_SET);
|
||||
ssize_t r = readn(fd.get(), buffer, size);
|
||||
if (r < 0 || r != static_cast<ssize_t>(size)) {
|
||||
free(buffer);
|
||||
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "'").str());
|
||||
}
|
||||
char * buf = buffer;
|
||||
size_t sz = size;
|
||||
free_data = [=]() { free_sized(buf, sz); };
|
||||
}
|
||||
|
||||
size_t big_size = files[files.size()-1].m_base_addr + files[files.size()-1].m_size - files[0].m_base_addr;
|
||||
char * big_buffer = static_cast<char *>(malloc(big_size));
|
||||
if (!big_buffer) {
|
||||
return io_result_mk_error(decode_io_error(ENOMEM, nullptr));
|
||||
}
|
||||
for (auto & file : files) {
|
||||
std::string const & olean_fn = file.m_fname;
|
||||
try {
|
||||
file.m_buffer = big_buffer + (file.m_base_addr - files[0].m_base_addr);
|
||||
ssize_t read_size = readn(file.m_fd.get(), file.m_buffer, file.m_size);
|
||||
if (read_size < 0) {
|
||||
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "': " << strerror(errno)).str());
|
||||
}
|
||||
if (read_size != static_cast<ssize_t>(file.m_size)) {
|
||||
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "': unexpected EOF").str());
|
||||
}
|
||||
} catch (exception & ex) {
|
||||
return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str());
|
||||
}
|
||||
}
|
||||
files[0].m_free_data = [=]() {
|
||||
free_sized(big_buffer, big_size);
|
||||
};
|
||||
}
|
||||
|
||||
std::vector<object_ref> res;
|
||||
for (auto & file : files) {
|
||||
compacted_region * region =
|
||||
new compacted_region(file.m_size - sizeof(olean_header), file.m_buffer + sizeof(olean_header), static_cast<char *>(file.m_base_addr) + sizeof(olean_header), is_mmap, file.m_free_data);
|
||||
compacted_region * region = new compacted_region(
|
||||
size - sizeof(olean_header), buffer + sizeof(olean_header),
|
||||
base_addr + sizeof(olean_header), is_mmap, free_data,
|
||||
std::move(dep_regions));
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(address_sanitizer)
|
||||
// do not report as leak
|
||||
__lsan_ignore_object(region);
|
||||
#endif
|
||||
#endif
|
||||
object * mod = region->read();
|
||||
object * mod_region = alloc_cnstr(0, 2, 0);
|
||||
cnstr_set(mod_region, 0, mod);
|
||||
cnstr_set(mod_region, 1, box_size_t(reinterpret_cast<size_t>(region)));
|
||||
|
||||
res.push_back(object_ref(mod_region));
|
||||
object * pair = alloc_cnstr(0, 2, 0);
|
||||
cnstr_set(pair, 0, mod);
|
||||
cnstr_set(pair, 1, box_size_t(reinterpret_cast<size_t>(region)));
|
||||
return io_result_mk_ok(pair);
|
||||
} catch (exception & ex) {
|
||||
return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str());
|
||||
}
|
||||
return io_result_mk_ok(to_array(res));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,12 +57,25 @@ struct object_compactor::max_sharing_table {
|
|||
}
|
||||
};
|
||||
|
||||
object_compactor::object_compactor(void * base_addr):
|
||||
object_compactor::object_compactor(void * base_addr, std::vector<compacted_region *> dep_regions):
|
||||
m_max_sharing_table(new max_sharing_table(this)),
|
||||
m_dep_regions(std::move(dep_regions)),
|
||||
m_base_addr(base_addr),
|
||||
m_begin(malloc(LEAN_COMPACTOR_INIT_SZ)),
|
||||
m_end(m_begin),
|
||||
m_capacity(static_cast<char*>(m_begin) + LEAN_COMPACTOR_INIT_SZ) {
|
||||
// Sort dep regions by `begin` address for binary search in `to_offset`.
|
||||
// Extract keys into a flat array first to avoid pointer-chasing TLB misses through
|
||||
// thousands of scattered `compacted_region` heap objects during sort comparisons.
|
||||
size_t n = m_dep_regions.size();
|
||||
std::vector<std::pair<void *, compacted_region *>> keyed(n);
|
||||
for (size_t i = 0; i < n; i++)
|
||||
keyed[i] = {m_dep_regions[i]->begin(), m_dep_regions[i]};
|
||||
std::sort(keyed.begin(), keyed.end(),
|
||||
[](std::pair<void *, compacted_region *> const & a,
|
||||
std::pair<void *, compacted_region *> const & b) { return a.first < b.first; });
|
||||
for (size_t i = 0; i < n; i++)
|
||||
m_dep_regions[i] = keyed[i].second;
|
||||
}
|
||||
|
||||
object_compactor::~object_compactor() {
|
||||
|
|
@ -118,12 +131,30 @@ object_offset object_compactor::to_offset(object * o) {
|
|||
return o;
|
||||
} else {
|
||||
auto it = m_obj_table.find(o);
|
||||
if (it == m_obj_table.end()) {
|
||||
m_todo.push_back(o);
|
||||
return g_null_offset;
|
||||
} else {
|
||||
if (it != m_obj_table.end()) {
|
||||
return it->second;
|
||||
}
|
||||
// Only check dep regions for non-heap objects
|
||||
if (!m_dep_regions.empty() && !lean_has_rc(o)) {
|
||||
// Binary search dep regions (sorted by `begin`)
|
||||
char * addr = reinterpret_cast<char *>(o);
|
||||
std::vector<compacted_region *>::iterator upper = std::upper_bound(
|
||||
m_dep_regions.begin(), m_dep_regions.end(), addr,
|
||||
[](char * a, compacted_region * r) { return a < static_cast<char *>(r->begin()); });
|
||||
if (upper != m_dep_regions.begin()) {
|
||||
compacted_region * region = *(upper - 1);
|
||||
char * region_end = static_cast<char *>(region->begin()) + region->size();
|
||||
if (addr < region_end) {
|
||||
// Object is in this dep region; compute its `base_addr`-relative pointer
|
||||
object_offset off = reinterpret_cast<object_offset>(
|
||||
reinterpret_cast<size_t>(region->base_addr()) + (addr - static_cast<char *>(region->begin())));
|
||||
m_obj_table.insert(std::make_pair(o, off));
|
||||
return off;
|
||||
}
|
||||
}
|
||||
}
|
||||
m_todo.push_back(o);
|
||||
return g_null_offset;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -384,14 +415,40 @@ void object_compactor::operator()(object * o) {
|
|||
*root = to_offset(o);
|
||||
}
|
||||
|
||||
compacted_region::compacted_region(size_t sz, void * data, void * base_addr, bool is_mmap, std::function<void()> free_data):
|
||||
compacted_region::compacted_region(size_t sz, void * data, void * base_addr, bool is_mmap, std::function<void()> free_data,
|
||||
std::vector<compacted_region *> dep_regions):
|
||||
m_size(sz),
|
||||
m_base_addr(base_addr),
|
||||
m_is_mmap(is_mmap),
|
||||
m_free_data(free_data),
|
||||
m_begin(data),
|
||||
m_next(data),
|
||||
m_end(static_cast<char*>(data)+sz) {
|
||||
m_end(static_cast<char*>(data)+sz),
|
||||
m_dep_regions(std::move(dep_regions)) {
|
||||
// Sort dep regions by `base_addr` for binary search in `fix_object_ptr`
|
||||
std::sort(m_dep_regions.begin(), m_dep_regions.end(),
|
||||
[](compacted_region * a, compacted_region * b) { return a->base_addr() < b->base_addr(); });
|
||||
// Reject overlapping saved address ranges: `fix_object_ptr` resolves cross-region pointers
|
||||
// by binary-searching `base_addr`s, and would silently translate via the wrong region if
|
||||
// two deps overlap (or if a dep overlaps our own range). This should not happen with regular
|
||||
// .olean use as we use only use `read`'s `prev` instead of `dep_regions` there.
|
||||
for (size_t i = 1; i < m_dep_regions.size(); i++) {
|
||||
compacted_region * prev = m_dep_regions[i - 1];
|
||||
compacted_region * curr = m_dep_regions[i];
|
||||
if (reinterpret_cast<size_t>(prev->base_addr()) + prev->size()
|
||||
> reinterpret_cast<size_t>(curr->base_addr())) {
|
||||
throw exception("compacted_region: dep regions have overlapping `base_addr` ranges");
|
||||
}
|
||||
}
|
||||
size_t self_base = reinterpret_cast<size_t>(m_base_addr);
|
||||
size_t self_end = self_base + m_size;
|
||||
for (compacted_region * dep : m_dep_regions) {
|
||||
size_t dep_base = reinterpret_cast<size_t>(dep->base_addr());
|
||||
size_t dep_end = dep_base + dep->size();
|
||||
if (self_base < dep_end && dep_base < self_end) {
|
||||
throw exception("compacted_region: own region overlaps a dep region's `base_addr` range");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
compacted_region::~compacted_region() {
|
||||
|
|
@ -402,7 +459,25 @@ compacted_region::~compacted_region() {
|
|||
|
||||
inline object * compacted_region::fix_object_ptr(object * o) {
|
||||
if (lean_is_scalar(o)) return o;
|
||||
return reinterpret_cast<object*>(static_cast<char*>(m_begin) - reinterpret_cast<char*>(m_base_addr) + reinterpret_cast<ptrdiff_t>(o));
|
||||
size_t addr = reinterpret_cast<size_t>(o);
|
||||
size_t self_base = reinterpret_cast<size_t>(m_base_addr);
|
||||
// Check own region first (most common case)
|
||||
if (addr >= self_base && addr < self_base + m_size) {
|
||||
return reinterpret_cast<object*>(static_cast<char*>(m_begin) + (addr - self_base));
|
||||
}
|
||||
// Binary search dep regions (sorted by `base_addr`)
|
||||
char * addr_ptr = reinterpret_cast<char *>(addr);
|
||||
std::vector<compacted_region *>::iterator upper = std::upper_bound(
|
||||
m_dep_regions.begin(), m_dep_regions.end(), addr_ptr,
|
||||
[](char * a, compacted_region * r) { return a < static_cast<char *>(r->base_addr()); });
|
||||
if (upper != m_dep_regions.begin()) {
|
||||
compacted_region * dep = *(upper - 1);
|
||||
size_t dep_base = reinterpret_cast<size_t>(dep->base_addr());
|
||||
if (addr < dep_base + dep->size()) {
|
||||
return reinterpret_cast<object*>(static_cast<char*>(dep->begin()) + (addr - dep_base));
|
||||
}
|
||||
}
|
||||
lean_unreachable();
|
||||
}
|
||||
|
||||
inline void compacted_region::move(size_t d) {
|
||||
|
|
@ -475,11 +550,19 @@ object * compacted_region::read() {
|
|||
object * root = fix_object_ptr(*static_cast<object_offset *>(m_next));
|
||||
move(sizeof(object_offset));
|
||||
if (m_begin == m_base_addr) {
|
||||
// no relocations needed
|
||||
m_end = m_next;
|
||||
return root;
|
||||
// Own-region pointers are already correct (this region landed at its saved address).
|
||||
// But if any dep region missed its `base_addr`, cross-region pointers in this region
|
||||
// still hold the dep's saved address and need fixup, so fall through to the walk in
|
||||
// that case.
|
||||
bool needs_dep_reloc = false;
|
||||
for (compacted_region * dep : m_dep_regions) {
|
||||
if (dep->begin() != dep->base_addr()) { needs_dep_reloc = true; break; }
|
||||
}
|
||||
if (!needs_dep_reloc) {
|
||||
m_end = m_next;
|
||||
return root;
|
||||
}
|
||||
}
|
||||
lean_assert(!m_is_mmap);
|
||||
|
||||
while (m_next < m_end) {
|
||||
object * curr = reinterpret_cast<object*>(m_next);
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ Author: Leonardo de Moura
|
|||
*/
|
||||
#pragma once
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "runtime/object.h"
|
||||
#include "util/alloc.h"
|
||||
|
|
@ -13,6 +14,8 @@ Author: Leonardo de Moura
|
|||
namespace lean {
|
||||
typedef lean_object * object_offset;
|
||||
|
||||
class compacted_region;
|
||||
|
||||
class LEAN_EXPORT object_compactor {
|
||||
struct max_sharing_table;
|
||||
friend struct max_sharing_hash;
|
||||
|
|
@ -21,6 +24,8 @@ class LEAN_EXPORT object_compactor {
|
|||
std::unique_ptr<max_sharing_table> m_max_sharing_table;
|
||||
std::vector<object*> m_todo;
|
||||
std::vector<object_offset> m_tmp;
|
||||
// Dependency regions sorted by `begin` address for binary search in `to_offset`
|
||||
std::vector<compacted_region *> m_dep_regions;
|
||||
// On-disk base address used for `mmap`ing compacted regions without relocations
|
||||
// References within the compacted region are rewritten by subtracting `m_begin` and adding `m_base_addr`
|
||||
// In the simplest case `base_addr == nullptr`, we get region-relative pointers
|
||||
|
|
@ -44,7 +49,7 @@ class LEAN_EXPORT object_compactor {
|
|||
bool insert_ref(object * o);
|
||||
void insert_mpz(object * o);
|
||||
public:
|
||||
object_compactor(void * base_addr = nullptr);
|
||||
object_compactor(void * base_addr = nullptr, std::vector<compacted_region *> dep_regions = {});
|
||||
object_compactor(object_compactor const &) = delete;
|
||||
object_compactor(object_compactor &&) = delete;
|
||||
~object_compactor();
|
||||
|
|
@ -55,6 +60,8 @@ public:
|
|||
void const * data() const { return m_begin; }
|
||||
// Allocate `sz` bytes of zeroed memory.
|
||||
void * alloc(size_t sz);
|
||||
void const * base_addr() const { return m_base_addr; }
|
||||
std::vector<compacted_region *> const & dep_regions() const { return m_dep_regions; }
|
||||
};
|
||||
|
||||
class LEAN_EXPORT compacted_region {
|
||||
|
|
@ -66,6 +73,8 @@ class LEAN_EXPORT compacted_region {
|
|||
void * m_begin;
|
||||
void * m_next;
|
||||
void * m_end;
|
||||
// Dependency regions for cross-region pointer fixup
|
||||
std::vector<compacted_region *> m_dep_regions;
|
||||
void move(size_t d);
|
||||
void move(object * o);
|
||||
object * fix_object_ptr(object * o);
|
||||
|
|
@ -79,7 +88,8 @@ class LEAN_EXPORT compacted_region {
|
|||
public:
|
||||
/* Creates a compacted object region using the given region in memory.
|
||||
This object takes ownership of the region. */
|
||||
compacted_region(size_t sz, void * data, void * base_addr, bool is_mmap, std::function<void()> free_data);
|
||||
compacted_region(size_t sz, void * data, void * base_addr, bool is_mmap, std::function<void()> free_data,
|
||||
std::vector<compacted_region *> dep_regions = {});
|
||||
/* Creates a compacted object region using the object_compactor current state.
|
||||
It creates a copy of the compacted region generated by the object compactor. */
|
||||
explicit compacted_region(object_compactor const & c);
|
||||
|
|
@ -91,5 +101,7 @@ public:
|
|||
object * read();
|
||||
bool is_memory_mapped() const { return m_is_mmap; }
|
||||
size_t size() const { return m_size; }
|
||||
void * base_addr() const { return m_base_addr; }
|
||||
void * begin() const { return m_begin; }
|
||||
};
|
||||
}
|
||||
|
|
|
|||
5
tests/compile/.gitignore
vendored
Normal file
5
tests/compile/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Tests creating temporary output files should prefix them with `_tmp_` so they're all
|
||||
# covered by this one pattern. The tests delete them on success; this entry catches stragglers
|
||||
# from failed runs.
|
||||
/_tmp_*
|
||||
|
||||
47
tests/compile/compactor_chain.lean
Normal file
47
tests/compile/compactor_chain.lean
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import Lean.CompactedRegion
|
||||
|
||||
open Lean
|
||||
|
||||
/-!
|
||||
Regression test for `CompactedRegion.save` / `CompactedRegion.read` chained-`Compactor`
|
||||
object sharing.
|
||||
|
||||
Saves three parts where parts 1 and 2 both reference the same `Array Nat`. The chained
|
||||
`Compactor` should emit that array's bytes exactly once (in part 1), with part 2 holding only
|
||||
a cross-region reference. We verify both halves:
|
||||
|
||||
* **dedup:** part 2's file size should be substantially smaller than part 1's, since part 2
|
||||
doesn't re-serialize the shared array.
|
||||
* **round-trip:** loading via `CompactedRegion.read` with the prior regions as `depRegions`
|
||||
reproduces the original arrays, exercising `fix_object_ptr`'s dep-region lookup.
|
||||
-/
|
||||
unsafe def main : IO UInt32 := do
|
||||
let part0 : System.FilePath := "./_tmp_compactor_chain_0.olean"
|
||||
let part1 : System.FilePath := "./_tmp_compactor_chain_1.olean"
|
||||
let part2 : System.FilePath := "./_tmp_compactor_chain_2.olean"
|
||||
|
||||
let shared : Array Nat := Array.range 256
|
||||
let cs ← CompactedRegion.save part0 `Test (0 : Nat) #[] none
|
||||
let cs ← CompactedRegion.save part1 `Test shared #[] (some cs)
|
||||
let _ ← CompactedRegion.save part2 `Test shared #[] (some cs)
|
||||
|
||||
let s1 := (← IO.FS.readBinFile part1).size
|
||||
let s2 := (← IO.FS.readBinFile part2).size
|
||||
-- Part 2 should be much smaller than part 1: the chained compactor sees the shared array
|
||||
-- was already emitted to part 1 and writes only a cross-region pointer. A factor of 4 is
|
||||
-- generous — in practice part 2 is dominated by the olean header + a single root pointer.
|
||||
unless 4 * s2 < s1 do
|
||||
throw <| IO.userError s!"dedup did not take effect: part1={s1}, part2={s2}"
|
||||
|
||||
let (_, r0) ← CompactedRegion.read (α := Nat) part0 #[]
|
||||
let (a1, r1) ← CompactedRegion.read (α := Array Nat) part1 #[r0]
|
||||
let (a2, _ ) ← CompactedRegion.read (α := Array Nat) part2 #[r0, r1]
|
||||
unless a1 = shared do
|
||||
throw <| IO.userError "part 1 round-trip mismatch"
|
||||
unless a2 = shared do
|
||||
throw <| IO.userError "part 2 round-trip mismatch"
|
||||
|
||||
IO.FS.removeFile part0
|
||||
IO.FS.removeFile part1
|
||||
IO.FS.removeFile part2
|
||||
return 0
|
||||
0
tests/compile/compactor_chain.lean.no_compile
Normal file
0
tests/compile/compactor_chain.lean.no_compile
Normal file
42
tests/compile/dep_regions.lean
Normal file
42
tests/compile/dep_regions.lean
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import Lean.CompactedRegion
|
||||
|
||||
open Lean
|
||||
|
||||
/-!
|
||||
Regression test for the `depRegions` argument to `CompactedRegion.save` and
|
||||
`CompactedRegion.read`.
|
||||
|
||||
Saves a standalone olean (`part0`) holding some data, loads it back, then saves a second
|
||||
olean (`part1`) that *references* the loaded data via the `depRegions` argument. The
|
||||
compactor should recognize the dep-region object and emit a cross-region pointer rather than
|
||||
re-serializing it; the reader, given the same dep region, should resolve that pointer via
|
||||
`fix_object_ptr`'s dep-region lookup.
|
||||
-/
|
||||
unsafe def main : IO UInt32 := do
|
||||
let part0 : System.FilePath := "./_tmp_dep_regions_0.olean"
|
||||
let part1 : System.FilePath := "./_tmp_dep_regions_1.olean"
|
||||
|
||||
-- File 0: standalone payload.
|
||||
let payload0 : Array Nat := Array.range 256
|
||||
let _ ← CompactedRegion.save part0 `A payload0 #[] none
|
||||
|
||||
let (loaded0, r0) ← CompactedRegion.read (α := Array Nat) part0 #[]
|
||||
unless loaded0 = payload0 do
|
||||
throw <| IO.userError "file 0 round-trip mismatch"
|
||||
|
||||
-- File 1: payload references `loaded0` (an object from `r0`'s region) and adds its own data.
|
||||
let extra : Array Nat := Array.range 32
|
||||
let payload1 : Array Nat × Array Nat := (loaded0, extra)
|
||||
let _ ← CompactedRegion.save part1 `B payload1 #[r0] none
|
||||
|
||||
-- Loading `part1` without supplying `r0` would either segfault or hit `lean_unreachable`
|
||||
-- when `fix_object_ptr` tries to resolve the cross-region pointer; we supply it.
|
||||
let (loaded1, _r1) ← CompactedRegion.read (α := Array Nat × Array Nat) part1 #[r0]
|
||||
unless loaded1.1 = payload0 do
|
||||
throw <| IO.userError "cross-region reference did not round-trip"
|
||||
unless loaded1.2 = extra do
|
||||
throw <| IO.userError "own-region data did not round-trip"
|
||||
|
||||
IO.FS.removeFile part0
|
||||
IO.FS.removeFile part1
|
||||
return 0
|
||||
0
tests/compile/dep_regions.lean.no_compile
Normal file
0
tests/compile/dep_regions.lean.no_compile
Normal file
62
tests/compile/dep_regions_miss.lean
Normal file
62
tests/compile/dep_regions_miss.lean
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
import Lean.CompactedRegion
|
||||
|
||||
open Lean
|
||||
|
||||
/-!
|
||||
Regression test for the dep-aware fast-path skip in `compacted_region::read()`.
|
||||
|
||||
Saves two distinct payloads to two different files with the same `mod` `Name`, so both files
|
||||
derive the same saved `base_addr`. Loading the first mmaps successfully at that address;
|
||||
loading the second finds the address occupied and falls back to `malloc`+`read` —
|
||||
`m_begin ≠ m_base_addr`. We then save a third file whose data references the malloc'd
|
||||
payload via `depRegions := #[r_B]`, and load it back.
|
||||
|
||||
When the dependent file (which itself mmaps at a different `base_addr`) reaches
|
||||
`compacted_region::read()`, the fast path must check **all** dep regions for
|
||||
`m_begin == m_base_addr` and fall through to the full pointer-fixup walk if any didn't land
|
||||
at their saved address. If the fast path only checks `m_begin == m_base_addr` for the
|
||||
current region (master's behavior before this fix), the cross-region pointer still holds
|
||||
the dep's saved address — which in this process happens to be mapped, but mapped to a
|
||||
*different file's data* (the first load's). The test catches that by asserting the loaded
|
||||
data matches `payload_b`, not `payload_a`.
|
||||
-/
|
||||
unsafe def main : IO UInt32 := do
|
||||
let part_a : System.FilePath := "./_tmp_dep_regions_miss_a.olean"
|
||||
let part_b : System.FilePath := "./_tmp_dep_regions_miss_b.olean"
|
||||
let part_c : System.FilePath := "./_tmp_dep_regions_miss_c.olean"
|
||||
|
||||
let payload_a : Array Nat := (Array.range 256).map (· + 1000)
|
||||
let payload_b : Array Nat := (Array.range 256).map (· + 9000)
|
||||
let _ ← CompactedRegion.save part_a `Same payload_a #[] none
|
||||
let _ ← CompactedRegion.save part_b `Same payload_b #[] none
|
||||
|
||||
-- First load: mmap succeeds at the saved `base_addr` derived from `Same`.
|
||||
let (_, _r_a) ← CompactedRegion.read (α := Array Nat) part_a #[]
|
||||
-- Second load: same `mod` → same saved `base_addr`, but it's already occupied by `_r_a`,
|
||||
-- so mmap fails and the reader falls back to malloc. `m_begin ≠ m_base_addr`.
|
||||
let (loaded_b, r_b) ← CompactedRegion.read (α := Array Nat) part_b #[]
|
||||
unless loaded_b = payload_b do
|
||||
throw <| IO.userError "malloc-fallback load did not round-trip identically"
|
||||
|
||||
-- Save part_c referencing the malloc-loaded `loaded_b`. The compactor recognizes
|
||||
-- objects in `r_b`'s range and emits a cross-region pointer using `r_b`'s saved
|
||||
-- `base_addr` (which equals `r_a`'s saved `base_addr`).
|
||||
let extra : Array Nat := Array.range 32
|
||||
let payload_c : Array Nat × Array Nat := (loaded_b, extra)
|
||||
let _ ← CompactedRegion.save part_c `Other payload_c #[r_b] none
|
||||
|
||||
-- Load part_c. Its own region mmaps at a different `base_addr` (mod `Other`), so the
|
||||
-- own-region check passes — but `r_b` has `m_begin ≠ m_base_addr`, so the fast path must
|
||||
-- fall through to the full walk to translate the cross-region pointer to `r_b`'s
|
||||
-- malloc'd memory. If the walk is skipped, the unfixed pointer instead reads into
|
||||
-- `_r_a`'s mmap (which holds `payload_a`, not `payload_b`).
|
||||
let (loaded_c, _r_c) ← CompactedRegion.read (α := Array Nat × Array Nat) part_c #[r_b]
|
||||
unless loaded_c.1 = payload_b do
|
||||
throw <| IO.userError "fast path skipped fixup: cross-region pointer resolved to wrong dep"
|
||||
unless loaded_c.2 = extra do
|
||||
throw <| IO.userError "own-region data didn't round-trip"
|
||||
|
||||
IO.FS.removeFile part_a
|
||||
IO.FS.removeFile part_b
|
||||
IO.FS.removeFile part_c
|
||||
return 0
|
||||
0
tests/compile/dep_regions_miss.lean.no_compile
Normal file
0
tests/compile/dep_regions_miss.lean.no_compile
Normal file
|
|
@ -7,7 +7,7 @@ elif [[ -f "$1.no_compile" ]]; then DO_COMPILE=
|
|||
else DO_COMPILE=1
|
||||
fi
|
||||
|
||||
rm -f "$1.measurements.jsonl"
|
||||
: > "$1.measurements.jsonl" # always create (possibly empty) so `combine.py` finds it
|
||||
|
||||
if [[ -n $DO_COMPILE ]]; then
|
||||
run_before "$1"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue