From 895752dc2eb1516bd0884083cb25cc7e34de6b98 Mon Sep 17 00:00:00 2001 From: Sebastian Ullrich Date: Sat, 16 May 2026 14:53:20 +0200 Subject: [PATCH] feat: add `Lean.CompactedRegion.save`/`read` supporting cross-file object sharing (#13185) This PR adds new incremental module serialization functions that save/load a single module at a time with explicit sharing via dep regions and compactor state, generalizing the existing batch saveModuleDataParts API. Two sharing mechanisms that can be mixed: - `CompactedRegion` dep regions for sharing with loaded regions - `CompactorState` for same-process chaining (pre-loaded `m_obj_table`) --- src/Lean/CompactedRegion.lean | 81 +++ src/Lean/Environment.lean | 47 +- src/library/module.cpp | 463 +++++++++--------- src/runtime/compact.cpp | 107 +++- src/runtime/compact.h | 16 +- tests/compile/.gitignore | 5 + tests/compile/compactor_chain.lean | 47 ++ tests/compile/compactor_chain.lean.no_compile | 0 tests/compile/dep_regions.lean | 42 ++ tests/compile/dep_regions.lean.no_compile | 0 tests/compile/dep_regions_miss.lean | 62 +++ .../compile/dep_regions_miss.lean.no_compile | 0 tests/compile/run_bench.sh | 2 +- 13 files changed, 602 insertions(+), 270 deletions(-) create mode 100644 src/Lean/CompactedRegion.lean create mode 100644 tests/compile/.gitignore create mode 100644 tests/compile/compactor_chain.lean create mode 100644 tests/compile/compactor_chain.lean.no_compile create mode 100644 tests/compile/dep_regions.lean create mode 100644 tests/compile/dep_regions.lean.no_compile create mode 100644 tests/compile/dep_regions_miss.lean create mode 100644 tests/compile/dep_regions_miss.lean.no_compile diff --git a/src/Lean/CompactedRegion.lean b/src/Lean/CompactedRegion.lean new file mode 100644 index 0000000000..11ef54e70a --- /dev/null +++ b/src/Lean/CompactedRegion.lean @@ -0,0 +1,81 @@ +/- +Copyright (c) 2026 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Sebastian Ullrich +-/ +module + +prelude +public import Init.System.IO +public import Lean.Data.Name + +namespace Lean + +/-- +A compacted region holds multiple Lean objects in a contiguous memory region, which can be +read/written to/from disk. Objects inside the region do not have reference counters and cannot be +freed individually. The contents of `.olean` files are compacted regions. +-/ +@[expose] public def CompactedRegion := USize + +@[extern "lean_compacted_region_is_memory_mapped"] +public opaque CompactedRegion.isMemoryMapped : CompactedRegion → Bool + +/-- Size in bytes. -/ +@[extern "lean_compacted_region_size"] +public opaque CompactedRegion.size : CompactedRegion → USize + +/-- +Frees a compacted region and its contents. No live references to the contents may exist at the +time of invocation. +-/ +@[extern "lean_compacted_region_free"] +public unsafe opaque CompactedRegion.free : CompactedRegion → IO Unit + +opaque CompactorSpec : NonemptyType.{0} +/-- +Holds an opaque compactor handle returned by `CompactedRegion.save`, used to chain subsequent saves +so that objects shared between parts are emitted exactly once. + +Not thread-safe: a `Compactor` value must not be used concurrently from multiple threads. The +`CompactedRegion`s passed as `depRegions` when the `Compactor` was first created must outlive the +`Compactor`; if any are freed (via `CompactedRegion.free`) while the `Compactor` is still in use, +subsequent saves will dereference dangling pointers. +-/ +public def Compactor := CompactorSpec.type + +/-- +Saves arbitrary data to a compacted region on disk. + +The `α` type parameter is erased at the runtime/extern boundary: the compactor walks the live +object graph rooted at `data` regardless of its Lean type. `α` is purely a hint for the caller to +align save and load sites. Mismatched types between save and load yield undefined behavior on use. + +`key` is hashed to derive a deterministic mmap base address; for module saves, pass the module +`Name`. `depRegions` are loaded compacted regions (typically from imports) whose objects should +not be re-serialized. `prev`, when present, likewise allows for reuse of objects from prior saves +in the same session. + +Returns a `Compactor` that may be passed as `prev` to subsequent saves. Unsafe because the +returned `Compactor` carries thread-safety and `depRegions` lifetime contracts the type system +cannot enforce; see `Compactor`. +-/ +@[extern "lean_compacted_region_save"] +public unsafe opaque CompactedRegion.save {α : Type} (fname : @& System.FilePath) (key : @& Name) + (data : @& α) (depRegions : @& Array CompactedRegion) (prev : Option Compactor) : + IO Compactor + +/-- +Reads a compacted region from disk. + +`depRegions` are existing compacted regions whose address ranges are needed for cross-region +pointer fixup. The result is the root object reinterpreted at type `α` paired with the new +`CompactedRegion`. Unsafe because `α` is type-erased at the extern boundary: it is the caller's +responsibility to ensure `α` matches the type the data was saved at; mismatched types yield +undefined behavior on use. +-/ +@[extern "lean_compacted_region_read"] +public unsafe opaque CompactedRegion.read {α : Type} (fname : @& System.FilePath) + (depRegions : @& Array CompactedRegion) : IO (α × CompactedRegion) + +end Lean diff --git a/src/Lean/Environment.lean b/src/Lean/Environment.lean index e6ecda2e9d..bc11918643 100644 --- a/src/Lean/Environment.lean +++ b/src/Lean/Environment.lean @@ -20,6 +20,7 @@ public import Lean.Util.InstantiateLevelParams public import Lean.Util.FoldConsts public import Lean.PrivateName public import Lean.LoadDynlib +public import Lean.CompactedRegion public import Init.Dynamic import Init.Data.Slice import Init.Data.String.TakeDrop @@ -98,23 +99,6 @@ instance : GetElem? (Array α) ModuleIdx α (fun a i => i.toNat < a.size) where abbrev ConstMap := SMap Name ConstantInfo -/-- - A compacted region holds multiple Lean objects in a contiguous memory region, which can be read/written to/from disk. - Objects inside the region do not have reference counters and cannot be freed individually. The contents of .olean - files are compacted regions. -/ -@[expose] def CompactedRegion := USize - -@[extern "lean_compacted_region_is_memory_mapped"] -opaque CompactedRegion.isMemoryMapped : CompactedRegion → Bool - -/-- Size in bytes. -/ -@[extern "lean_compacted_region_size"] -opaque CompactedRegion.size : CompactedRegion → USize - -/-- Free a compacted region and its contents. No live references to the contents may exist at the time of invocation. -/ -@[extern "lean_compacted_region_free"] -unsafe opaque CompactedRegion.free : CompactedRegion → IO Unit - /-- Opaque persistent environment extension entry. -/ opaque EnvExtensionEntrySpec : NonemptyType.{0} @[expose] def EnvExtensionEntry : Type := EnvExtensionEntrySpec.type @@ -1752,24 +1736,31 @@ duplicated. Thus the data cannot be loaded with individual `readModuleData` call passing (a prefix of) the file names to `readModuleDataParts`. `mod` is used to determine an arbitrary but deterministic base address for `mmap`. -/ -@[extern "lean_save_module_data_parts"] -opaque saveModuleDataParts (mod : @& Name) (parts : @& Array (System.FilePath × ModuleData)) : IO Unit +def saveModuleDataParts (mod : Name) (parts : Array (System.FilePath × ModuleData)) : IO Unit := do + let mut cs : Option Compactor := none + for h : i in [:parts.size] do + let (fname, data) := parts[i] + cs := some (← unsafe CompactedRegion.save fname mod data #[] cs) /-- Loads the module data from the given file names. The files must be (a prefix of) the result of a `saveModuleDataParts` call. -/ -@[extern "lean_read_module_data_parts"] -opaque readModuleDataParts (fnames : @& Array System.FilePath) : IO (Array (ModuleData × CompactedRegion)) +def readModuleDataParts (fnames : Array System.FilePath) : + IO (Array (ModuleData × CompactedRegion)) := do + let mut depRegions : Array CompactedRegion := #[] + let mut result : Array (ModuleData × CompactedRegion) := #[] + for fname in fnames do + let part ← unsafe CompactedRegion.read (α := ModuleData) fname depRegions + result := result.push part + depRegions := depRegions.push part.2 + return result -def saveModuleData (fname : System.FilePath) (mod : Name) (data : ModuleData) : IO Unit := - saveModuleDataParts mod #[(fname, data)] +def saveModuleData (fname : System.FilePath) (mod : Name) (data : ModuleData) : IO Unit := do + let _ ← unsafe CompactedRegion.save fname mod data #[] none -def readModuleData (fname : @& System.FilePath) : IO (ModuleData × CompactedRegion) := do - let parts ← readModuleDataParts #[fname] - assert! parts.size == 1 - let some part := parts[0]? | unreachable! - return part +def readModuleData (fname : @& System.FilePath) : IO (ModuleData × CompactedRegion) := + unsafe CompactedRegion.read fname #[] /-- Free compacted regions of imports. No live references to imported objects may exist at the time of invocation; in diff --git a/src/library/module.cpp b/src/library/module.cpp index 1bab884712..4af3f15026 100644 --- a/src/library/module.cpp +++ b/src/library/module.cpp @@ -24,6 +24,7 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich #include "runtime/compact.h" #include "runtime/buffer.h" #include "runtime/array_ref.h" +#include "runtime/option_ref.h" #include "util/io.h" #include "util/name_map.h" #include "library/module.h" @@ -42,6 +43,13 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich #include #endif +// `MAP_FIXED_NOREPLACE` turns out to improve mmap hits in the snapshot use case but is exclusive to +// Linux 4.17+ and glibc 2.28+; as this is not a core use case and older kernels will silently +// ignore the flag, we simply define it conditionally. +#if defined(__linux__) && !defined(MAP_FIXED_NOREPLACE) +#define MAP_FIXED_NOREPLACE 0x100000 +#endif + #if defined(__has_feature) #if __has_feature(address_sanitizer) #include @@ -125,275 +133,276 @@ struct olean_header { // make sure we don't have any padding bytes, which also ensures `data` is properly aligned static_assert(sizeof(olean_header) == 5 + 1 + 1 + 33 + 40 + sizeof(size_t), "olean_header must be packed"); -extern "C" LEAN_EXPORT object * lean_save_module_data_parts(b_obj_arg mod, b_obj_arg oparts, object *) { + +// Compactor external object: wraps a live `object_compactor` for incremental compaction. +// Keeping the full compactor alive preserves both the `obj_table` (pointer-identity dedup) +// and the `max_sharing_table` (structural dedup) across sequential `lean_compacted_region_save` +// calls so that objects shared between parts are emitted exactly once. + +static lean_external_class * g_compactor_class = nullptr; + +static void compactor_finalizer(void * data) { + delete static_cast(data); +} + +static void compactor_foreach(void *, b_lean_obj_arg) { + // no Lean object references to trace +} + +static void ensure_compactor_class() { + if (!g_compactor_class) { + g_compactor_class = lean_register_external_class(compactor_finalizer, compactor_foreach); + } +} + +static lean_object * mk_compactor(void * base_addr, std::vector dep_regions) { + ensure_compactor_class(); + return lean_alloc_external(g_compactor_class, + new object_compactor(base_addr, std::move(dep_regions))); +} + +static object_compactor * to_compactor(lean_object * o) { + return static_cast(lean_get_external_data(o)); +} + +// Extract `compacted_region *` pointers from an `Array CompactedRegion`. +static std::vector extract_dep_regions(b_obj_arg odep_regions) { + std::vector result; + size_t n = lean_array_size(odep_regions); + result.reserve(n); + for (size_t i = 0; i < n; i++) { + result.push_back(reinterpret_cast( + lean_unbox_usize(lean_array_get_core(odep_regions, i)))); + } + return result; +} + +extern "C" LEAN_EXPORT object * lean_compacted_region_save(b_obj_arg ofname, b_obj_arg mod, b_obj_arg odata, + b_obj_arg odep_regions, obj_arg oprev, object *) { + // `mmap` addresses must be page-aligned. The default (non-huge) page size on x86-64 is 4KB; + // `MapViewOfFileEx` addresses must be aligned to the "memory allocation granularity" (64KB). + const size_t ALIGN = 1LL<<16; + + option_ref prev(oprev); + object_ref cs_obj; + if (prev) { + cs_obj = prev.get_val(); + } else { + // Derive a base address that is uniformly distributed but deterministic, and should most + // likely work for `mmap` on all interesting platforms. + // NOTE: an overlapping/non-compatible base address does not prevent the module from being + // imported, merely from using `mmap` for that. + // Start with a hash of the module name. While our string hash is a dubious 32-bit + // algorithm, the mixing of multiple `Name` parts seems to result in a nicely distributed + // 64-bit output. + size_t base_addr = name(mod, true).hash(); + // x86-64 user space is currently limited to the lower 47 bits + // (https://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details). + // On Linux at least, the stack grows down from ~0x7fff... followed by shared libraries, + // so reserve a bit of space for them (0x7fff...-0x7f00... = 1TB). + base_addr = base_addr % 0x7f0000000000; + base_addr = base_addr & ~(ALIGN - 1); + std::vector dep_regions = extract_dep_regions(odep_regions); + cs_obj = object_ref(mk_compactor(reinterpret_cast(base_addr), std::move(dep_regions))); + } + + object_compactor & compactor = *to_compactor(cs_obj.raw()); + + char const * olean_fn = lean_string_cstr(ofname); + // We first write to a temp file and then move it to the correct path (possibly deleting an + // older file) so that we neither expose partially-written files nor modify possibly + // memory-mapped files. #ifdef LEAN_WINDOWS uint32_t pid = GetCurrentProcessId(); #else uint32_t pid = getpid(); #endif - // Derive a base address that is uniformly distributed by deterministic, and should most likely - // work for `mmap` on all interesting platforms - // NOTE: an overlapping/non-compatible base address does not prevent the module from being imported, - // merely from using `mmap` for that + std::string olean_tmp_fn = std::string(olean_fn) + ".tmp." + std::to_string(pid); - // Let's start with a hash of the module name. Note that while our string hash is a dubious 32-bit - // algorithm, the mixing of multiple `Name` parts seems to result in a nicely distributed 64-bit - // output - size_t base_addr = name(mod, true).hash(); - // x86-64 user space is currently limited to the lower 47 bits - // https://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details - // On Linux at least, the stack grows down from ~0x7fff... followed by shared libraries, so reserve - // a bit of space for them (0x7fff...-0x7f00... = 1TB) - base_addr = base_addr % 0x7f0000000000; - // `mmap` addresses must be page-aligned. The default (non-huge) page size on x86-64 is 4KB. - // `MapViewOfFileEx` addresses must be aligned to the "memory allocation granularity", which is 64KB. - const size_t ALIGN = 1LL<<16; - base_addr = base_addr & ~(ALIGN - 1); + try { + std::ofstream out(olean_tmp_fn, std::ios_base::binary); - object_compactor compactor(reinterpret_cast(base_addr)); - - array_ref> parts(oparts, true); - std::vector tmp_fnames; - for (auto const & part : parts) { - std::string olean_fn = part.fst().to_std_string(); - try { - // we first write to a temp file and then move it to the correct path (possibly deleting an older file) - // so that we neither expose partially-written files nor modify possibly memory-mapped files - std::string olean_tmp_fn = olean_fn + ".tmp." + std::to_string(pid); - tmp_fnames.push_back(olean_tmp_fn); - - std::ofstream out(olean_tmp_fn, std::ios_base::binary); - - if (compactor.size() % ALIGN != 0) { - compactor.alloc(ALIGN - (compactor.size() % ALIGN)); - } - size_t file_offset = compactor.size(); - - compactor.alloc(sizeof(olean_header)); - olean_header header = {}; - // see/sync with file format description above - header.base_addr = base_addr + file_offset; - strncpy(header.lean_version, get_short_version_string().c_str(), sizeof(header.lean_version)); - strncpy(header.githash, LEAN_GITHASH, sizeof(header.githash)); - out.write(reinterpret_cast(&header), sizeof(header)); - - compactor(part.snd().raw()); - - if (out.fail()) { - throw exception((sstream() << "failed to create file '" << olean_fn << "'").str()); - } - out.write(static_cast(compactor.data()) + file_offset + sizeof(olean_header), compactor.size() - file_offset - sizeof(olean_header)); - out.close(); - } catch (exception & ex) { - return io_result_mk_error((sstream() << "failed to write '" << olean_fn << "': " << ex.what()).str()); + if (compactor.size() % ALIGN != 0) { + compactor.alloc(ALIGN - (compactor.size() % ALIGN)); } + size_t file_offset = compactor.size(); + + // Reserve space in the compactor buffer for the header so subsequent objects' + // `base_addr`-relative offsets land in the right spot. The header is written directly + // to `out` below; these reserved bytes are never read back. + compactor.alloc(sizeof(olean_header)); + olean_header header = {}; + header.base_addr = reinterpret_cast(compactor.base_addr()) + file_offset; + strncpy(header.lean_version, get_short_version_string().c_str(), sizeof(header.lean_version)); + strncpy(header.githash, LEAN_GITHASH, sizeof(header.githash)); + out.write(reinterpret_cast(&header), sizeof(header)); + + compactor(odata); + + if (out.fail()) { + throw exception((sstream() << "failed to create file '" << olean_fn << "'").str()); + } + out.write(static_cast(compactor.data()) + file_offset + sizeof(olean_header), + compactor.size() - file_offset - sizeof(olean_header)); + out.close(); + } catch (exception & ex) { + std::remove(olean_tmp_fn.c_str()); + return io_result_mk_error((sstream() << "failed to write '" << olean_fn << "': " << ex.what()).str()); } - for (unsigned i = 0; i < parts.size(); i++) { - std::string olean_fn = parts[i].fst().to_std_string(); - while (std::rename(tmp_fnames[i].c_str(), olean_fn.c_str()) != 0) { + while (std::rename(olean_tmp_fn.c_str(), olean_fn) != 0) { #ifdef LEAN_WINDOWS - if (errno == EEXIST) { - // Memory-mapped files can be deleted starting with Windows 10 using "POSIX semantics" - HANDLE h_olean_fn = CreateFile(olean_fn.c_str(), GENERIC_READ | DELETE, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - if (h_olean_fn == INVALID_HANDLE_VALUE) { - return io_result_mk_error((sstream() << "failed to open '" << olean_fn << "': " << GetLastError()).str()); - } - - FILE_DISPOSITION_INFO_EX fdi = { FILE_DISPOSITION_FLAG_DELETE | FILE_DISPOSITION_FLAG_POSIX_SEMANTICS }; - if (SetFileInformationByHandle(h_olean_fn, static_cast(21) /* FileDispositionInfoEx */, &fdi, sizeof(fdi)) != 0) { - lean_always_assert(CloseHandle(h_olean_fn)); - continue; - } else { - return io_result_mk_error((sstream() << "failed to delete '" << olean_fn << "': " << GetLastError()).str()); - } + if (errno == EEXIST) { + // Memory-mapped files can be deleted starting with Windows 10 using "POSIX semantics". + HANDLE h_olean_fn = CreateFile(olean_fn, GENERIC_READ | DELETE, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (h_olean_fn == INVALID_HANDLE_VALUE) { + return io_result_mk_error((sstream() << "failed to open '" << olean_fn << "': " << GetLastError()).str()); + } + FILE_DISPOSITION_INFO_EX fdi = { FILE_DISPOSITION_FLAG_DELETE | FILE_DISPOSITION_FLAG_POSIX_SEMANTICS }; + if (SetFileInformationByHandle(h_olean_fn, static_cast(21) /* FileDispositionInfoEx */, &fdi, sizeof(fdi)) != 0) { + lean_always_assert(CloseHandle(h_olean_fn)); + continue; + } else { + return io_result_mk_error((sstream() << "failed to delete '" << olean_fn << "': " << GetLastError()).str()); } -#endif - return io_result_mk_error((sstream() << "failed to write '" << olean_fn << "': " << errno << " " << strerror(errno)).str()); } +#endif + return io_result_mk_error((sstream() << "failed to write '" << olean_fn << "': " << errno << " " << strerror(errno)).str()); } - return io_result_mk_ok(box(0)); + + return io_result_mk_ok(cs_obj.steal()); } -struct module_file { - std::string m_fname; - file_descriptor m_fd; +// Implements `Lean.CompactedRegion.read`. Loads a compacted region from disk. `odep_regions` +// carries `CompactedRegion`s whose address ranges must be known to resolve cross-region pointers +// in this file. Returns `(α × CompactedRegion)`, where `α` is the type the Lean caller asks the +// root to be interpreted as — the C side does no type checking and the caller is responsible for +// using a type compatible with what was saved (see `CompactedRegion.read`). +extern "C" LEAN_EXPORT object * lean_compacted_region_read(b_obj_arg ofname, b_obj_arg odep_regions, object *) { + std::string olean_fn(lean_string_cstr(ofname)); + std::vector dep_regions = extract_dep_regions(odep_regions); + try { #ifdef LEAN_WINDOWS - HANDLE m_handle; // store the original Windows for mmap -#endif - char * m_base_addr; - size_t m_size; - char * m_buffer; - std::function m_free_data; -}; - -extern "C" LEAN_EXPORT object * lean_read_module_data_parts(b_obj_arg ofnames, object *) { - array_ref fnames(ofnames, true); - - // first read in all headers - std::vector files; - for (auto const & fname : fnames) { - std::string olean_fn = fname.to_std_string(); - try { -#ifdef LEAN_WINDOWS - // Use CreateFile with proper sharing flags, then convert to POSIX fd for shared code - // `FILE_SHARE_DELETE` is necessary to allow the file to (be marked to) be deleted while in use - HANDLE h_file = CreateFile(olean_fn.c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - if (h_file == INVALID_HANDLE_VALUE) { - return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "': " << GetLastError()).str()); - } - int raw_fd = _open_osfhandle((intptr_t)h_file, _O_RDONLY); - if (raw_fd == -1) { - CloseHandle(h_file); - return io_result_mk_error((sstream() << "failed to convert handle to fd for '" << olean_fn << "'").str()); - } - file_descriptor fd(raw_fd); + HANDLE h_file = CreateFile(olean_fn.c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (h_file == INVALID_HANDLE_VALUE) { + return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "': " << GetLastError()).str()); + } + int raw_fd = _open_osfhandle((intptr_t)h_file, _O_RDONLY); + if (raw_fd == -1) { + CloseHandle(h_file); + return io_result_mk_error((sstream() << "failed to convert handle to fd for '" << olean_fn << "'").str()); + } + file_descriptor fd(raw_fd); #else - file_descriptor fd(open(olean_fn.c_str(), O_RDONLY)); - if (!fd) { - return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "': " << strerror(errno)).str()); - } + file_descriptor fd(open(olean_fn.c_str(), O_RDONLY)); + if (!fd) { + return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "': " << strerror(errno)).str()); + } #endif - /* Get file size */ - struct stat st; - if (fstat(fd.get(), &st) == -1) { - return io_result_mk_error((sstream() << "failed to stat file '" << olean_fn << "': " << strerror(errno)).str()); - } - size_t size = st.st_size; + struct stat st; + if (fstat(fd.get(), &st) == -1) { + return io_result_mk_error((sstream() << "failed to stat file '" << olean_fn << "': " << strerror(errno)).str()); + } + size_t size = st.st_size; - olean_header default_header = {}; - olean_header header; - ssize_t read_size = readn(fd.get(), &header, sizeof(header)); - if (read_size < 0) { - return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "': " << strerror(errno)).str()); - } - if (read_size != sizeof(header) - || memcmp(header.marker, default_header.marker, sizeof(header.marker)) != 0) { - return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str()); - } - lseek(fd.get(), 0, SEEK_SET); - if (header.version != default_header.version || header.flags != default_header.flags + olean_header default_header = {}; + olean_header header; + ssize_t read_size = readn(fd.get(), &header, sizeof(header)); + if (read_size < 0) { + return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "': " << strerror(errno)).str()); + } + if (read_size != sizeof(header) + || memcmp(header.marker, default_header.marker, sizeof(header.marker)) != 0) { + return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str()); + } + if (header.version != default_header.version || header.flags != default_header.flags #ifdef LEAN_CHECK_OLEAN_VERSION - || strncmp(header.githash, LEAN_GITHASH, sizeof(header.githash)) != 0 + || strncmp(header.githash, LEAN_GITHASH, sizeof(header.githash)) != 0 #endif - ) { - return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', incompatible header").str()); - } - char * base_addr = reinterpret_cast(header.base_addr); -#ifdef LEAN_WINDOWS - files.push_back({olean_fn, std::move(fd), h_file, base_addr, size, nullptr, nullptr}); -#else - files.push_back({olean_fn, std::move(fd), base_addr, size, nullptr, nullptr}); -#endif - } catch (exception & ex) { - return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str()); + ) { + return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', incompatible header").str()); } - } + char * base_addr = reinterpret_cast(header.base_addr); -#ifndef LEAN_MMAP - bool is_mmap = false; -#else - // now try mmapping *all* files - bool is_mmap = true; - for (auto & file : files) { - std::string const & olean_fn = file.m_fname; - char * base_addr = file.m_base_addr; - try { + char * buffer = nullptr; + bool is_mmap = false; + std::function free_data; + + // Map file COW-writable. The fallback walk in `compacted_region::read()` writes + // fixed pointers back into the region's memory when any dep region didn't land at its + // saved `base_addr`, so the mapping must be writable. Unwritten pages remain shared + // with the page cache under `MAP_PRIVATE` regardless of protection, so `PROT_WRITE` + // is free when no fixup happens. +#ifdef LEAN_MMAP #ifdef LEAN_WINDOWS - // Use the stored handle that was created with proper sharing flags - HANDLE h_olean_fn = file.m_handle; - HANDLE h_map = CreateFileMapping(h_olean_fn, NULL, PAGE_READONLY, 0, 0, NULL); - if (h_map == NULL) { - return io_result_mk_error((sstream() << "failed to map '" << olean_fn << "': " << GetLastError()).str()); - } - char * buffer = static_cast(MapViewOfFileEx(h_map, FILE_MAP_READ, 0, 0, 0, base_addr)); + HANDLE h_map = CreateFileMapping(h_file, NULL, PAGE_WRITECOPY, 0, 0, NULL); + if (h_map != NULL) { + // `FILE_MAP_COPY` already implies read+copy-on-write; OR'ing with `FILE_MAP_READ` + // silently degrades the mapping to read-only and turns the fix-up walk's pointer + // writes into access violations. + buffer = static_cast(MapViewOfFileEx(h_map, FILE_MAP_COPY, 0, 0, 0, base_addr)); lean_always_assert(CloseHandle(h_map)); - // NOTE: no need to close `h_olean_fn` as it's owned by `file.m_fd` - if (!buffer) { - is_mmap = false; - break; + if (buffer && buffer == base_addr) { + is_mmap = true; + free_data = [=]() { lean_always_assert(UnmapViewOfFile(base_addr)); }; + } else if (buffer) { + lean_always_assert(UnmapViewOfFile(buffer)); + buffer = nullptr; } - file.m_free_data = [=]() { - lean_always_assert(UnmapViewOfFile(base_addr)); - }; + } #else - int fd = file.m_fd.get(); - // NOTE: `file.m_fd` does NOT need to outlive `buffer` after this call - char * buffer = static_cast(mmap(base_addr, file.m_size, PROT_READ, MAP_PRIVATE, fd, 0)); - if (buffer == MAP_FAILED) { - is_mmap = false; - break; - } - size_t size = file.m_size; - file.m_free_data = [=]() { - lean_always_assert(munmap(buffer, size) == 0); - }; + // On Linux 4.17+ kernels, `MAP_FIXED_NOREPLACE` makes the kernel atomically reject the + // mapping if `base_addr` is already taken; older kernels silently ignore the bit. macOS + // doesn't expose the flag at all (see top-of-file fallback). The post-check + cleanup + // `munmap` handles every "did not land at `base_addr`" case uniformly. + int mmap_flags = MAP_PRIVATE +#ifdef MAP_FIXED_NOREPLACE + | MAP_FIXED_NOREPLACE #endif - if (buffer == base_addr) { - file.m_buffer = buffer; - } else { - is_mmap = false; - break; - } - } catch (exception & ex) { - return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str()); + ; + buffer = static_cast(mmap(base_addr, size, PROT_READ | PROT_WRITE, + mmap_flags, fd.get(), 0)); + if (buffer != MAP_FAILED && buffer == base_addr) { + is_mmap = true; + free_data = [=]() { lean_always_assert(munmap(buffer, size) == 0); }; + } else { + if (buffer != MAP_FAILED) munmap(buffer, size); + buffer = nullptr; } - } +#endif #endif - // if *any* file failed to mmap, read all of them into a single big allocation so that offsets - // between them are unchanged - if (!is_mmap && !files.empty()) { - for (auto & file : files) { - if (file.m_free_data) { - file.m_free_data(); - file.m_free_data = {}; + if (!buffer) { + buffer = static_cast(malloc(size)); + lseek(fd.get(), 0, SEEK_SET); + ssize_t r = readn(fd.get(), buffer, size); + if (r < 0 || r != static_cast(size)) { + free(buffer); + return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "'").str()); } + char * buf = buffer; + size_t sz = size; + free_data = [=]() { free_sized(buf, sz); }; } - size_t big_size = files[files.size()-1].m_base_addr + files[files.size()-1].m_size - files[0].m_base_addr; - char * big_buffer = static_cast(malloc(big_size)); - if (!big_buffer) { - return io_result_mk_error(decode_io_error(ENOMEM, nullptr)); - } - for (auto & file : files) { - std::string const & olean_fn = file.m_fname; - try { - file.m_buffer = big_buffer + (file.m_base_addr - files[0].m_base_addr); - ssize_t read_size = readn(file.m_fd.get(), file.m_buffer, file.m_size); - if (read_size < 0) { - return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "': " << strerror(errno)).str()); - } - if (read_size != static_cast(file.m_size)) { - return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "': unexpected EOF").str()); - } - } catch (exception & ex) { - return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str()); - } - } - files[0].m_free_data = [=]() { - free_sized(big_buffer, big_size); - }; - } - - std::vector res; - for (auto & file : files) { - compacted_region * region = - new compacted_region(file.m_size - sizeof(olean_header), file.m_buffer + sizeof(olean_header), static_cast(file.m_base_addr) + sizeof(olean_header), is_mmap, file.m_free_data); + compacted_region * region = new compacted_region( + size - sizeof(olean_header), buffer + sizeof(olean_header), + base_addr + sizeof(olean_header), is_mmap, free_data, + std::move(dep_regions)); #if defined(__has_feature) #if __has_feature(address_sanitizer) - // do not report as leak __lsan_ignore_object(region); #endif #endif object * mod = region->read(); - object * mod_region = alloc_cnstr(0, 2, 0); - cnstr_set(mod_region, 0, mod); - cnstr_set(mod_region, 1, box_size_t(reinterpret_cast(region))); - - res.push_back(object_ref(mod_region)); + object * pair = alloc_cnstr(0, 2, 0); + cnstr_set(pair, 0, mod); + cnstr_set(pair, 1, box_size_t(reinterpret_cast(region))); + return io_result_mk_ok(pair); + } catch (exception & ex) { + return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str()); } - return io_result_mk_ok(to_array(res)); } + } diff --git a/src/runtime/compact.cpp b/src/runtime/compact.cpp index f1cff26271..07f09d53c0 100644 --- a/src/runtime/compact.cpp +++ b/src/runtime/compact.cpp @@ -57,12 +57,25 @@ struct object_compactor::max_sharing_table { } }; -object_compactor::object_compactor(void * base_addr): +object_compactor::object_compactor(void * base_addr, std::vector dep_regions): m_max_sharing_table(new max_sharing_table(this)), + m_dep_regions(std::move(dep_regions)), m_base_addr(base_addr), m_begin(malloc(LEAN_COMPACTOR_INIT_SZ)), m_end(m_begin), m_capacity(static_cast(m_begin) + LEAN_COMPACTOR_INIT_SZ) { + // Sort dep regions by `begin` address for binary search in `to_offset`. + // Extract keys into a flat array first to avoid pointer-chasing TLB misses through + // thousands of scattered `compacted_region` heap objects during sort comparisons. + size_t n = m_dep_regions.size(); + std::vector> keyed(n); + for (size_t i = 0; i < n; i++) + keyed[i] = {m_dep_regions[i]->begin(), m_dep_regions[i]}; + std::sort(keyed.begin(), keyed.end(), + [](std::pair const & a, + std::pair const & b) { return a.first < b.first; }); + for (size_t i = 0; i < n; i++) + m_dep_regions[i] = keyed[i].second; } object_compactor::~object_compactor() { @@ -118,12 +131,30 @@ object_offset object_compactor::to_offset(object * o) { return o; } else { auto it = m_obj_table.find(o); - if (it == m_obj_table.end()) { - m_todo.push_back(o); - return g_null_offset; - } else { + if (it != m_obj_table.end()) { return it->second; } + // Only check dep regions for non-heap objects + if (!m_dep_regions.empty() && !lean_has_rc(o)) { + // Binary search dep regions (sorted by `begin`) + char * addr = reinterpret_cast(o); + std::vector::iterator upper = std::upper_bound( + m_dep_regions.begin(), m_dep_regions.end(), addr, + [](char * a, compacted_region * r) { return a < static_cast(r->begin()); }); + if (upper != m_dep_regions.begin()) { + compacted_region * region = *(upper - 1); + char * region_end = static_cast(region->begin()) + region->size(); + if (addr < region_end) { + // Object is in this dep region; compute its `base_addr`-relative pointer + object_offset off = reinterpret_cast( + reinterpret_cast(region->base_addr()) + (addr - static_cast(region->begin()))); + m_obj_table.insert(std::make_pair(o, off)); + return off; + } + } + } + m_todo.push_back(o); + return g_null_offset; } } @@ -384,14 +415,40 @@ void object_compactor::operator()(object * o) { *root = to_offset(o); } -compacted_region::compacted_region(size_t sz, void * data, void * base_addr, bool is_mmap, std::function free_data): +compacted_region::compacted_region(size_t sz, void * data, void * base_addr, bool is_mmap, std::function free_data, + std::vector dep_regions): m_size(sz), m_base_addr(base_addr), m_is_mmap(is_mmap), m_free_data(free_data), m_begin(data), m_next(data), - m_end(static_cast(data)+sz) { + m_end(static_cast(data)+sz), + m_dep_regions(std::move(dep_regions)) { + // Sort dep regions by `base_addr` for binary search in `fix_object_ptr` + std::sort(m_dep_regions.begin(), m_dep_regions.end(), + [](compacted_region * a, compacted_region * b) { return a->base_addr() < b->base_addr(); }); + // Reject overlapping saved address ranges: `fix_object_ptr` resolves cross-region pointers + // by binary-searching `base_addr`s, and would silently translate via the wrong region if + // two deps overlap (or if a dep overlaps our own range). This should not happen with regular + // .olean use as we use only use `read`'s `prev` instead of `dep_regions` there. + for (size_t i = 1; i < m_dep_regions.size(); i++) { + compacted_region * prev = m_dep_regions[i - 1]; + compacted_region * curr = m_dep_regions[i]; + if (reinterpret_cast(prev->base_addr()) + prev->size() + > reinterpret_cast(curr->base_addr())) { + throw exception("compacted_region: dep regions have overlapping `base_addr` ranges"); + } + } + size_t self_base = reinterpret_cast(m_base_addr); + size_t self_end = self_base + m_size; + for (compacted_region * dep : m_dep_regions) { + size_t dep_base = reinterpret_cast(dep->base_addr()); + size_t dep_end = dep_base + dep->size(); + if (self_base < dep_end && dep_base < self_end) { + throw exception("compacted_region: own region overlaps a dep region's `base_addr` range"); + } + } } compacted_region::~compacted_region() { @@ -402,7 +459,25 @@ compacted_region::~compacted_region() { inline object * compacted_region::fix_object_ptr(object * o) { if (lean_is_scalar(o)) return o; - return reinterpret_cast(static_cast(m_begin) - reinterpret_cast(m_base_addr) + reinterpret_cast(o)); + size_t addr = reinterpret_cast(o); + size_t self_base = reinterpret_cast(m_base_addr); + // Check own region first (most common case) + if (addr >= self_base && addr < self_base + m_size) { + return reinterpret_cast(static_cast(m_begin) + (addr - self_base)); + } + // Binary search dep regions (sorted by `base_addr`) + char * addr_ptr = reinterpret_cast(addr); + std::vector::iterator upper = std::upper_bound( + m_dep_regions.begin(), m_dep_regions.end(), addr_ptr, + [](char * a, compacted_region * r) { return a < static_cast(r->base_addr()); }); + if (upper != m_dep_regions.begin()) { + compacted_region * dep = *(upper - 1); + size_t dep_base = reinterpret_cast(dep->base_addr()); + if (addr < dep_base + dep->size()) { + return reinterpret_cast(static_cast(dep->begin()) + (addr - dep_base)); + } + } + lean_unreachable(); } inline void compacted_region::move(size_t d) { @@ -475,11 +550,19 @@ object * compacted_region::read() { object * root = fix_object_ptr(*static_cast(m_next)); move(sizeof(object_offset)); if (m_begin == m_base_addr) { - // no relocations needed - m_end = m_next; - return root; + // Own-region pointers are already correct (this region landed at its saved address). + // But if any dep region missed its `base_addr`, cross-region pointers in this region + // still hold the dep's saved address and need fixup, so fall through to the walk in + // that case. + bool needs_dep_reloc = false; + for (compacted_region * dep : m_dep_regions) { + if (dep->begin() != dep->base_addr()) { needs_dep_reloc = true; break; } + } + if (!needs_dep_reloc) { + m_end = m_next; + return root; + } } - lean_assert(!m_is_mmap); while (m_next < m_end) { object * curr = reinterpret_cast(m_next); diff --git a/src/runtime/compact.h b/src/runtime/compact.h index 33e3b8d597..8281d4beba 100644 --- a/src/runtime/compact.h +++ b/src/runtime/compact.h @@ -6,6 +6,7 @@ Author: Leonardo de Moura */ #pragma once #include +#include #include #include "runtime/object.h" #include "util/alloc.h" @@ -13,6 +14,8 @@ Author: Leonardo de Moura namespace lean { typedef lean_object * object_offset; +class compacted_region; + class LEAN_EXPORT object_compactor { struct max_sharing_table; friend struct max_sharing_hash; @@ -21,6 +24,8 @@ class LEAN_EXPORT object_compactor { std::unique_ptr m_max_sharing_table; std::vector m_todo; std::vector m_tmp; + // Dependency regions sorted by `begin` address for binary search in `to_offset` + std::vector m_dep_regions; // On-disk base address used for `mmap`ing compacted regions without relocations // References within the compacted region are rewritten by subtracting `m_begin` and adding `m_base_addr` // In the simplest case `base_addr == nullptr`, we get region-relative pointers @@ -44,7 +49,7 @@ class LEAN_EXPORT object_compactor { bool insert_ref(object * o); void insert_mpz(object * o); public: - object_compactor(void * base_addr = nullptr); + object_compactor(void * base_addr = nullptr, std::vector dep_regions = {}); object_compactor(object_compactor const &) = delete; object_compactor(object_compactor &&) = delete; ~object_compactor(); @@ -55,6 +60,8 @@ public: void const * data() const { return m_begin; } // Allocate `sz` bytes of zeroed memory. void * alloc(size_t sz); + void const * base_addr() const { return m_base_addr; } + std::vector const & dep_regions() const { return m_dep_regions; } }; class LEAN_EXPORT compacted_region { @@ -66,6 +73,8 @@ class LEAN_EXPORT compacted_region { void * m_begin; void * m_next; void * m_end; + // Dependency regions for cross-region pointer fixup + std::vector m_dep_regions; void move(size_t d); void move(object * o); object * fix_object_ptr(object * o); @@ -79,7 +88,8 @@ class LEAN_EXPORT compacted_region { public: /* Creates a compacted object region using the given region in memory. This object takes ownership of the region. */ - compacted_region(size_t sz, void * data, void * base_addr, bool is_mmap, std::function free_data); + compacted_region(size_t sz, void * data, void * base_addr, bool is_mmap, std::function free_data, + std::vector dep_regions = {}); /* Creates a compacted object region using the object_compactor current state. It creates a copy of the compacted region generated by the object compactor. */ explicit compacted_region(object_compactor const & c); @@ -91,5 +101,7 @@ public: object * read(); bool is_memory_mapped() const { return m_is_mmap; } size_t size() const { return m_size; } + void * base_addr() const { return m_base_addr; } + void * begin() const { return m_begin; } }; } diff --git a/tests/compile/.gitignore b/tests/compile/.gitignore new file mode 100644 index 0000000000..7cb809e725 --- /dev/null +++ b/tests/compile/.gitignore @@ -0,0 +1,5 @@ +# Tests creating temporary output files should prefix them with `_tmp_` so they're all +# covered by this one pattern. The tests delete them on success; this entry catches stragglers +# from failed runs. +/_tmp_* + diff --git a/tests/compile/compactor_chain.lean b/tests/compile/compactor_chain.lean new file mode 100644 index 0000000000..49894c0eda --- /dev/null +++ b/tests/compile/compactor_chain.lean @@ -0,0 +1,47 @@ +import Lean.CompactedRegion + +open Lean + +/-! +Regression test for `CompactedRegion.save` / `CompactedRegion.read` chained-`Compactor` +object sharing. + +Saves three parts where parts 1 and 2 both reference the same `Array Nat`. The chained +`Compactor` should emit that array's bytes exactly once (in part 1), with part 2 holding only +a cross-region reference. We verify both halves: + +* **dedup:** part 2's file size should be substantially smaller than part 1's, since part 2 + doesn't re-serialize the shared array. +* **round-trip:** loading via `CompactedRegion.read` with the prior regions as `depRegions` + reproduces the original arrays, exercising `fix_object_ptr`'s dep-region lookup. +-/ +unsafe def main : IO UInt32 := do + let part0 : System.FilePath := "./_tmp_compactor_chain_0.olean" + let part1 : System.FilePath := "./_tmp_compactor_chain_1.olean" + let part2 : System.FilePath := "./_tmp_compactor_chain_2.olean" + + let shared : Array Nat := Array.range 256 + let cs ← CompactedRegion.save part0 `Test (0 : Nat) #[] none + let cs ← CompactedRegion.save part1 `Test shared #[] (some cs) + let _ ← CompactedRegion.save part2 `Test shared #[] (some cs) + + let s1 := (← IO.FS.readBinFile part1).size + let s2 := (← IO.FS.readBinFile part2).size + -- Part 2 should be much smaller than part 1: the chained compactor sees the shared array + -- was already emitted to part 1 and writes only a cross-region pointer. A factor of 4 is + -- generous — in practice part 2 is dominated by the olean header + a single root pointer. + unless 4 * s2 < s1 do + throw <| IO.userError s!"dedup did not take effect: part1={s1}, part2={s2}" + + let (_, r0) ← CompactedRegion.read (α := Nat) part0 #[] + let (a1, r1) ← CompactedRegion.read (α := Array Nat) part1 #[r0] + let (a2, _ ) ← CompactedRegion.read (α := Array Nat) part2 #[r0, r1] + unless a1 = shared do + throw <| IO.userError "part 1 round-trip mismatch" + unless a2 = shared do + throw <| IO.userError "part 2 round-trip mismatch" + + IO.FS.removeFile part0 + IO.FS.removeFile part1 + IO.FS.removeFile part2 + return 0 diff --git a/tests/compile/compactor_chain.lean.no_compile b/tests/compile/compactor_chain.lean.no_compile new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/compile/dep_regions.lean b/tests/compile/dep_regions.lean new file mode 100644 index 0000000000..ab7a1b5582 --- /dev/null +++ b/tests/compile/dep_regions.lean @@ -0,0 +1,42 @@ +import Lean.CompactedRegion + +open Lean + +/-! +Regression test for the `depRegions` argument to `CompactedRegion.save` and +`CompactedRegion.read`. + +Saves a standalone olean (`part0`) holding some data, loads it back, then saves a second +olean (`part1`) that *references* the loaded data via the `depRegions` argument. The +compactor should recognize the dep-region object and emit a cross-region pointer rather than +re-serializing it; the reader, given the same dep region, should resolve that pointer via +`fix_object_ptr`'s dep-region lookup. +-/ +unsafe def main : IO UInt32 := do + let part0 : System.FilePath := "./_tmp_dep_regions_0.olean" + let part1 : System.FilePath := "./_tmp_dep_regions_1.olean" + + -- File 0: standalone payload. + let payload0 : Array Nat := Array.range 256 + let _ ← CompactedRegion.save part0 `A payload0 #[] none + + let (loaded0, r0) ← CompactedRegion.read (α := Array Nat) part0 #[] + unless loaded0 = payload0 do + throw <| IO.userError "file 0 round-trip mismatch" + + -- File 1: payload references `loaded0` (an object from `r0`'s region) and adds its own data. + let extra : Array Nat := Array.range 32 + let payload1 : Array Nat × Array Nat := (loaded0, extra) + let _ ← CompactedRegion.save part1 `B payload1 #[r0] none + + -- Loading `part1` without supplying `r0` would either segfault or hit `lean_unreachable` + -- when `fix_object_ptr` tries to resolve the cross-region pointer; we supply it. + let (loaded1, _r1) ← CompactedRegion.read (α := Array Nat × Array Nat) part1 #[r0] + unless loaded1.1 = payload0 do + throw <| IO.userError "cross-region reference did not round-trip" + unless loaded1.2 = extra do + throw <| IO.userError "own-region data did not round-trip" + + IO.FS.removeFile part0 + IO.FS.removeFile part1 + return 0 diff --git a/tests/compile/dep_regions.lean.no_compile b/tests/compile/dep_regions.lean.no_compile new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/compile/dep_regions_miss.lean b/tests/compile/dep_regions_miss.lean new file mode 100644 index 0000000000..d922c9abff --- /dev/null +++ b/tests/compile/dep_regions_miss.lean @@ -0,0 +1,62 @@ +import Lean.CompactedRegion + +open Lean + +/-! +Regression test for the dep-aware fast-path skip in `compacted_region::read()`. + +Saves two distinct payloads to two different files with the same `mod` `Name`, so both files +derive the same saved `base_addr`. Loading the first mmaps successfully at that address; +loading the second finds the address occupied and falls back to `malloc`+`read` — +`m_begin ≠ m_base_addr`. We then save a third file whose data references the malloc'd +payload via `depRegions := #[r_B]`, and load it back. + +When the dependent file (which itself mmaps at a different `base_addr`) reaches +`compacted_region::read()`, the fast path must check **all** dep regions for +`m_begin == m_base_addr` and fall through to the full pointer-fixup walk if any didn't land +at their saved address. If the fast path only checks `m_begin == m_base_addr` for the +current region (master's behavior before this fix), the cross-region pointer still holds +the dep's saved address — which in this process happens to be mapped, but mapped to a +*different file's data* (the first load's). The test catches that by asserting the loaded +data matches `payload_b`, not `payload_a`. +-/ +unsafe def main : IO UInt32 := do + let part_a : System.FilePath := "./_tmp_dep_regions_miss_a.olean" + let part_b : System.FilePath := "./_tmp_dep_regions_miss_b.olean" + let part_c : System.FilePath := "./_tmp_dep_regions_miss_c.olean" + + let payload_a : Array Nat := (Array.range 256).map (· + 1000) + let payload_b : Array Nat := (Array.range 256).map (· + 9000) + let _ ← CompactedRegion.save part_a `Same payload_a #[] none + let _ ← CompactedRegion.save part_b `Same payload_b #[] none + + -- First load: mmap succeeds at the saved `base_addr` derived from `Same`. + let (_, _r_a) ← CompactedRegion.read (α := Array Nat) part_a #[] + -- Second load: same `mod` → same saved `base_addr`, but it's already occupied by `_r_a`, + -- so mmap fails and the reader falls back to malloc. `m_begin ≠ m_base_addr`. + let (loaded_b, r_b) ← CompactedRegion.read (α := Array Nat) part_b #[] + unless loaded_b = payload_b do + throw <| IO.userError "malloc-fallback load did not round-trip identically" + + -- Save part_c referencing the malloc-loaded `loaded_b`. The compactor recognizes + -- objects in `r_b`'s range and emits a cross-region pointer using `r_b`'s saved + -- `base_addr` (which equals `r_a`'s saved `base_addr`). + let extra : Array Nat := Array.range 32 + let payload_c : Array Nat × Array Nat := (loaded_b, extra) + let _ ← CompactedRegion.save part_c `Other payload_c #[r_b] none + + -- Load part_c. Its own region mmaps at a different `base_addr` (mod `Other`), so the + -- own-region check passes — but `r_b` has `m_begin ≠ m_base_addr`, so the fast path must + -- fall through to the full walk to translate the cross-region pointer to `r_b`'s + -- malloc'd memory. If the walk is skipped, the unfixed pointer instead reads into + -- `_r_a`'s mmap (which holds `payload_a`, not `payload_b`). + let (loaded_c, _r_c) ← CompactedRegion.read (α := Array Nat × Array Nat) part_c #[r_b] + unless loaded_c.1 = payload_b do + throw <| IO.userError "fast path skipped fixup: cross-region pointer resolved to wrong dep" + unless loaded_c.2 = extra do + throw <| IO.userError "own-region data didn't round-trip" + + IO.FS.removeFile part_a + IO.FS.removeFile part_b + IO.FS.removeFile part_c + return 0 diff --git a/tests/compile/dep_regions_miss.lean.no_compile b/tests/compile/dep_regions_miss.lean.no_compile new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/compile/run_bench.sh b/tests/compile/run_bench.sh index f2d46fecf4..692d742a80 100644 --- a/tests/compile/run_bench.sh +++ b/tests/compile/run_bench.sh @@ -7,7 +7,7 @@ elif [[ -f "$1.no_compile" ]]; then DO_COMPILE= else DO_COMPILE=1 fi -rm -f "$1.measurements.jsonl" +: > "$1.measurements.jsonl" # always create (possibly empty) so `combine.py` finds it if [[ -n $DO_COMPILE ]]; then run_before "$1"