From a41fb49e2523b4f9ddd6d03563c4f1d0508bef03 Mon Sep 17 00:00:00 2001 From: Mac Malone Date: Tue, 18 Feb 2025 18:03:52 -0500 Subject: [PATCH] feat: smarter plugin loading (#7090) This PR strips `lib` prefixes and `_shared` suffixes from plugin names. It also moves most of the dynlib processing code to Lean to make such preprocessing more standard. --- src/Lean/LoadDynlib.lean | 73 +++++++++- src/library/CMakeLists.txt | 2 +- src/library/dynlib.cpp | 135 ++++++++++++++++++ .../load_dynlib.h => library/dynlib.h} | 1 + src/library/init_module.cpp | 2 + src/runtime/CMakeLists.txt | 2 +- src/runtime/load_dynlib.cpp | 119 --------------- src/util/shell.cpp | 2 +- tests/pkg/user_plugin/test.sh | 5 +- 9 files changed, 212 insertions(+), 129 deletions(-) create mode 100644 src/library/dynlib.cpp rename src/{runtime/load_dynlib.h => library/dynlib.h} (88%) delete mode 100644 src/runtime/load_dynlib.cpp diff --git a/src/Lean/LoadDynlib.lean b/src/Lean/LoadDynlib.lean index ee189e3d17..b6589eafce 100644 --- a/src/Lean/LoadDynlib.lean +++ b/src/Lean/LoadDynlib.lean @@ -8,6 +8,45 @@ import Init.System.IO namespace Lean +private opaque DynlibImpl : NonemptyType.{0} +/-- A dynamic library handle. -/ +def Dynlib := DynlibImpl.type +instance : Nonempty Dynlib := DynlibImpl.property + +private opaque Dynlib.SymbolImpl (dynlib : Dynlib) : NonemptyType.{0} +/-- A reference to a symbol within a dynamic library. -/ +def Dynlib.Symbol (dynlib : Dynlib) := SymbolImpl dynlib |>.type +instance : Nonempty (Dynlib.Symbol dynlib) := Dynlib.SymbolImpl dynlib |>.property + +/-- +Dynamically loads a shared library. + +The path may also be used perform a system-dependent search for library. +To avoid this, use an absolute path (e.g., from `IO.FS.realPath`). +-/ +@[extern "lean_dynlib_load"] +opaque Dynlib.load (path : @& System.FilePath) : IO Dynlib + +/-- Returns the symbol of the dynamic library with the specified name (if any). -/ +@[extern "lean_dynlib_get"] +opaque Dynlib.get? (dynlib : @& Dynlib) (sym : @& String) : Option dynlib.Symbol + +/-- +Runs a module initializer function. +The symbol should have the signature `(builtin : Bool) → IO Unit` +(e.g., `initialize_Foo(uint8_t builtin, obj_arg)`). + +This function is unsafe because there is no guarantee the symbol has the +expected signature. An invalid symbol can thus produce undefined behavior. +Furthermore, if the initializer introduces pointers (e.g., function closures) +from the dynamic library into the global state, future garbage collection of +the library will produce undefined behavior. In such cases, garbage collection +of the dynamic library can be prevented via `Runtime.markPersistent` or +`Runtime.forget`. +-/ +@[extern "lean_dynlib_symbol_run_as_init"] +unsafe opaque Dynlib.Symbol.runAsInit {dynlib : @& Dynlib} (sym : @& dynlib.Symbol) : IO Unit + /-- Dynamically loads a shared library so that its symbols can be used by the Lean interpreter (e.g., for interpreting `@[extern]` declarations). @@ -18,8 +57,12 @@ symbols shared with a previously loaded library (including itself) will error. If multiple libraries share common symbols, those symbols should be linked and loaded as separate libraries. -/ -@[extern "lean_load_dynlib"] -opaque loadDynlib (path : @& System.FilePath) : IO Unit +@[export lean_load_dynlib] +def loadDynlib (path : @& System.FilePath) : IO Unit := do + let dynlib ← Dynlib.load path + -- Lean never unloads libraries. + -- Safety: There are no concurrent accesses to `dynlib` at this point. + let _ ← unsafe Runtime.markPersistent dynlib /-- Loads a Lean plugin and runs its initializers. @@ -40,5 +83,27 @@ symbols shared with a previously loaded plugin (including itself) will error. If multiple plugins share common symbols (e.g., imports), those symbols should be linked and loaded separately. -/ -@[extern "lean_load_plugin"] -opaque loadPlugin (path : @& System.FilePath) : IO Unit +@[export lean_load_plugin] +def loadPlugin (path : System.FilePath) : IO Unit := do + -- We never want to look up plugins using the system library search + let path ← IO.FS.realPath path + let some name := path.fileStem + | throw <| IO.userError s!"error, plugin has invalid file name '{path}'" + let dynlib ← Dynlib.load path + -- Lean libraries can be prefixed with `lib` or suffixed with `_shared` + -- under some configurations. We strip these from the initializer symbol. + let name := name.stripPrefix "lib" |>.stripSuffix "_shared" + let name := s!"initialize_{name}" + let some sym := dynlib.get? name + | throw <| IO.userError s!"error loading plugin, initializer not found '{name}'" + -- Lean never unloads plugins (once initialized). + -- Safety: There are no concurrent accesses to `dynlib` at this point. + let _ ← unsafe Runtime.markPersistent dynlib + /- + Safety: + * As `dynlib` is marked persistent, there is no danger of garbage collection. + * There is still no guarantee that `sym` has the proper signature, but this + is about as safe as it can be. The initializer naming convention helps + avoid accidentally mistaking non-plugins as plugins. + -/ + unsafe sym.runAsInit diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index 30c2c8924a..2bd6cca20a 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -1,6 +1,6 @@ add_library(library OBJECT expr_lt.cpp bin_app.cpp constants.cpp max_sharing.cpp - module.cpp replace_visitor.cpp num.cpp + module.cpp dynlib.cpp replace_visitor.cpp num.cpp class.cpp util.cpp print.cpp annotation.cpp reducible.cpp init_module.cpp projection.cpp diff --git a/src/library/dynlib.cpp b/src/library/dynlib.cpp new file mode 100644 index 0000000000..643c453a9a --- /dev/null +++ b/src/library/dynlib.cpp @@ -0,0 +1,135 @@ +/* +Copyright (c) 2021 Microsoft Corporation. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. + +Author: Leonardo de Moura, Mac Malone +*/ +#include "util/io.h" +#include "runtime/io.h" +#include "runtime/object.h" +#include "runtime/sstream.h" +#include "runtime/exception.h" +#include "library/dynlib.h" + +#ifdef LEAN_WINDOWS +#include +#else +#include +#endif + +namespace lean { + +static lean_external_class * g_dynlib_external_class = nullptr; +static lean_external_class * g_dynlib_symbol_external_class = nullptr; + +static void dynlib_finalizer(void * h) { + // There is no sensible way to handle errors here. + // The same decision was made in Rust's libloading. +#ifdef LEAN_WINDOWS + FreeLibrary(static_cast(h)); +#else + dlclose(h); +#endif +} + +static void noop_foreach(void * /* val */, b_obj_arg /* fn */) { +} + +static void noop_finalizer(void * h) { +} + +void initialize_dynlib() { + g_dynlib_external_class = lean_register_external_class(dynlib_finalizer, noop_foreach); + g_dynlib_symbol_external_class = lean_register_external_class(noop_finalizer, noop_foreach); +} + +#ifdef LEAN_WINDOWS +static inline obj_res wrap_dynlib(HMODULE h) { + return alloc_external(g_dynlib_external_class, h); +} +static inline HMODULE dynlib_handle(b_obj_arg dynlib) { + return static_cast(lean_get_external_data(dynlib)); +} +#else +static inline obj_res wrap_dynlib(void * h) { + return alloc_external(g_dynlib_external_class, h); +} +static inline void * dynlib_handle(b_obj_arg dynlib) { + return lean_get_external_data(dynlib); +} +#endif + +static inline obj_res wrap_symbol(void * sym) { + return alloc_external(g_dynlib_symbol_external_class, sym); +} +static inline void * symbol_ptr(b_obj_arg sym) { + return lean_get_external_data(sym); +} + +/* Dynlib.load : System.FilePath -> IO Dynlib */ +extern "C" LEAN_EXPORT obj_res lean_dynlib_load(b_obj_arg path, obj_arg) { +#ifdef LEAN_WINDOWS + HMODULE h = LoadLibrary(string_cstr(path)); + if (!h) { + return io_result_mk_error((sstream() + << "error loading library " << string_cstr(path) << ": " << GetLastError()).str()); + } + return io_result_mk_ok(wrap_dynlib(h)); +#else + // Both dynlibs and plugins are loaded with RTLD_GLOBAL. + // This ensures the interpreter has access to plugin definitions that are also + // imported (e.g., an environment extension defined with builtin_initialize). + // In either case, loading the same symbol twice (and thus e.g. running initializers + // manipulating global `IO.Ref`s twice) should be avoided; the common module + // should instead be factored out into a separate shared library + void *h = dlopen(string_cstr(path), RTLD_LAZY | RTLD_GLOBAL); + if (!h) { + return io_result_mk_error((sstream() + << "error loading library, " << dlerror()).str()); + } + return io_result_mk_ok(wrap_dynlib(h)); +#endif +} + +/* Dynlib.get? : (dynlib : Dynlib) -> String -> dynlib.Symbol */ +extern "C" LEAN_EXPORT obj_res lean_dynlib_get(b_obj_arg dynlib, b_obj_arg name) { +#ifdef LEAN_WINDOWS + auto sym = reinterpret_cast(GetProcAddress(dynlib_handle(dynlib), string_cstr(name))); + if (sym) { + return mk_option_some(wrap_symbol(sym)); + } else { + return mk_option_none(); + } +#else + // The address of a valid Linux symbol can be NULL. + // Thus, this is the recommended way to validate a symbol. + dlerror(); + void * sym = dlsym(dynlib_handle(dynlib), string_cstr(name)); + if (dlerror()) { + return mk_option_none(); + } else { + return mk_option_some(wrap_symbol(sym)); + } +#endif +} + +/* Dynlib.Symbol.runAsInit : {Dynlib} -> Symbol -> IO Unit */ +extern "C" LEAN_EXPORT obj_res lean_dynlib_symbol_run_as_init(b_obj_arg /* dynlib */, b_obj_arg sym, obj_arg) { + auto init_fn = reinterpret_cast(symbol_ptr(sym)); + return init_fn(1 /* builtin */, io_mk_world()); +} + +/* Lean.loadDynlib : System.FilePath -> IO Unit */ +extern "C" obj_res lean_load_dynlib(obj_arg path, obj_arg); + +void load_dynlib(std::string path) { + consume_io_result(lean_load_dynlib(mk_string(path), io_mk_world())); +} + +/* Lean.loadPlugin : System.FilePath -> IO Unit */ +extern "C" obj_res lean_load_plugin(obj_arg path, obj_arg); + +void load_plugin(std::string path) { + consume_io_result(lean_load_plugin(mk_string(path), io_mk_world())); +} +} diff --git a/src/runtime/load_dynlib.h b/src/library/dynlib.h similarity index 88% rename from src/runtime/load_dynlib.h rename to src/library/dynlib.h index e329e01ee7..4ed99bfb77 100644 --- a/src/runtime/load_dynlib.h +++ b/src/library/dynlib.h @@ -8,6 +8,7 @@ Author: Mac Malone #include namespace lean { +LEAN_EXPORT void initialize_dynlib(); LEAN_EXPORT void load_dynlib(std::string path); LEAN_EXPORT void load_plugin(std::string path); } diff --git a/src/library/init_module.cpp b/src/library/init_module.cpp index 7f4baba1ab..14a26103b1 100644 --- a/src/library/init_module.cpp +++ b/src/library/init_module.cpp @@ -14,6 +14,7 @@ Author: Leonardo de Moura #include "library/profiling.h" #include "library/time_task.h" #include "library/formatter.h" +#include "library/dynlib.h" namespace lean { void initialize_library_core_module() { @@ -35,6 +36,7 @@ void initialize_library_module() { initialize_class(); initialize_library_util(); initialize_time_task(); + initialize_dynlib(); } void finalize_library_module() { diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index fc1e41f416..6fab1878da 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -1,6 +1,6 @@ set(RUNTIME_OBJS debug.cpp thread.cpp mpz.cpp utf8.cpp object.cpp apply.cpp exception.cpp interrupt.cpp memory.cpp -stackinfo.cpp compact.cpp init_module.cpp load_dynlib.cpp io.cpp hash.cpp +stackinfo.cpp compact.cpp init_module.cpp io.cpp hash.cpp platform.cpp alloc.cpp allocprof.cpp sharecommon.cpp stack_overflow.cpp process.cpp object_ref.cpp mpn.cpp mutex.cpp libuv.cpp uv/net_addr.cpp uv/event_loop.cpp uv/timer.cpp) diff --git a/src/runtime/load_dynlib.cpp b/src/runtime/load_dynlib.cpp deleted file mode 100644 index 72faaa44ef..0000000000 --- a/src/runtime/load_dynlib.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* -Copyright (c) 2021 Microsoft Corporation. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. - -Author: Leonardo de Moura, Mac Malone -*/ -#include "util/io.h" -#include "runtime/io.h" -#include "runtime/object.h" -#include "runtime/sstream.h" -#include "runtime/exception.h" -#include "runtime/load_dynlib.h" - -#ifdef LEAN_WINDOWS -#include -#else -#include -#endif - -namespace lean { -void load_dynlib(std::string path) { -#ifdef LEAN_WINDOWS - HMODULE h = LoadLibrary(path.c_str()); - if (!h) { - throw exception(sstream() << "error loading library " << path << ": " << GetLastError()); - } -#else - void *handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_GLOBAL); - if (!handle) { - throw exception(sstream() << "error loading library, " << dlerror()); - } -#endif - // NOTE: we never unload libraries -} - -/* loadDynlib : System.FilePath -> IO Unit */ -extern "C" LEAN_EXPORT obj_res lean_load_dynlib(b_obj_arg path, obj_arg) { - try { - load_dynlib(string_cstr(path)); - return io_result_mk_ok(box(0)); - } catch (exception & ex) { - return io_result_mk_error(ex.what()); - } -} - -/* loadPlugin : System.FilePath -> IO Unit */ -extern "C" LEAN_EXPORT obj_res lean_load_plugin(b_obj_arg path, obj_arg) { - // we never want to look up plugins using the system library search - std::string rpath; -#if defined(LEAN_EMSCRIPTEN) - rpath = string_to_std(path); - auto sep = rpath.rfind('/'); -#elif defined(LEAN_WINDOWS) - constexpr unsigned BufferSize = 8192; - char buffer[BufferSize]; - DWORD retval = GetFullPathName(string_cstr(path), BufferSize, buffer, nullptr); - if (retval == 0 || retval > BufferSize) { - rpath = string_to_std(path); - } else { - rpath = std::string(buffer); - } - auto sep = rpath.rfind('\\'); -#else - char buffer[PATH_MAX]; - char * tmp = realpath(string_cstr(path), buffer); - if (tmp) { - rpath = std::string(tmp); - } else { - inc(path); - return io_result_mk_error(lean_mk_io_error_no_file_or_directory(path, ENOENT, mk_string(""))); - } - auto sep = rpath.rfind('/'); -#endif - if (sep == std::string::npos) { - sep = 0; - } else { - sep++; - } - auto dot = rpath.rfind("."); - if (dot == std::string::npos) { - dot = rpath.size(); - } - std::string pkg = rpath.substr(sep, dot - sep); - std::string sym = "initialize_" + pkg; - void * init; -#ifdef LEAN_WINDOWS - HMODULE h = LoadLibrary(rpath.c_str()); - if (!h) { - return io_result_mk_error((sstream() - << "error loading plugin " << rpath << ": " << GetLastError()).str()); - } - init = reinterpret_cast(GetProcAddress(h, sym.c_str())); -#else - // Like lean_load_dynlib, the library is loaded with RTLD_GLOBAL. - // This ensures the interpreter has access to plugin definitions that are also - // imported (e.g., an environment extension defined with builtin_initialize). - // In either case, loading the same symbol twice (and thus e.g. running initializers - // manipulating global `IO.Ref`s twice) should be avoided; the common module - // should instead be factored out into a separate shared library - void *handle = dlopen(rpath.c_str(), RTLD_LAZY | RTLD_GLOBAL); - if (!handle) { - return io_result_mk_error((sstream() - << "error loading plugin, " << dlerror()).str()); - } - init = dlsym(handle, sym.c_str()); -#endif - if (!init) { - return io_result_mk_error((sstream() - << "error, plugin " << rpath << " does not seem to contain a module '" << pkg << "'").str()); - } - auto init_fn = reinterpret_cast(init); - return init_fn(1 /* builtin */, io_mk_world()); - // NOTE: we never unload plugins -} - -void load_plugin(std::string path) { - consume_io_result(lean_load_plugin(mk_string(path), io_mk_world())); -} -} diff --git a/src/util/shell.cpp b/src/util/shell.cpp index e678ff8b06..1d2a872c34 100644 --- a/src/util/shell.cpp +++ b/src/util/shell.cpp @@ -19,7 +19,6 @@ Author: Leonardo de Moura #include "runtime/thread.h" #include "runtime/debug.h" #include "runtime/sstream.h" -#include "runtime/load_dynlib.h" #include "runtime/array_ref.h" #include "runtime/object_ref.h" #include "runtime/utf8.h" @@ -31,6 +30,7 @@ Author: Leonardo de Moura #include "library/elab_environment.h" #include "kernel/kernel_exception.h" #include "kernel/trace.h" +#include "library/dynlib.h" #include "library/formatter.h" #include "library/module.h" #include "library/time_task.h" diff --git a/tests/pkg/user_plugin/test.sh b/tests/pkg/user_plugin/test.sh index 1d97beb077..37ca83c895 100755 --- a/tests/pkg/user_plugin/test.sh +++ b/tests/pkg/user_plugin/test.sh @@ -28,12 +28,11 @@ check_plugin () { ls $LIB_DIR exit 1 } - mv $shlib $LIB_DIR/$plugin.$SHLIB_EXT } check_plugin UserPlugin check_plugin UserEnvPlugin -PLUGIN=$LIB_DIR/UserPlugin.$SHLIB_EXT -ENV_PLUGIN=$LIB_DIR/UserEnvPlugin.$SHLIB_EXT +PLUGIN=$LIB_DIR/${LIB_PREFIX}UserPlugin.$SHLIB_EXT +ENV_PLUGIN=$LIB_DIR/${LIB_PREFIX}UserEnvPlugin.$SHLIB_EXT # Expected test output EXPECTED_OUT="Ran builtin initializer"