feat: smarter plugin loading (#7090)

This PR strips `lib` prefixes and `_shared` suffixes from plugin names.
It also moves most of the dynlib processing code to Lean to make such
preprocessing more standard.
This commit is contained in:
Mac Malone 2025-02-18 18:03:52 -05:00 committed by GitHub
parent dfce31e2a2
commit a41fb49e25
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 212 additions and 129 deletions

View file

@ -8,6 +8,45 @@ import Init.System.IO
namespace Lean
private opaque DynlibImpl : NonemptyType.{0}
/-- A dynamic library handle. -/
def Dynlib := DynlibImpl.type
instance : Nonempty Dynlib := DynlibImpl.property
private opaque Dynlib.SymbolImpl (dynlib : Dynlib) : NonemptyType.{0}
/-- A reference to a symbol within a dynamic library. -/
def Dynlib.Symbol (dynlib : Dynlib) := SymbolImpl dynlib |>.type
instance : Nonempty (Dynlib.Symbol dynlib) := Dynlib.SymbolImpl dynlib |>.property
/--
Dynamically loads a shared library.
The path may also be used perform a system-dependent search for library.
To avoid this, use an absolute path (e.g., from `IO.FS.realPath`).
-/
@[extern "lean_dynlib_load"]
opaque Dynlib.load (path : @& System.FilePath) : IO Dynlib
/-- Returns the symbol of the dynamic library with the specified name (if any). -/
@[extern "lean_dynlib_get"]
opaque Dynlib.get? (dynlib : @& Dynlib) (sym : @& String) : Option dynlib.Symbol
/--
Runs a module initializer function.
The symbol should have the signature `(builtin : Bool) → IO Unit`
(e.g., `initialize_Foo(uint8_t builtin, obj_arg)`).
This function is unsafe because there is no guarantee the symbol has the
expected signature. An invalid symbol can thus produce undefined behavior.
Furthermore, if the initializer introduces pointers (e.g., function closures)
from the dynamic library into the global state, future garbage collection of
the library will produce undefined behavior. In such cases, garbage collection
of the dynamic library can be prevented via `Runtime.markPersistent` or
`Runtime.forget`.
-/
@[extern "lean_dynlib_symbol_run_as_init"]
unsafe opaque Dynlib.Symbol.runAsInit {dynlib : @& Dynlib} (sym : @& dynlib.Symbol) : IO Unit
/--
Dynamically loads a shared library so that its symbols can be used by
the Lean interpreter (e.g., for interpreting `@[extern]` declarations).
@ -18,8 +57,12 @@ symbols shared with a previously loaded library (including itself) will error.
If multiple libraries share common symbols, those symbols should be linked
and loaded as separate libraries.
-/
@[extern "lean_load_dynlib"]
opaque loadDynlib (path : @& System.FilePath) : IO Unit
@[export lean_load_dynlib]
def loadDynlib (path : @& System.FilePath) : IO Unit := do
let dynlib ← Dynlib.load path
-- Lean never unloads libraries.
-- Safety: There are no concurrent accesses to `dynlib` at this point.
let _ ← unsafe Runtime.markPersistent dynlib
/--
Loads a Lean plugin and runs its initializers.
@ -40,5 +83,27 @@ symbols shared with a previously loaded plugin (including itself) will error.
If multiple plugins share common symbols (e.g., imports), those symbols
should be linked and loaded separately.
-/
@[extern "lean_load_plugin"]
opaque loadPlugin (path : @& System.FilePath) : IO Unit
@[export lean_load_plugin]
def loadPlugin (path : System.FilePath) : IO Unit := do
-- We never want to look up plugins using the system library search
let path ← IO.FS.realPath path
let some name := path.fileStem
| throw <| IO.userError s!"error, plugin has invalid file name '{path}'"
let dynlib ← Dynlib.load path
-- Lean libraries can be prefixed with `lib` or suffixed with `_shared`
-- under some configurations. We strip these from the initializer symbol.
let name := name.stripPrefix "lib" |>.stripSuffix "_shared"
let name := s!"initialize_{name}"
let some sym := dynlib.get? name
| throw <| IO.userError s!"error loading plugin, initializer not found '{name}'"
-- Lean never unloads plugins (once initialized).
-- Safety: There are no concurrent accesses to `dynlib` at this point.
let _ ← unsafe Runtime.markPersistent dynlib
/-
Safety:
* As `dynlib` is marked persistent, there is no danger of garbage collection.
* There is still no guarantee that `sym` has the proper signature, but this
is about as safe as it can be. The initializer naming convention helps
avoid accidentally mistaking non-plugins as plugins.
-/
unsafe sym.runAsInit

View file

@ -1,6 +1,6 @@
add_library(library OBJECT expr_lt.cpp
bin_app.cpp constants.cpp max_sharing.cpp
module.cpp replace_visitor.cpp num.cpp
module.cpp dynlib.cpp replace_visitor.cpp num.cpp
class.cpp util.cpp print.cpp annotation.cpp
reducible.cpp init_module.cpp
projection.cpp

135
src/library/dynlib.cpp Normal file
View file

@ -0,0 +1,135 @@
/*
Copyright (c) 2021 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura, Mac Malone
*/
#include "util/io.h"
#include "runtime/io.h"
#include "runtime/object.h"
#include "runtime/sstream.h"
#include "runtime/exception.h"
#include "library/dynlib.h"
#ifdef LEAN_WINDOWS
#include <windows.h>
#else
#include <dlfcn.h>
#endif
namespace lean {
static lean_external_class * g_dynlib_external_class = nullptr;
static lean_external_class * g_dynlib_symbol_external_class = nullptr;
static void dynlib_finalizer(void * h) {
// There is no sensible way to handle errors here.
// The same decision was made in Rust's libloading.
#ifdef LEAN_WINDOWS
FreeLibrary(static_cast<HMODULE>(h));
#else
dlclose(h);
#endif
}
static void noop_foreach(void * /* val */, b_obj_arg /* fn */) {
}
static void noop_finalizer(void * h) {
}
void initialize_dynlib() {
g_dynlib_external_class = lean_register_external_class(dynlib_finalizer, noop_foreach);
g_dynlib_symbol_external_class = lean_register_external_class(noop_finalizer, noop_foreach);
}
#ifdef LEAN_WINDOWS
static inline obj_res wrap_dynlib(HMODULE h) {
return alloc_external(g_dynlib_external_class, h);
}
static inline HMODULE dynlib_handle(b_obj_arg dynlib) {
return static_cast<HMODULE>(lean_get_external_data(dynlib));
}
#else
static inline obj_res wrap_dynlib(void * h) {
return alloc_external(g_dynlib_external_class, h);
}
static inline void * dynlib_handle(b_obj_arg dynlib) {
return lean_get_external_data(dynlib);
}
#endif
static inline obj_res wrap_symbol(void * sym) {
return alloc_external(g_dynlib_symbol_external_class, sym);
}
static inline void * symbol_ptr(b_obj_arg sym) {
return lean_get_external_data(sym);
}
/* Dynlib.load : System.FilePath -> IO Dynlib */
extern "C" LEAN_EXPORT obj_res lean_dynlib_load(b_obj_arg path, obj_arg) {
#ifdef LEAN_WINDOWS
HMODULE h = LoadLibrary(string_cstr(path));
if (!h) {
return io_result_mk_error((sstream()
<< "error loading library " << string_cstr(path) << ": " << GetLastError()).str());
}
return io_result_mk_ok(wrap_dynlib(h));
#else
// Both dynlibs and plugins are loaded with RTLD_GLOBAL.
// This ensures the interpreter has access to plugin definitions that are also
// imported (e.g., an environment extension defined with builtin_initialize).
// In either case, loading the same symbol twice (and thus e.g. running initializers
// manipulating global `IO.Ref`s twice) should be avoided; the common module
// should instead be factored out into a separate shared library
void *h = dlopen(string_cstr(path), RTLD_LAZY | RTLD_GLOBAL);
if (!h) {
return io_result_mk_error((sstream()
<< "error loading library, " << dlerror()).str());
}
return io_result_mk_ok(wrap_dynlib(h));
#endif
}
/* Dynlib.get? : (dynlib : Dynlib) -> String -> dynlib.Symbol */
extern "C" LEAN_EXPORT obj_res lean_dynlib_get(b_obj_arg dynlib, b_obj_arg name) {
#ifdef LEAN_WINDOWS
auto sym = reinterpret_cast<void *>(GetProcAddress(dynlib_handle(dynlib), string_cstr(name)));
if (sym) {
return mk_option_some(wrap_symbol(sym));
} else {
return mk_option_none();
}
#else
// The address of a valid Linux symbol can be NULL.
// Thus, this is the recommended way to validate a symbol.
dlerror();
void * sym = dlsym(dynlib_handle(dynlib), string_cstr(name));
if (dlerror()) {
return mk_option_none();
} else {
return mk_option_some(wrap_symbol(sym));
}
#endif
}
/* Dynlib.Symbol.runAsInit : {Dynlib} -> Symbol -> IO Unit */
extern "C" LEAN_EXPORT obj_res lean_dynlib_symbol_run_as_init(b_obj_arg /* dynlib */, b_obj_arg sym, obj_arg) {
auto init_fn = reinterpret_cast<object *(*)(uint8_t, object *)>(symbol_ptr(sym));
return init_fn(1 /* builtin */, io_mk_world());
}
/* Lean.loadDynlib : System.FilePath -> IO Unit */
extern "C" obj_res lean_load_dynlib(obj_arg path, obj_arg);
void load_dynlib(std::string path) {
consume_io_result(lean_load_dynlib(mk_string(path), io_mk_world()));
}
/* Lean.loadPlugin : System.FilePath -> IO Unit */
extern "C" obj_res lean_load_plugin(obj_arg path, obj_arg);
void load_plugin(std::string path) {
consume_io_result(lean_load_plugin(mk_string(path), io_mk_world()));
}
}

View file

@ -8,6 +8,7 @@ Author: Mac Malone
#include <string>
namespace lean {
LEAN_EXPORT void initialize_dynlib();
LEAN_EXPORT void load_dynlib(std::string path);
LEAN_EXPORT void load_plugin(std::string path);
}

View file

@ -14,6 +14,7 @@ Author: Leonardo de Moura
#include "library/profiling.h"
#include "library/time_task.h"
#include "library/formatter.h"
#include "library/dynlib.h"
namespace lean {
void initialize_library_core_module() {
@ -35,6 +36,7 @@ void initialize_library_module() {
initialize_class();
initialize_library_util();
initialize_time_task();
initialize_dynlib();
}
void finalize_library_module() {

View file

@ -1,6 +1,6 @@
set(RUNTIME_OBJS debug.cpp thread.cpp mpz.cpp utf8.cpp
object.cpp apply.cpp exception.cpp interrupt.cpp memory.cpp
stackinfo.cpp compact.cpp init_module.cpp load_dynlib.cpp io.cpp hash.cpp
stackinfo.cpp compact.cpp init_module.cpp io.cpp hash.cpp
platform.cpp alloc.cpp allocprof.cpp sharecommon.cpp stack_overflow.cpp
process.cpp object_ref.cpp mpn.cpp mutex.cpp libuv.cpp uv/net_addr.cpp uv/event_loop.cpp
uv/timer.cpp)

View file

@ -1,119 +0,0 @@
/*
Copyright (c) 2021 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura, Mac Malone
*/
#include "util/io.h"
#include "runtime/io.h"
#include "runtime/object.h"
#include "runtime/sstream.h"
#include "runtime/exception.h"
#include "runtime/load_dynlib.h"
#ifdef LEAN_WINDOWS
#include <windows.h>
#else
#include <dlfcn.h>
#endif
namespace lean {
void load_dynlib(std::string path) {
#ifdef LEAN_WINDOWS
HMODULE h = LoadLibrary(path.c_str());
if (!h) {
throw exception(sstream() << "error loading library " << path << ": " << GetLastError());
}
#else
void *handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_GLOBAL);
if (!handle) {
throw exception(sstream() << "error loading library, " << dlerror());
}
#endif
// NOTE: we never unload libraries
}
/* loadDynlib : System.FilePath -> IO Unit */
extern "C" LEAN_EXPORT obj_res lean_load_dynlib(b_obj_arg path, obj_arg) {
try {
load_dynlib(string_cstr(path));
return io_result_mk_ok(box(0));
} catch (exception & ex) {
return io_result_mk_error(ex.what());
}
}
/* loadPlugin : System.FilePath -> IO Unit */
extern "C" LEAN_EXPORT obj_res lean_load_plugin(b_obj_arg path, obj_arg) {
// we never want to look up plugins using the system library search
std::string rpath;
#if defined(LEAN_EMSCRIPTEN)
rpath = string_to_std(path);
auto sep = rpath.rfind('/');
#elif defined(LEAN_WINDOWS)
constexpr unsigned BufferSize = 8192;
char buffer[BufferSize];
DWORD retval = GetFullPathName(string_cstr(path), BufferSize, buffer, nullptr);
if (retval == 0 || retval > BufferSize) {
rpath = string_to_std(path);
} else {
rpath = std::string(buffer);
}
auto sep = rpath.rfind('\\');
#else
char buffer[PATH_MAX];
char * tmp = realpath(string_cstr(path), buffer);
if (tmp) {
rpath = std::string(tmp);
} else {
inc(path);
return io_result_mk_error(lean_mk_io_error_no_file_or_directory(path, ENOENT, mk_string("")));
}
auto sep = rpath.rfind('/');
#endif
if (sep == std::string::npos) {
sep = 0;
} else {
sep++;
}
auto dot = rpath.rfind(".");
if (dot == std::string::npos) {
dot = rpath.size();
}
std::string pkg = rpath.substr(sep, dot - sep);
std::string sym = "initialize_" + pkg;
void * init;
#ifdef LEAN_WINDOWS
HMODULE h = LoadLibrary(rpath.c_str());
if (!h) {
return io_result_mk_error((sstream()
<< "error loading plugin " << rpath << ": " << GetLastError()).str());
}
init = reinterpret_cast<void *>(GetProcAddress(h, sym.c_str()));
#else
// Like lean_load_dynlib, the library is loaded with RTLD_GLOBAL.
// This ensures the interpreter has access to plugin definitions that are also
// imported (e.g., an environment extension defined with builtin_initialize).
// In either case, loading the same symbol twice (and thus e.g. running initializers
// manipulating global `IO.Ref`s twice) should be avoided; the common module
// should instead be factored out into a separate shared library
void *handle = dlopen(rpath.c_str(), RTLD_LAZY | RTLD_GLOBAL);
if (!handle) {
return io_result_mk_error((sstream()
<< "error loading plugin, " << dlerror()).str());
}
init = dlsym(handle, sym.c_str());
#endif
if (!init) {
return io_result_mk_error((sstream()
<< "error, plugin " << rpath << " does not seem to contain a module '" << pkg << "'").str());
}
auto init_fn = reinterpret_cast<object *(*)(uint8_t, object *)>(init);
return init_fn(1 /* builtin */, io_mk_world());
// NOTE: we never unload plugins
}
void load_plugin(std::string path) {
consume_io_result(lean_load_plugin(mk_string(path), io_mk_world()));
}
}

View file

@ -19,7 +19,6 @@ Author: Leonardo de Moura
#include "runtime/thread.h"
#include "runtime/debug.h"
#include "runtime/sstream.h"
#include "runtime/load_dynlib.h"
#include "runtime/array_ref.h"
#include "runtime/object_ref.h"
#include "runtime/utf8.h"
@ -31,6 +30,7 @@ Author: Leonardo de Moura
#include "library/elab_environment.h"
#include "kernel/kernel_exception.h"
#include "kernel/trace.h"
#include "library/dynlib.h"
#include "library/formatter.h"
#include "library/module.h"
#include "library/time_task.h"

View file

@ -28,12 +28,11 @@ check_plugin () {
ls $LIB_DIR
exit 1
}
mv $shlib $LIB_DIR/$plugin.$SHLIB_EXT
}
check_plugin UserPlugin
check_plugin UserEnvPlugin
PLUGIN=$LIB_DIR/UserPlugin.$SHLIB_EXT
ENV_PLUGIN=$LIB_DIR/UserEnvPlugin.$SHLIB_EXT
PLUGIN=$LIB_DIR/${LIB_PREFIX}UserPlugin.$SHLIB_EXT
ENV_PLUGIN=$LIB_DIR/${LIB_PREFIX}UserEnvPlugin.$SHLIB_EXT
# Expected test output
EXPECTED_OUT="Ran builtin initializer"