From aa24c15e61417a6711faa6c5ab5ca53bb6ef7edf Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Fri, 16 Feb 2018 14:37:11 -0800 Subject: [PATCH] feat(library/vm): basic performance counters @kha I have added a few performance counters. I collect their values at each snapshot. Right now, I am printing only the values in the last snapshot, but if we want we can even display their progress over time. Right now, I track the following information - number of allocated closures - number of allocated constructors/objects - number of allocated big numbers --- src/library/eval_helper.cpp | 3 +++ src/library/vm/vm.cpp | 32 ++++++++++++++++++++++++++------ src/library/vm/vm.h | 18 ++++++++++++++++++ 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/library/eval_helper.cpp b/src/library/eval_helper.cpp index 3c484149ed..58bf2c614f 100644 --- a/src/library/eval_helper.cpp +++ b/src/library/eval_helper.cpp @@ -25,6 +25,9 @@ eval_helper::eval_helper(environment const & env, options const & opts, name con } vm_obj eval_helper::invoke_fn() { + /* We use `scope_vm_state` to set thread local g_vm_state which is used + to collect performance numbers when profiling. */ + scope_vm_state scope(m_vms); unsigned arity = m_vms.get_decl(m_fn)->get_arity(); if (arity > m_args.size()) { throw exception(sstream() << "cannot evaluate function: " << m_args.size() diff --git a/src/library/vm/vm.cpp b/src/library/vm/vm.cpp index 969c1054d1..eff1db19ea 100644 --- a/src/library/vm/vm.cpp +++ b/src/library/vm/vm.cpp @@ -36,6 +36,9 @@ Author: Leonardo de Moura #endif namespace lean { +/* Reference to the VM that is currently running. */ +LEAN_THREAD_VALUE(vm_state *, g_vm_state, nullptr); + void vm_obj_cell::dec_ref(vm_obj & o, buffer & todelete) { if (LEAN_VM_IS_PTR(o.m_data)) { vm_obj_cell * c = o.steal_ptr(); @@ -72,6 +75,9 @@ void vm_composite::dealloc(buffer & todelete) { } vm_obj mk_vm_constructor(unsigned cidx, unsigned sz, vm_obj const * data) { + if (g_vm_state && g_vm_state->profiling()) { + g_vm_state->inc_constructor_allocs(); + } return mk_vm_composite(vm_obj_kind::Constructor, cidx, sz, data); } @@ -130,6 +136,9 @@ vm_obj update_vm_pair(vm_obj const & o, vm_obj const & v_1, vm_obj const & v_2) } vm_obj mk_vm_closure(unsigned fn_idx, unsigned sz, vm_obj const * data) { + if (g_vm_state && g_vm_state->profiling()) { + g_vm_state->inc_closure_allocs(); + } return mk_vm_composite(vm_obj_kind::Closure, fn_idx, sz, data); } @@ -155,6 +164,9 @@ vm_obj mk_vm_closure(unsigned cidx, vm_obj const & o1, vm_obj const & o2, vm_obj vm_mpz::vm_mpz(mpz const & v): vm_obj_cell(vm_obj_kind::MPZ), m_value(v) { + if (g_vm_state && g_vm_state->profiling()) { + g_vm_state->inc_mpz_allocs(); + } } vm_obj mk_vm_simple(unsigned v) { @@ -1448,9 +1460,6 @@ void vm_state::debugger_init() { m_debugger_state_ptr.reset(new debugger_state(m_env)); } -/* Reference to the VM that is currently running. */ -LEAN_THREAD_VALUE(vm_state *, g_vm_state, nullptr); - scope_vm_state::scope_vm_state(vm_state & s): m_prev(g_vm_state) { g_vm_state = &s; @@ -3414,7 +3423,8 @@ vm_state::profiler::profiler(vm_state & s, options const & opts): auto curr = chrono::steady_clock::now(); m_snapshots.push_back(snapshot_core()); snapshot_core & s = m_snapshots.back(); - s.m_duration = chrono::duration_cast(curr - start); + s.m_perf_counters = m_state.get_perf_counters(); + s.m_duration = chrono::duration_cast(curr - start); for (frame const & fr : m_state.m_call_stack) { if (fr.m_curr_fn_idx != g_null_fn_idx && (s.m_stack.empty() || s.m_stack.back().first != fr.m_curr_fn_idx)) { @@ -3456,8 +3466,9 @@ auto vm_state::profiler::get_snapshots() -> snapshots { std::unordered_map cum_times; for (snapshot_core const & s : m_snapshots) { snapshot new_s; - new_s.m_duration = s.m_duration; - r.m_total_time += s.m_duration; + new_s.m_duration = s.m_duration; + new_s.m_perf_counters = s.m_perf_counters; + r.m_total_time += s.m_duration; auto & new_stack = new_s.m_stack; std::unordered_set decl_already_seen_in_this_stack; for (auto const & p : s.m_stack) { @@ -3506,6 +3517,15 @@ static bool equal_fns(vm_state::profiler::snapshot const & s1, vm_state::profile #endif void vm_state::profiler::snapshots::display(std::ostream & out) const { + if (!m_snapshots.empty()) { + performance_counters const & c = m_snapshots.back().m_perf_counters; + if (c.m_num_constructor_allocs > 0) + out << "num. allocated objects: " << c.m_num_constructor_allocs << "\n"; + if (c.m_num_closure_allocs > 0) + out << "num. allocated closures: " << c.m_num_closure_allocs << "\n"; + if (c.m_num_mpz_allocs > 0) + out << "num. allocated big nums: " << c.m_num_mpz_allocs << "\n"; + } for (auto & cum_time : m_cum_times) { out << std::setw(5) << cum_time.second.count() << "ms " << std::setw(5) << std::fixed << std::setprecision(1) diff --git a/src/library/vm/vm.h b/src/library/vm/vm.h index 901710438a..56bdaf7735 100644 --- a/src/library/vm/vm.h +++ b/src/library/vm/vm.h @@ -599,6 +599,12 @@ public: /** \brief Virtual machine for executing VM bytecode. */ class vm_state { + struct performance_counters { + size_t m_num_constructor_allocs{0}; + size_t m_num_closure_allocs{0}; + size_t m_num_mpz_allocs{0}; + }; +public: typedef std::vector decl_vector; typedef std::vector> cache_vector; typedef unsigned_map decl_map; @@ -641,6 +647,8 @@ class vm_state { debugger_state_ptr m_debugger_state_ptr; bool m_was_updated{false}; /* set to true if update_env is invoked */ + performance_counters m_perf_counters; + void debugger_init(); void debugger_step(); void push_local_info(unsigned idx, vm_local_info const & info); @@ -673,6 +681,8 @@ public: void update_env(environment const & env); bool env_was_updated() const { return m_was_updated; } + bool profiling() const { return m_profiling; } + /* Auxiliary object for temporarily resetting env_was_updated flag. */ class reset_env_was_updated_flag { vm_state & m_state; @@ -801,6 +811,7 @@ public: struct snapshot_core { chrono::milliseconds m_duration; std::vector> m_stack; + performance_counters m_perf_counters; }; vm_state & m_state; atomic m_stop; @@ -815,6 +826,7 @@ public: struct snapshot { chrono::milliseconds m_duration; std::vector> m_stack; + performance_counters m_perf_counters; }; struct snapshots { @@ -827,6 +839,12 @@ public: bool enabled() const { return m_thread_ptr.get() != nullptr; } snapshots get_snapshots(); }; + + /* performance counters */ + void inc_constructor_allocs() { m_perf_counters.m_num_constructor_allocs++; } + void inc_closure_allocs() { m_perf_counters.m_num_closure_allocs++; } + void inc_mpz_allocs() { m_perf_counters.m_num_mpz_allocs++; } + performance_counters const & get_perf_counters() const { return m_perf_counters; } }; /** \brief Helper class for setting thread local vm_state object */