From bda8c5983ba41274cca6144f4a1f29b23eaa44a7 Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Tue, 1 Aug 2017 14:03:18 +0100 Subject: [PATCH 1/3] epee: add nanosecond timer and pause/restart profiling macros Nanosecond timer precision won't work on Windows, but we don't care since I'm using that just for profiling incremental code paths, but a Windows coder is welcome to add it if there's a way. --- contrib/epee/include/misc_os_dependent.h | 15 ++++++++++----- contrib/epee/include/profile_tools.h | 7 +++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/contrib/epee/include/misc_os_dependent.h b/contrib/epee/include/misc_os_dependent.h index 806d3e83..69ded09e 100644 --- a/contrib/epee/include/misc_os_dependent.h +++ b/contrib/epee/include/misc_os_dependent.h @@ -48,17 +48,17 @@ namespace epee namespace misc_utils { - inline uint64_t get_tick_count() + inline uint64_t get_ns_count() { #if defined(_MSC_VER) - return ::GetTickCount64(); + return ::GetTickCount64() * 1000000; #elif defined(WIN32) static LARGE_INTEGER pcfreq = {0}; LARGE_INTEGER ticks; if (!pcfreq.QuadPart) QueryPerformanceFrequency(&pcfreq); QueryPerformanceCounter(&ticks); - ticks.QuadPart *= 1000; /* we want msec */ + ticks.QuadPart *= 1000000000; /* we want nsec */ return ticks.QuadPart / pcfreq.QuadPart; #elif defined(__MACH__) clock_serv_t cclock; @@ -68,16 +68,21 @@ namespace misc_utils clock_get_time(cclock, &mts); mach_port_deallocate(mach_task_self(), cclock); - return (mts.tv_sec * 1000) + (mts.tv_nsec/1000000); + return (mts.tv_sec * 1000000000) + (mts.tv_nsec); #else struct timespec ts; if(clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { return 0; } - return (ts.tv_sec * 1000) + (ts.tv_nsec/1000000); + return (ts.tv_sec * 1000000000) + (ts.tv_nsec); #endif } + inline uint64_t get_tick_count() + { + return get_ns_count() / 1000000; + } + inline int call_sys_cmd(const std::string& cmd) { diff --git a/contrib/epee/include/profile_tools.h b/contrib/epee/include/profile_tools.h index d3b1e4db..f285fe48 100644 --- a/contrib/epee/include/profile_tools.h +++ b/contrib/epee/include/profile_tools.h @@ -57,8 +57,15 @@ namespace epee #define TIME_MEASURE_START(var_name) uint64_t var_name = epee::misc_utils::get_tick_count(); +#define TIME_MEASURE_PAUSE(var_name) var_name = epee::misc_utils::get_tick_count() - var_name; +#define TIME_MEASURE_RESTART(var_name) var_name = epee::misc_utils::get_tick_count() - var_name; #define TIME_MEASURE_FINISH(var_name) var_name = epee::misc_utils::get_tick_count() - var_name; +#define TIME_MEASURE_NS_START(var_name) uint64_t var_name = epee::misc_utils::get_ns_count(); +#define TIME_MEASURE_NS_PAUSE(var_name) var_name = epee::misc_utils::get_ns_count() - var_name; +#define TIME_MEASURE_NS_RESTART(var_name) var_name = epee::misc_utils::get_ns_count() - var_name; +#define TIME_MEASURE_NS_FINISH(var_name) var_name = epee::misc_utils::get_ns_count() - var_name; + namespace profile_tools { struct local_call_account From 19d7f568ce839c5dc5581159234d1a9bd1d78ebc Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Tue, 1 Aug 2017 14:05:20 +0100 Subject: [PATCH 2/3] perf_timer: allow profiling more granular than millisecond --- src/common/perf_timer.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/common/perf_timer.h b/src/common/perf_timer.h index 56662ff2..bc8e0580 100644 --- a/src/common/perf_timer.h +++ b/src/common/perf_timer.h @@ -46,9 +46,9 @@ extern __thread std::vector *performance_timers; class PerformanceTimer { public: - PerformanceTimer(const std::string &s, el::Level l = el::Level::Debug): name(s), level(l), started(false) + PerformanceTimer(const std::string &s, uint64_t unit, el::Level l = el::Level::Debug): name(s), unit(unit), level(l), started(false) { - ticks = epee::misc_utils::get_tick_count(); + ticks = epee::misc_utils::get_ns_count(); if (!performance_timers) { MLOG(level, "PERF ----------"); @@ -69,9 +69,9 @@ public: ~PerformanceTimer() { performance_timers->pop_back(); - ticks = epee::misc_utils::get_tick_count() - ticks; + ticks = epee::misc_utils::get_ns_count() - ticks; char s[12]; - snprintf(s, sizeof(s), "%8llu ", (unsigned long long)ticks); + snprintf(s, sizeof(s), "%8llu ", (unsigned long long)ticks / (1000000000 / unit)); MLOG(level, "PERF " << s << std::string(performance_timers->size() * 2, ' ') << " " << name); if (performance_timers->empty()) { @@ -82,6 +82,7 @@ public: private: std::string name; + uint64_t unit; el::Level level; uint64_t ticks; bool started; @@ -89,7 +90,9 @@ private: void set_performance_timer_log_level(el::Level level); -#define PERF_TIMER(name) tools::PerformanceTimer pt_##name(#name, tools::performance_timer_log_level) -#define PERF_TIMER_L(name, l) tools::PerformanceTimer pt_##name(#name, l) +#define PERF_TIMER_UNIT(name, unit) tools::PerformanceTimer pt_##name(#name, unit, tools::performance_timer_log_level) +#define PERF_TIMER_UNIT_L(name, unit, l) tools::PerformanceTimer pt_##name(#name, unit, l) +#define PERF_TIMER(name) PERF_TIMER_UNIT(name, 1000) +#define PERF_TIMER_L(name, l) PERF_TIMER_UNIT_L(name, 1000, l) } From 5d4ef719b9763ce9c150b93d61e541f5bbc1bbb9 Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Tue, 1 Aug 2017 14:06:12 +0100 Subject: [PATCH 3/3] core: speed up output index unique set calculation A sort+uniq step was done for every tx in a 200 block chunk, causing a lot of repeated scanning as the size of the offset map got larger with every added tx. We now do the step only once at the end of the loop. Doing it this way potentially uses more memory, but testing shows that it's currently only about 2% more. --- src/cryptonote_core/blockchain.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cryptonote_core/blockchain.cpp b/src/cryptonote_core/blockchain.cpp index 745608b9..2420adc5 100644 --- a/src/cryptonote_core/blockchain.cpp +++ b/src/cryptonote_core/blockchain.cpp @@ -3866,17 +3866,17 @@ bool Blockchain::prepare_handle_incoming_blocks(const std::list> transactions(amounts.size());