From 0b586ec25b2450ec9ab79c846cf7ea53d77fd6d7 Mon Sep 17 00:00:00 2001 From: Alessandro Vetere Date: Mon, 29 Jun 2026 18:10:27 +0200 Subject: [PATCH] MDEV-40129 Retry transient trylock failures in lock-release fast paths The trylock attempts on per-cell lock_sys_t::hash_latch (try_acquire()) and on per-table dict_table_t::lock_mutex_trylock() inside lock_release_try(), lock_release_on_prepare_try() and lock_rec_unlock_unmodified() now use a bounded spin loop (up to LOCK_RELEASE_TRY_SPIN_BUDGET CAS attempts, with MY_RELAX_CPU() between them) instead of a single CAS attempt. These paths hold trx->mutex while attempting the trylock, which is the reverse of the standard order used by lock_rec_convert_impl_to_expl(). Blocking acquisition is therefore unsafe, hence the trylock pattern. However, a single failed CAS marks the entire pass of lock_release_try() as unsuccessful, and after 5 such failed passes lock_release() falls back to exclusive lock_sys.wr_lock() for the whole transaction. That global wr_lock then blocks every concurrent lock_sys.rd_lock() acquirer in lock_rec_lock() and lock_table(), producing a server-wide convoy under heavy concurrency. The bounded spin (no syscall, no blocking) gives a transient latch holder time to release without weakening the deadlock-avoidance guarantee that motivated the trylock pattern. The extra trx->mutex hold time is bounded by LOCK_RELEASE_TRY_SPIN_BUDGET times the pause cost. This is a first, still to be fine-tuned implementation. Only the lock_release_try() path has been positively tested; the lock_release_on_prepare_try() path is not yet covered. --- storage/innobase/include/dict0mem.h | 4 ++ storage/innobase/include/lock0lock.h | 4 ++ storage/innobase/lock/lock0lock.cc | 59 +++++++++++++++++++++++++--- 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 6569de9564647..3d79b41d18e84 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -2157,6 +2157,10 @@ struct dict_table_t { void lock_mutex_lock() { lock_latch.wr_lock(ut_d(SRW_LOCK_CALL)); } /** Try to acquire exclusive lock_latch */ bool lock_mutex_trylock() { return lock_latch.wr_lock_try(); } + /** Try to acquire exclusive lock_latch with a bounded spin (no syscall, + no blocking), for the lock-release fast paths that hold trx->mutex. + @return whether lock_latch was acquired */ + bool lock_mutex_trylock_spin() noexcept; /** Release exclusive lock_latch */ void lock_mutex_unlock() { lock_latch.wr_unlock(); } /** Acquire shared lock_latch */ diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 9c192f2a58644..06cdcf8d21470 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -663,6 +663,10 @@ class lock_sys_t /** @return whether this latch is possibly held by any thread */ bool is_locked() const noexcept { return lock.is_locked(); } #endif + /** Try to acquire the latch with a bounded spin (no syscall, no + blocking), for the lock-release fast paths that hold trx->mutex. + @return whether the latch was acquired */ + bool try_acquire_spin() noexcept; }; public: diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 62f6fd58a1b6b..657367df23a99 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -48,6 +48,7 @@ Created 5/7/1996 Heikki Tuuri #include "que0que.h" #include "scope.h" #include "buf0buf.h" +#include "my_cpu.h" #include #include @@ -4529,6 +4530,52 @@ lock_rec_unlock( g.cell(), first_lock, heap_no); } +/** Spin budget for lock_release_try(), lock_release_on_prepare_try() and +lock_rec_unlock_unmodified() trylock attempts on per-cell and per-table +latches. + +Those paths hold trx->mutex while attempting the trylock (the reverse of +the standard order used by lock_rec_convert_impl_to_expl()), so blocking +acquisition would risk deadlock. A single CAS attempt fails too readily +under contention: any one failure across the trx's record/table locks +marks the whole try-pass as unsuccessful, and after 5 such passes +lock_release() escalates to lock_sys.wr_lock() for the entire trx, which +then blocks every concurrent lock_sys.rd_lock() acquirer in lock_rec_lock() +and lock_table(). + +A bounded spin (CAS, PAUSE between attempts, no syscall, no blocking) lets a +transient holder release without changing the deadlock-avoidance guarantee. The +upper bound on extra trx->mutex hold time is LOCK_RELEASE_TRY_SPIN_BUDGET +* pause-cost (tens to ~100ns per iteration, microarchitecture-dependent). */ +static constexpr unsigned LOCK_RELEASE_TRY_SPIN_BUDGET= 16; +static_assert(LOCK_RELEASE_TRY_SPIN_BUDGET, "the !--spin loops would underflow"); + +ATTRIBUTE_NOINLINE +bool lock_sys_t::hash_latch::try_acquire_spin() noexcept +{ + for (unsigned spin= LOCK_RELEASE_TRY_SPIN_BUDGET;;) + { + if (try_acquire()) + return true; + if (!--spin) + return false; + MY_RELAX_CPU(); + } +} + +ATTRIBUTE_NOINLINE +bool dict_table_t::lock_mutex_trylock_spin() noexcept +{ + for (unsigned spin= LOCK_RELEASE_TRY_SPIN_BUDGET;;) + { + if (lock_mutex_trylock()) + return true; + if (!--spin) + return false; + MY_RELAX_CPU(); + } +} + /** Release the explicit locks of a committing transaction, and release possible other transactions waiting because of these locks. @return whether the operation succeeded */ @@ -4573,7 +4620,7 @@ static bool lock_release_try(trx_t *trx) auto &lock_hash= lock_sys.hash_get(lock->type_mode); auto cell= lock_hash.cell_get(lock->un_member.rec_lock.page_id.fold()); auto latch= lock_sys_t::hash_table::latch(cell); - if (!latch->try_acquire()) + if (!latch->try_acquire_spin()) all_released= false; else { @@ -4588,7 +4635,7 @@ static bool lock_release_try(trx_t *trx) ut_ad(table->id >= DICT_HDR_FIRST_ID || (lock->mode() != LOCK_IX && lock->mode() != LOCK_X) || trx->dict_operation || trx->was_dict_operation); - if (!table->lock_mutex_trylock()) + if (!table->lock_mutex_trylock_spin()) all_released= false; else { @@ -4831,7 +4878,7 @@ bool lock_rec_unlock_unmodified(buf_block_t *block, hash_cell_t *cell, computed cell address invalid */ cell= lock_sys.rec_hash.cell_get( lock->un_member.rec_lock.page_id.fold()); - if (!lock_sys_t::hash_table::latch(cell)->try_acquire()) + if (!lock_sys_t::hash_table::latch(cell)->try_acquire_spin()) return false; } if (lock->trx != impl_trx) @@ -4889,7 +4936,7 @@ static bool lock_release_on_prepare_try(trx_t *trx, bool unlock_unmodified) const auto fold = lock->un_member.rec_lock.page_id.fold(); auto cell= lock_sys.rec_hash.cell_get(fold); auto latch= lock_sys_t::hash_table::latch(cell); - if (latch->try_acquire()) + if (latch->try_acquire_spin()) { if (!rec_granted_exclusive_not_gap) { @@ -4926,7 +4973,7 @@ static bool lock_release_on_prepare_try(trx_t *trx, bool unlock_unmodified) computed cell address invalid */ cell= lock_sys.rec_hash.cell_get(fold); latch= lock_sys_t::hash_table::latch(cell); - if (latch->try_acquire()) + if (latch->try_acquire_spin()) { if (!lock_rec_unlock_unmodified(block, cell, lock, offsets)) @@ -4962,7 +5009,7 @@ static bool lock_release_on_prepare_try(trx_t *trx, bool unlock_unmodified) switch (lock->mode()) { case LOCK_IS: case LOCK_S: - if (table->lock_mutex_trylock()) + if (table->lock_mutex_trylock_spin()) { lock_table_dequeue(lock, false); table->lock_mutex_unlock();