Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions storage/innobase/include/dict0mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -2157,6 +2157,10 @@ struct dict_table_t {
void lock_mutex_lock() { lock_latch.wr_lock(ut_d(SRW_LOCK_CALL)); }
/** Try to acquire exclusive lock_latch */
bool lock_mutex_trylock() { return lock_latch.wr_lock_try(); }
/** Try to acquire exclusive lock_latch with a bounded spin (no syscall,
no blocking), for the lock-release fast paths that hold trx->mutex.
@return whether lock_latch was acquired */
bool lock_mutex_trylock_spin() noexcept;
/** Release exclusive lock_latch */
void lock_mutex_unlock() { lock_latch.wr_unlock(); }
/** Acquire shared lock_latch */
Expand Down
4 changes: 4 additions & 0 deletions storage/innobase/include/lock0lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,10 @@ class lock_sys_t
/** @return whether this latch is possibly held by any thread */
bool is_locked() const noexcept { return lock.is_locked(); }
#endif
/** Try to acquire the latch with a bounded spin (no syscall, no
blocking), for the lock-release fast paths that hold trx->mutex.
@return whether the latch was acquired */
bool try_acquire_spin() noexcept;
};

public:
Expand Down
137 changes: 123 additions & 14 deletions storage/innobase/lock/lock0lock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Created 5/7/1996 Heikki Tuuri
#include "que0que.h"
#include "scope.h"
#include "buf0buf.h"
#include "my_cpu.h"
#include <debug_sync.h>
#include <mysql/service_thd_mdl.h>

Expand Down Expand Up @@ -4529,10 +4530,73 @@ lock_rec_unlock(
g.cell(), first_lock, heap_no);
}

/** Spin budget for lock_release_try(), lock_release_on_prepare_try() and
lock_rec_unlock_unmodified() trylock attempts on per-cell and per-table
latches.

Those paths hold trx->mutex while attempting the trylock (the reverse of
the standard order used by lock_rec_convert_impl_to_expl()), so blocking
acquisition would risk deadlock. A single CAS attempt fails too readily
under contention: any one failure across the trx's record/table locks
marks the whole try-pass as unsuccessful, and after 5 such passes
lock_release() escalates to lock_sys.wr_lock() for the entire trx, which
then blocks every concurrent lock_sys.rd_lock() acquirer in lock_rec_lock()
and lock_table().
Comment thread
Copilot marked this conversation as resolved.

A bounded spin (CAS, PAUSE between attempts, no syscall, no blocking) lets a
transient holder release without changing the deadlock-avoidance guarantee. The
upper bound on extra trx->mutex hold time is LOCK_RELEASE_TRY_SPIN_BUDGET
* pause-cost (tens to ~100ns per iteration, microarchitecture-dependent). */
static constexpr unsigned LOCK_RELEASE_TRY_SPIN_BUDGET= 16;
static_assert(LOCK_RELEASE_TRY_SPIN_BUDGET, "the !--spin loops would underflow");

ATTRIBUTE_NOINLINE
bool lock_sys_t::hash_latch::try_acquire_spin() noexcept
{
for (unsigned spin= LOCK_RELEASE_TRY_SPIN_BUDGET;;)
{
if (try_acquire())
return true;
if (!--spin)
return false;
MY_RELAX_CPU();
}
Comment thread
iMineLink marked this conversation as resolved.
}

ATTRIBUTE_NOINLINE
bool dict_table_t::lock_mutex_trylock_spin() noexcept
{
for (unsigned spin= LOCK_RELEASE_TRY_SPIN_BUDGET;;)
{
if (lock_mutex_trylock())
return true;
if (!--spin)
return false;
MY_RELAX_CPU();
}
}

/** Escalate the optimistic lock-release loop to the exclusive lock_sys.latch
once no-progress passes outpace progress passes by this much: a pass that
frees no lock increments the counter, a pass that frees one decrements it
(floored at 0). A fully stuck transaction still escalates after 5 passes, as
the previous fixed cap did. */
static constexpr unsigned LOCK_RELEASE_MAX_STALLS= 5;

/** Hard ceiling on optimistic release passes, independent of the stall
counter: bounds the loop when concurrent activity keeps adding to the lock set
(implicit-to-explicit conversion during XA PREPARE, or lock_rec_move()
relocation) so that progress never converges. Set well above
LOCK_RELEASE_MAX_STALLS so it only caps sustained churn. */
static constexpr unsigned LOCK_RELEASE_MAX_PASSES= 100;

/** Release the explicit locks of a committing transaction,
and release possible other transactions waiting because of these locks.
@param trx a committing transaction
@param made_progress set to whether this pass freed a lock, computed under
trx->mutex (so the caller needs no unlatched read)
@return whether the operation succeeded */
static bool lock_release_try(trx_t *trx)
static bool lock_release_try(trx_t *trx, bool *made_progress)
{
/* At this point, trx->lock.trx_locks cannot be modified by other
threads, because our transaction has been committed.
Expand All @@ -4546,6 +4610,7 @@ static bool lock_release_try(trx_t *trx)
DBUG_ASSERT(!trx->is_referenced());

bool all_released= true;
*made_progress= false;
restart:
ulint count= 1000;
/* We will not attempt hardware lock elision (memory transaction)
Expand Down Expand Up @@ -4573,12 +4638,13 @@ static bool lock_release_try(trx_t *trx)
auto &lock_hash= lock_sys.hash_get(lock->type_mode);
auto cell= lock_hash.cell_get(lock->un_member.rec_lock.page_id.fold());
auto latch= lock_sys_t::hash_table::latch(cell);
if (!latch->try_acquire())
if (!latch->try_acquire_spin())
all_released= false;
else
{
lock_rec_dequeue_from_page(lock, false);
latch->release();
*made_progress= true;
}
}
else
Expand All @@ -4588,12 +4654,13 @@ static bool lock_release_try(trx_t *trx)
ut_ad(table->id >= DICT_HDR_FIRST_ID ||
(lock->mode() != LOCK_IX && lock->mode() != LOCK_X) ||
trx->dict_operation || trx->was_dict_operation);
if (!table->lock_mutex_trylock())
if (!table->lock_mutex_trylock_spin())
all_released= false;
else
{
lock_table_dequeue(lock, false);
table->lock_mutex_unlock();
*made_progress= true;
}
}

Expand Down Expand Up @@ -4623,9 +4690,25 @@ void lock_release(trx_t *trx)
#endif
ulint count;

for (count= 5; count--; )
if (lock_release_try(trx))
/* Optimistic release under the shared latch. lock_release_try() reports
whether it freed a lock, computed under trx->mutex: we must not read
trx_locks here, as a concurrent lock_rec_move() can change it between
passes. Escalate on LOCK_RELEASE_MAX_STALLS net stalls or
LOCK_RELEASE_MAX_PASSES total passes. */
unsigned stalls= 0;
for (count= 0; count < LOCK_RELEASE_MAX_PASSES; count++)
{
bool progressed;
if (lock_release_try(trx, &progressed))
goto released;
if (progressed)
{
if (stalls)
stalls--;
}
else if (++stalls == LOCK_RELEASE_MAX_STALLS)
break;
}

/* Fall back to acquiring lock_sys.latch in exclusive mode */
restart:
Expand Down Expand Up @@ -4751,13 +4834,16 @@ static void lock_rec_unlock(hash_cell_t &cell, lock_t *lock, ulint heap_no)
@param cell lock_sys.rec_hash cell of lock
@param lock record lock
@param offsets storage for rec_get_offsets()
@param made_progress (CELL only) set to true when any record lock bit is
actually cleared; the GLOBAL instantiation never reads it
@tparam latch_type how the caller of the function latched lock_sys,
GLOBAL or CELL
@return true if the cell was latched successfully or if latch_type is GLOBAL,
false otherwise */
template <lock_sys_latch_type latch_type>
bool lock_rec_unlock_unmodified(buf_block_t *block, hash_cell_t *cell,
lock_t *lock, rec_offs *offsets)
lock_t *lock, rec_offs *offsets,
bool *made_progress= nullptr)
{
DEBUG_SYNC_C("lock_rec_unlock_unmodified_start");
lock_sys.assert_locked(*lock);
Expand Down Expand Up @@ -4786,6 +4872,8 @@ bool lock_rec_unlock_unmodified(buf_block_t *block, hash_cell_t *cell,
continue;
unlock_rec:
lock_rec_unlock(*cell, lock, i);
if constexpr (latch_type == CELL)
*made_progress= true;
}
else
{
Expand Down Expand Up @@ -4831,7 +4919,7 @@ bool lock_rec_unlock_unmodified(buf_block_t *block, hash_cell_t *cell,
computed cell address invalid */
cell= lock_sys.rec_hash.cell_get(
lock->un_member.rec_lock.page_id.fold());
if (!lock_sys_t::hash_table::latch(cell)->try_acquire())
if (!lock_sys_t::hash_table::latch(cell)->try_acquire_spin())
return false;
}
if (lock->trx != impl_trx)
Expand All @@ -4847,7 +4935,8 @@ and wake up possible other transactions waiting because of these locks.
@param trx transaction in XA PREPARE state
@param unlock_unmodified must unmodified by the trx records be unlocked or not
@return whether all locks were released */
static bool lock_release_on_prepare_try(trx_t *trx, bool unlock_unmodified)
static bool lock_release_on_prepare_try(trx_t *trx, bool unlock_unmodified,
bool *made_progress)
{
/* At this point, trx->lock.trx_locks can still be modified by other
threads to convert implicit exclusive locks into explicit ones.
Expand All @@ -4858,6 +4947,10 @@ static bool lock_release_on_prepare_try(trx_t *trx, bool unlock_unmodified)
DBUG_ASSERT(trx->state == TRX_STATE_PREPARED);

bool all_released= true;
/* Set at every site that frees a lock, including the unlock_unmodified path:
lock_rec_unlock_unmodified() reports through made_progress whether it actually
cleared any bit, since it returns true even when it frees nothing. */
*made_progress= false;
mtr_t mtr{trx};
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs *offsets= offsets_;
Expand Down Expand Up @@ -4889,17 +4982,19 @@ static bool lock_release_on_prepare_try(trx_t *trx, bool unlock_unmodified)
const auto fold = lock->un_member.rec_lock.page_id.fold();
auto cell= lock_sys.rec_hash.cell_get(fold);
auto latch= lock_sys_t::hash_table::latch(cell);
if (latch->try_acquire())
if (latch->try_acquire_spin())
{
if (!rec_granted_exclusive_not_gap)
{
lock_rec_dequeue_from_page(lock, false);
latch->release();
*made_progress= true;
}
else if (supremum_bit)
{
lock_rec_unlock(*cell, lock, PAGE_HEAP_NO_SUPREMUM);
latch->release();
*made_progress= true;
}
else if (unlock_unmodified)
{
Expand All @@ -4926,10 +5021,10 @@ static bool lock_release_on_prepare_try(trx_t *trx, bool unlock_unmodified)
computed cell address invalid */
cell= lock_sys.rec_hash.cell_get(fold);
latch= lock_sys_t::hash_table::latch(cell);
if (latch->try_acquire())
if (latch->try_acquire_spin())
{
if (!lock_rec_unlock_unmodified<CELL>(block, cell, lock,
offsets))
offsets, made_progress))
all_released= false;
else
latch->release();
Comment thread
iMineLink marked this conversation as resolved.
Expand Down Expand Up @@ -4962,10 +5057,11 @@ static bool lock_release_on_prepare_try(trx_t *trx, bool unlock_unmodified)
switch (lock->mode()) {
case LOCK_IS:
case LOCK_S:
if (table->lock_mutex_trylock())
if (table->lock_mutex_trylock_spin())
{
lock_table_dequeue(lock, false);
table->lock_mutex_unlock();
*made_progress= true;
}
else
all_released= false;
Expand Down Expand Up @@ -5031,9 +5127,22 @@ void lock_release_on_prepare(trx_t *trx)
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF("skip_lock_release_on_prepare_try", goto skip_try;);
#endif
for (ulint count= 5; count--; )
if (lock_release_on_prepare_try(trx, unlock_unmodified))
/* Same optimistic-release loop as lock_release() (see there). A prepared
transaction is still active, so trx_locks may grow between passes, making
LOCK_RELEASE_MAX_PASSES the effective bound. */
for (unsigned count= 0, stalls= 0; count < LOCK_RELEASE_MAX_PASSES; count++)
{
bool progressed;
if (lock_release_on_prepare_try(trx, unlock_unmodified, &progressed))
return;
if (progressed)
{
if (stalls)
stalls--;
}
else if (++stalls == LOCK_RELEASE_MAX_STALLS)
break;
}
#ifdef ENABLED_DEBUG_SYNC
skip_try:
#endif
Expand Down