Skip to content

Commit

Permalink
MDEV-29293 Refactor MDL BF abort logic
Browse files Browse the repository at this point in the history
In this commit, the BF abort logic is refactored to eliminate
the need to call back to server from InnoDB wsrep_abort_transaction().
This is to simplify the MDL BF abort codepath so that the direction
is always only server -> InnoDB. The main change is in wsrep_abort_thd(),
which first does the BF abort through wsrep-lib, and then continues to
ha_abort_transaction() if the wsrep-lib BF abort was successful and no
background rollback was started for victim.

KILL command handling for wsrep threads was changed so that the
thread to be killed is flagged first with wsrep_abort_by_kill to
avoid calling storage engine ha_kill_query() from awake_no_mutex().
KILL for wsrep thread is executed by awake_no_mutex() combined
with call to ha_abort_transaction().

The locking order for wsrep BF aborts is now:

  lock_sys.mutex
  trx.mutex
  LOCK_thd_kill
  LOCK_thd_data

With this, BF aborts which originate from InnoDB lock manager
don't have to release lock_sys.mutex and trx.mutex in between,
which reduces chance for race conditions.

In order to make the above locking order work for
wsrep_abort_transaction(), the function records InnoDB transaction
ID and releases victim LOCK_thd_kill and LOCK_thd_data. InnoDB
lock_sys.mutex is then grabbed and the victim InnoDB trx object
is looked up by InnoDB transaction ID.

Locking/unlocking LOCK_thd_kill and LOCK_thd_data was reorganized
so that it happens in the visible scope as much as possible.

Remove LOCK_thd_kill from wsrep_thd_LOCK/UNLOCK to allow more
fine grained locking for SR BF abort which may require locking
of victim LOCK_thd_kill. Added explicit call for
wsrep_thd_kill_LOCK/UNLOCK where appropriate.

Wsrep-lib was updated to version which allows external
locking for BF abort calls.

Changes to MTR tests:
- Disable galera_bf_abort_group_commit. This test is going to
  be removed (MDEV-30855).
- Make galera_var_retry_autocommit result more readable by echoing
  cases and expectations into result. Only one expected result for
  reap to verify that server returns expected status for query.
- Record galera_gcache_recover_manytrx as result file was incomplete.
  Trivial change.
- Make galera_create_table_as_select more deterministic:
  Wait until CTAS execution has reached MDL wait for multi-master
  conflict case. Expected error from multi-master conflict is
  ER_QUERY_INTERRUPTED. This is because CTAS does not yet have open
  wsrep transaction when it is waiting for MDL, query gets interrupted
  instead of BF aborted. This should be addressed in separate task.
- Test galera_bf_abort_registering to check that registering trx gets
  BF aborted through MDL.

Co-authored-by: Jan Lindström <[email protected]>
  • Loading branch information
temeo and janlindstrom committed Apr 14, 2023
1 parent 0b23494 commit 50ac97f
Show file tree
Hide file tree
Showing 19 changed files with 280 additions and 199 deletions.
1 change: 1 addition & 0 deletions mysql-test/suite/galera/disabled.def
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ galera_bf_abort_shutdown : MDEV-29918 Assertion failure on galera_bf_abort_shutd
galera_wan : [ERROR] WSREP: /home/buildbot/buildbot/build/gcs/src/gcs_state_msg.cpp:gcs_state_msg_get_quorum():947: Failed to establish quorum.
galera_var_ignore_apply_errors : 28: "Server did not transition to READY state"
MDEV-27713 : test is using get_lock(), which is now rejected in cluster
galera_bf_abort_group_commit : PR to remove the test exists
20 changes: 20 additions & 0 deletions mysql-test/suite/galera/r/galera_bf_abort_registering.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
connection node_1;
SET SESSION wsrep_retry_autocommit = 0;
SET SESSION DEBUG_SYNC = "wsrep_after_trans_register_ha SIGNAL reached WAIT_FOR continue";
INSERT INTO t1 VALUES (1);
connect node_1_ctrl, 127.0.0.1, root, , test, $NODE_MYPORT_1;
SET DEBUG_SYNC = "now WAIT_FOR reached";
SET GLOBAL DEBUG_DBUG = "+d,sync.wsrep_abort_transaction_read_only";
connection node_2;
ALTER TABLE t1 ADD COLUMN f2 INT;
connection node_1_ctrl;
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_abort_transaction_read_only_reached";
SET DEBUG_SYNC = "now SIGNAL continue";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_abort_transaction_read_only";
connection node_1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SET DEBUG_SYNC = "RESET";
DROP TABLE t1;
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
LOCK TABLE t2 WRITE;
connection node_1;
CREATE TABLE t1 AS SELECT * FROM t2;;
connection node_1a;
connection node_2;
SELECT COUNT(*) = 5 FROM t2;
COUNT(*) = 5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,3 @@ connection node_1;
call mtr.add_suppression("Error in Log_event::read_log_event():.*");
CALL mtr.add_suppression("conflict state 7 after post commit");
CALL mtr.add_suppression("Skipped GCache ring buffer recovery");
connection node_2;
call mtr.add_suppression("Error in Log_event::read_log_event():.*");
CALL mtr.add_suppression("Skipped GCache ring buffer recovery");
23 changes: 14 additions & 9 deletions mysql-test/suite/galera/r/galera_var_retry_autocommit.result
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
connection node_2;
connection node_1;
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
# With wsrep_retry_autocommit = 0, error is certain
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
SET SESSION wsrep_retry_autocommit = 0;
Expand All @@ -12,11 +13,12 @@ connection node_2;
TRUNCATE TABLE t1;
connection node_1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SELECT COUNT(*) FROM t1;
COUNT(*)
SELECT COUNT(*) AS expect_0 FROM t1;
expect_0
0
SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;
# With wsrep_retry_autocommit = 1, success against one TRUNCATE
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
SET SESSION wsrep_retry_autocommit = 1;
Expand All @@ -27,11 +29,12 @@ SET DEBUG_SYNC = 'now WAIT_FOR before_cert';
connection node_2;
TRUNCATE TABLE t1;
connection node_1;
SELECT COUNT(*) FROM t1;
COUNT(*)
0
SELECT COUNT(*) AS expect_1 FROM t1;
expect_1
1
SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;
# With wsrep_retry_autcommit = 1, failure against multiple TRUNCATEs
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
SET SESSION wsrep_retry_autocommit = 1;
Expand All @@ -51,22 +54,24 @@ SET DEBUG_SYNC = 'now SIGNAL wsrep_retry_autocommit_continue WAIT_FOR before_cer
connection node_2;
TRUNCATE TABLE t1;
connection node_1a;
SELECT COUNT(*) FROM t1;
COUNT(*)
SELECT COUNT(*) AS expect_0 FROM t1;
expect_0
0
connection node_1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SET DEBUG_SYNC = 'RESET';
SET GLOBAL debug_dbug = NULL;
DROP TABLE t1;
# With wsrep_retry_autocommit = 64, success against 64 TRUNCATEs
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
SET SESSION wsrep_retry_autocommit = 64;
SET GLOBAL debug_dbug = '+d,sync.wsrep_retry_autocommit';
SET DEBUG_SYNC = 'wsrep_before_certification SIGNAL before_cert WAIT_FOR continue EXECUTE 64';
INSERT INTO t1 VALUES (5);
connection node_1;
SELECT COUNT(*) FROM t1;
COUNT(*)
SELECT COUNT(*) AS expect_1 FROM t1;
expect_1
1
SET DEBUG_SYNC = 'RESET';
SET GLOBAL debug_dbug = NULL;
Expand Down
45 changes: 45 additions & 0 deletions mysql-test/suite/galera/t/galera_bf_abort_registering.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#
# Test that BF abort through MDL aborts InnoDB transaction which
# is registering.
#

--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug_sync.inc

CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;

--let $debug_dbug_orig = `SELECT VARIABLE_VALUE FROM information_schema.global_variables WHERE VARIABLE_NAME = 'debug_dbug'`

--connection node_1
SET SESSION wsrep_retry_autocommit = 0;
# Stop execution in trans_register_ha()
SET SESSION DEBUG_SYNC = "wsrep_after_trans_register_ha SIGNAL reached WAIT_FOR continue";
--send INSERT INTO t1 VALUES (1)

--connect node_1_ctrl, 127.0.0.1, root, , test, $NODE_MYPORT_1
SET DEBUG_SYNC = "now WAIT_FOR reached";

# Stop APPLIER execution in wsrep_abort_transaction_read_only after
# finding that victim is read only.
SET GLOBAL DEBUG_DBUG = "+d,sync.wsrep_abort_transaction_read_only";

--connection node_2
ALTER TABLE t1 ADD COLUMN f2 INT;

--connection node_1_ctrl
# Wait for BF thread to arrive in sync point
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_abort_transaction_read_only_reached";
# Let the local INSERT continue
SET DEBUG_SYNC = "now SIGNAL continue";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_abort_transaction_read_only";
--connection node_1
--error ER_LOCK_DEADLOCK
--reap

SET DEBUG_SYNC = "RESET";
--disable_query_log
--eval SET GLOBAL DEBUG_DBUG = "$debug_dbug_orig"
--enable_query_log

DROP TABLE t1;
6 changes: 5 additions & 1 deletion mysql-test/suite/galera/t/galera_create_table_as_select.test
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ LOCK TABLE t2 WRITE;
--connection node_1
--send CREATE TABLE t1 AS SELECT * FROM t2;

--connection node_1a
--let $wait_condition = SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE STATE LIKE 'Waiting for table metadata lock%'
--source include/wait_condition.inc

--connection node_2
SELECT COUNT(*) = 5 FROM t2;
CREATE TABLE t1 AS SELECT * FROM t2;
Expand All @@ -121,7 +125,7 @@ CREATE TABLE t1 AS SELECT * FROM t2;
UNLOCK TABLES;

--connection node_1
--error ER_TABLE_EXISTS_ERROR,ER_LOCK_DEADLOCK
--error ER_TABLE_EXISTS_ERROR,ER_QUERY_INTERRUPTED
--reap

DROP TABLE t1, t2;
Expand Down
32 changes: 9 additions & 23 deletions mysql-test/suite/galera/t/galera_var_retry_autocommit.test
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@

--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1

#
# With wsrep_retry_autocommit = 0, error is certain
#

--echo # With wsrep_retry_autocommit = 0, error is certain
--connection node_1
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;

Expand All @@ -30,16 +27,13 @@ TRUNCATE TABLE t1;
--connection node_1
--error ER_LOCK_DEADLOCK
--reap
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) AS expect_0 FROM t1;

SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;


#
# With wsrep_retry_autocommit = 1, success against one TRUNCATE
#

--echo # With wsrep_retry_autocommit = 1, success against one TRUNCATE
--connection node_1
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;

Expand All @@ -54,18 +48,14 @@ SET DEBUG_SYNC = 'now WAIT_FOR before_cert';
TRUNCATE TABLE t1;

--connection node_1
--error 0,ER_LOCK_DEADLOCK
--reap
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) AS expect_1 FROM t1;

SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;


#
# With wsrep_retry_autcommit = 1, failure against multiple TRUNCATEs
#

--echo # With wsrep_retry_autcommit = 1, failure against multiple TRUNCATEs
--connection node_1
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;

Expand All @@ -90,21 +80,18 @@ SET DEBUG_SYNC = 'now SIGNAL wsrep_retry_autocommit_continue WAIT_FOR before_cer
TRUNCATE TABLE t1;

--connection node_1a
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) AS expect_0 FROM t1;

--connection node_1
--error 0,ER_LOCK_DEADLOCK
--error ER_LOCK_DEADLOCK
--reap

SET DEBUG_SYNC = 'RESET';
SET GLOBAL debug_dbug = NULL;
DROP TABLE t1;


#
# With wsrep_retry_autocommit = 64, success against 64 TRUNCATEs
#

--echo # With wsrep_retry_autocommit = 64, success against 64 TRUNCATEs
--connection node_1
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;

Expand Down Expand Up @@ -136,9 +123,8 @@ while ($count)
--enable_query_log

--connection node_1
--error 0,ER_LOCK_DEADLOCK
--reap
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) AS expect_1 FROM t1;

SET DEBUG_SYNC = 'RESET';
SET GLOBAL debug_dbug = NULL;
Expand Down
12 changes: 10 additions & 2 deletions sql/handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,9 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht_arg, ulonglong trxid)
DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid");
//gtid_set_performance_schema_values(thd);
}
#ifdef WITH_WSREP
DEBUG_SYNC(thd, "wsrep_after_trans_register_ha");
#endif /* WITH_WSREP */
DBUG_VOID_RETURN;
}

Expand Down Expand Up @@ -7589,6 +7592,9 @@ Compare_keys handler::compare_key_parts(const Field &old_field,
concurrent accesses. And it's an overkill to take LOCK_plugin and
iterate the whole installed_htons[] array every time.
@note Object victim_thd is not guaranteed to exist after this
function returns.
@param bf_thd brute force THD asking for the abort
@param victim_thd victim THD to be aborted
Expand All @@ -7602,7 +7608,8 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
if (!WSREP(bf_thd) &&
!(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU &&
wsrep_thd_is_toi(bf_thd))) {
wsrep_thd_UNLOCK(victim_thd);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
DBUG_RETURN(0);
}

Expand All @@ -7613,8 +7620,9 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
}
else
{
wsrep_thd_UNLOCK(victim_thd);
WSREP_WARN("Cannot abort InnoDB transaction");
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
}

DBUG_RETURN(0);
Expand Down
16 changes: 1 addition & 15 deletions sql/service_wsrep.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,12 @@ extern "C" my_bool wsrep_on(const THD *thd)

extern "C" void wsrep_thd_LOCK(const THD *thd)
{
mysql_mutex_lock(&thd->LOCK_thd_kill);
mysql_mutex_lock(&thd->LOCK_thd_data);
}

extern "C" void wsrep_thd_UNLOCK(const THD *thd)
{
mysql_mutex_unlock(&thd->LOCK_thd_data);
mysql_mutex_unlock(&thd->LOCK_thd_kill);
}

extern "C" void wsrep_thd_kill_LOCK(const THD *thd)
Expand Down Expand Up @@ -252,24 +250,16 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
{
WSREP_DEBUG("victim is killed already by %llu, skipping awake",
victim_thd->wsrep_aborter);
wsrep_thd_UNLOCK(victim_thd);
return false;
}

victim_thd->wsrep_aborter= bf_thd->thread_id;
/* for KILL command, use the chosen kill signal,
BF aborting uses KILL_QUERY_HARD
*/
if (wsrep_thd_is_local(bf_thd) && bf_thd->lex->sql_command == SQLCOM_KILL)
victim_thd->awake_no_mutex(bf_thd->lex->kill_signal);
else
victim_thd->awake_no_mutex(KILL_QUERY_HARD);
victim_thd->awake_no_mutex(KILL_QUERY_HARD);
}
else
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake for %llu",
thd_get_thread_id(victim_thd));

wsrep_thd_UNLOCK(victim_thd);
return ret;
}

Expand Down Expand Up @@ -406,10 +396,6 @@ extern "C" bool wsrep_thd_set_wsrep_aborter(THD *bf_thd, THD *victim_thd)
return true;
}
victim_thd->wsrep_aborter= bf_thd->thread_id;
if (wsrep_thd_is_local(bf_thd) && bf_thd->lex->sql_command == SQLCOM_KILL)
{
victim_thd->wsrep_abort_by_kill= bf_thd->lex->kill_signal;
}
WSREP_DEBUG("wsrep_thd_set_wsrep_aborter setting wsrep_aborter %u",
victim_thd->wsrep_aborter);
return false;
Expand Down
4 changes: 3 additions & 1 deletion sql/sql_class.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1915,7 +1915,9 @@ void THD::awake_no_mutex(killed_state state_to_set)
}

/* Interrupt target waiting inside a storage engine. */
if (state_to_set != NOT_KILLED && !wsrep_is_bf_aborted(this))
if (state_to_set != NOT_KILLED &&
IF_WSREP(!wsrep_is_bf_aborted(this) && wsrep_abort_by_kill == NOT_KILLED,
true))
ha_kill_query(this, thd_kill_level(this));

abort_current_cond_wait(false);
Expand Down
Loading

0 comments on commit 50ac97f

Please sign in to comment.