From 50ac97f2ee64b4e3f2422f841be693ea04b04881 Mon Sep 17 00:00:00 2001 From: Teemu Ollakka Date: Wed, 12 Apr 2023 08:40:38 +0300 Subject: [PATCH] MDEV-29293 Refactor MDL BF abort logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this commit, the BF abort logic is refactored to eliminate the need to call back to server from InnoDB wsrep_abort_transaction(). This is to simplify the MDL BF abort codepath so that the direction is always only server -> InnoDB. The main change is in wsrep_abort_thd(), which first does the BF abort through wsrep-lib, and then continues to ha_abort_transaction() if the wsrep-lib BF abort was successful and no background rollback was started for victim. KILL command handling for wsrep threads was changed so that the thread to be killed is flagged first with wsrep_abort_by_kill to avoid calling storage engine ha_kill_query() from awake_no_mutex(). KILL for wsrep thread is executed by awake_no_mutex() combined with call to ha_abort_transaction(). The locking order for wsrep BF aborts is now: lock_sys.mutex trx.mutex LOCK_thd_kill LOCK_thd_data With this, BF aborts which originate from InnoDB lock manager don't have to release lock_sys.mutex and trx.mutex in between, which reduces chance for race conditions. In order to make the above locking order work for wsrep_abort_transaction(), the function records InnoDB transaction ID and releases victim LOCK_thd_kill and LOCK_thd_data. InnoDB lock_sys.mutex is then grabbed and the victim InnoDB trx object is looked up by InnoDB transaction ID. Locking/unlocking LOCK_thd_kill and LOCK_thd_data was reorganized so that it happens in the visible scope as much as possible. Remove LOCK_thd_kill from wsrep_thd_LOCK/UNLOCK to allow more fine grained locking for SR BF abort which may require locking of victim LOCK_thd_kill. Added explicit call for wsrep_thd_kill_LOCK/UNLOCK where appropriate. Wsrep-lib was updated to version which allows external locking for BF abort calls. Changes to MTR tests: - Disable galera_bf_abort_group_commit. This test is going to be removed (MDEV-30855). - Make galera_var_retry_autocommit result more readable by echoing cases and expectations into result. Only one expected result for reap to verify that server returns expected status for query. - Record galera_gcache_recover_manytrx as result file was incomplete. Trivial change. - Make galera_create_table_as_select more deterministic: Wait until CTAS execution has reached MDL wait for multi-master conflict case. Expected error from multi-master conflict is ER_QUERY_INTERRUPTED. This is because CTAS does not yet have open wsrep transaction when it is waiting for MDL, query gets interrupted instead of BF aborted. This should be addressed in separate task. - Test galera_bf_abort_registering to check that registering trx gets BF aborted through MDL. Co-authored-by: Jan Lindström --- mysql-test/suite/galera/disabled.def | 1 + .../r/galera_bf_abort_registering.result | 20 ++++ .../r/galera_create_table_as_select.result | 1 + .../r/galera_gcache_recover_manytrx.result | 3 - .../r/galera_var_retry_autocommit.result | 23 ++-- .../galera/t/galera_bf_abort_registering.test | 45 ++++++++ .../t/galera_create_table_as_select.test | 6 +- .../galera/t/galera_var_retry_autocommit.test | 32 ++---- sql/handler.cc | 12 ++- sql/service_wsrep.cc | 16 +-- sql/sql_class.cc | 4 +- sql/sql_parse.cc | 53 +++++---- sql/wsrep_mysqld.cc | 38 ++++--- sql/wsrep_server_service.cc | 13 +-- sql/wsrep_server_service.h | 2 +- sql/wsrep_thd.cc | 100 +++++++++-------- sql/wsrep_thd.h | 7 ++ storage/innobase/handler/ha_innodb.cc | 101 ++++++++++-------- wsrep-lib | 2 +- 19 files changed, 280 insertions(+), 199 deletions(-) create mode 100644 mysql-test/suite/galera/r/galera_bf_abort_registering.result create mode 100644 mysql-test/suite/galera/t/galera_bf_abort_registering.test diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def index a19619abe4c5a..babb58544d497 100644 --- a/mysql-test/suite/galera/disabled.def +++ b/mysql-test/suite/galera/disabled.def @@ -23,3 +23,4 @@ galera_bf_abort_shutdown : MDEV-29918 Assertion failure on galera_bf_abort_shutd galera_wan : [ERROR] WSREP: /home/buildbot/buildbot/build/gcs/src/gcs_state_msg.cpp:gcs_state_msg_get_quorum():947: Failed to establish quorum. galera_var_ignore_apply_errors : 28: "Server did not transition to READY state" MDEV-27713 : test is using get_lock(), which is now rejected in cluster +galera_bf_abort_group_commit : PR to remove the test exists diff --git a/mysql-test/suite/galera/r/galera_bf_abort_registering.result b/mysql-test/suite/galera/r/galera_bf_abort_registering.result new file mode 100644 index 0000000000000..9c9eee9b5cdf1 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_bf_abort_registering.result @@ -0,0 +1,20 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; +connection node_1; +SET SESSION wsrep_retry_autocommit = 0; +SET SESSION DEBUG_SYNC = "wsrep_after_trans_register_ha SIGNAL reached WAIT_FOR continue"; +INSERT INTO t1 VALUES (1); +connect node_1_ctrl, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET DEBUG_SYNC = "now WAIT_FOR reached"; +SET GLOBAL DEBUG_DBUG = "+d,sync.wsrep_abort_transaction_read_only"; +connection node_2; +ALTER TABLE t1 ADD COLUMN f2 INT; +connection node_1_ctrl; +SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_abort_transaction_read_only_reached"; +SET DEBUG_SYNC = "now SIGNAL continue"; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_abort_transaction_read_only"; +connection node_1; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +SET DEBUG_SYNC = "RESET"; +DROP TABLE t1; diff --git a/mysql-test/suite/galera/r/galera_create_table_as_select.result b/mysql-test/suite/galera/r/galera_create_table_as_select.result index 6f65ee99f0a18..beda5f30fe239 100644 --- a/mysql-test/suite/galera/r/galera_create_table_as_select.result +++ b/mysql-test/suite/galera/r/galera_create_table_as_select.result @@ -82,6 +82,7 @@ connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; LOCK TABLE t2 WRITE; connection node_1; CREATE TABLE t1 AS SELECT * FROM t2;; +connection node_1a; connection node_2; SELECT COUNT(*) = 5 FROM t2; COUNT(*) = 5 diff --git a/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result b/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result index 8495bfde2f9ae..9b1e8105c1c33 100644 --- a/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result +++ b/mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result @@ -134,6 +134,3 @@ connection node_1; call mtr.add_suppression("Error in Log_event::read_log_event():.*"); CALL mtr.add_suppression("conflict state 7 after post commit"); CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); -connection node_2; -call mtr.add_suppression("Error in Log_event::read_log_event():.*"); -CALL mtr.add_suppression("Skipped GCache ring buffer recovery"); diff --git a/mysql-test/suite/galera/r/galera_var_retry_autocommit.result b/mysql-test/suite/galera/r/galera_var_retry_autocommit.result index 56c2c995402b1..328a5bf763434 100644 --- a/mysql-test/suite/galera/r/galera_var_retry_autocommit.result +++ b/mysql-test/suite/galera/r/galera_var_retry_autocommit.result @@ -1,6 +1,7 @@ connection node_2; connection node_1; connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +# With wsrep_retry_autocommit = 0, error is certain connection node_1; CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; SET SESSION wsrep_retry_autocommit = 0; @@ -12,11 +13,12 @@ connection node_2; TRUNCATE TABLE t1; connection node_1; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction -SELECT COUNT(*) FROM t1; -COUNT(*) +SELECT COUNT(*) AS expect_0 FROM t1; +expect_0 0 SET DEBUG_SYNC = 'RESET'; DROP TABLE t1; +# With wsrep_retry_autocommit = 1, success against one TRUNCATE connection node_1; CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; SET SESSION wsrep_retry_autocommit = 1; @@ -27,11 +29,12 @@ SET DEBUG_SYNC = 'now WAIT_FOR before_cert'; connection node_2; TRUNCATE TABLE t1; connection node_1; -SELECT COUNT(*) FROM t1; -COUNT(*) -0 +SELECT COUNT(*) AS expect_1 FROM t1; +expect_1 +1 SET DEBUG_SYNC = 'RESET'; DROP TABLE t1; +# With wsrep_retry_autcommit = 1, failure against multiple TRUNCATEs connection node_1; CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; SET SESSION wsrep_retry_autocommit = 1; @@ -51,13 +54,15 @@ SET DEBUG_SYNC = 'now SIGNAL wsrep_retry_autocommit_continue WAIT_FOR before_cer connection node_2; TRUNCATE TABLE t1; connection node_1a; -SELECT COUNT(*) FROM t1; -COUNT(*) +SELECT COUNT(*) AS expect_0 FROM t1; +expect_0 0 connection node_1; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction SET DEBUG_SYNC = 'RESET'; SET GLOBAL debug_dbug = NULL; DROP TABLE t1; +# With wsrep_retry_autocommit = 64, success against 64 TRUNCATEs connection node_1; CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; SET SESSION wsrep_retry_autocommit = 64; @@ -65,8 +70,8 @@ SET GLOBAL debug_dbug = '+d,sync.wsrep_retry_autocommit'; SET DEBUG_SYNC = 'wsrep_before_certification SIGNAL before_cert WAIT_FOR continue EXECUTE 64'; INSERT INTO t1 VALUES (5); connection node_1; -SELECT COUNT(*) FROM t1; -COUNT(*) +SELECT COUNT(*) AS expect_1 FROM t1; +expect_1 1 SET DEBUG_SYNC = 'RESET'; SET GLOBAL debug_dbug = NULL; diff --git a/mysql-test/suite/galera/t/galera_bf_abort_registering.test b/mysql-test/suite/galera/t/galera_bf_abort_registering.test new file mode 100644 index 0000000000000..dce6899f3c240 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_abort_registering.test @@ -0,0 +1,45 @@ +# +# Test that BF abort through MDL aborts InnoDB transaction which +# is registering. +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; + +--let $debug_dbug_orig = `SELECT VARIABLE_VALUE FROM information_schema.global_variables WHERE VARIABLE_NAME = 'debug_dbug'` + +--connection node_1 +SET SESSION wsrep_retry_autocommit = 0; +# Stop execution in trans_register_ha() +SET SESSION DEBUG_SYNC = "wsrep_after_trans_register_ha SIGNAL reached WAIT_FOR continue"; +--send INSERT INTO t1 VALUES (1) + +--connect node_1_ctrl, 127.0.0.1, root, , test, $NODE_MYPORT_1 +SET DEBUG_SYNC = "now WAIT_FOR reached"; + +# Stop APPLIER execution in wsrep_abort_transaction_read_only after +# finding that victim is read only. +SET GLOBAL DEBUG_DBUG = "+d,sync.wsrep_abort_transaction_read_only"; + +--connection node_2 +ALTER TABLE t1 ADD COLUMN f2 INT; + +--connection node_1_ctrl +# Wait for BF thread to arrive in sync point +SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_abort_transaction_read_only_reached"; +# Let the local INSERT continue +SET DEBUG_SYNC = "now SIGNAL continue"; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_abort_transaction_read_only"; +--connection node_1 +--error ER_LOCK_DEADLOCK +--reap + +SET DEBUG_SYNC = "RESET"; +--disable_query_log +--eval SET GLOBAL DEBUG_DBUG = "$debug_dbug_orig" +--enable_query_log + +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_create_table_as_select.test b/mysql-test/suite/galera/t/galera_create_table_as_select.test index a6c1f65728059..cfee63e5e2769 100644 --- a/mysql-test/suite/galera/t/galera_create_table_as_select.test +++ b/mysql-test/suite/galera/t/galera_create_table_as_select.test @@ -113,6 +113,10 @@ LOCK TABLE t2 WRITE; --connection node_1 --send CREATE TABLE t1 AS SELECT * FROM t2; +--connection node_1a +--let $wait_condition = SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE STATE LIKE 'Waiting for table metadata lock%' +--source include/wait_condition.inc + --connection node_2 SELECT COUNT(*) = 5 FROM t2; CREATE TABLE t1 AS SELECT * FROM t2; @@ -121,7 +125,7 @@ CREATE TABLE t1 AS SELECT * FROM t2; UNLOCK TABLES; --connection node_1 ---error ER_TABLE_EXISTS_ERROR,ER_LOCK_DEADLOCK +--error ER_TABLE_EXISTS_ERROR,ER_QUERY_INTERRUPTED --reap DROP TABLE t1, t2; diff --git a/mysql-test/suite/galera/t/galera_var_retry_autocommit.test b/mysql-test/suite/galera/t/galera_var_retry_autocommit.test index bd10e448e06eb..30a1abab98993 100644 --- a/mysql-test/suite/galera/t/galera_var_retry_autocommit.test +++ b/mysql-test/suite/galera/t/galera_var_retry_autocommit.test @@ -10,10 +10,7 @@ --connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 -# -# With wsrep_retry_autocommit = 0, error is certain -# - +--echo # With wsrep_retry_autocommit = 0, error is certain --connection node_1 CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; @@ -30,16 +27,13 @@ TRUNCATE TABLE t1; --connection node_1 --error ER_LOCK_DEADLOCK --reap -SELECT COUNT(*) FROM t1; +SELECT COUNT(*) AS expect_0 FROM t1; SET DEBUG_SYNC = 'RESET'; DROP TABLE t1; -# -# With wsrep_retry_autocommit = 1, success against one TRUNCATE -# - +--echo # With wsrep_retry_autocommit = 1, success against one TRUNCATE --connection node_1 CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; @@ -54,18 +48,14 @@ SET DEBUG_SYNC = 'now WAIT_FOR before_cert'; TRUNCATE TABLE t1; --connection node_1 ---error 0,ER_LOCK_DEADLOCK --reap -SELECT COUNT(*) FROM t1; +SELECT COUNT(*) AS expect_1 FROM t1; SET DEBUG_SYNC = 'RESET'; DROP TABLE t1; -# -# With wsrep_retry_autcommit = 1, failure against multiple TRUNCATEs -# - +--echo # With wsrep_retry_autcommit = 1, failure against multiple TRUNCATEs --connection node_1 CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; @@ -90,10 +80,10 @@ SET DEBUG_SYNC = 'now SIGNAL wsrep_retry_autocommit_continue WAIT_FOR before_cer TRUNCATE TABLE t1; --connection node_1a -SELECT COUNT(*) FROM t1; +SELECT COUNT(*) AS expect_0 FROM t1; --connection node_1 ---error 0,ER_LOCK_DEADLOCK +--error ER_LOCK_DEADLOCK --reap SET DEBUG_SYNC = 'RESET'; @@ -101,10 +91,7 @@ SET GLOBAL debug_dbug = NULL; DROP TABLE t1; -# -# With wsrep_retry_autocommit = 64, success against 64 TRUNCATEs -# - +--echo # With wsrep_retry_autocommit = 64, success against 64 TRUNCATEs --connection node_1 CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; @@ -136,9 +123,8 @@ while ($count) --enable_query_log --connection node_1 ---error 0,ER_LOCK_DEADLOCK --reap -SELECT COUNT(*) FROM t1; +SELECT COUNT(*) AS expect_1 FROM t1; SET DEBUG_SYNC = 'RESET'; SET GLOBAL debug_dbug = NULL; diff --git a/sql/handler.cc b/sql/handler.cc index a20b793e387b8..248bc18ca94df 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1362,6 +1362,9 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht_arg, ulonglong trxid) DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid"); //gtid_set_performance_schema_values(thd); } +#ifdef WITH_WSREP + DEBUG_SYNC(thd, "wsrep_after_trans_register_ha"); +#endif /* WITH_WSREP */ DBUG_VOID_RETURN; } @@ -7589,6 +7592,9 @@ Compare_keys handler::compare_key_parts(const Field &old_field, concurrent accesses. And it's an overkill to take LOCK_plugin and iterate the whole installed_htons[] array every time. + @note Object victim_thd is not guaranteed to exist after this + function returns. + @param bf_thd brute force THD asking for the abort @param victim_thd victim THD to be aborted @@ -7602,7 +7608,8 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) if (!WSREP(bf_thd) && !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU && wsrep_thd_is_toi(bf_thd))) { - wsrep_thd_UNLOCK(victim_thd); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); DBUG_RETURN(0); } @@ -7613,8 +7620,9 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) } else { - wsrep_thd_UNLOCK(victim_thd); WSREP_WARN("Cannot abort InnoDB transaction"); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); } DBUG_RETURN(0); diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index ac544c065e752..7ca7a65be0cca 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -29,14 +29,12 @@ extern "C" my_bool wsrep_on(const THD *thd) extern "C" void wsrep_thd_LOCK(const THD *thd) { - mysql_mutex_lock(&thd->LOCK_thd_kill); mysql_mutex_lock(&thd->LOCK_thd_data); } extern "C" void wsrep_thd_UNLOCK(const THD *thd) { mysql_mutex_unlock(&thd->LOCK_thd_data); - mysql_mutex_unlock(&thd->LOCK_thd_kill); } extern "C" void wsrep_thd_kill_LOCK(const THD *thd) @@ -252,24 +250,16 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, { WSREP_DEBUG("victim is killed already by %llu, skipping awake", victim_thd->wsrep_aborter); - wsrep_thd_UNLOCK(victim_thd); return false; } victim_thd->wsrep_aborter= bf_thd->thread_id; - /* for KILL command, use the chosen kill signal, - BF aborting uses KILL_QUERY_HARD - */ - if (wsrep_thd_is_local(bf_thd) && bf_thd->lex->sql_command == SQLCOM_KILL) - victim_thd->awake_no_mutex(bf_thd->lex->kill_signal); - else - victim_thd->awake_no_mutex(KILL_QUERY_HARD); + victim_thd->awake_no_mutex(KILL_QUERY_HARD); } else WSREP_DEBUG("wsrep_thd_bf_abort skipped awake for %llu", thd_get_thread_id(victim_thd)); - wsrep_thd_UNLOCK(victim_thd); return ret; } @@ -406,10 +396,6 @@ extern "C" bool wsrep_thd_set_wsrep_aborter(THD *bf_thd, THD *victim_thd) return true; } victim_thd->wsrep_aborter= bf_thd->thread_id; - if (wsrep_thd_is_local(bf_thd) && bf_thd->lex->sql_command == SQLCOM_KILL) - { - victim_thd->wsrep_abort_by_kill= bf_thd->lex->kill_signal; - } WSREP_DEBUG("wsrep_thd_set_wsrep_aborter setting wsrep_aborter %u", victim_thd->wsrep_aborter); return false; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 89998ff057711..ddb507f992687 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1915,7 +1915,9 @@ void THD::awake_no_mutex(killed_state state_to_set) } /* Interrupt target waiting inside a storage engine. */ - if (state_to_set != NOT_KILLED && !wsrep_is_bf_aborted(this)) + if (state_to_set != NOT_KILLED && + IF_WSREP(!wsrep_is_bf_aborted(this) && wsrep_abort_by_kill == NOT_KILLED, + true)) ha_kill_query(this, thd_kill_level(this)); abort_current_cond_wait(false); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 217572fa3b6a0..b7744baaafe0e 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1247,7 +1247,7 @@ bool do_command(THD *thd) /* Check if we can continue without closing the connection */ /* The error must be set. */ - DBUG_ASSERT(thd->is_error() || thd->killed != NOT_KILLED); + DBUG_ASSERT(thd->is_error()); thd->protocol->end_statement(); /* Mark the statement completed. */ @@ -1318,11 +1318,8 @@ bool do_command(THD *thd) command != COM_STMT_EXECUTE && command != COM_QUIT) { - if (thd->wsrep_abort_by_kill == NOT_KILLED) - { - WSREP_DEBUG("Deadlock error for: %s", thd->query()); - my_error(ER_LOCK_DEADLOCK, MYF(0)); - } + my_error(ER_LOCK_DEADLOCK, MYF(0)); + WSREP_DEBUG("Deadlock error for: %s", thd->query()); thd->reset_killed(); thd->mysys_var->abort = 0; thd->wsrep_retry_counter = 0; @@ -1394,16 +1391,13 @@ bool do_command(THD *thd) if (thd->wsrep_delayed_BF_abort) { - if (thd->wsrep_abort_by_kill == NOT_KILLED) - { my_error(ER_LOCK_DEADLOCK, MYF(0)); - } - WSREP_DEBUG("Deadlock error for PS query: %s", thd->query()); - thd->reset_killed(); - thd->mysys_var->abort = 0; - thd->wsrep_retry_counter = 0; + WSREP_DEBUG("Deadlock error for PS query: %s", thd->query()); + thd->reset_killed(); + thd->mysys_var->abort = 0; + thd->wsrep_retry_counter = 0; - thd->wsrep_delayed_BF_abort= false; + thd->wsrep_delayed_BF_abort= false; } #endif /* WITH_WSREP */ DBUG_RETURN(return_value); @@ -7838,7 +7832,6 @@ static void wsrep_prepare_for_autocommit_retry(THD* thd, Parser_state* parser_state) { thd->clear_error(); - thd->reset_killed(); close_thread_tables(thd); thd->wsrep_retry_counter++; // grow wsrep_copy_query(thd); @@ -7916,12 +7909,8 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, (thd->get_stmt_da()->is_error()) ? thd->get_stmt_da()->sql_errno() : 0); - if (thd->wsrep_abort_by_kill == NOT_KILLED) - { - WSREP_DEBUG("Deadlock error for: %s", thd->query()); - wsrep_override_error(thd, ER_LOCK_DEADLOCK); - } thd->reset_kill_query(); + wsrep_override_error(thd, ER_LOCK_DEADLOCK); } #ifdef ENABLED_DEBUG_SYNC @@ -7935,6 +7924,7 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, thd_is_connection_alive(thd)) { thd->reset_for_next_command(); + thd->reset_kill_query(); if (is_autocommit && thd->lex->sql_command != SQLCOM_SELECT && thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit) @@ -7965,11 +7955,7 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, thd->wsrep_retry_counter, thd->variables.wsrep_retry_autocommit, wsrep_thd_query(thd)); - if (thd->wsrep_abort_by_kill == NOT_KILLED) - { - WSREP_DEBUG("Deadlock error for: %s", thd->query()); - my_error(ER_LOCK_DEADLOCK, MYF(0)); - } + my_error(ER_LOCK_DEADLOCK, MYF(0)); thd->reset_kill_query(); thd->wsrep_retry_counter= 0; // reset } @@ -9274,7 +9260,6 @@ kill_one_thread(THD *thd, my_thread_id id, killed_state kill_signal, killed_type if (!tmp) DBUG_RETURN(error); DEBUG_SYNC(thd, "found_killee"); - if (tmp->get_command() != COM_DAEMON) { /* @@ -9299,6 +9284,7 @@ kill_one_thread(THD *thd, my_thread_id id, killed_state kill_signal, killed_type */ mysql_mutex_lock(&tmp->LOCK_thd_data); // Lock from concurrent usage + #ifdef WITH_WSREP if (((thd->security_ctx->master_access & PRIV_KILL_OTHER_USER_PROCESS) || thd->security_ctx->user_matches(tmp->security_ctx)) && @@ -9325,10 +9311,18 @@ kill_one_thread(THD *thd, my_thread_id id, killed_state kill_signal, killed_type id, tmp->wsrep_aborter, kill_signal); #ifdef WITH_WSREP - if (WSREP(thd)) + if (WSREP(tmp)) { - wsrep_abort_thd(thd, tmp, 1); - /* tmp is unlocked in wsrep_abort_thd call */ + /* + Mark killed thd with kill_signal so that awake_no_mutex does + not dive into storage engine. We use ha_abort_transaction() + to do the storage engine part for wsrep THDs. + */ + tmp->wsrep_abort_by_kill= kill_signal; + tmp->awake_no_mutex(kill_signal); + /* ha_abort_transaction() releases tmp->LOCK_thd_kill, so tmp + is not safe to access anymore. */ + ha_abort_transaction(thd, tmp, 1); DBUG_RETURN(0); } else @@ -9340,6 +9334,7 @@ kill_one_thread(THD *thd, my_thread_id id, killed_state kill_signal, killed_type else error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR : ER_KILL_DENIED_ERROR); + mysql_mutex_unlock(&tmp->LOCK_thd_data); } mysql_mutex_unlock(&tmp->LOCK_thd_kill); diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index a649052043838..bf28f7fd39a6b 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -2670,8 +2670,15 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, /* Here we will call wsrep_abort_transaction so we should hold THD::LOCK_thd_data to protect victim from concurrent usage - and THD::LOCK_thd_kill to protect from disconnect or delete. */ - wsrep_thd_LOCK(granted_thd); + and THD::LOCK_thd_kill to protect from disconnect or delete. + + Note that all calls to wsrep_abort_thd() and ha_abort_transaction() + unlock LOCK_thd_kill for granted_thd, so granted_thd must not be + accessed after any of those calls. Moreover all other if branches + must release those locks. + */ + mysql_mutex_lock(&granted_thd->LOCK_thd_kill); + mysql_mutex_lock(&granted_thd->LOCK_thd_data); if (wsrep_thd_is_toi(granted_thd) || wsrep_thd_is_applying(granted_thd)) @@ -2680,22 +2687,22 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, { WSREP_DEBUG("BF thread waiting for SR in aborting state"); ticket->wsrep_report(wsrep_debug); - wsrep_thd_UNLOCK(granted_thd); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); } else if (wsrep_thd_is_SR(granted_thd) && !wsrep_thd_is_SR(request_thd)) { WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR", schema, schema_len, request_thd, granted_thd); wsrep_abort_thd(request_thd, granted_thd, 1); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len, request_thd, granted_thd); ticket->wsrep_report(true); - wsrep_thd_UNLOCK(granted_thd); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); unireg_abort(1); } } @@ -2704,7 +2711,8 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, { WSREP_DEBUG("BF thread waiting for FLUSH"); ticket->wsrep_report(wsrep_debug); - wsrep_thd_UNLOCK(granted_thd); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); } else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) { @@ -2712,8 +2720,6 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, wsrep_thd_transaction_state_str(granted_thd)); ticket->wsrep_report(wsrep_debug); wsrep_abort_thd(request_thd, granted_thd, 1); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { @@ -2723,8 +2729,6 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, if (granted_thd->wsrep_trx().active()) { wsrep_abort_thd(request_thd, granted_thd, true); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { @@ -2734,15 +2738,16 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, */ if (wsrep_thd_is_BF(request_thd, FALSE)) { + granted_thd->awake_no_mutex(KILL_QUERY_HARD); ha_abort_transaction(request_thd, granted_thd, TRUE); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); - mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); } else { WSREP_MDL_LOG(INFO, "MDL unknown BF-BF conflict", schema, schema_len, request_thd, granted_thd); ticket->wsrep_report(true); + mysql_mutex_unlock(&granted_thd->LOCK_thd_data); + mysql_mutex_unlock(&granted_thd->LOCK_thd_kill); unireg_abort(1); } } @@ -2758,6 +2763,7 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, static bool abort_replicated(THD *thd) { bool ret_code= false; + wsrep_thd_kill_LOCK(thd); wsrep_thd_LOCK(thd); if (thd->wsrep_trx().state() == wsrep::transaction::s_committing) { @@ -2767,8 +2773,12 @@ static bool abort_replicated(THD *thd) ret_code= true; } else + { + /* wsrep_abort_thd() above releases LOCK_thd_data and LOCK_thd_kill, so + must do it here too. */ wsrep_thd_UNLOCK(thd); - + wsrep_thd_kill_UNLOCK(thd); + } return ret_code; } diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc index d8472fc1eb3b0..d651ba4860ac7 100644 --- a/sql/wsrep_server_service.cc +++ b/sql/wsrep_server_service.cc @@ -143,20 +143,11 @@ void Wsrep_server_service::release_high_priority_service(wsrep::high_priority_se wsrep_delete_threadvars(); } -bool Wsrep_server_service::background_rollback(wsrep::client_state& client_state) +void Wsrep_server_service::background_rollback(wsrep::client_state& client_state) { Wsrep_client_state& cs= static_cast(client_state); - - if (wsrep_thd_is_local(cs.thd()) && - cs.thd()->wsrep_abort_by_kill >= KILL_CONNECTION) - { - WSREP_DEBUG("Skipping rollbacker for KILL command"); - cs.thd()->wsrep_abort_by_kill= NOT_KILLED; - return true; - } - + mysql_mutex_assert_owner(&cs.thd()->LOCK_thd_data); wsrep_fire_rollbacker(cs.thd()); - return false; } void Wsrep_server_service::bootstrap() diff --git a/sql/wsrep_server_service.h b/sql/wsrep_server_service.h index d298023406907..168e98206e367 100644 --- a/sql/wsrep_server_service.h +++ b/sql/wsrep_server_service.h @@ -46,7 +46,7 @@ class Wsrep_server_service : public wsrep::server_service void release_high_priority_service(wsrep::high_priority_service*); - bool background_rollback(wsrep::client_state&); + void background_rollback(wsrep::client_state&); void bootstrap(); void log_message(enum wsrep::log::level, const char*); diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 49a8f144dfed7..18d03fbbf4a01 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -307,6 +307,52 @@ void wsrep_fire_rollbacker(THD *thd) } } +static bool wsrep_bf_abort_low(THD *bf_thd, THD *victim_thd) +{ + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.wsrep_bf_abort", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_bf_abort_reached " + "WAIT_FOR signal.wsrep_bf_abort"; + DBUG_ASSERT(!debug_sync_set_action(bf_thd, + STRING_WITH_LEN(act))); + };); +#endif + + wsrep::seqno bf_seqno(bf_thd->wsrep_trx().ws_meta().seqno()); + bool ret; + + { + /* Adopt the lock, it is being held by the caller. */ + Wsrep_mutex wsm{&victim_thd->LOCK_thd_data}; + wsrep::unique_lock lock{wsm, std::adopt_lock}; + + if (wsrep_thd_is_toi(bf_thd)) + { + ret= victim_thd->wsrep_cs().total_order_bf_abort(lock, bf_seqno); + } + else + { + DBUG_ASSERT(WSREP(victim_thd) ? victim_thd->wsrep_trx().active() : 1); + ret= victim_thd->wsrep_cs().bf_abort(lock, bf_seqno); + } + if (ret) + { + wsrep_bf_aborts_counter++; + } + lock.release(); /* No unlock at the end of the scope. */ + } + + /* Sanity check for wsrep-lib calls to return with LOCK_thd_data held. */ + mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); + + return ret; +} + void wsrep_abort_thd(THD *bf_thd, THD *victim_thd, @@ -324,11 +370,14 @@ void wsrep_abort_thd(THD *bf_thd, || ((WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) && wsrep_thd_is_toi(bf_thd)) || bf_thd->lex->sql_command == SQLCOM_KILL) - && !wsrep_thd_is_aborting(victim_thd)) + && !wsrep_thd_is_aborting(victim_thd) && + wsrep_bf_abort_low(bf_thd, victim_thd) && + !victim_thd->wsrep_cs().is_rollbacker_active()) { - WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", - (long long)bf_thd->real_id, (long long)victim_thd->real_id); - ha_abort_transaction(bf_thd, victim_thd, signal); + WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", + (long long)bf_thd->real_id, (long long)victim_thd->real_id); + victim_thd->awake_no_mutex(KILL_QUERY_HARD); + ha_abort_transaction(bf_thd, victim_thd, signal); } else { @@ -339,7 +388,8 @@ void wsrep_abort_thd(THD *bf_thd, bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU, wsrep_thd_is_toi(bf_thd), wsrep_thd_is_aborting(victim_thd)); - wsrep_thd_UNLOCK(victim_thd); + mysql_mutex_unlock(&victim_thd->LOCK_thd_data); + mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); } DBUG_VOID_RETURN; @@ -351,19 +401,6 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) WSREP_LOG_THD(victim_thd, "victim before"); mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); - mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); - -#ifdef ENABLED_DEBUG_SYNC - DBUG_EXECUTE_IF("sync.wsrep_bf_abort", - { - const char act[]= - "now " - "SIGNAL sync.wsrep_bf_abort_reached " - "WAIT_FOR signal.wsrep_bf_abort"; - DBUG_ASSERT(!debug_sync_set_action(bf_thd, - STRING_WITH_LEN(act))); - };); -#endif if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active()) { @@ -386,32 +423,7 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) mysql_mutex_lock(&victim_thd->LOCK_thd_data); } - bool ret; - wsrep::seqno bf_seqno(bf_thd->wsrep_trx().ws_meta().seqno()); - - if (wsrep_thd_is_toi(bf_thd)) - { - /* Here we enter wsrep-lib were LOCK_thd_data will be acquired, - thus we need to release it. However, we can still hold - LOCK_thd_kill to protect from disconnect or delete. */ - mysql_mutex_unlock(&victim_thd->LOCK_thd_data); - ret= victim_thd->wsrep_cs().total_order_bf_abort(bf_seqno); - mysql_mutex_lock(&victim_thd->LOCK_thd_data); - } - else - { - /* Test: mysql-wsrep-features#165. Here we enter wsrep-lib - were LOCK_thd_data will be acquired and later LOCK_thd_kill - thus we need to release them. */ - wsrep_thd_UNLOCK(victim_thd); - ret= victim_thd->wsrep_cs().bf_abort(bf_seqno); - wsrep_thd_LOCK(victim_thd); - } - if (ret) - { - wsrep_bf_aborts_counter++; - } - return ret; + return wsrep_bf_abort_low(bf_thd, victim_thd); } int wsrep_create_threadvars() diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h index de6920541bfac..7d25d7d29b46e 100644 --- a/sql/wsrep_thd.h +++ b/sql/wsrep_thd.h @@ -88,6 +88,13 @@ bool wsrep_create_appliers(long threads, bool mutex_protected=false); void wsrep_create_rollbacker(); bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd); +/* + Abort transaction for victim_thd. This function is called from + MDL BF abort codepath. + + @note This thread unlocks victim_thd->LOCK_thd_kill, so accessing + victim_thd after the function returns is not safe anymore. +*/ void wsrep_abort_thd(THD *bf_thd, THD *victim_thd, my_bool signal) __attribute__((nonnull(1,2))); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index b7c4536df0ba0..fa9656108b86f 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -18755,12 +18755,13 @@ wsrep_kill_victim( my_bool signal) { DBUG_ENTER("wsrep_kill_victim"); + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(victim_trx)); /* Mark transaction as a victim for Galera abort */ if (wsrep_thd_set_wsrep_aborter(bf_thd, thd)) { WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set"); - wsrep_thd_UNLOCK(thd); DBUG_VOID_RETURN; } @@ -18769,18 +18770,16 @@ wsrep_kill_victim( lock_t* wait_lock= victim_trx->lock.wait_lock; if (wait_lock) { + victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; DBUG_ASSERT(victim_trx->is_wsrep()); WSREP_DEBUG("victim has wait flag: %lu", thd_get_thread_id(thd)); - victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; lock_cancel_waiting_and_release(wait_lock); } } else { - wsrep_thd_LOCK(thd); victim_trx->lock.was_chosen_as_wsrep_victim= false; wsrep_thd_set_wsrep_aborter(NULL, thd); - wsrep_thd_UNLOCK(thd); WSREP_DEBUG("wsrep_thd_bf_abort has failed, victim %lu will survive", thd_get_thread_id(thd)); @@ -18835,17 +18834,8 @@ wsrep_innobase_kill_one_trx( DBUG_VOID_RETURN; } - /* Here we need to lock THD::LOCK_thd_data to protect from - concurrent usage or disconnect or delete. */ - - /* but, mark first the inndb transaction as a victim for Galera abort, - this will prevent potential deadlock, if e.g. KILL command has locked - the victim THD and would call for innobase_kill_query where additional - innodb side locks will be needed - */ - victim_trx->lock.was_chosen_as_wsrep_victim= true; - DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock"); + wsrep_thd_kill_LOCK(thd); wsrep_thd_LOCK(thd); DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock"); @@ -18878,6 +18868,8 @@ wsrep_innobase_kill_one_trx( wsrep_thd_query(thd)); wsrep_kill_victim(bf_thd, thd, victim_trx, signal); + wsrep_thd_UNLOCK(thd); + wsrep_thd_kill_UNLOCK(thd); DBUG_VOID_RETURN; } @@ -18898,41 +18890,60 @@ wsrep_abort_transaction( THD *victim_thd, my_bool signal) { - /* Note that victim thd is protected with - THD::LOCK_thd_data and THD::LOCK_thd_kill here. */ + /* Unlock LOCK_thd_kill and LOCK_thd_data temporarily to grab mutexes + in the right order: + lock_sys.mutex + trx.mutex + LOCK_thd_kill + LOCK_thd_data*/ trx_t* victim_trx= thd_to_trx(victim_thd); - trx_t* bf_trx= thd_to_trx(bf_thd); - WSREP_DEBUG("wsrep_abort_transaction: BF:" - " thread %ld client_state %s client_mode %s" - " trans_state %s query %s trx " TRX_ID_FMT, - thd_get_thread_id(bf_thd), - wsrep_thd_client_state_str(bf_thd), - wsrep_thd_client_mode_str(bf_thd), - wsrep_thd_transaction_state_str(bf_thd), - wsrep_thd_query(bf_thd), - bf_trx ? bf_trx->id : 0); - - WSREP_DEBUG("wsrep_abort_transaction: victim:" - " thread %ld client_state %s client_mode %s" - " trans_state %s query %s trx " TRX_ID_FMT, - thd_get_thread_id(victim_thd), - wsrep_thd_client_state_str(victim_thd), - wsrep_thd_client_mode_str(victim_thd), - wsrep_thd_transaction_state_str(victim_thd), - wsrep_thd_query(victim_thd), - victim_trx ? victim_trx->id : 0); - if (victim_trx) + trx_id_t victim_trx_id= victim_trx ? victim_trx->id : 0; + wsrep_thd_UNLOCK(victim_thd); + wsrep_thd_kill_UNLOCK(victim_thd); + /* After this point must use find_thread_by_id() is victim_thd + is needed again. */ + + /* Victim didn't have active RW transaction. Note that tere is a possible + race when the victim transaction is just starting write operation + as is still read only. This however will be resolved eventually since + all the possible blocking transactions are also BF aborted, + and the victim will find that it was BF aborted on server level after + the write operation in InnoDB completes. */ + if (!victim_trx_id) { - wsrep_thd_UNLOCK(victim_thd); - lock_mutex_enter(); - trx_mutex_enter(victim_trx); - wsrep_thd_LOCK(victim_thd); - - wsrep_kill_victim(bf_thd, victim_thd, victim_trx, signal); - +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF( + "sync.wsrep_abort_transaction_read_only", + {const char act[]= + "now " + "SIGNAL sync.wsrep_abort_transaction_read_only_reached " + "WAIT_FOR signal.wsrep_abort_transaction_read_only"; + DBUG_ASSERT(!debug_sync_set_action(bf_thd, STRING_WITH_LEN(act))); + };); +#endif /* ENABLED_DEBUG_SYNC*/ + return; + } + lock_mutex_enter(); + + /* Check if victim trx still exists. */ + /* Note based on comment on trx0sys.h only ACTIVE or PREPARED trx + objects may participate in hash. However, transaction may get committed + before this method returns. */ + if(!(victim_trx= trx_sys.find(nullptr, victim_trx_id, true))) { + WSREP_DEBUG("wsrep_abort_transaction: Victim trx does not exist anymore"); lock_mutex_exit(); - trx_mutex_exit(victim_trx); + return; + } + trx_mutex_enter(victim_trx); + + if (victim_trx->state == TRX_STATE_ACTIVE && victim_trx->lock.wait_lock) { + victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; + lock_cancel_waiting_and_release(victim_trx->lock.wait_lock); } + + trx_mutex_exit(victim_trx); + victim_trx->release_reference(); + lock_mutex_exit(); } static diff --git a/wsrep-lib b/wsrep-lib index f0e24625f45cb..f8a20ff00f285 160000 --- a/wsrep-lib +++ b/wsrep-lib @@ -1 +1 @@ -Subproject commit f0e24625f45cb1b43a55dac231e2e40b928a60d0 +Subproject commit f8a20ff00f28583b1d257cc148d8ab705dd274ce