diff --git a/mysql-test/suite/rpl/r/auto_relay_log_recovery.result b/mysql-test/suite/rpl/r/auto_relay_log_recovery.result new file mode 100644 index 0000000000000..a5cbcdb010d9f --- /dev/null +++ b/mysql-test/suite/rpl/r/auto_relay_log_recovery.result @@ -0,0 +1,69 @@ +include/master-slave.inc +[connection master] +# Case 0: No event groups and no trailing incomplete event +connection slave; +# Case 1: No event groups but trailing incomplete event +include/stop_slave_sql.inc +SET @@GLOBAL.debug_dbug= '+d,fail_rotate_mid_write'; +connection master; +FLUSH BINARY LOGS; +connection slave; +include/wait_for_slave_io_error.inc [errno=1595] +include/rpl_restart_server.inc [server_number=2] +include/show_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +slave-relay-bin.000002 # Rotate # # master-bin.000001;pos=POS +slave-relay-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION +slave-relay-bin.000002 # Gtid_list # # [] +slave-relay-bin.000002 # Binlog_checkpoint # # master-bin.000001 +# Case 2: Complete event group but trailing incomplete event +include/start_slave.inc +include/stop_slave_sql.inc +SET @@GLOBAL.debug_dbug= '+d,fail_rotate_mid_write'; +connection master; +CREATE TABLE t (a TEXT) ENGINE=innodb; +FLUSH BINARY LOGS; +connection slave; +include/wait_for_slave_io_error.inc [errno=1595] +include/rpl_restart_server.inc [server_number=2] +include/show_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +slave-relay-bin.000002 # Gtid # # GTID #-#-# +slave-relay-bin.000002 # Query # # use `test`; CREATE TABLE t (a TEXT) ENGINE=innodb +# Case 3: Trailing incomplete event group AND trailing incomplete event +SET @@max_relay_log_size=4096; +include/start_slave.inc +include/stop_slave_sql.inc +SET @@GLOBAL.debug_dbug= '+d,fail_rotate_mid_write'; +connection master; +BEGIN; +INSERT INTO t VALUES (REPEAT('3', 4096)); +INSERT INTO t VALUES ('3'); +COMMIT; +connection slave; +include/wait_for_slave_io_error.inc [errno=1595] +include/rpl_restart_server.inc [server_number=2] +include/show_events.inc +# Case 4: Trailing incomplete event group but complete event +include/start_slave.inc +include/stop_slave_sql.inc +SET @@GLOBAL.debug_dbug= '+d,kill_slave_io_before_commit'; +connection master; +INSERT INTO t VALUES ('4'); +connection slave; +include/wait_for_slave_io_error.inc [errno=1595] +include/rpl_restart_server.inc [server_number=2] +include/show_events.inc +# Double-check that all INSERTs have applied +include/start_slave.inc +SELECT LEFT(a, 2) FROM t; +LEFT(a, 2) +33 +3 +4 +# Case NaN: Cleanup +connection master; +DROP TABLE t; +CALL mtr.add_suppression('Could not use '); +CALL mtr.add_suppression('Slave I/O: Relay log write failure: '); +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/auto_relay_log_recovery.test b/mysql-test/suite/rpl/t/auto_relay_log_recovery.test new file mode 100644 index 0000000000000..52c733c359e0f --- /dev/null +++ b/mysql-test/suite/rpl/t/auto_relay_log_recovery.test @@ -0,0 +1,109 @@ +# MDEV-38907: Relay Logs can now automatically remove +# an incomplete event or event group left by a crash. +# +# Each case must first sync the slave up +# so it does not inject failures at the previous case's unsynchronized section. +# Due to START SLAVE restarting relay logs in GTID mode (MDEV-4698), +# it must also show truncation results directly via +# SHOW RELAYLOG EVENTS before the next case's START SLAVE and sync. +# (Due to specifying `$binlog_start`, the listing may be empty; +# this is expected when truncating a trailing incomplete event group.) + +--source include/have_debug.inc +--source include/have_innodb.inc +# format-agnostic, but easier to test rotate crashes with the row format +--source include/have_binlog_format_row.inc +--source include/master-slave.inc + +--echo # Case 0: No event groups and no trailing incomplete event +--sync_slave_with_master + + +--echo # Case 1: No event groups but trailing incomplete event +--source include/stop_slave_sql.inc +--let $binlog_file= `SELECT Relay_Log_File FROM information_schema.slave_status` + +SET @@GLOBAL.debug_dbug= '+d,fail_rotate_mid_write'; +--connection master + FLUSH BINARY LOGS; + --save_master_pos +--connection slave +--let $slave_io_errno= 1595 +--source include/wait_for_slave_io_error.inc + +--let $rpl_server_number= 2 +--source include/rpl_restart_server.inc +--source include/show_relaylog_events.inc + + +--echo # Case 2: Complete event group but trailing incomplete event +--source include/start_slave.inc +--sync_with_master +--source include/stop_slave_sql.inc +--let $binlog_file= `SELECT Relay_Log_File FROM information_schema.slave_status` +--let $binlog_start= `SELECT Relay_Log_Pos FROM information_schema.slave_status` + +SET @@GLOBAL.debug_dbug= '+d,fail_rotate_mid_write'; +--connection master + CREATE TABLE t (a TEXT) ENGINE=innodb; + FLUSH BINARY LOGS; + --save_master_pos +--connection slave +--source include/wait_for_slave_io_error.inc + +--source include/rpl_restart_server.inc +--source include/show_relaylog_events.inc + + +--echo # Case 3: Trailing incomplete event group AND trailing incomplete event +SET @@max_relay_log_size=4096; +--source include/start_slave.inc +--sync_with_master +--source include/stop_slave_sql.inc +--let $binlog_file= `SELECT Relay_Log_File FROM information_schema.slave_status` +--let $binlog_start= `SELECT Relay_Log_Pos FROM information_schema.slave_status` + +SET @@GLOBAL.debug_dbug= '+d,fail_rotate_mid_write'; +--connection master + BEGIN; + INSERT INTO t VALUES (REPEAT('3', 4096)); # slave's `@@max_relay_log_size` + INSERT INTO t VALUES ('3'); + COMMIT; + --save_master_pos +--connection slave +--source include/wait_for_slave_io_error.inc + +--source include/rpl_restart_server.inc +--source include/show_relaylog_events.inc + + +--echo # Case 4: Trailing incomplete event group but complete event +--source include/start_slave.inc +--sync_with_master +--source include/stop_slave_sql.inc +--let $binlog_file= `SELECT Relay_Log_File FROM information_schema.slave_status` +--let $binlog_start= `SELECT Relay_Log_Pos FROM information_schema.slave_status` + +SET @@GLOBAL.debug_dbug= '+d,kill_slave_io_before_commit'; +--connection master + INSERT INTO t VALUES ('4'); + --save_master_pos +--connection slave +--source include/wait_for_slave_io_error.inc + +--source include/rpl_restart_server.inc +--source include/show_relaylog_events.inc + + +--echo # Double-check that all INSERTs have applied +--source include/start_slave.inc +--sync_with_master +SELECT LEFT(a, 2) FROM t; + +--echo # Case NaN: Cleanup +--connection master + DROP TABLE t; + CALL mtr.add_suppression('Could not use '); + CALL mtr.add_suppression('Slave I/O: Relay log write failure: '); + +--source include/rpl_end.inc diff --git a/sql/log.cc b/sql/log.cc index 45160bc8cdd14..dd3ac04074ac8 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -4622,6 +4622,203 @@ Event_log::write_description_event(enum_binlog_checksum_alg checksum_alg, } +bool do_relaylog_recovery(MYSQL_BIN_LOG *relay_log, const char *log_name) +{ + MEM_ROOT memroot; + my_off_t group_pos; ///< Start offset of the event group or non-group event + /// @note not initialized when @ref truncate_entry is `nullptr` + my_off_t truncate_pos; + if (my_b_filelength(relay_log->get_index_file()) == 0) + // No recovery required for a fresh new log with no files + return false; + + int err= true; + Format_description_log_event fdev(BINLOG_VERSION); + DBUG_ASSERT(fdev.is_valid()); + rpl_gtid truncate_gtid= {0, 0, 0}; + /* + We want to truncate an incompletely written latest group, but we don't + know if this group began in a previous file, so start pretending it did. + */ + enum {UNCERTAIN, OUTSIDE, IN_TRANSACTION, IN_STANDALONE} + group_state= UNCERTAIN; + init_alloc_root(PSI_INSTRUMENT_ME, &memroot, + FN_REFLEN+sizeof(binlog_file_entry), 0, MYF(0)); + binlog_file_entry + *truncate_entry= nullptr, *list= relay_log->get_list(&memroot); + if (!list) + { + sql_print_error( + "Out of memory while loading the index file for relay log %sQ", log_name); + goto end; + } + + for (; list; list= list->next) + { + IO_CACHE log; + const char *errmsg; + Log_event *event; + char log_file_name[FN_REFLEN]; + if (normalize_binlog_name(log_file_name, list->name.str, true)) + { + sql_print_error( + "Could not normalize the name for relay log file %sQ", log_name); + goto end; + } + File file= open_binlog(&log, log_file_name, &errmsg); + if ((file) < 0) + { + sql_print_error("%s", errmsg); + goto end; + } + group_pos= 0; + while (( + /* + A slave will verify the checksum in the SQL thread; + the relay log does not care about checksums until then. + */ + event= Log_event::read_log_event(&log, &err, &fdev, false, false) + ) && event->is_valid()) + { + + switch (event->get_type_code()) { + case FORMAT_DESCRIPTION_EVENT: + { + auto format_description_event= + static_cast(event); + /*FIXME: + Shouldn't this section, as well as TC_LOG_BINLOG::recover(), + replace the `fdev` passed from an outer scope with + what's read from the log, since they may not match? + */ + fdev.used_checksum_alg= format_description_event->used_checksum_alg; + /* + In practice, the presence of this legacy field either: + * marks the very first Format Description of a log. + This means there are no prior events, let alone an open group. + * (with Log_event::is_artificial_event() set) + tells the SQL thread to disregard the current group - + because the other end of the connection has appearently crashed, + or have changed after reconnecting mid-group. + */ + if (!format_description_event->created) + break; + } + [[fallthrough]]; + case XID_EVENT: + case XA_PREPARE_LOG_EVENT: + group_state= OUTSIDE; + break; + case START_ENCRYPTION_EVENT: + if (fdev.start_decryption( + static_cast(event))) + { + delete event; + err= LOG_READ_DECRYPT; + errmsg= "Prepare decryption failure"; + goto end_while; + } + break; + case GTID_EVENT: + { + auto gtid_event= static_cast(event); + truncate_gtid= + {gtid_event->domain_id, gtid_event->server_id, gtid_event->seq_no}; + group_state= (gtid_event->flags2 & Gtid_log_event::FL_STANDALONE) ? + IN_STANDALONE : IN_TRANSACTION; + } + break; + case QUERY_EVENT: + { + auto query_event= static_cast(event); + if (query_event->is_begin()) + { + group_state= IN_TRANSACTION; + break; + } + if (query_event->is_commit() || query_event->is_rollback()) + { + group_state= OUTSIDE; + break; + } + } + [[fallthrough]]; + default: +#ifdef HAVE_REPLICATION + switch (group_state) { + case IN_STANDALONE: + if (!event->is_part_of_group()) + group_state= OUTSIDE; + case IN_TRANSACTION: + break; + default: + if (event->is_part_of_group()) + group_state= IN_STANDALONE; + } +#endif + } + delete event; + + // Increment @ref group_pos only when not inside a group + switch (group_state) { + case OUTSIDE: + truncate_gtid= {0, 0, 0}; + [[fallthrough]]; + case UNCERTAIN: + group_pos= my_b_tell(&log); + default:; + } + } + end_while: + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + + switch (err) { + case LOG_READ_IO: + case LOG_READ_MEM: + case LOG_READ_DECRYPT: + sql_print_error("Error from Log_event::read_log_event(): '%s'", errmsg); + goto end; + default: // EOF or incomprehensible event (which effectively is also EOF) + switch (group_state) { + default: + if (!err) + break; // No corrupted event to truncate + /* + Shelf this position: + Truncate from here even if the whole log is ungrouped events + */ + [[fallthrough]]; + // In a group, unclosed at the end + case IN_TRANSACTION: + case IN_STANDALONE: + truncate_pos= group_pos; + truncate_entry= list; + } + } + if (group_state != UNCERTAIN) + break; // Truncation (if necessary) has been located earlier. + // Keep checking for a group + fdev.reset_crypto(); + } + + if (truncate_entry && (err= relay_log->truncate_and_remove_binlogs( + truncate_entry->name.str, truncate_pos, &truncate_gtid))) + { + sql_print_error("Failed to truncate the relay log to file:%s pos:%llu.", + truncate_entry->name.str, truncate_pos); + goto end; + } +end: + free_root(&memroot, MYF(0)); + if (err) + sql_print_error("Crash detection and recovery failed. " + "Either correct the problem (if it's, for example, out of memory error) " + "and restart, or delete (or rename) the relay log %sQ " + "or restart the slave with @@GLOBAL.relay_log_recovery=1", log_name); + return err; +} + /** Open a (new) binlog file. @@ -4643,7 +4840,8 @@ bool MYSQL_BIN_LOG::open(const char *log_name, ulong max_size_arg, bool null_created_arg, bool need_mutex, - bool commit_by_rotate) + bool commit_by_rotate, + bool do_recovery) { xid_count_per_binlog *new_xid_list_entry= NULL, *b; DBUG_ENTER("MYSQL_BIN_LOG::open"); @@ -4651,21 +4849,19 @@ bool MYSQL_BIN_LOG::open(const char *log_name, DBUG_ASSERT(is_relay_log || !opt_binlog_engine_hton); mysql_mutex_assert_owner(&LOCK_log); - if (!is_relay_log) + if (!binlog_state_recover_done) { - if (!binlog_state_recover_done) - { - binlog_state_recover_done= true; - if (do_binlog_recovery(opt_bin_logname, false)) - DBUG_RETURN(1); - } - - if ((!binlog_background_thread_started && - !binlog_background_thread_stop) && - start_binlog_background_thread()) + binlog_state_recover_done= true; + if (do_recovery && is_relay_log ? do_relaylog_recovery(this, log_name) : + do_binlog_recovery(opt_bin_logname, false)) DBUG_RETURN(1); } + if (!is_relay_log && + !binlog_background_thread_started && !binlog_background_thread_stop && + start_binlog_background_thread()) + DBUG_RETURN(1); + /* We need to calculate new log file name for purge to delete old */ if (init_and_set_log_file_name(log_name, new_name, next_log_number, LOG_BIN, io_cache_type_arg)) @@ -11685,8 +11881,9 @@ MYSQL_BIN_LOG::close_engine() */ void MYSQL_BIN_LOG::clear_inuse_flag_when_closing(File file) { - my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET; - uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F + constexpr my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET; + //FIXME: use mysql_file_pread() to load the existing flags + uchar flags= is_relay_log ? LOG_EVENT_RELAY_LOG_F : 0; mysql_file_pwrite(file, &flags, 1, offset, MYF(0)); } diff --git a/sql/log.h b/sql/log.h index 01607d48706c9..15f62e029b9c4 100644 --- a/sql/log.h +++ b/sql/log.h @@ -1049,7 +1049,8 @@ class MYSQL_BIN_LOG: public TC_LOG, public Event_log ulong max_size, bool null_created, bool need_mutex, - bool commit_by_rotate = false); + bool commit_by_rotate= false, + bool do_recovery= true); bool open_engine(handlerton *hton, ulong max_size, const char *dir); bool open_index_file(const char *index_file_name_arg, const char *log_name, bool need_mutex); @@ -1163,6 +1164,19 @@ class MYSQL_BIN_LOG: public TC_LOG, public Event_log void close_engine(); void clear_inuse_flag_when_closing(File file); + // Exposed from `sql_repl.cc` + /** + Read all logs and return as a list + @param memroot Use this for mem_root calls + @param reverse If set filenames returned in latest first order + (reverse order than in the index file) + @param already_locked If set, index file is already locked. + @return nullptr error + otherwise pointer to list + @note index_file is always unlocked at return + */ + binlog_file_entry *get_list( + MEM_ROOT *memroot, bool reverse= true, bool already_locked= false); // iterating through the log index file int find_log_pos(LOG_INFO* linfo, const char* log_name, bool need_mutex); diff --git a/sql/log_event.cc b/sql/log_event.cc index c814b65066cdd..d1f477267a5ed 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -886,13 +886,14 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, int *out_error, const char *error= 0; Log_event *res= 0; - *out_error= 0; - switch (read_log_event(file, &event, fdle, BINLOG_CHECKSUM_ALG_OFF, - max_allowed_packet)) + *out_error= read_log_event(file, &event, fdle, BINLOG_CHECKSUM_ALG_OFF, + max_allowed_packet); + switch (*out_error) { case 0: break; case LOG_READ_EOF: // no error here; we are at the file's end + *out_error= 0; goto err; case LOG_READ_BOGUS: error= "Event invalid"; @@ -926,6 +927,8 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, int *out_error, if ((res= read_log_event((uchar*) event.ptr(), event.length(), &error, fdle, crc_check, false))) res->register_temp_buf((uchar*) event.release(), true); + else + *out_error= 1; err: if (unlikely(error)) @@ -943,9 +946,7 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, int *out_error, enough to stop the SQL thread now ; as we are skipping the current event, going on with reading and successfully executing other events can only corrupt the slave's databases. So stop. - */ - *out_error= 1; - /* + Clear any error that might have been set in the IO_CACHE from a read error, while we are still holding the relay log mutex (if reading from the hot log). Otherwise the error might interfere unpredictably with diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index a3426f93a2e9b..b85277a12d236 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -2754,6 +2754,7 @@ bool Rotate_log_event::write(Log_event_writer *writer) int8store(buf + R_POS_OFFSET, pos); return (write_header(writer, ROTATE_HEADER_LEN + ident_len) || write_data(writer, buf, ROTATE_HEADER_LEN) || + DBUG_IF("fail_rotate_mid_write") || write_data(writer, new_log_ident, (uint) ident_len) || write_footer(writer)); } diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index e03357acbb12f..b20925c52f0af 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -241,8 +241,13 @@ a file name for --relay-log-index option", opt_relaylog_index_name); */ mysql_mutex_lock(log_lock); if (relay_log.open_index_file(buf_relaylog_index_name, ln, TRUE) || - relay_log.open(ln, 0, 0, SEQ_READ_APPEND, - (ulong)max_relay_log_size, 1, TRUE)) + relay_log.open(ln, nullptr, 0, SEQ_READ_APPEND, + static_cast(max_relay_log_size), true, true, false, + /* + Not needed for `@@relay_log_recovery`, + and not safe for non-GTID mode until MDEV-39051 + */ + !is_relay_log_recovery && mi->master_use_gtid)) { mysql_mutex_unlock(log_lock); mysql_mutex_unlock(&data_lock); @@ -1122,9 +1127,14 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset, DBUG_RETURN(1); } mysql_mutex_lock(rli->relay_log.get_log_lock()); - if (rli->relay_log.open(ln, 0, 0, SEQ_READ_APPEND, - (ulong)(rli->max_relay_log_size ? rli->max_relay_log_size : - max_binlog_size), 1, TRUE)) + if (rli->relay_log.open(ln, nullptr, 0, SEQ_READ_APPEND, + rli->max_relay_log_size ? static_cast(rli->max_relay_log_size) : + max_binlog_size, true, true, false, + /* + Not needed for `@@relay_log_recovery`, + and not safe for non-GTID mode until MDEV-39051 + */ + !rli->is_relay_log_recovery && master_use_gtid)) { sql_print_error("Unable to purge relay log files. Failed to open relay " "log file:%s.", rli->relay_log.get_log_fname()); diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index c4c3e7d338b39..9c47c0b997a87 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1175,9 +1175,6 @@ static binlog_file_entry * get_binlog_list(MEM_ROOT *memroot, bool reverse= true, bool already_locked= false) { - IO_CACHE *index_file; - char *fname, *buff, *end_pos; - binlog_file_entry *current_list= NULL, *current_link= NULL, *e; DBUG_ENTER("get_binlog_list"); if (opt_binlog_engine_hton) @@ -1194,9 +1191,17 @@ get_binlog_list(MEM_ROOT *memroot, bool reverse= true, my_error(ER_NO_BINARY_LOGGING, MYF(0)); DBUG_RETURN(NULL); } + DBUG_RETURN(mysql_bin_log.get_list(memroot, reverse, already_locked)); +} + +binlog_file_entry *MYSQL_BIN_LOG::get_list + (MEM_ROOT *memroot, bool reverse, bool already_locked) +{ + char *fname, *buff, *end_pos; + binlog_file_entry *current_list= nullptr, *current_link= nullptr, *e; if (!already_locked) - mysql_bin_log.lock_index(); - index_file=mysql_bin_log.get_index_file(); + lock_index(); + IO_CACHE *index_file= get_index_file(); reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0); if (!(buff= (char*) alloc_root(memroot, @@ -1209,7 +1214,7 @@ get_binlog_list(MEM_ROOT *memroot, bool reverse= true, goto err; } buff[index_file->end_of_file]= 0; // For strchr - mysql_bin_log.unlock_index(); + unlock_index(); /* The file ends with EOF or empty line */ for (fname= buff; @@ -1218,7 +1223,7 @@ get_binlog_list(MEM_ROOT *memroot, bool reverse= true, { end_pos[0]= '\0'; // remove the newline if (!(e= (binlog_file_entry *) alloc_root(memroot, sizeof(*e)))) - DBUG_RETURN(NULL); + return nullptr; if (reverse) { e->next= current_list; @@ -1236,11 +1241,11 @@ get_binlog_list(MEM_ROOT *memroot, bool reverse= true, e->name.str= fname; e->name.length= (size_t) (end_pos - fname); } - DBUG_RETURN(current_list); + return current_list; err: - mysql_bin_log.unlock_index(); - DBUG_RETURN(0); + unlock_index(); + return nullptr; }