Hi, Aleksey! Like last time, it's a review of the diff bc04ded2353 22158b3c05a, so not only commit 38a888da0f1.
The main thing below - I didn't understand a couple of changes and asked for clarifications. On May 30, Aleksey Midenkov wrote: > revision-id: 38a888da0f1 (mariadb-10.5.2-653-g38a888da0f1) > parent(s): bc04ded2353 > author: Aleksey Midenkov <[email protected]> > committer: Aleksey Midenkov <[email protected]> > timestamp: 2021-04-22 23:35:06 +0300 > message: > > MDEV-17554 Auto-create new partition for system versioned tables with history > partitioned by INTERVAL/LIMIT > diff --git a/mysql-test/suite/versioning/r/delete_history.result > b/mysql-test/suite/versioning/r/delete_history.result > --- a/mysql-test/suite/versioning/r/delete_history.result > +++ b/mysql-test/suite/versioning/r/delete_history.result > @@ -154,3 +152,21 @@ select * from t1; > a > 1 > drop table t1; > +# > +# MDEV-17554 Auto-create new partition for system versioned tables with > history partitioned by INTERVAL/LIMIT > +# > +# Don't auto-create new partition on DELETE HISTORY: > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t (a int) with system versioning > +partition by system_time interval 1 hour auto; > +set timestamp= unix_timestamp('2000-01-01 10:00:00'); perhaps, make it only 02:00:00 to avoid any chance that a partition won't be created because of max-auto-create limit. it's possible we put the limit at 10, but 2 is very unlikely. or does max-auto-create limit always result in a warning? > +delete history from t; > +set timestamp= default; > +show create table t; > +Table Create Table > +t CREATE TABLE `t` ( > + `a` int(11) DEFAULT NULL > +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 > 00:00:00' AUTO > +PARTITIONS 2 > +drop table t; > diff --git a/mysql-test/suite/versioning/r/partition.result > b/mysql-test/suite/versioning/r/partition.result > --- a/mysql-test/suite/versioning/r/partition.result > +++ b/mysql-test/suite/versioning/r/partition.result > @@ -1236,27 +1270,826 @@ t1 CREATE TABLE `t1` ( > PARTITION `p5` HISTORY ENGINE = DEFAULT_ENGINE, > PARTITION `pn` CURRENT ENGINE = DEFAULT_ENGINE) > alter table t1 add partition partitions 8; > +drop table t1; > +# > +# End of 10.5 tests > +# > +# > +# MDEV-17554 Auto-create new partition for system versioned tables with > history partitioned by INTERVAL/LIMIT > +# > +create or replace table t1 (x int) with system versioning > +partition by system_time limit 1 auto; > show create table t1; > Table Create Table > t1 CREATE TABLE `t1` ( > `x` int(11) DEFAULT NULL > ) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > - PARTITION BY SYSTEM_TIME LIMIT 1 > -(PARTITION `p2` HISTORY ENGINE = DEFAULT_ENGINE, > - PARTITION `p8` HISTORY ENGINE = DEFAULT_ENGINE, > + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO > +PARTITIONS 2 > +insert into t1 values (1); > +create or replace table t2 (y int); > +insert into t2 values (2); > +insert into t1 select * from t2; > +insert into t2 select * from t1; > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `x` int(11) DEFAULT NULL > +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO > +PARTITIONS 2 > +# Too many partitions error > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 1 hour auto; > +set timestamp= unix_timestamp('2001-01-01 00:01:00'); > +update t1 set x= x + 1; > +ERROR HY000: Too many partitions (including subpartitions) were defined > +# Partition overflow error and manual fix seems to be a wrong comment? no manual fix here. > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 1 hour; > +insert into t1 values (333); > +set timestamp= unix_timestamp('2000-01-01 01:00:00'); > +update t1 set x= x + 1; > +Warnings: > +Warning 4114 Versioned table `test`.`t1`: last HISTORY partition > (`p0`) is out of INTERVAL, need more HISTORY partitions > +drop table t1; > +# Partition overflow error and manual fix > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 1 hour; > +insert into t1 values (440); > +set timestamp= unix_timestamp('2000-01-01 00:10:00'); > +update t1 set x= x + 1; > +# Check how pruning boundaries work > +explain partitions select * from t1 for system_time as of '2000-01-01 > 00:59:58'; > +id select_type table partitions type possible_keys key > key_len ref rows Extra > +1 SIMPLE t1 p0,pn # NULL NULL NULL NULL # > # > +explain partitions select * from t1 for system_time as of '2000-01-01 > 00:59:59'; > +id select_type table partitions type possible_keys key > key_len ref rows Extra > +1 SIMPLE t1 pn # NULL NULL NULL NULL # > # Why the first select looks in both partitions and the second one - only in pn? > +explain partitions select * from t1 for system_time as of '2000-01-01 > 01:00:00'; > +id select_type table partitions type possible_keys key > key_len ref rows Extra > +1 SIMPLE t1 pn # NULL NULL NULL NULL # > # > +select * from t1 for system_time as of '2000-01-01 00:09:59'; > +x > +440 > +set timestamp= unix_timestamp('2000-01-01 02:00:00'); > +update t1 set x= x + 1; > +Warnings: > +Warning 4114 Versioned table `test`.`t1`: last HISTORY partition > (`p0`) is out of INTERVAL, need more HISTORY partitions > +select * from t1 for system_time as of '2000-01-01 01:00:00'; > +x > +select * from t1 partition (p0) order by x; > +x > +440 > +441 > +alter table t1 add partition partitions 3; > +select * from t1 for system_time as of '2000-01-01 01:00:00'; > +x > +441 > +select * from t1 partition (p0) order by x; > +x > +440 would be good to have EXPLAINs after the overflow and after the manual fix. to show how ALTER changes what partition will be pruned > +drop table t1; > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 3600 second > +starts '2000-01-01 00:00:00' auto partitions 3; > +affected rows: 0 > +insert into t1 values (1); > +affected rows: 1 > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `x` int(11) DEFAULT NULL > +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME INTERVAL 3600 SECOND STARTS TIMESTAMP'2000-01-01 > 00:00:00' AUTO > +PARTITIONS 3 > ... > diff --git a/mysql-test/suite/versioning/r/rpl_mix.result > b/mysql-test/suite/versioning/r/rpl_mix.result > --- a/mysql-test/suite/versioning/r/rpl_mix.result > +++ b/mysql-test/suite/versioning/r/rpl_mix.result > @@ -8,4 +8,59 @@ DELETE HISTORY FROM t1; > connection slave; > connection master; > drop table t1; > +# > +# MDEV-25347 DML events for auto-partitioned tables are written into binary > log twice > +# > +flush binary logs; > +create table t1 (a int) with system versioning > +partition by system_time limit 1 auto; > +insert into t1 values (1); > +update t1 set a= a + 1; > +update t1 set a= a + 1; better do a+2 the second time. otherwise it's a bit confusing, the bug says "written twice" and you indeed have update a=a+1 written twice :) with clearly different statements the test will prove that every event is written once, and not one is skipped and the other is duplicated > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `a` int(11) DEFAULT NULL > +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO > +PARTITIONS 3 > +select * from t1; > +a > +3 > +include/show_binlog_events.inc > +Log_name Pos Event_type Server_id End_log_pos Info > +master-bin.000002 # Binlog_checkpoint # # > master-bin.000002 > +master-bin.000002 # Gtid # # GTID #-#-# > +master-bin.000002 # Query # # use `test`; create > table t1 (a int) with system versioning > +partition by system_time limit 1 auto > +master-bin.000002 # Gtid # # BEGIN GTID #-#-# > +master-bin.000002 # Annotate_rows # # insert into t1 > values (1) > +master-bin.000002 # Table_map # # table_id: # > (test.t1) > +master-bin.000002 # Write_rows_v1 # # table_id: # > flags: STMT_END_F > +master-bin.000002 # Query # # COMMIT > +master-bin.000002 # Gtid # # BEGIN GTID #-#-# > +master-bin.000002 # Annotate_rows # # update t1 set > a= a + 1 > +master-bin.000002 # Table_map # # table_id: # > (test.t1) > +master-bin.000002 # Update_rows_v1 # # table_id: # > +master-bin.000002 # Write_rows_v1 # # table_id: # > flags: STMT_END_F > +master-bin.000002 # Query # # COMMIT > +master-bin.000002 # Gtid # # BEGIN GTID #-#-# > +master-bin.000002 # Annotate_rows # # update t1 set > a= a + 1 > +master-bin.000002 # Table_map # # table_id: # > (test.t1) > +master-bin.000002 # Update_rows_v1 # # table_id: # > +master-bin.000002 # Write_rows_v1 # # table_id: # > flags: STMT_END_F > +master-bin.000002 # Query # # COMMIT > +connection slave; > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `a` int(11) DEFAULT NULL > +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO > +PARTITIONS 3 > +select * from t1; > +a > +3 > +connection master; > +drop table t1; > include/rpl_end.inc > diff --git a/mysql-test/suite/versioning/r/rpl_row.result > b/mysql-test/suite/versioning/r/rpl_row.result > --- a/mysql-test/suite/versioning/r/rpl_row.result > +++ b/mysql-test/suite/versioning/r/rpl_row.result > @@ -11,4 +11,59 @@ connection slave; > connection master; > drop table t1; > set binlog_row_image= @old_row_image; > +# > +# MDEV-25347 DML events for auto-partitioned tables are written into binary > log twice > +# > +flush binary logs; > +create table t1 (a int) with system versioning > +partition by system_time limit 1 auto; > +insert into t1 values (1); > +update t1 set a= a + 1; > +update t1 set a= a + 1; same. as a general rule it's helpful to use slightly different statement, to be able to map them to binlog events uniquely. a+2, or 1+a, or whatever. > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `a` int(11) DEFAULT NULL > +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO > +PARTITIONS 3 > +select * from t1; > +a > +3 > +include/show_binlog_events.inc > +Log_name Pos Event_type Server_id End_log_pos Info > +master-bin.000002 # Binlog_checkpoint # # > master-bin.000002 > +master-bin.000002 # Gtid # # GTID #-#-# > +master-bin.000002 # Query # # use `test`; create > table t1 (a int) with system versioning > +partition by system_time limit 1 auto > +master-bin.000002 # Gtid # # BEGIN GTID #-#-# > +master-bin.000002 # Annotate_rows # # insert into t1 > values (1) > +master-bin.000002 # Table_map # # table_id: # > (test.t1) > +master-bin.000002 # Write_rows_v1 # # table_id: # > flags: STMT_END_F > +master-bin.000002 # Query # # COMMIT > +master-bin.000002 # Gtid # # BEGIN GTID #-#-# > +master-bin.000002 # Annotate_rows # # update t1 set > a= a + 1 > +master-bin.000002 # Table_map # # table_id: # > (test.t1) > +master-bin.000002 # Update_rows_v1 # # table_id: # > +master-bin.000002 # Write_rows_v1 # # table_id: # > flags: STMT_END_F > +master-bin.000002 # Query # # COMMIT > +master-bin.000002 # Gtid # # BEGIN GTID #-#-# > +master-bin.000002 # Annotate_rows # # update t1 set > a= a + 1 > +master-bin.000002 # Table_map # # table_id: # > (test.t1) > +master-bin.000002 # Update_rows_v1 # # table_id: # > +master-bin.000002 # Write_rows_v1 # # table_id: # > flags: STMT_END_F > +master-bin.000002 # Query # # COMMIT > +connection slave; > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `a` int(11) DEFAULT NULL > +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO > +PARTITIONS 3 > +select * from t1; > +a > +3 > +connection master; > +drop table t1; > include/rpl_end.inc > diff --git a/mysql-test/suite/versioning/t/partition.test > b/mysql-test/suite/versioning/t/partition.test > --- a/mysql-test/suite/versioning/t/partition.test > +++ b/mysql-test/suite/versioning/t/partition.test > @@ -1068,13 +1078,456 @@ alter table t1 add partition partitions 2; > --replace_result $default_engine DEFAULT_ENGINE > show create table t1; > alter table t1 add partition partitions 8; > +drop table t1; > + > +--echo # > +--echo # End of 10.5 tests > +--echo # > + > +--echo # > +--echo # MDEV-17554 Auto-create new partition for system versioned tables > with history partitioned by INTERVAL/LIMIT > +--echo # > +create or replace table t1 (x int) with system versioning > +partition by system_time limit 1 auto; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + in the previous version of this patch you had clarifying comments here, like --echo # INSERT, INSERT .. SELECT don't auto-create partitions or --echo # Number of partitions goes from 3 to 5 I kind of miss them now, they were helpful > +insert into t1 values (1); > + > +create or replace table t2 (y int); > +insert into t2 values (2); > + > +insert into t1 select * from t2; > +insert into t2 select * from t1; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > ... > diff --git a/sql/ha_partition.h b/sql/ha_partition.h > --- a/sql/ha_partition.h > +++ b/sql/ha_partition.h > @@ -1607,7 +1607,7 @@ class ha_partition :public handler > for (; part_id < part_id_end; ++part_id) > { > handler *file= m_file[part_id]; > - DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id)); > + bitmap_set_bit(&(m_part_info->read_partitions), part_id); would be good to have a comment before the method that it's only used in vers_set_hist_part(), if (vers_info->limit). because if it's a generic method to return the number of rows in a partition (incl. subpartitions) then it's totally not clear why it modifies read_partitions bitmap. or may be you'd want to rename the method to make it look more specific for LIMIT partitions in vers_set_hist_part() ? > file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_OPEN); > part_recs+= file->stats.records; > } > diff --git a/sql/handler.cc b/sql/handler.cc > --- a/sql/handler.cc > +++ b/sql/handler.cc > @@ -1574,6 +1574,8 @@ int ha_commit_trans(THD *thd, bool all) > DBUG_ASSERT(thd->transaction->stmt.ha_list == NULL || > trans == &thd->transaction->stmt); > > + DBUG_ASSERT(!thd->in_sub_stmt); > if (thd->in_sub_stmt) > { > DBUG_ASSERT(0); This is a bit redundant, don't you think? One DBUG_ASSERT would be enough. (personally, I'd prefer the one you added, not DBUG_ASSERT(0)) > diff --git a/sql/lock.cc b/sql/lock.cc > --- a/sql/lock.cc > +++ b/sql/lock.cc > @@ -662,16 +662,28 @@ MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK > *b) > DBUG_PRINT("enter", ("a->lock_count: %u b->lock_count: %u", > a->lock_count, b->lock_count)); > > - if (!(sql_lock= (MYSQL_LOCK*) > - my_malloc(key_memory_MYSQL_LOCK, sizeof(*sql_lock) + > - sizeof(THR_LOCK_DATA*)*((a->lock_count+b->lock_count)*2) + > - sizeof(TABLE*)*(a->table_count+b->table_count),MYF(MY_WME)))) > - DBUG_RETURN(0); // Fatal error > + const size_t lock_size= sizeof(*sql_lock) + > + sizeof(THR_LOCK_DATA *) * ((a->lock_count + b->lock_count) * 2) + > + sizeof(TABLE *) * (a->table_count + b->table_count); > + if (thd) > + { > + sql_lock= (MYSQL_LOCK *) thd->alloc(lock_size); > + if (!sql_lock) > + DBUG_RETURN(0); > + sql_lock->flags= GET_LOCK_ON_THD; why? the commit comment for MDEV-23639 doesn't mention it. generally, I think it's a good idea and, likely, should be used more than just in recover_from_failed_open(). But why right now and in this commit? Does this commit (22158b3c) rely on locks being on THD memroot? How so? > + } > + else > + { > + sql_lock= (MYSQL_LOCK *) > + my_malloc(key_memory_MYSQL_LOCK, lock_size, MYF(MY_WME)); > + if (!sql_lock) > + DBUG_RETURN(0); > + sql_lock->flags= 0; > + } > sql_lock->lock_count=a->lock_count+b->lock_count; > sql_lock->table_count=a->table_count+b->table_count; > sql_lock->locks=(THR_LOCK_DATA**) (sql_lock+1); > sql_lock->table=(TABLE**) (sql_lock->locks+sql_lock->lock_count*2); > - sql_lock->flags= 0; > memcpy(sql_lock->locks,a->locks,a->lock_count*sizeof(*a->locks)); > memcpy(sql_lock->locks+a->lock_count,b->locks, > b->lock_count*sizeof(*b->locks)); > @@ -705,8 +717,10 @@ MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b) > a->lock_count, b->lock_count); > > /* Delete old, not needed locks */ > - my_free(a); > - my_free(b); > + if (!(a->flags & GET_LOCK_ON_THD)) > + my_free(a); > + if (!(b->flags & GET_LOCK_ON_THD)) > + my_free(b); this even looks like a bug, no check that old locks weren't on THD memroot. (probably impossible in this particular case, but still rather fragile) thanks! > DBUG_RETURN(sql_lock); > } > > diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt > --- a/sql/share/errmsg-utf8.txt > +++ b/sql/share/errmsg-utf8.txt > @@ -7985,3 +7985,5 @@ ER_JSON_TABLE_MULTIPLE_MATCHES > eng "Can't store multiple matches of the path in the column '%s' of > JSON_TABLE '%s'." > ER_WITH_TIES_NEEDS_ORDER > eng "FETCH ... WITH TIES requires ORDER BY clause to be present" > +ER_VERS_HIST_PART_FAILED > + eng "Versioned table %`s.%`s: adding HISTORY partition failed" Can you come up with a test for this error? > diff --git a/sql/sql_base.cc b/sql/sql_base.cc > --- a/sql/sql_base.cc > +++ b/sql/sql_base.cc > @@ -931,7 +934,7 @@ void close_thread_table(THD *thd, TABLE **table_ptr) > DBUG_PRINT("tcache", ("table: '%s'.'%s' %p", table->s->db.str, > table->s->table_name.str, table)); > DBUG_ASSERT(!table->file->keyread_enabled()); > - DBUG_ASSERT(!table->file || table->file->inited == handler::NONE); > + DBUG_ASSERT(table->file->inited == handler::NONE); indeed > > /* > The metadata lock must be released after giving back > @@ -1625,6 +1625,125 @@ bool is_locked_view(THD *thd, TABLE_LIST *t) > } > > > +#ifdef WITH_PARTITION_STORAGE_ENGINE > +/** > + Switch part_info->hist_part and request partition creation if needed. > + > + @retval true Error or partition creation was requested. > + @retval false No error > +*/ > +bool TABLE::vers_switch_partition(THD *thd, TABLE_LIST *table_list, > + Open_table_context *ot_ctx) > +{ > + if (!part_info || part_info->part_type != VERSIONING_PARTITION || > + table_list->vers_conditions.delete_history || > + thd->stmt_arena->is_stmt_prepare() || > + table_list->lock_type < TL_WRITE_ALLOW_WRITE || > + table_list->mdl_request.type < MDL_SHARED_WRITE || > + table_list->mdl_request.type == MDL_EXCLUSIVE) > + { > + return false; > + } > + > + switch (thd->lex->sql_command) > + { > + case SQLCOM_INSERT: > + if (thd->lex->duplicates != DUP_UPDATE) > + return false; > + break; > + case SQLCOM_LOAD: > + if (thd->lex->duplicates != DUP_REPLACE) > + return false; > + break; > + case SQLCOM_LOCK_TABLES: > + case SQLCOM_DELETE: > + case SQLCOM_UPDATE: > + case SQLCOM_REPLACE: > + case SQLCOM_REPLACE_SELECT: > + case SQLCOM_DELETE_MULTI: > + case SQLCOM_UPDATE_MULTI: > + break; > + default: > + /* > + TODO: make row events set thd->lex->sql_command appropriately. > + > + Sergei Golubchik: f.ex. currently row events increment > + thd->status_var.com_stat[] each event for its own SQLCOM_xxx, it > won't be > + needed if they'll just set thd->lex->sql_command. > + */ > + if (thd->rgi_slave && thd->rgi_slave->current_event && > + thd->lex->sql_command == SQLCOM_END) > + { > + switch (thd->rgi_slave->current_event->get_type_code()) > + { > + case UPDATE_ROWS_EVENT: > + case UPDATE_ROWS_EVENT_V1: > + case DELETE_ROWS_EVENT: > + case DELETE_ROWS_EVENT_V1: > + break; > + default:; > + return false; > + } > + } > + break; > + } > + > + TABLE *table= this; > + > + /* > + NOTE: The semantics of vers_set_hist_part() is double: even when we twofold > + don't need auto-create, we need to update part_info->hist_part. > + */ > + uint *create_count= table_list->vers_skip_create ? > + NULL : &ot_ctx->vers_create_count; > + table_list->vers_skip_create= true; > + if (table->part_info->vers_set_hist_part(thd, create_count)) > + { > + MYSQL_UNBIND_TABLE(table->file); > + tc_release_table(table); > + return true; > + } > + if (ot_ctx->vers_create_count) > + { > + Open_table_context::enum_open_table_action action; > + TABLE_LIST *table_arg; > + mysql_mutex_lock(&table->s->LOCK_share); > + if (!table->s->vers_skip_auto_create) > + { > + table->s->vers_skip_auto_create= true; > + action= Open_table_context::OT_ADD_HISTORY_PARTITION; > + table_arg= table_list; > + } > + else > + { > + /* > + NOTE: this may repeat multiple times until creating thread acquires > + MDL_EXCLUSIVE. Since auto-creation is rare operation this is > acceptable. > + We could suspend this thread on cond-var but we must first exit > + MDL_SHARED_WRITE first and we cannot store cond-var into > TABLE_SHARE > + because it is already released and there is no guarantee that it > will > + be same instance if we acquire it again. > + */ > + table_list->vers_skip_create= false; > + ot_ctx->vers_create_count= 0; > + action= Open_table_context::OT_REOPEN_TABLES; > + table_arg= NULL; > + } I'm afraid I don't understand. All this business with vers_skip_create and vers_skip_auto_create, it wasn't in the previous version of the patch. So, I believe it was a fix for one of the MDEV bugs reported and fixed meanwhile. What MDEV was it? Could you explain what was the issue and what was the fix, please? > + mysql_mutex_unlock(&table->s->LOCK_share); > + if (!thd->locked_tables_mode) > + { > + MYSQL_UNBIND_TABLE(table->file); > + tc_release_table(table); > + } > + ot_ctx->request_backoff_action(action, table_arg); > + return true; > + } > + > + return false; > +} > +#endif /* WITH_PARTITION_STORAGE_ENGINE */ > + > + > /** > Open a base table. > > @@ -2572,11 +2699,13 @@ void Locked_tables_list::mark_table_for_reopen(THD > *thd, TABLE *table) > table_list; table_list= table_list->next_global) > { > if (table_list->table->s == share) > + { > table_list->table->internal_set_needs_reopen(true); > + some_table_marked_for_reopen= 1; > + } > } > /* This is needed in the case where lock tables where not used */ > table->internal_set_needs_reopen(true); why ^^^ this doesn't count as "some_table_marked_for_reopen" ? > - some_table_marked_for_reopen= 1; > } > > > @@ -3172,13 +3304,49 @@ Open_table_context::recover_from_failed_open() > break; > case OT_DISCOVER: > case OT_REPAIR: > - if ((result= lock_table_names(m_thd, m_thd->lex->create_info, > - m_failed_table, NULL, > - get_timeout(), 0))) > + case OT_ADD_HISTORY_PARTITION: > + if (!m_thd->locked_tables_mode) > + result= lock_table_names(m_thd, m_thd->lex->create_info, > m_failed_table, > + NULL, get_timeout(), 0); > + else > + { > + DBUG_ASSERT(!result); > + DBUG_ASSERT(m_action == OT_ADD_HISTORY_PARTITION); It seems you've solved recover_from_failed_open under LOCK TABLES, so it could be possible to make OT_DISCOVER and OT_REPAIR to work there too. Is that right? It's just a question, I don't imply it should be part of this MDEV. > + } > + /* > + We are now under MDL_EXCLUSIVE mode. Other threads have no table > share > + acquired: they are blocked either at open_table_get_mdl_lock() in > + open_table() or at lock_table_names() here. > + */ > + if (result) > + { > + if (m_action == OT_ADD_HISTORY_PARTITION) > + { > + TABLE_SHARE *share= tdc_acquire_share(m_thd, m_failed_table, > + GTS_TABLE, NULL); > + if (share) > + { > + share->vers_skip_auto_create= false; > + tdc_release_share(share); > + } > + if (m_thd->get_stmt_da()->sql_errno() == ER_LOCK_WAIT_TIMEOUT) > + { > + // MDEV-23642 Locking timeout caused by auto-creation affects > original DML > + m_thd->clear_error(); > + vers_create_count= 0; > + result= false; > + } > + } > break; > + } > > - tdc_remove_table(m_thd, m_failed_table->db.str, > - m_failed_table->table_name.str); > + /* > + We don't need to remove share under OT_ADD_HISTORY_PARTITION. > + Moreover fast_alter_partition_table() works with TABLE instance. > + */ > + if (m_action != OT_ADD_HISTORY_PARTITION) > + tdc_remove_table(m_thd, m_failed_table->db.str, > + m_failed_table->table_name.str); > > switch (m_action) > { > @@ -3206,6 +3374,70 @@ Open_table_context::recover_from_failed_open() > case OT_REPAIR: > result= auto_repair_table(m_thd, m_failed_table); > break; > + case OT_ADD_HISTORY_PARTITION: > + { > + result= false; > + TABLE *table= open_ltable(m_thd, m_failed_table, TL_WRITE, > + MYSQL_OPEN_HAS_MDL_LOCK | > MYSQL_OPEN_IGNORE_LOGGING_FORMAT); > + if (table == NULL) > + { > + m_thd->clear_error(); > + break; > + } > + > + DBUG_ASSERT(vers_create_count); > + result= vers_create_partitions(m_thd, m_failed_table, > vers_create_count); > + vers_create_count= 0; > + if (!m_thd->transaction->stmt.is_empty()) > + trans_commit_stmt(m_thd); > + DBUG_ASSERT(!result || > + !m_thd->locked_tables_mode || > + m_thd->lock->lock_count); > + if (result) > + break; > + if (!m_thd->locked_tables_mode) > + { > + /* > + alter_partition_lock_handling() does mysql_lock_remove() but > + does not clear thd->lock completely. > + */ > + DBUG_ASSERT(m_thd->lock->lock_count == 0); > + if (!(m_thd->lock->flags & GET_LOCK_ON_THD)) > + my_free(m_thd->lock); > + m_thd->lock= NULL; > + } > + else if (m_thd->locked_tables_mode == LTM_PRELOCKED) > + { > + MYSQL_LOCK *lock; > + MYSQL_LOCK *merged_lock; > + > + /* > + In LTM_LOCK_TABLES table was reopened via locked_tables_list, > + but not in prelocked environment where we have to reopen > + the table manually. > + */ > + Open_table_context ot_ctx(m_thd, MYSQL_OPEN_REOPEN); > + if (open_table(m_thd, m_failed_table, &ot_ctx)) > + { > + result= true; > + break; > + } > + TABLE *table= m_failed_table->table; > + table->reginfo.lock_type= m_thd->update_lock_default; > + m_thd->in_lock_tables= 1; > + lock= mysql_lock_tables(m_thd, &table, 1, > + MYSQL_OPEN_REOPEN | > MYSQL_LOCK_USE_MALLOC); > + m_thd->in_lock_tables= 0; > + if (lock == NULL || > + !(merged_lock= mysql_lock_merge(m_thd->lock, lock, m_thd))) > + { > + result= true; > + break; > + } > + m_thd->lock= merged_lock; What MDEV does it fix? I wonder why other cases in recover_from_failed_open doesn't do that > + } > + break; > + } > case OT_BACKOFF_AND_RETRY: > case OT_REOPEN_TABLES: > case OT_NO_ACTION: > @@ -4370,6 +4611,8 @@ bool open_tables(THD *thd, const DDL_options_st > &options, > { > if (ot_ctx.can_recover_from_failed_open()) > { > + // FIXME: is this really used? > + DBUG_ASSERT(0); I take it, you weren't able to get this assert to fire? > close_tables_for_reopen(thd, start, > ot_ctx.start_of_statement_svp()); > if (ot_ctx.recover_from_failed_open()) Regards, Sergei VP of MariaDB Server Engineering and [email protected] _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : [email protected] Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp

