未验证 提交 afcf30be 编写于 作者: D dreamedcheng 提交者: GitHub

Fix overflow of shmCommittedGxactArray on standby (#11071)

Previously, standby will replay checkpoint XLOG record's DTX info
in function XLogProcessCheckpointRecord. However, is some certain cases,
it will cause anomaly: When a DTX has flushed FORGET COMMITTED XLOG into
disk, but didn't change its own state to DTX_STATE_INSERTED_FORGET_COMMITTED.
If at this very moment, checkpoint process is calculating DTX info, it
will include the DTX into its XLOG record. So when standby replaying this
checkpoint XLOG record from master, it will add an already forgotten GID to
shmCommittedGxactArray again, which may cause the overflow of shmCommittedGxactArray.

Since DTX info stored in checkpoint XLOG record has been populated earlier in
ReadCheckpointRecord(), there is no need to call XLogProcessCheckpointRecord()
again during recovery.
Co-authored-by: Nwuchengwen <wcw190496@alibaba-inc.com>
Co-authored-by: NDenis Smirnov <sd@arenadata.io>
上级 71043c5b
......@@ -7337,22 +7337,6 @@ StartupXLOG(void)
*/
AdvanceNextFullTransactionIdPastXid(record->xl_xid);
/*
* See if this record is a checkpoint, if yes then uncover it to
* find distributed committed Xacts.
* No need to unpack checkpoint in crash recovery mode
*/
uint8 xlogRecInfo = record->xl_info & ~XLR_INFO_MASK;
if (IsStandbyMode() &&
record->xl_rmid == RM_XLOG_ID &&
(xlogRecInfo == XLOG_CHECKPOINT_SHUTDOWN
|| xlogRecInfo == XLOG_CHECKPOINT_ONLINE))
{
XLogProcessCheckpointRecord(xlogreader);
memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
}
/*
* Before replaying this record, check if this record causes
* the current timeline to change. The record is already
......
......@@ -25,6 +25,7 @@
#include "storage/proc.h"
#include "storage/shmem.h"
#include "storage/procarray.h"
#include "utils/faultinjector.h"
#include "access/xact.h"
#include "cdb/cdbgang.h"
......@@ -458,17 +459,36 @@ redoDistributedCommitRecord(TMGXACT_LOG *gxact_log)
if (i == *shmNumCommittedGxacts)
{
#ifdef FAULT_INJECTOR
if (SIMPLE_FAULT_INJECTOR("standby_gxacts_overflow") == FaultInjectorTypeSkip)
{
max_tm_gxacts = 1;
elog(LOG, "Committed gid array length: %d", *shmNumCommittedGxacts);
}
#endif
/*
* Transaction not found, this is the first log of this transaction.
*/
if (*shmNumCommittedGxacts >= max_tm_gxacts)
{
StringInfoData gxact_array;
initStringInfo(&gxact_array);
for (int j = 0; j < *shmNumCommittedGxacts; j++)
{
appendStringInfo(&gxact_array, "shmCommittedGxactArray[%d]: %s\n",
j, shmCommittedGxactArray[j].gid);
}
ereport(FATAL,
(errmsg("the limit of %d distributed transactions has been reached",
max_tm_gxacts),
(errmsg("the limit of %d distributed transactions has been reached "\
"while adding gid = %s. Committed gid array length: %d, dump:\n%s",
max_tm_gxacts, gxact_log->gid, *shmNumCommittedGxacts, gxact_array.data),
errdetail("It should not happen. Temporarily increase "
"max_connections (need postmaster reboot) on "
"the postgres (master or standby) to work "
"around this issue and then report a bug")));
}
shmCommittedGxactArray[(*shmNumCommittedGxacts)++] = *gxact_log;
elog((Debug_print_full_dtm ? LOG : DEBUG5),
......
-- Test if standby will overflow shmNumCommittedGxacts
-- This case test if shmCommittedGxactArray will be overflowed at the following
-- scenario: When a DTX has flushed FORGET COMMITTED XLOG into disk,
-- but didn't change its own state to DTX_STATE_INSERTED_FORGET_COMMITTED.
-- If at this very moment, checkpoint process is calculating DTX info, it
-- will include the DTX into its XLOG record.
create or replace function wait_for_standby_replay (retries int) returns bool as $$ declare i int; /* in func */ standby_is_up bool; /* in func */ result bool; /* in func */ begin i := 0; /* in func */ -- Wait until the mirror/standby has replayed up to flush location loop SELECT flush_lsn = replay_lsn INTO result from pg_stat_replication; /* in func */ if not found then return false; /* in func */ end if; /* in func */ if result then return true; /* in func */ end if; /* in func */
if i >= retries then return false; /* in func */ end if; /* in func */ perform pg_sleep(0.1); /* in func */ perform pg_stat_clear_snapshot(); /* in func */ i := i + 1; /* in func */ end loop; /* in func */ end; /* in func */ $$ language plpgsql;
CREATE
select wait_for_standby_replay(1200);
wait_for_standby_replay
-------------------------
t
(1 row)
create table test_dtx_standby_tbl(c1 int);
CREATE
-- We have just created a checkpoint. The next automatic checkpoint
-- will be triggered only after 5 minutes or after CheckPointSegments
-- wal segments. Neither of that can happen until this test calls
-- explicit checkpoint.
checkpoint;
CHECKPOINT
1: select gp_inject_fault_infinite('dtm_before_insert_forget_comitted', 'suspend', dbid) from gp_segment_configuration where content = -1 and role = 'p';
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
1: select gp_inject_fault_infinite('checkpoint_after_redo_calculated', 'suspend', dbid) from gp_segment_configuration where content = -1 and role = 'p';
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
1&: select gp_wait_until_triggered_fault('dtm_before_insert_forget_comitted', 1, dbid) from gp_segment_configuration where content = -1 and role = 'p'; <waiting ...>
-- record the transaction into checkpoint XLOG and make sure commit forget XLOG is writen
-- before checkpoint XLOG.
2&: insert into test_dtx_standby_tbl select generate_series(1,10); <waiting ...>
1<: <... completed>
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
1&: select gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, dbid) from gp_segment_configuration where content = -1 and role = 'p'; <waiting ...>
3&: checkpoint; <waiting ...>
1<: <... completed>
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
1: select gp_inject_fault_infinite('dtm_before_insert_forget_comitted', 'reset', dbid) from gp_segment_configuration where content = -1 and role = 'p';
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
2<: <... completed>
INSERT 10
1: select gp_inject_fault_infinite('checkpoint_after_redo_calculated', 'reset', dbid) from gp_segment_configuration where content = -1 and role = 'p';
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
3<: <... completed>
CHECKPOINT
-- now the array may have 1 unforggten gid, and we set max_tm_gxacts to 1
1: select gp_inject_fault_infinite('standby_gxacts_overflow', 'skip', dbid) from gp_segment_configuration where content = -1 and role = 'm';
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
-- this DTX might overflow gxacts array
2: insert into test_dtx_standby_tbl select generate_series(11,20);
INSERT 10
-- Wait standby to replay all XLOG
select wait_for_standby_replay(1200);
wait_for_standby_replay
-------------------------
t
(1 row)
select gp_inject_fault_infinite('standby_gxacts_overflow', 'reset', dbid) from gp_segment_configuration where content = -1 and role = 'm';
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
drop table test_dtx_standby_tbl;
DROP
drop function wait_for_standby_replay(int);
DROP
......@@ -243,3 +243,6 @@ test: distributed_transactions
# Test for tablespace
test: concurrent_drop_truncate_tablespace
# Test for distributed commit array overflow during replay on standby
test: standby_replay_dtx_info
-- Test if standby will overflow shmNumCommittedGxacts
-- This case test if shmCommittedGxactArray will be overflowed at the following
-- scenario: When a DTX has flushed FORGET COMMITTED XLOG into disk,
-- but didn't change its own state to DTX_STATE_INSERTED_FORGET_COMMITTED.
-- If at this very moment, checkpoint process is calculating DTX info, it
-- will include the DTX into its XLOG record.
create or replace function wait_for_standby_replay (retries int) returns bool as
$$
declare
i int; /* in func */
standby_is_up bool; /* in func */
result bool; /* in func */
begin
i := 0; /* in func */
-- Wait until the mirror/standby has replayed up to flush location
loop
SELECT flush_lsn = replay_lsn INTO result from pg_stat_replication; /* in func */
if not found then
return false; /* in func */
end if; /* in func */
if result then
return true; /* in func */
end if; /* in func */
if i >= retries then
return false; /* in func */
end if; /* in func */
perform pg_sleep(0.1); /* in func */
perform pg_stat_clear_snapshot(); /* in func */
i := i + 1; /* in func */
end loop; /* in func */
end; /* in func */
$$ language plpgsql;
select wait_for_standby_replay(1200);
create table test_dtx_standby_tbl(c1 int);
-- We have just created a checkpoint. The next automatic checkpoint
-- will be triggered only after 5 minutes or after CheckPointSegments
-- wal segments. Neither of that can happen until this test calls
-- explicit checkpoint.
checkpoint;
1: select gp_inject_fault_infinite('dtm_before_insert_forget_comitted', 'suspend', dbid) from gp_segment_configuration where content = -1 and role = 'p';
1: select gp_inject_fault_infinite('checkpoint_after_redo_calculated', 'suspend', dbid) from gp_segment_configuration where content = -1 and role = 'p';
1&: select gp_wait_until_triggered_fault('dtm_before_insert_forget_comitted', 1, dbid) from gp_segment_configuration where content = -1 and role = 'p';
-- record the transaction into checkpoint XLOG and make sure commit forget XLOG is writen
-- before checkpoint XLOG.
2&: insert into test_dtx_standby_tbl select generate_series(1,10);
1<:
1&: select gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, dbid) from gp_segment_configuration where content = -1 and role = 'p';
3&: checkpoint;
1<:
1: select gp_inject_fault_infinite('dtm_before_insert_forget_comitted', 'reset', dbid) from gp_segment_configuration where content = -1 and role = 'p';
2<:
1: select gp_inject_fault_infinite('checkpoint_after_redo_calculated', 'reset', dbid) from gp_segment_configuration where content = -1 and role = 'p';
3<:
-- now the array may have 1 unforggten gid, and we set max_tm_gxacts to 1
1: select gp_inject_fault_infinite('standby_gxacts_overflow', 'skip', dbid) from gp_segment_configuration where content = -1 and role = 'm';
-- this DTX might overflow gxacts array
2: insert into test_dtx_standby_tbl select generate_series(11,20);
-- Wait standby to replay all XLOG
select wait_for_standby_replay(1200);
select gp_inject_fault_infinite('standby_gxacts_overflow', 'reset', dbid) from gp_segment_configuration where content = -1 and role = 'm';
drop table test_dtx_standby_tbl;
drop function wait_for_standby_replay(int);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册