未验证 提交 7b0f1758 编写于 作者: H Hubert Zhang 提交者: GitHub

[Backport 6X] Using gp_add_segment to register mirror in catalog

When introducing a new mirror, we need two steps:
1. start mirror segment
2. update gp_segment_configuration catalog

Previously gp_add_segment_mirror will be called to update
the catalog, but dbid is chosen by get_availableDbId() which
cannot ensure to be the same dbid in internal.auto.conf.
Reported by issue9837
Reviewed-by: NPaul Guo <pguo@pivotal.io>
Reviewed-by: NBhuvnesh Chaudhary <bhuvnesh2703@gmail.com>

cherry-pick from commit: f7965d and 1ee999
上级 1d37e0ff
......@@ -126,11 +126,6 @@ class GpConfigurationProviderUsingGpdbCatalog(GpConfigurationProvider) :
for seg in update.mirror_to_add:
mirror_map[ seg.getSegmentContentId() ] = seg
# reset dbId of new mirror segments to -1
# before invoking the operations which will assign them new ids
for seg in update.mirror_to_add:
seg.setSegmentDbId(-1)
# remove mirror segments (e.g. for gpexpand rollback)
for seg in update.mirror_to_remove:
self.__updateSystemConfigRemoveMirror(conn, seg, textForConfigTable)
......@@ -298,17 +293,20 @@ class GpConfigurationProviderUsingGpdbCatalog(GpConfigurationProvider) :
def __callSegmentAddMirror(self, conn, gpArray, seg):
"""
Call gp_add_segment_mirror() to add the mirror.
Return the new segment's dbid.
Similar to __callSegmentAdd, ideally we should call gp_add_segment_mirror() to add the mirror.
But chicken-egg problem also exists in mirror case. If we use gp_add_segment_mirror(),
new dbid will be chosen by `get_availableDbId()`, which cannot ensure to be same as dbid
in internal.auto.conf(see issue-9837). Refer to __callSegmentAdd for details.
"""
logger.debug('callSegmentAddMirror %s' % repr(seg))
sql = "SELECT gp_add_segment_mirror(%s::int2, %s, %s, %s, %s)" \
sql = "SELECT gp_add_segment(%s::int2, %s::int2, 'm', 'm', 'n', 'd', %s, %s, %s, %s)" \
% (
self.__toSqlIntValue(seg.getSegmentDbId()),
self.__toSqlIntValue(seg.getSegmentContentId()),
self.__toSqlIntValue(seg.getSegmentPort()),
self.__toSqlTextValue(seg.getSegmentHostName()),
self.__toSqlTextValue(seg.getSegmentAddress()),
self.__toSqlIntValue(seg.getSegmentPort()),
self.__toSqlTextValue(seg.getSegmentDataDirectory()),
)
......
-- Test gprecoverseg from config file uses the correct dbid.
--
-- In github issue 9837 dbid in gp_segment_configuration is not
-- consistent with dbid in file internal.auto.conf.
-- This is caused by gprecoverseg fetch the smallest dbid in
-- gp_segment_configuration which is not occupied by others when
-- adding a new mirror. When dbid in gp_segment_configuration is not
-- continous, the inconsistent issue will happen
include: helpers/server_helpers.sql;
CREATE
--
-- generate_recover_config_file:
-- generate config file used by recoverseg -i
--
create or replace function generate_recover_config_file(datadir text, port text) returns void as $$ import io import os myhost = os.uname()[1] inplaceConfig = myhost + '|' + port + '|' + datadir configStr = inplaceConfig + ' ' + inplaceConfig f = open("/tmp/recover_config_file", "w") f.write(configStr) f.close() $$ language plpythonu;
CREATE
SELECT dbid, role, preferred_role, content, mode, status FROM gp_segment_configuration order by dbid;
dbid | role | preferred_role | content | mode | status
------+------+----------------+---------+------+--------
1 | p | p | -1 | n | u
2 | p | p | 0 | s | u
3 | p | p | 1 | s | u
4 | p | p | 2 | s | u
5 | m | m | 0 | s | u
6 | m | m | 1 | s | u
7 | m | m | 2 | s | u
8 | m | m | -1 | s | u
(8 rows)
-- stop a primary in order to trigger a mirror promotion
select pg_ctl((select datadir from gp_segment_configuration c where c.role='p' and c.content=1), 'stop');
pg_ctl
--------
OK
(1 row)
-- trigger failover
select gp_request_fts_probe_scan();
gp_request_fts_probe_scan
---------------------------
t
(1 row)
-- wait for content 1 (earlier mirror, now primary) to finish the promotion
1U: select 1;
?column?
----------
1
(1 row)
-- Quit this utility mode session, as need to start fresh one below
1Uq: ... <quitting>
-- make the dbid in gp_segment_configuration not continuous
-- dbid=2 corresponds to content 0 and role p, change it to dbid=9
set allow_system_table_mods to true;
SET
update gp_segment_configuration set dbid=9 where content=0 and role='p';
UPDATE 1
-- trigger failover
select gp_request_fts_probe_scan();
gp_request_fts_probe_scan
---------------------------
t
(1 row)
-- wait for content 0 (earlier mirror, now primary) to finish the promotion
0U: select 1;
?column?
----------
1
(1 row)
-- Quit this utility mode session, as need to start fresh one below
0Uq: ... <quitting>
-- generate recover config file
select generate_recover_config_file( (select datadir from gp_segment_configuration c where c.role='m' and c.content=1), (select port from gp_segment_configuration c where c.role='m' and c.content=1)::text);
generate_recover_config_file
------------------------------
(1 row)
-- recover from config file, only seg with content=1 will be recovered
!\retcode gprecoverseg -a -i /tmp/recover_config_file;
-- start_ignore
-- end_ignore
(exited with code 0)
-- after gprecoverseg -i, the down segemnt should be up
-- in mirror mode
select status from gp_segment_configuration where role='m' and content=1;
status
--------
u
(1 row)
-- recover should reuse the old dbid and not occupy dbid=2
select dbid from gp_segment_configuration where dbid=2;
dbid
------
(0 rows)
update gp_segment_configuration set dbid=2 where dbid=9;
UPDATE 1
set allow_system_table_mods to false;
SET
-- we manually change dbid from 2 to 9, which causes the
-- corresponding segment down as well, so recovery full
-- at here
!\retcode gprecoverseg -a;
-- start_ignore
-- end_ignore
(exited with code 0)
-- rebalance the cluster
!\retcode gprecoverseg -ar;
-- start_ignore
-- end_ignore
(exited with code 0)
-- recheck gp_segment_configuration after rebalance
SELECT dbid, role, preferred_role, content, mode, status FROM gp_segment_configuration order by dbid;
dbid | role | preferred_role | content | mode | status
------+------+----------------+---------+------+--------
1 | p | p | -1 | n | u
2 | p | p | 0 | s | u
3 | p | p | 1 | s | u
4 | p | p | 2 | s | u
5 | m | m | 0 | s | u
6 | m | m | 1 | s | u
7 | m | m | 2 | s | u
8 | m | m | -1 | s | u
(8 rows)
-- remove the config file
!\retcode rm /tmp/recover_config_file
......@@ -186,6 +186,7 @@ test: segwalrep/die_commit_pending_replication
test: fts_errors
test: segwalrep/commit_blocking
test: segwalrep/fts_unblock_primary
test: segwalrep/recoverseg_from_file
test: segwalrep/mirror_promotion
test: segwalrep/cancel_commit_pending_replication
test: segwalrep/twophase_tolerance_with_mirror_promotion
......
-- Test gprecoverseg from config file uses the correct dbid.
--
-- In github issue 9837 dbid in gp_segment_configuration is not
-- consistent with dbid in file internal.auto.conf.
-- This is caused by gprecoverseg fetch the smallest dbid in
-- gp_segment_configuration which is not occupied by others when
-- adding a new mirror. When dbid in gp_segment_configuration is not
-- continous, the inconsistent issue will happen
include: helpers/server_helpers.sql;
--
-- generate_recover_config_file:
-- generate config file used by recoverseg -i
--
create or replace function generate_recover_config_file(datadir text, port text)
returns void as $$
import io
import os
myhost = os.uname()[1]
inplaceConfig = myhost + '|' + port + '|' + datadir
configStr = inplaceConfig + ' ' + inplaceConfig
f = open("/tmp/recover_config_file", "w")
f.write(configStr)
f.close()
$$ language plpythonu;
SELECT dbid, role, preferred_role, content, mode, status FROM gp_segment_configuration order by dbid;
-- stop a primary in order to trigger a mirror promotion
select pg_ctl((select datadir from gp_segment_configuration c
where c.role='p' and c.content=1), 'stop');
-- trigger failover
select gp_request_fts_probe_scan();
-- wait for content 1 (earlier mirror, now primary) to finish the promotion
1U: select 1;
-- Quit this utility mode session, as need to start fresh one below
1Uq:
-- make the dbid in gp_segment_configuration not continuous
-- dbid=2 corresponds to content 0 and role p, change it to dbid=9
set allow_system_table_mods to true;
update gp_segment_configuration set dbid=9 where content=0 and role='p';
-- trigger failover
select gp_request_fts_probe_scan();
-- wait for content 0 (earlier mirror, now primary) to finish the promotion
0U: select 1;
-- Quit this utility mode session, as need to start fresh one below
0Uq:
-- generate recover config file
select generate_recover_config_file(
(select datadir from gp_segment_configuration c where c.role='m' and c.content=1),
(select port from gp_segment_configuration c where c.role='m' and c.content=1)::text);
-- recover from config file, only seg with content=1 will be recovered
!\retcode gprecoverseg -a -i /tmp/recover_config_file;
-- after gprecoverseg -i, the down segemnt should be up
-- in mirror mode
select status from gp_segment_configuration
where role='m' and content=1;
-- recover should reuse the old dbid and not occupy dbid=2
select dbid from gp_segment_configuration where dbid=2;
update gp_segment_configuration set dbid=2 where dbid=9;
set allow_system_table_mods to false;
-- we manually change dbid from 2 to 9, which causes the
-- corresponding segment down as well, so recovery full
-- at here
!\retcode gprecoverseg -a;
-- rebalance the cluster
!\retcode gprecoverseg -ar;
-- recheck gp_segment_configuration after rebalance
SELECT dbid, role, preferred_role, content, mode, status FROM gp_segment_configuration order by dbid;
-- remove the config file
!\retcode rm /tmp/recover_config_file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册