recoverseg_from_file.out 4.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
-- Test gprecoverseg from config file uses the correct dbid.
--
-- In github issue 9837 dbid in gp_segment_configuration is not
-- consistent with dbid in file internal.auto.conf.
-- This is caused by gprecoverseg fetch the smallest dbid in
-- gp_segment_configuration which is not occupied by others when
-- adding a new mirror. When dbid in gp_segment_configuration is not
-- continous, the inconsistent issue will happen

include: helpers/server_helpers.sql;
CREATE

--
-- generate_recover_config_file:
--   generate config file used by recoverseg -i
--
create or replace function generate_recover_config_file(datadir text, port text) returns void as $$ import io import os myhost = os.uname()[1] inplaceConfig = myhost + '|' + port + '|' + datadir configStr = inplaceConfig + ' ' + inplaceConfig  f = open("/tmp/recover_config_file", "w") f.write(configStr) f.close() $$ language plpythonu;
CREATE

SELECT dbid, role, preferred_role, content, mode, status FROM gp_segment_configuration order by dbid;
 dbid | role | preferred_role | content | mode | status 
------+------+----------------+---------+------+--------
 1    | p    | p              | -1      | n    | u      
 2    | p    | p              | 0       | s    | u      
 3    | p    | p              | 1       | s    | u      
 4    | p    | p              | 2       | s    | u      
 5    | m    | m              | 0       | s    | u      
 6    | m    | m              | 1       | s    | u      
 7    | m    | m              | 2       | s    | u      
 8    | m    | m              | -1      | s    | u      
(8 rows)
-- stop a primary in order to trigger a mirror promotion
select pg_ctl((select datadir from gp_segment_configuration c where c.role='p' and c.content=1), 'stop');
 pg_ctl 
--------
 OK     
(1 row)

-- trigger failover
select gp_request_fts_probe_scan();
 gp_request_fts_probe_scan 
---------------------------
 t                         
(1 row)

46 47
-- wait for content 1 (earlier mirror, now primary) to finish the promotion
1U: select 1;
48 49 50 51 52
 ?column? 
----------
 1        
(1 row)
-- Quit this utility mode session, as need to start fresh one below
53
1Uq: ... <quitting>
54 55

-- make the dbid in gp_segment_configuration not continuous
56
-- dbid=2 corresponds to content id =0
57 58 59 60 61
set allow_system_table_mods to true;
SET
update gp_segment_configuration set dbid=9 where dbid=2;
UPDATE 1

62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
-- trigger failover
select gp_request_fts_probe_scan();
 gp_request_fts_probe_scan 
---------------------------
 t                         
(1 row)

-- wait for content 0 (earlier mirror, now primary) to finish the promotion
0U: select 1;
 ?column? 
----------
 1        
(1 row)
-- Quit this utility mode session, as need to start fresh one below
0Uq: ... <quitting>

78 79 80 81 82 83 84
-- generate recover config file
select generate_recover_config_file( (select datadir from gp_segment_configuration c where c.role='m' and c.content=1), (select port from gp_segment_configuration c where c.role='m' and c.content=1)::text);
 generate_recover_config_file 
------------------------------
                              
(1 row)

85
-- recover from config file, only seg with content=1 will be recovered
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
!\retcode gprecoverseg -a -i /tmp/recover_config_file;
-- start_ignore
-- end_ignore
(exited with code 0)

-- after gprecoverseg -i, the down segemnt should be up
-- in mirror mode
select status from gp_segment_configuration where role='m' and content=1;
 status 
--------
 u      
(1 row)

-- recover should reuse the old dbid and not occupy dbid=2
select dbid from gp_segment_configuration where dbid=2;
 dbid 
------
(0 rows)

update gp_segment_configuration set dbid=2 where dbid=9;
UPDATE 1
set allow_system_table_mods to false;
SET

-- we manually change dbid from 2 to 9, which casue the
-- corresponding segment down as well, so recovery full
-- at here
!\retcode gprecoverseg -aF;
-- start_ignore
-- end_ignore
(exited with code 0)

-- rebalance the cluster
!\retcode gprecoverseg -ar;
-- start_ignore
-- end_ignore
(exited with code 0)

-- remove the config file
!\retcode rm /tmp/recover_config_file