diff --git a/gpMgmt/bin/gpload.py b/gpMgmt/bin/gpload.py index ef60e7c41676cf7570657590af80bf3d9baf7557..009bc2c2e0696bb830e79004e52e056fbed872f4 100755 --- a/gpMgmt/bin/gpload.py +++ b/gpMgmt/bin/gpload.py @@ -112,6 +112,7 @@ valid_tokens = { "quote": {'parse_children': True, 'parent': "input"}, "encoding": {'parse_children': True, 'parent': "input"}, "force_not_null": {'parse_children': False, 'parent': "input"}, + "fill_missing_fields": {'parse_children': False, 'parent': "input"}, "error_limit": {'parse_children': True, 'parent': "input"}, "error_percent": {'parse_children': True, 'parent': "input"}, "error_table": {'parse_children': True, 'parent': "input"}, @@ -2365,6 +2366,10 @@ class gpload: else: from_cols = self.from_columns + if formatType == 'csv' or formatType == 'text': + if self.getconfig('gpload:input:fill_missing_fields', bool, False): + self.formatOpts += 'fill missing fields' + # If the 'reuse tables' option was specified we now try to find an # already existing external table in the catalog which will match # the one that we need to use. It must have identical attributes, diff --git a/gpMgmt/bin/gpload_test/gpload2/TEST.py b/gpMgmt/bin/gpload_test/gpload2/TEST.py index e874f3b109d33adf92f0e2ef1516c0920720f4d0..b5db42e5c9fa71097b2562ce49134d1e6f36c226 100755 --- a/gpMgmt/bin/gpload_test/gpload2/TEST.py +++ b/gpMgmt/bin/gpload_test/gpload2/TEST.py @@ -98,7 +98,7 @@ d = mkpath('config') if not os.path.exists(d): os.mkdir(d) -def write_config_file(mode='insert', reuse_flag='',columns_flag='0',mapping='0',portNum='8081',database='reuse_gptest',host='localhost',formatOpts='text',file='data/external_file_01.txt',table='texttable',format='text',delimiter="'|'",escape='',quote='',truncate='False',log_errors=None, error_limit='0',error_table=None,externalSchema=None,staging_table=None,fast_match='false', encoding=None, preload=True): +def write_config_file(mode='insert', reuse_flag='',columns_flag='0',mapping='0',portNum='8081',database='reuse_gptest',host='localhost',formatOpts='text',file='data/external_file_01.txt',table='texttable',format='text',delimiter="'|'",escape='',quote='',truncate='False',log_errors=None, error_limit='0',error_table=None,externalSchema=None,staging_table=None,fast_match='false', encoding=None, preload=True, fill=False): f = open(mkpath('config/config_file'),'w') f.write("VERSION: 1.0.0.1") @@ -147,6 +147,8 @@ def write_config_file(mode='insert', reuse_flag='',columns_flag='0',mapping='0', f.write("\n - ESCAPE: "+escape) if quote: f.write("\n - QUOTE: "+quote) + if fill: + f.write("\n - FILL_MISSING_FIELDS: true") f.write("\n OUTPUT:") f.write("\n - TABLE: "+table) if mode: @@ -443,7 +445,7 @@ class GPLoad_FormatOpts_TestCase(unittest.TestCase): def test_00_gpload_formatOpts_setup(self): "0 gpload setup" - for num in range(1,39): + for num in range(1,40): f = open(mkpath('query%d.sql' % num),'w') f.write("\! gpload -f "+mkpath('config/config_file')+ " -d reuse_gptest\n"+"\! gpload -f "+mkpath('config/config_file')+ " -d reuse_gptest\n") f.close() @@ -753,6 +755,14 @@ class GPLoad_FormatOpts_TestCase(unittest.TestCase): write_config_file(mode='insert',reuse_flag='true',fast_match='false',file='data_file.txt',error_table="err_table",error_limit='1000',preload=False) self.doTest(38) + def test_39_gpload_fill_missing_fields(self): + "39 gpload fill missing fields" + file = mkpath('setup.sql') + runfile(file) + copy_data('external_file_04.txt','data_file.txt') + write_config_file(mode='insert',reuse_flag='false',fast_match='false',file='data_file.txt',table='texttable1', error_limit='1000', fill=True) + self.doTest(39) + if __name__ == '__main__': suite = unittest.TestLoader().loadTestsFromTestCase(GPLoad_FormatOpts_TestCase) runner = unittest.TextTestRunner(verbosity=2) diff --git a/gpMgmt/bin/gpload_test/gpload2/query39.ans b/gpMgmt/bin/gpload_test/gpload2/query39.ans new file mode 100644 index 0000000000000000000000000000000000000000..e6a4d427fec93cc315cc3545855c41f024f41df8 --- /dev/null +++ b/gpMgmt/bin/gpload_test/gpload2/query39.ans @@ -0,0 +1,16 @@ +2020-06-02 14:55:41|INFO|gpload session started 2020-06-02 14:55:41 +2020-06-02 14:55:46|INFO|setting schema 'public' for table 'texttable1' +2020-06-02 14:55:46|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file.txt" -t 30 +2020-06-02 14:55:46|INFO|running time: 5.27 seconds +2020-06-02 14:55:51|INFO|rows Inserted = 16 +2020-06-02 14:55:51|INFO|rows Updated = 0 +2020-06-02 14:55:51|INFO|data formatting errors = 0 +2020-06-02 14:55:51|INFO|gpload succeeded +2020-06-02 14:55:51|INFO|gpload session started 2020-06-02 14:55:51 +2020-06-02 14:55:56|INFO|setting schema 'public' for table 'texttable1' +2020-06-02 14:55:56|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file.txt" -t 30 +2020-06-02 14:55:56|INFO|running time: 5.09 seconds +2020-06-02 14:56:01|INFO|rows Inserted = 16 +2020-06-02 14:56:01|INFO|rows Updated = 0 +2020-06-02 14:56:01|INFO|data formatting errors = 0 +2020-06-02 14:56:01|INFO|gpload succeeded diff --git a/gpMgmt/bin/gpload_test/gpload2/setup.ans b/gpMgmt/bin/gpload_test/gpload2/setup.ans index 87506216312f7e29a36717ed4abe9a87b3bbfd6c..c66045578264a535a79ae85ec3e71fc2752d10c3 100644 --- a/gpMgmt/bin/gpload_test/gpload2/setup.ans +++ b/gpMgmt/bin/gpload_test/gpload2/setup.ans @@ -13,11 +13,19 @@ DROP TABLE DROP TABLE IF EXISTS csvtable; NOTICE: table "csvtable" does not exist, skipping DROP TABLE +DROP TABLE IF EXISTS texttable1; +NOTICE: table "texttable1" does not exist, skipping +DROP TABLE CREATE TABLE texttable ( s1 text, s2 text, s3 text, dt timestamp, n1 smallint, n2 integer, n3 bigint, n4 decimal, n5 numeric, n6 real, n7 double precision) DISTRIBUTED BY (n1); CREATE TABLE +CREATE TABLE texttable1 ( + s1 text, s2 text, s3 text, dt timestamp, + n1 smallint, n2 integer, n3 bigint, n4 decimal, + n5 numeric, n6 real, n7 double precision, n8 int) DISTRIBUTED BY (n1); +CREATE TABLE CREATE TABLE csvtable ( year int, make text, model text, decription text, price decimal) DISTRIBUTED BY (year); diff --git a/gpMgmt/bin/gpload_test/gpload2/setup.sql b/gpMgmt/bin/gpload_test/gpload2/setup.sql index b5a8608ce28a8970d91131f99b0e35e545d3c9f3..9c9ca86899348b98a436a67c6c85a0acc64e89b6 100644 --- a/gpMgmt/bin/gpload_test/gpload2/setup.sql +++ b/gpMgmt/bin/gpload_test/gpload2/setup.sql @@ -9,10 +9,15 @@ CREATE SCHEMA test; DROP EXTERNAL TABLE IF EXISTS temp_gpload_staging_table; DROP TABLE IF EXISTS texttable; DROP TABLE IF EXISTS csvtable; +DROP TABLE IF EXISTS texttable1; CREATE TABLE texttable ( s1 text, s2 text, s3 text, dt timestamp, n1 smallint, n2 integer, n3 bigint, n4 decimal, n5 numeric, n6 real, n7 double precision) DISTRIBUTED BY (n1); +CREATE TABLE texttable1 ( + s1 text, s2 text, s3 text, dt timestamp, + n1 smallint, n2 integer, n3 bigint, n4 decimal, + n5 numeric, n6 real, n7 double precision, n8 int) DISTRIBUTED BY (n1); CREATE TABLE csvtable ( year int, make text, model text, decription text, price decimal) DISTRIBUTED BY (year);