From 87fef901cd5191e415a61ade9b6eab3121fed05a Mon Sep 17 00:00:00 2001 From: Wen Lin Date: Thu, 4 Jun 2020 11:51:28 +0800 Subject: [PATCH] Add "FILL_MISSING_FIELDS" option for gpload. --- gpMgmt/bin/gpload.py | 5 +++++ gpMgmt/bin/gpload_test/gpload2/TEST.py | 14 ++++++++++++-- gpMgmt/bin/gpload_test/gpload2/query39.ans | 16 ++++++++++++++++ gpMgmt/bin/gpload_test/gpload2/setup.ans | 8 ++++++++ gpMgmt/bin/gpload_test/gpload2/setup.sql | 5 +++++ 5 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 gpMgmt/bin/gpload_test/gpload2/query39.ans diff --git a/gpMgmt/bin/gpload.py b/gpMgmt/bin/gpload.py index ef60e7c416..009bc2c2e0 100755 --- a/gpMgmt/bin/gpload.py +++ b/gpMgmt/bin/gpload.py @@ -112,6 +112,7 @@ valid_tokens = { "quote": {'parse_children': True, 'parent': "input"}, "encoding": {'parse_children': True, 'parent': "input"}, "force_not_null": {'parse_children': False, 'parent': "input"}, + "fill_missing_fields": {'parse_children': False, 'parent': "input"}, "error_limit": {'parse_children': True, 'parent': "input"}, "error_percent": {'parse_children': True, 'parent': "input"}, "error_table": {'parse_children': True, 'parent': "input"}, @@ -2365,6 +2366,10 @@ class gpload: else: from_cols = self.from_columns + if formatType == 'csv' or formatType == 'text': + if self.getconfig('gpload:input:fill_missing_fields', bool, False): + self.formatOpts += 'fill missing fields' + # If the 'reuse tables' option was specified we now try to find an # already existing external table in the catalog which will match # the one that we need to use. It must have identical attributes, diff --git a/gpMgmt/bin/gpload_test/gpload2/TEST.py b/gpMgmt/bin/gpload_test/gpload2/TEST.py index e874f3b109..b5db42e5c9 100755 --- a/gpMgmt/bin/gpload_test/gpload2/TEST.py +++ b/gpMgmt/bin/gpload_test/gpload2/TEST.py @@ -98,7 +98,7 @@ d = mkpath('config') if not os.path.exists(d): os.mkdir(d) -def write_config_file(mode='insert', reuse_flag='',columns_flag='0',mapping='0',portNum='8081',database='reuse_gptest',host='localhost',formatOpts='text',file='data/external_file_01.txt',table='texttable',format='text',delimiter="'|'",escape='',quote='',truncate='False',log_errors=None, error_limit='0',error_table=None,externalSchema=None,staging_table=None,fast_match='false', encoding=None, preload=True): +def write_config_file(mode='insert', reuse_flag='',columns_flag='0',mapping='0',portNum='8081',database='reuse_gptest',host='localhost',formatOpts='text',file='data/external_file_01.txt',table='texttable',format='text',delimiter="'|'",escape='',quote='',truncate='False',log_errors=None, error_limit='0',error_table=None,externalSchema=None,staging_table=None,fast_match='false', encoding=None, preload=True, fill=False): f = open(mkpath('config/config_file'),'w') f.write("VERSION: 1.0.0.1") @@ -147,6 +147,8 @@ def write_config_file(mode='insert', reuse_flag='',columns_flag='0',mapping='0', f.write("\n - ESCAPE: "+escape) if quote: f.write("\n - QUOTE: "+quote) + if fill: + f.write("\n - FILL_MISSING_FIELDS: true") f.write("\n OUTPUT:") f.write("\n - TABLE: "+table) if mode: @@ -443,7 +445,7 @@ class GPLoad_FormatOpts_TestCase(unittest.TestCase): def test_00_gpload_formatOpts_setup(self): "0 gpload setup" - for num in range(1,39): + for num in range(1,40): f = open(mkpath('query%d.sql' % num),'w') f.write("\! gpload -f "+mkpath('config/config_file')+ " -d reuse_gptest\n"+"\! gpload -f "+mkpath('config/config_file')+ " -d reuse_gptest\n") f.close() @@ -753,6 +755,14 @@ class GPLoad_FormatOpts_TestCase(unittest.TestCase): write_config_file(mode='insert',reuse_flag='true',fast_match='false',file='data_file.txt',error_table="err_table",error_limit='1000',preload=False) self.doTest(38) + def test_39_gpload_fill_missing_fields(self): + "39 gpload fill missing fields" + file = mkpath('setup.sql') + runfile(file) + copy_data('external_file_04.txt','data_file.txt') + write_config_file(mode='insert',reuse_flag='false',fast_match='false',file='data_file.txt',table='texttable1', error_limit='1000', fill=True) + self.doTest(39) + if __name__ == '__main__': suite = unittest.TestLoader().loadTestsFromTestCase(GPLoad_FormatOpts_TestCase) runner = unittest.TextTestRunner(verbosity=2) diff --git a/gpMgmt/bin/gpload_test/gpload2/query39.ans b/gpMgmt/bin/gpload_test/gpload2/query39.ans new file mode 100644 index 0000000000..e6a4d427fe --- /dev/null +++ b/gpMgmt/bin/gpload_test/gpload2/query39.ans @@ -0,0 +1,16 @@ +2020-06-02 14:55:41|INFO|gpload session started 2020-06-02 14:55:41 +2020-06-02 14:55:46|INFO|setting schema 'public' for table 'texttable1' +2020-06-02 14:55:46|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file.txt" -t 30 +2020-06-02 14:55:46|INFO|running time: 5.27 seconds +2020-06-02 14:55:51|INFO|rows Inserted = 16 +2020-06-02 14:55:51|INFO|rows Updated = 0 +2020-06-02 14:55:51|INFO|data formatting errors = 0 +2020-06-02 14:55:51|INFO|gpload succeeded +2020-06-02 14:55:51|INFO|gpload session started 2020-06-02 14:55:51 +2020-06-02 14:55:56|INFO|setting schema 'public' for table 'texttable1' +2020-06-02 14:55:56|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file.txt" -t 30 +2020-06-02 14:55:56|INFO|running time: 5.09 seconds +2020-06-02 14:56:01|INFO|rows Inserted = 16 +2020-06-02 14:56:01|INFO|rows Updated = 0 +2020-06-02 14:56:01|INFO|data formatting errors = 0 +2020-06-02 14:56:01|INFO|gpload succeeded diff --git a/gpMgmt/bin/gpload_test/gpload2/setup.ans b/gpMgmt/bin/gpload_test/gpload2/setup.ans index 8750621631..c660455782 100644 --- a/gpMgmt/bin/gpload_test/gpload2/setup.ans +++ b/gpMgmt/bin/gpload_test/gpload2/setup.ans @@ -13,11 +13,19 @@ DROP TABLE DROP TABLE IF EXISTS csvtable; NOTICE: table "csvtable" does not exist, skipping DROP TABLE +DROP TABLE IF EXISTS texttable1; +NOTICE: table "texttable1" does not exist, skipping +DROP TABLE CREATE TABLE texttable ( s1 text, s2 text, s3 text, dt timestamp, n1 smallint, n2 integer, n3 bigint, n4 decimal, n5 numeric, n6 real, n7 double precision) DISTRIBUTED BY (n1); CREATE TABLE +CREATE TABLE texttable1 ( + s1 text, s2 text, s3 text, dt timestamp, + n1 smallint, n2 integer, n3 bigint, n4 decimal, + n5 numeric, n6 real, n7 double precision, n8 int) DISTRIBUTED BY (n1); +CREATE TABLE CREATE TABLE csvtable ( year int, make text, model text, decription text, price decimal) DISTRIBUTED BY (year); diff --git a/gpMgmt/bin/gpload_test/gpload2/setup.sql b/gpMgmt/bin/gpload_test/gpload2/setup.sql index b5a8608ce2..9c9ca86899 100644 --- a/gpMgmt/bin/gpload_test/gpload2/setup.sql +++ b/gpMgmt/bin/gpload_test/gpload2/setup.sql @@ -9,10 +9,15 @@ CREATE SCHEMA test; DROP EXTERNAL TABLE IF EXISTS temp_gpload_staging_table; DROP TABLE IF EXISTS texttable; DROP TABLE IF EXISTS csvtable; +DROP TABLE IF EXISTS texttable1; CREATE TABLE texttable ( s1 text, s2 text, s3 text, dt timestamp, n1 smallint, n2 integer, n3 bigint, n4 decimal, n5 numeric, n6 real, n7 double precision) DISTRIBUTED BY (n1); +CREATE TABLE texttable1 ( + s1 text, s2 text, s3 text, dt timestamp, + n1 smallint, n2 integer, n3 bigint, n4 decimal, + n5 numeric, n6 real, n7 double precision, n8 int) DISTRIBUTED BY (n1); CREATE TABLE csvtable ( year int, make text, model text, decription text, price decimal) DISTRIBUTED BY (year); -- GitLab