未验证 提交 968499f6 编写于 作者: X xiaoxiao 提交者: GitHub

Fix 6x gpload fail when capital letters in column in merge mode (#10783)

* fix gpload multi-level partition table and special char in columns issue

fix match column condition to resolve primary key conflict when using the gpload
merge mode to import data to the Multi-level partition table
fix fail when special char and capital letters in column names

* add double quotations when creating staging table
omit distribution key

* fix gpload fail when column names have capital letters in merge mode
Co-authored-by: NXiaoxiaoHe <hxiaoxiao@vmware.com>
上级 3219b792
......@@ -2518,9 +2518,9 @@ class gpload:
# MPP-14667 - self.reuse_tables should change one, and only one, aspect of how we build the following table,
# and that is, whether it's a temp table or not. In other words, is_temp_table = '' iff self.reuse_tables == True.
sql = 'CREATE %sTABLE %s ' % (is_temp_table, self.staging_table_name)
cols = map(lambda a:'%s %s' % (a[0], a[1]), target_columns)
cols = map(lambda a:'"%s" %s' % (a[0], a[1]), target_columns)
sql += "(%s)" % ','.join(cols)
sql += " DISTRIBUTED BY (%s)" % ', '.join(distcols)
#sql += " DISTRIBUTED BY (%s)" % ', '.join(distcols)
self.log(self.LOG, sql)
if not self.options.D:
......
......@@ -98,7 +98,7 @@ d = mkpath('config')
if not os.path.exists(d):
os.mkdir(d)
def write_config_file(mode='insert', reuse_flag='',columns_flag='0',mapping='0',portNum='8081',database='reuse_gptest',host='localhost',formatOpts='text',file='data/external_file_01.txt',table='texttable',format='text',delimiter="'|'",escape='',quote='',truncate='False',log_errors=None, error_limit='0',error_table=None,externalSchema=None,staging_table=None,fast_match='false', encoding=None, preload=True, fill=False, config='config/config_file'):
def write_config_file(mode='insert', reuse_flag='',columns_flag='0',mapping='0',portNum='8081',database='reuse_gptest',host='localhost',formatOpts='text',file='data/external_file_01.txt',table='texttable',format='text',delimiter="'|'",escape='',quote='',truncate='False',log_errors=None, error_limit='0',error_table=None,externalSchema=None,staging_table=None,fast_match='false', encoding=None, preload=True, fill=False, config='config/config_file', match_columns='true', update_columns='n2'):
f = open(mkpath(config),'w')
f.write("VERSION: 1.0.0.1")
......@@ -135,7 +135,6 @@ def write_config_file(mode='insert', reuse_flag='',columns_flag='0',mapping='0',
f.write("\n - COLUMNS:")
f.write("\n - 'Field1': bigint")
f.write("\n - 'Field#2': text")
if format:
f.write("\n - FORMAT: "+format)
if log_errors:
......@@ -164,11 +163,16 @@ def write_config_file(mode='insert', reuse_flag='',columns_flag='0',mapping='0',
if mode == 'merge':
f.write("\n - MODE: "+'merge')
f.write("\n - UPDATE_COLUMNS:")
f.write("\n - n2")
f.write("\n - MATCH_COLUMNS:")
f.write("\n - n1")
f.write("\n - s1")
f.write("\n - s2")
f.write("\n - "+update_columns)
if match_columns=='true':
f.write("\n - MATCH_COLUMNS:")
f.write("\n - n1")
f.write("\n - s1")
f.write("\n - s2")
if match_columns=='2':
f.write("\n - MATCH_COLUMNS:")
f.write("\n - '\"Field1\"'")
f.write("\n - '\"Field#2\"'")
if mapping=='1':
f.write("\n - MAPPING:")
f.write("\n s1: s_s1")
......@@ -800,9 +804,13 @@ class GPLoad_FormatOpts_TestCase(unittest.TestCase):
runfile(file)
copy_data('external_file_15.txt','data_file.txt')
write_config_file(mode='insert',reuse_flag='true',fast_match='false', file='data_file.txt',table='testSpecialChar',columns_flag='2', delimiter=";")
copy_data('external_file_16.txt','data_file2.txt')
write_config_file(update_columns='\'"Field#2"\'',config='config/config_file2', mode='merge',reuse_flag='true',fast_match='false', file='data_file2.txt',table='testSpecialChar',columns_flag='2', delimiter=";",match_columns='2')
f = open(mkpath('query41.sql'),'a')
f.write("\! gpload -f "+mkpath('config/config_file2')+ " -d reuse_gptest\n")
f.close()
self.doTest(41)
if __name__ == '__main__':
suite = unittest.TestLoader().loadTestsFromTestCase(GPLoad_FormatOpts_TestCase)
runner = unittest.TextTestRunner(verbosity=2)
......
1;Line 1
2;2nd line
3;test
4;
5;Vide
6;Field 2
8;new line
9;Line 10
\ No newline at end of file
2020-08-20 16:13:06|INFO|gpload session started 2020-08-20 16:13:06
2020-08-20 16:13:06|INFO|setting schema 'public' for table 'testspecialchar'
2020-08-20 16:13:06|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file.txt" -t 30
2020-08-20 16:13:06|INFO|did not find an external table to reuse. creating ext_gpload_reusable_f9d2769a_e2bc_11ea_9270_00505698707d
2020-08-20 16:13:06|INFO|running time: 0.08 seconds
2020-08-20 16:13:06|INFO|rows Inserted = 8
2020-08-20 16:13:06|INFO|rows Updated = 0
2020-08-20 16:13:06|INFO|data formatting errors = 0
2020-08-20 16:13:06|INFO|gpload succeeded
2020-08-20 16:13:06|INFO|gpload session started 2020-08-20 16:13:06
2020-08-20 16:13:06|INFO|setting schema 'public' for table 'testspecialchar'
2020-08-20 16:13:06|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file.txt" -t 30
2020-08-20 16:13:06|INFO|reusing external table ext_gpload_reusable_f9d2769a_e2bc_11ea_9270_00505698707d
2020-08-20 16:13:06|INFO|running time: 0.07 seconds
2020-08-20 16:13:06|INFO|rows Inserted = 8
2020-08-20 16:13:06|INFO|rows Updated = 0
2020-08-20 16:13:06|INFO|data formatting errors = 0
2020-08-20 16:13:06|INFO|gpload succeeded
2020-09-07 20:06:20|INFO|gpload session started 2020-09-07 20:06:20
2020-09-07 20:06:20|INFO|setting schema 'public' for table 'testspecialchar'
2020-09-07 20:06:20|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file.txt" -t 30
2020-09-07 20:06:20|INFO|did not find an external table to reuse. creating ext_gpload_reusable_8a7fd82e_f102_11ea_916e_00505698707d
2020-09-07 20:06:20|INFO|running time: 0.11 seconds
2020-09-07 20:06:20|INFO|rows Inserted = 8
2020-09-07 20:06:20|INFO|rows Updated = 0
2020-09-07 20:06:20|INFO|data formatting errors = 0
2020-09-07 20:06:20|INFO|gpload succeeded
2020-09-07 20:06:20|INFO|gpload session started 2020-09-07 20:06:20
2020-09-07 20:06:20|INFO|setting schema 'public' for table 'testspecialchar'
2020-09-07 20:06:20|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file.txt" -t 30
2020-09-07 20:06:20|INFO|reusing external table ext_gpload_reusable_8a7fd82e_f102_11ea_916e_00505698707d
2020-09-07 20:06:20|INFO|running time: 0.10 seconds
2020-09-07 20:06:20|INFO|rows Inserted = 8
2020-09-07 20:06:20|INFO|rows Updated = 0
2020-09-07 20:06:20|INFO|data formatting errors = 0
2020-09-07 20:06:20|INFO|gpload succeeded
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'Field1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
2020-09-07 20:06:21|INFO|gpload session started 2020-09-07 20:06:21
2020-09-07 20:06:21|INFO|setting schema 'public' for table 'testspecialchar'
2020-09-07 20:06:21|INFO|started gpfdist -p 8081 -P 8082 -f "/home/gpadmin/workspace/gpdb/gpMgmt/bin/gpload_test/gpload2/data_file2.txt" -t 30
2020-09-07 20:06:21|INFO|did not find a staging table to reuse. creating staging_gpload_reusable_a1101b5024707ea34f55e778f329e548
2020-09-07 20:06:21|INFO|did not find an external table to reuse. creating ext_gpload_reusable_8aee4250_f102_11ea_a4d9_00505698707d
2020-09-07 20:06:21|INFO|running time: 0.14 seconds
2020-09-07 20:06:21|INFO|rows Inserted = 2
2020-09-07 20:06:21|INFO|rows Updated = 12
2020-09-07 20:06:21|INFO|data formatting errors = 0
2020-09-07 20:06:21|INFO|gpload succeeded
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册