CTranslatorQueryToDXL.cpp 137.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
//---------------------------------------------------------------------------
//  Greenplum Database
//	Copyright (C) 2011 EMC Corp.
//
//	@filename:
//		CTranslatorQueryToDXL.cpp
//
//	@doc:
//		Implementation of the methods used to translate a query into DXL tree.
//		All translator methods allocate memory in the provided memory pool, and
//		the caller is responsible for freeing it
//
//	@test:
//
//---------------------------------------------------------------------------

#include "postgres.h"

#include "access/sysattr.h"
#include "nodes/plannodes.h"
#include "nodes/parsenodes.h"
#include "nodes/makefuncs.h"
#include "optimizer/walkers.h"

#include "gpos/base.h"
#include "gpos/common/CAutoTimer.h"

#include "gpopt/base/CUtils.h"
#include "gpopt/mdcache/CMDAccessor.h"
#include "gpopt/translate/CCTEListEntry.h"
#include "gpopt/translate/CQueryMutators.h"
#include "gpopt/translate/CTranslatorUtils.h"
#include "gpopt/translate/CTranslatorQueryToDXL.h"
#include "gpopt/translate/CTranslatorDXLToPlStmt.h"
#include "gpopt/translate/CTranslatorRelcacheToDXL.h"

37 38 39 40 41 42 43 44 45 46 47 48 49
#include "naucrates/exception.h"

#include "naucrates/dxl/CDXLUtils.h"
#include "naucrates/dxl/operators/dxlops.h"
#include "naucrates/dxl/operators/CDXLScalarBooleanTest.h"
#include "naucrates/dxl/operators/CDXLDatumInt8.h"
#include "naucrates/dxl/xml/dxltokens.h"

#include "naucrates/md/IMDScalarOp.h"
#include "naucrates/md/IMDAggregate.h"
#include "naucrates/md/IMDTypeBool.h"
#include "naucrates/md/IMDTypeInt8.h"
#include "naucrates/md/CMDIdGPDBCtas.h"
50

51
#include "naucrates/traceflags/traceflags.h"
52 53 54 55 56 57 58 59 60

#include "gpopt/gpdbwrappers.h"

using namespace gpdxl;
using namespace gpos;
using namespace gpopt;
using namespace gpnaucrates;
using namespace gpmd;

J
Jesse Zhang 已提交
61
extern bool optimizer_enable_ctas;
62
extern bool optimizer_enable_dml;
V
Venkatesh Raghavan 已提交
63 64
extern bool optimizer_enable_dml_triggers;
extern bool optimizer_enable_dml_constraints;
65 66 67
extern bool optimizer_enable_multiple_distinct_aggs;

// OIDs of variants of LEAD window function
J
Jesse Zhang 已提交
68 69 70 71 72 73 74 75 76 77
static const OID lead_func_oids[] = {
	7011, 7074, 7075, 7310, 7312, 7314, 7316, 7318, 7320, 7322, 7324, 7326,
	7328, 7330, 7332, 7334, 7336, 7338, 7340, 7342, 7344, 7346, 7348, 7350,
	7352, 7354, 7356, 7358, 7360, 7362, 7364, 7366, 7368, 7370, 7372, 7374,
	7376, 7378, 7380, 7382, 7384, 7386, 7388, 7390, 7392, 7394, 7396, 7398,
	7400, 7402, 7404, 7406, 7408, 7410, 7412, 7414, 7416, 7418, 7420, 7422,
	7424, 7426, 7428, 7430, 7432, 7434, 7436, 7438, 7440, 7442, 7444, 7446,
	7448, 7450, 7452, 7454, 7456, 7458, 7460, 7462, 7464, 7466, 7468, 7470,
	7472, 7474, 7476, 7478, 7480, 7482, 7484, 7486, 7488, 7214, 7215, 7216,
	7220, 7222, 7224, 7244, 7246, 7248, 7260, 7262, 7264};
78 79

// OIDs of variants of LAG window function
J
Jesse Zhang 已提交
80 81 82 83 84 85 86 87 88 89
static const OID lag_func_oids[] = {
	7675, 7491, 7493, 7495, 7497, 7499, 7501, 7503, 7505, 7507, 7509, 7511,
	7513, 7515, 7517, 7519, 7521, 7523, 7525, 7527, 7529, 7531, 7533, 7535,
	7537, 7539, 7541, 7543, 7545, 7547, 7549, 7551, 7553, 7555, 7557, 7559,
	7561, 7563, 7565, 7567, 7569, 7571, 7573, 7575, 7577, 7579, 7581, 7583,
	7585, 7587, 7589, 7591, 7593, 7595, 7597, 7599, 7601, 7603, 7605, 7607,
	7609, 7611, 7613, 7615, 7617, 7619, 7621, 7623, 7625, 7627, 7629, 7631,
	7633, 7635, 7637, 7639, 7641, 7643, 7645, 7647, 7649, 7651, 7653, 7655,
	7657, 7659, 7661, 7663, 7665, 7667, 7669, 7671, 7673, 7211, 7212, 7213,
	7226, 7228, 7230, 7250, 7252, 7254, 7266, 7268, 7270};
90 91 92 93 94 95 96 97 98

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::CTranslatorQueryToDXL
//
//	@doc:
//		Ctor
//
//---------------------------------------------------------------------------
J
Jesse Zhang 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
CTranslatorQueryToDXL::CTranslatorQueryToDXL(
	CMemoryPool *mp, CMDAccessor *md_accessor, CIdGenerator *m_colid_counter,
	CIdGenerator *cte_id_counter, CMappingVarColId *var_colid_mapping,
	Query *query, ULONG query_level, BOOL is_top_query_dml,
	HMUlCTEListEntry *query_level_to_cte_map)
	: m_mp(mp),
	  m_sysid(IMDId::EmdidGPDB, GPMD_GPDB_SYSID),
	  m_md_accessor(md_accessor),
	  m_colid_counter(m_colid_counter),
	  m_cte_id_counter(cte_id_counter),
	  m_var_to_colid_map(var_colid_mapping),
	  m_query_level(query_level),
	  m_has_distributed_tables(false),
	  m_is_top_query_dml(is_top_query_dml),
	  m_is_ctas_query(false),
	  m_query_level_to_cte_map(NULL),
	  m_dxl_query_output_cols(NULL),
	  m_dxl_cte_producers(NULL),
	  m_cteid_at_current_query_level_map(NULL)
118
{
119 120
	GPOS_ASSERT(NULL != query);
	CheckSupportedCmdType(query);
J
Jesse Zhang 已提交
121

122 123 124
	m_query_level_to_cte_map = GPOS_NEW(m_mp) HMUlCTEListEntry(m_mp);
	m_dxl_cte_producers = GPOS_NEW(m_mp) CDXLNodeArray(m_mp);
	m_cteid_at_current_query_level_map = GPOS_NEW(m_mp) UlongBoolHashMap(m_mp);
J
Jesse Zhang 已提交
125

126
	if (NULL != query_level_to_cte_map)
127
	{
128
		HMIterUlCTEListEntry cte_list_hashmap_iter(query_level_to_cte_map);
129

130
		while (cte_list_hashmap_iter.Advance())
131
		{
132
			ULONG cte_query_level = *(cte_list_hashmap_iter.Key());
133

J
Jesse Zhang 已提交
134 135
			CCTEListEntry *cte_list_entry =
				const_cast<CCTEListEntry *>(cte_list_hashmap_iter.Value());
136

137
			// CTE's that have been defined before the m_query_level
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
			// should only be inserted into the hash map
			// For example:
			// WITH ab as (SELECT a as a, b as b from foo)
			// SELECT *
			// FROM
			// 	(WITH aEq10 as (SELECT b from ab ab1 where ab1.a = 10)
			//  	SELECT *
			//  	FROM (WITH aEq20 as (SELECT b from ab ab2 where ab2.a = 20)
			// 		      SELECT * FROM aEq10 WHERE b > (SELECT min(b) from aEq20)
			// 		      ) dtInner
			// 	) dtOuter
			// When translating the from expression containing "aEq10" in the derived table "dtInner"
			// we have already seen three CTE namely: "ab", "aEq10" and "aEq20". BUT when we expand aEq10
			// in the dt1, we should only have access of CTE's defined prior to its level namely "ab".

153
			if (cte_query_level < query_level && NULL != cte_list_entry)
154
			{
155
				cte_list_entry->AddRef();
156
#ifdef GPOS_DEBUG
157
				BOOL is_res =
158
#endif
J
Jesse Zhang 已提交
159 160
					m_query_level_to_cte_map->Insert(
						GPOS_NEW(mp) ULONG(cte_query_level), cte_list_entry);
161
				GPOS_ASSERT(is_res);
162 163 164 165 166
			}
		}
	}

	// check if the query has any unsupported node types
167
	CheckUnsupportedNodeTypes(query);
168 169

	// check if the query has SIRV functions in the targetlist without a FROM clause
170
	CheckSirvFuncsWithoutFromClause(query);
171 172

	// first normalize the query
J
Jesse Zhang 已提交
173 174
	m_query =
		CQueryMutators::NormalizeQuery(m_mp, m_md_accessor, query, query_level);
175

176
	if (NULL != m_query->cteList)
177
	{
178
		ConstructCTEProducerList(m_query->cteList, query_level);
179 180
	}

J
Jesse Zhang 已提交
181 182 183 184
	m_scalar_translator = GPOS_NEW(m_mp) CTranslatorScalarToDXL(
		m_mp, m_md_accessor, m_colid_counter, m_cte_id_counter, m_query_level,
		true, /* m_fQuery */
		m_query_level_to_cte_map, m_dxl_cte_producers);
185 186 187 188
}

//---------------------------------------------------------------------------
//	@function:
189
//		CTranslatorQueryToDXL::QueryToDXLInstance
190 191 192 193 194 195
//
//	@doc:
//		Factory function
//
//---------------------------------------------------------------------------
CTranslatorQueryToDXL *
J
Jesse Zhang 已提交
196 197 198 199
CTranslatorQueryToDXL::QueryToDXLInstance(
	CMemoryPool *mp, CMDAccessor *md_accessor, CIdGenerator *m_colid_counter,
	CIdGenerator *cte_id_counter, CMappingVarColId *var_colid_mapping,
	Query *query, ULONG query_level, HMUlCTEListEntry *query_level_to_cte_map)
200
{
J
Jesse Zhang 已提交
201 202 203 204 205
	return GPOS_NEW(mp)
		CTranslatorQueryToDXL(mp, md_accessor, m_colid_counter, cte_id_counter,
							  var_colid_mapping, query, query_level,
							  false,  // is_top_query_dml
							  query_level_to_cte_map);
206 207 208 209 210 211 212 213 214 215 216 217
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::~CTranslatorQueryToDXL
//
//	@doc:
//		Dtor
//
//---------------------------------------------------------------------------
CTranslatorQueryToDXL::~CTranslatorQueryToDXL()
{
218 219 220 221 222 223 224
	GPOS_DELETE(m_scalar_translator);
	GPOS_DELETE(m_var_to_colid_map);
	gpdb::GPDBFree(m_query);
	m_query_level_to_cte_map->Release();
	m_dxl_cte_producers->Release();
	m_cteid_at_current_query_level_map->Release();
	CRefCount::SafeRelease(m_dxl_query_output_cols);
225 226 227 228 229 230 231 232 233 234 235
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::CheckUnsupportedNodeTypes
//
//	@doc:
//		Check for unsupported node types, and throws an exception when found
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
236
CTranslatorQueryToDXL::CheckUnsupportedNodeTypes(Query *query)
237
{
J
Jesse Zhang 已提交
238
	static const SUnsupportedFeature unsupported_features[] = {
239 240 241 242 243 244 245 246 247 248
		{T_RowExpr, GPOS_WSZ_LIT("ROW EXPRESSION")},
		{T_RowCompareExpr, GPOS_WSZ_LIT("ROW COMPARE")},
		{T_FieldSelect, GPOS_WSZ_LIT("FIELDSELECT")},
		{T_FieldStore, GPOS_WSZ_LIT("FIELDSTORE")},
		{T_CoerceToDomainValue, GPOS_WSZ_LIT("COERCETODOMAINVALUE")},
		{T_GroupId, GPOS_WSZ_LIT("GROUPID")},
		{T_PercentileExpr, GPOS_WSZ_LIT("PERCENTILE")},
		{T_CurrentOfExpr, GPOS_WSZ_LIT("CURRENT OF")},
	};

249 250
	List *unsupported_list = NIL;
	for (ULONG ul = 0; ul < GPOS_ARRAY_SIZE(unsupported_features); ul++)
251
	{
J
Jesse Zhang 已提交
252 253
		unsupported_list = gpdb::LAppendInt(unsupported_list,
											unsupported_features[ul].node_tag);
254 255
	}

256 257
	INT unsupported_node = gpdb::FindNodes((Node *) query, unsupported_list);
	gpdb::GPDBFree(unsupported_list);
258

259
	if (0 <= unsupported_node)
260
	{
J
Jesse Zhang 已提交
261 262
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   unsupported_features[unsupported_node].m_feature_name);
263 264 265 266 267 268 269 270 271 272 273 274 275
	}
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::CheckSirvFuncsWithoutFromClause
//
//	@doc:
//		Check for SIRV functions in the target list without a FROM clause, and
//		throw an exception when found
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
276
CTranslatorQueryToDXL::CheckSirvFuncsWithoutFromClause(Query *query)
277 278
{
	// if there is a FROM clause or if target list is empty, look no further
J
Jesse Zhang 已提交
279 280 281
	if ((NULL != query->jointree &&
		 0 < gpdb::ListLength(query->jointree->fromlist)) ||
		NIL == query->targetList)
282 283 284 285 286
	{
		return;
	}

	// see if we have SIRV functions in the target list
287
	if (HasSirvFunctions((Node *) query->targetList))
288
	{
J
Jesse Zhang 已提交
289 290
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("SIRV functions"));
291 292 293 294 295
	}
}

//---------------------------------------------------------------------------
//	@function:
296
//		CTranslatorQueryToDXL::HasSirvFunctions
297 298 299 300 301 302
//
//	@doc:
//		Check for SIRV functions in the tree rooted at the given node
//
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
303
CTranslatorQueryToDXL::HasSirvFunctions(Node *node) const
304
{
305
	GPOS_ASSERT(NULL != node);
306

J
Jesse Zhang 已提交
307 308
	List *function_list = gpdb::ExtractNodesExpression(
		node, T_FuncExpr, true /*descendIntoSubqueries*/);
309
	ListCell *lc = NULL;
310

311
	BOOL has_sirv = false;
J
Jesse Zhang 已提交
312
	ForEach(lc, function_list)
313
	{
314
		FuncExpr *func_expr = (FuncExpr *) lfirst(lc);
J
Jesse Zhang 已提交
315 316
		if (CTranslatorUtils::IsSirvFunc(m_mp, m_md_accessor,
										 func_expr->funcid))
317
		{
318
			has_sirv = true;
319 320 321
			break;
		}
	}
322
	gpdb::ListFree(function_list);
323

324
	return has_sirv;
325 326 327 328 329 330 331 332 333 334 335
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::CheckSupportedCmdType
//
//	@doc:
//		Check for supported command types, throws an exception when command
//		type not yet supported
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
336
CTranslatorQueryToDXL::CheckSupportedCmdType(Query *query)
337
{
338
	if (NULL != query->utilityStmt)
339
	{
J
Jesse Zhang 已提交
340 341
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("UTILITY command"));
342 343
	}

344 345
	if (CMD_SELECT == query->commandType)
	{
J
Jesse Zhang 已提交
346 347
		if (!optimizer_enable_ctas &&
			(NULL != query->intoClause || query->isCopy))
348
		{
J
Jesse Zhang 已提交
349 350 351 352
			GPOS_RAISE(
				gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				GPOS_WSZ_LIT(
					"CTAS. Set optimizer_enable_ctas to on to enable CTAS with GPORCA"));
353
		}
354 355
		if (query->isCopy)
		{
J
Jesse Zhang 已提交
356 357 358 359
			GPOS_RAISE(
				gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				GPOS_WSZ_LIT(
					"COPY. Copy select statement to file on segment is not supported with GPORCA"));
360
		}
J
Jesse Zhang 已提交
361

362 363 364 365
		// supported: regular select or CTAS when it is enabled
		return;
	}

J
Jesse Zhang 已提交
366 367
	static const SCmdNameElem unsupported_commands[] = {
		{CMD_UTILITY, GPOS_WSZ_LIT("UTILITY command")}};
368

369 370
	const ULONG length = GPOS_ARRAY_SIZE(unsupported_commands);
	for (ULONG ul = 0; ul < length; ul++)
371
	{
372 373
		SCmdNameElem mapelem = unsupported_commands[ul];
		if (mapelem.m_cmd_type == query->commandType)
374
		{
J
Jesse Zhang 已提交
375 376
			GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
					   mapelem.m_cmd_name);
377 378 379 380 381 382
		}
	}
}

//---------------------------------------------------------------------------
//	@function:
383
//		CTranslatorQueryToDXL::GetQueryOutputCols
384 385 386 387 388
//
//	@doc:
//		Return the list of query output columns
//
//---------------------------------------------------------------------------
389 390
CDXLNodeArray *
CTranslatorQueryToDXL::GetQueryOutputCols() const
391
{
392
	return m_dxl_query_output_cols;
393 394 395 396
}

//---------------------------------------------------------------------------
//	@function:
397
//		CTranslatorQueryToDXL::GetCTEs
398 399 400 401 402
//
//	@doc:
//		Return the list of CTEs
//
//---------------------------------------------------------------------------
403 404
CDXLNodeArray *
CTranslatorQueryToDXL::GetCTEs() const
405
{
406
	return m_dxl_cte_producers;
407 408 409 410
}

//---------------------------------------------------------------------------
//	@function:
411
//		CTranslatorQueryToDXL::TranslateSelectQueryToDXL
412 413 414 415 416 417 418
//
//	@doc:
//		Translates a Query into a DXL tree. The function allocates memory in
//		the translator memory pool, and caller is responsible for freeing it.
//
//---------------------------------------------------------------------------
CDXLNode *
419
CTranslatorQueryToDXL::TranslateSelectQueryToDXL()
420
{
421 422 423 424 425
	// The parsed query contains an RTE for the view, which is maintained all the way through planned statement.
	// This entries is annotated as requiring SELECT permissions for the current user.
	// In Orca, we only keep range table entries for the base tables in the planned statement, but not for the view itself.
	// Since permissions are only checked during ExecutorStart, we lose track of the permissions required for the view and the select goes through successfully.
	// We therefore need to check permissions before we go into optimization for all RTEs, including the ones not explicitly referred in the query, e.g. views.
426 427 428
	CTranslatorUtils::CheckRTEPermissions(m_query->rtable);

	CDXLNode *child_dxlnode = NULL;
J
Jesse Zhang 已提交
429 430 431 432
	IntToUlongMap *sort_group_attno_to_colid_mapping =
		GPOS_NEW(m_mp) IntToUlongMap(m_mp);
	IntToUlongMap *output_attno_to_colid_mapping =
		GPOS_NEW(m_mp) IntToUlongMap(m_mp);
433 434

	// construct CTEAnchor operators for the CTEs defined at the top level
435 436
	CDXLNode *dxl_cte_anchor_top = NULL;
	CDXLNode *dxl_cte_anchor_bottom = NULL;
J
Jesse Zhang 已提交
437 438 439 440 441 442 443 444
	ConstructCTEAnchors(m_dxl_cte_producers, &dxl_cte_anchor_top,
						&dxl_cte_anchor_bottom);
	GPOS_ASSERT_IMP(
		m_dxl_cte_producers == NULL || 0 < m_dxl_cte_producers->Size(),
		NULL != dxl_cte_anchor_top && NULL != dxl_cte_anchor_bottom);

	GPOS_ASSERT_IMP(NULL != m_query->setOperations,
					0 == gpdb::ListLength(m_query->windowClause));
445
	if (NULL != m_query->setOperations)
446
	{
447
		List *target_list = m_query->targetList;
448
		// translate set operations
J
Jesse Zhang 已提交
449 450
		child_dxlnode = TranslateSetOpToDXL(m_query->setOperations, target_list,
											output_attno_to_colid_mapping);
451

J
Jesse Zhang 已提交
452 453 454 455
		CDXLLogicalSetOp *dxlop =
			CDXLLogicalSetOp::Cast(child_dxlnode->GetOperator());
		const CDXLColDescrArray *dxl_col_descr_array =
			dxlop->GetDXLColumnDescrArray();
456 457
		ListCell *lc = NULL;
		ULONG resno = 1;
J
Jesse Zhang 已提交
458
		ForEach(lc, target_list)
459
		{
460 461
			TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
			if (0 < target_entry->ressortgroupref)
462
			{
463
				ULONG colid = ((*dxl_col_descr_array)[resno - 1])->Id();
J
Jesse Zhang 已提交
464 465
				AddSortingGroupingColumn(
					target_entry, sort_group_attno_to_colid_mapping, colid);
466
			}
467
			resno++;
468 469
		}
	}
J
Jesse Zhang 已提交
470 471
	else if (0 != gpdb::ListLength(
					  m_query->windowClause))  // translate window clauses
472
	{
473 474
		CDXLNode *dxlnode = TranslateFromExprToDXL(m_query->jointree);
		GPOS_ASSERT(NULL == m_query->groupClause);
J
Jesse Zhang 已提交
475 476 477 478
		child_dxlnode = TranslateWindowToDXL(
			dxlnode, m_query->targetList, m_query->windowClause,
			m_query->sortClause, sort_group_attno_to_colid_mapping,
			output_attno_to_colid_mapping);
479 480 481
	}
	else
	{
J
Jesse Zhang 已提交
482 483 484 485
		child_dxlnode = TranslateGroupingSets(
			m_query->jointree, m_query->targetList, m_query->groupClause,
			m_query->hasAggs, sort_group_attno_to_colid_mapping,
			output_attno_to_colid_mapping);
486 487 488
	}

	// translate limit clause
J
Jesse Zhang 已提交
489 490 491
	CDXLNode *limit_dxlnode = TranslateLimitToDXLGroupBy(
		m_query->sortClause, m_query->limitCount, m_query->limitOffset,
		child_dxlnode, sort_group_attno_to_colid_mapping);
492 493


494
	if (NULL == m_query->targetList)
495
	{
496
		m_dxl_query_output_cols = GPOS_NEW(m_mp) CDXLNodeArray(m_mp);
497 498 499
	}
	else
	{
J
Jesse Zhang 已提交
500 501
		m_dxl_query_output_cols = CreateDXLOutputCols(
			m_query->targetList, output_attno_to_colid_mapping);
502 503 504
	}

	// cleanup
505
	CRefCount::SafeRelease(sort_group_attno_to_colid_mapping);
506

507
	output_attno_to_colid_mapping->Release();
J
Jesse Zhang 已提交
508

509
	// add CTE anchors if needed
510
	CDXLNode *result_dxlnode = limit_dxlnode;
J
Jesse Zhang 已提交
511

512
	if (NULL != dxl_cte_anchor_top)
513
	{
514 515 516
		GPOS_ASSERT(NULL != dxl_cte_anchor_bottom);
		dxl_cte_anchor_bottom->AddChild(result_dxlnode);
		result_dxlnode = dxl_cte_anchor_top;
517
	}
J
Jesse Zhang 已提交
518

519
	return result_dxlnode;
520 521 522 523
}

//---------------------------------------------------------------------------
//	@function:
524
//		CTranslatorQueryToDXL::TranslateSelectProjectJoinToDXL
525 526 527 528 529 530
//
//	@doc:
//		Construct a DXL SPJ tree from the given query parts
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
531 532
CTranslatorQueryToDXL::TranslateSelectProjectJoinToDXL(
	List *target_list, FromExpr *from_expr,
533
	IntToUlongMap *sort_group_attno_to_colid_mapping,
J
Jesse Zhang 已提交
534
	IntToUlongMap *output_attno_to_colid_mapping, List *group_clause)
535
{
536
	CDXLNode *join_tree_dxlnode = TranslateFromExprToDXL(from_expr);
537 538

	// translate target list entries into a logical project
J
Jesse Zhang 已提交
539 540 541
	return TranslateTargetListToDXLProject(
		target_list, join_tree_dxlnode, sort_group_attno_to_colid_mapping,
		output_attno_to_colid_mapping, group_clause);
542 543 544 545
}

//---------------------------------------------------------------------------
//	@function:
546
//		CTranslatorQueryToDXL::TranslateSelectProjectJoinForGrpSetsToDXL
547 548 549 550 551 552 553
//
//	@doc:
//		Construct a DXL SPJ tree from the given query parts, and keep variables
//		appearing in aggregates in the project list
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
554 555
CTranslatorQueryToDXL::TranslateSelectProjectJoinForGrpSetsToDXL(
	List *target_list, FromExpr *from_expr,
556
	IntToUlongMap *sort_group_attno_to_colid_mapping,
J
Jesse Zhang 已提交
557
	IntToUlongMap *output_attno_to_colid_mapping, List *group_clause)
558
{
559
	CDXLNode *join_tree_dxlnode = TranslateFromExprToDXL(from_expr);
560 561

	// translate target list entries into a logical project
J
Jesse Zhang 已提交
562 563 564 565
	return TranslateTargetListToDXLProject(
		target_list, join_tree_dxlnode, sort_group_attno_to_colid_mapping,
		output_attno_to_colid_mapping, group_clause,
		true /*is_expand_aggref_expr*/);
566 567 568 569
}

//---------------------------------------------------------------------------
//	@function:
570
//		CTranslatorQueryToDXL::TranslateQueryToDXL
571 572 573 574 575 576
//
//	@doc:
//		Main driver
//
//---------------------------------------------------------------------------
CDXLNode *
577
CTranslatorQueryToDXL::TranslateQueryToDXL()
578
{
J
Jesse Zhang 已提交
579 580
	CAutoTimer at("\n[OPT]: Query To DXL Translation Time",
				  GPOS_FTRACE(EopttracePrintOptimizationStatistics));
581

582
	switch (m_query->commandType)
583 584
	{
		case CMD_SELECT:
585
			if (NULL == m_query->intoClause && !m_query->isCopy)
586
			{
587
				return TranslateSelectQueryToDXL();
588 589
			}

590 591
			return TranslateCTASToDXL();

592
		case CMD_INSERT:
593
			return TranslateInsertQueryToDXL();
594 595

		case CMD_DELETE:
596
			return TranslateDeleteQueryToDXL();
597 598

		case CMD_UPDATE:
599
			return TranslateUpdateQueryToDXL();
600 601 602 603 604 605 606 607 608

		default:
			GPOS_ASSERT(!"Statement type not supported");
			return NULL;
	}
}

//---------------------------------------------------------------------------
//	@function:
609
//		CTranslatorQueryToDXL::TranslateInsertQueryToDXL
610 611 612 613 614 615
//
//	@doc:
//		Translate an insert stmt
//
//---------------------------------------------------------------------------
CDXLNode *
616
CTranslatorQueryToDXL::TranslateInsertQueryToDXL()
617
{
618 619
	GPOS_ASSERT(CMD_INSERT == m_query->commandType);
	GPOS_ASSERT(0 < m_query->resultRelation);
620

621 622
	if (!optimizer_enable_dml)
	{
J
Jesse Zhang 已提交
623 624
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("DML not enabled"));
625 626
	}

627
	CDXLNode *query_dxlnode = TranslateSelectQueryToDXL();
J
Jesse Zhang 已提交
628 629
	const RangeTblEntry *rte = (RangeTblEntry *) gpdb::ListNth(
		m_query->rtable, m_query->resultRelation - 1);
630

J
Jesse Zhang 已提交
631 632
	CDXLTableDescr *table_descr = CTranslatorUtils::GetTableDescr(
		m_mp, m_md_accessor, m_colid_counter, rte, &m_has_distributed_tables);
633
	const IMDRelation *md_rel = m_md_accessor->RetrieveRel(table_descr->MDId());
J
Jesse Zhang 已提交
634 635 636
	if (!optimizer_enable_dml_triggers &&
		CTranslatorUtils::RelHasTriggers(m_mp, m_md_accessor, md_rel,
										 Edxldmlinsert))
637
	{
J
Jesse Zhang 已提交
638 639
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("INSERT with triggers"));
640 641
	}

642 643
	BOOL rel_has_constraints = CTranslatorUtils::RelHasConstraints(md_rel);
	if (!optimizer_enable_dml_constraints && rel_has_constraints)
644
	{
J
Jesse Zhang 已提交
645 646
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("INSERT with constraints"));
647
	}
J
Jesse Zhang 已提交
648 649 650

	const ULONG num_table_columns =
		CTranslatorUtils::GetNumNonSystemColumns(md_rel);
651 652 653
	const ULONG target_list_length = gpdb::ListLength(m_query->targetList);
	GPOS_ASSERT(num_table_columns >= target_list_length);
	GPOS_ASSERT(target_list_length == m_dxl_query_output_cols->Size());
654

655
	CDXLNode *project_list_dxlnode = NULL;
J
Jesse Zhang 已提交
656

657
	const ULONG num_system_cols = md_rel->ColumnCount() - num_table_columns;
J
Jesse Zhang 已提交
658 659
	const ULONG num_non_dropped_cols =
		md_rel->NonDroppedColsCount() - num_system_cols;
660
	if (num_non_dropped_cols > target_list_length)
661 662
	{
		// missing target list entries
J
Jesse Zhang 已提交
663 664
		project_list_dxlnode = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
665 666
	}

667
	ULongPtrArray *source_array = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
668

669 670
	ULONG target_list_pos = 0;
	for (ULONG ul = 0; ul < num_table_columns; ul++)
671
	{
672 673
		const IMDColumn *mdcol = md_rel->GetMdCol(ul);
		GPOS_ASSERT(!mdcol->IsSystemColumn());
J
Jesse Zhang 已提交
674

675
		if (mdcol->IsDropped())
676 677 678
		{
			continue;
		}
J
Jesse Zhang 已提交
679

680
		if (target_list_pos < target_list_length)
681
		{
682
			INT attno = mdcol->AttrNum();
J
Jesse Zhang 已提交
683 684 685

			TargetEntry *target_entry = (TargetEntry *) gpdb::ListNth(
				m_query->targetList, target_list_pos);
686
			AttrNumber resno = target_entry->resno;
687

688
			if (attno == resno)
689
			{
J
Jesse Zhang 已提交
690 691 692 693 694 695
				CDXLNode *dxl_column =
					(*m_dxl_query_output_cols)[target_list_pos];
				CDXLScalarIdent *dxl_ident =
					CDXLScalarIdent::Cast(dxl_column->GetOperator());
				source_array->Append(
					GPOS_NEW(m_mp) ULONG(dxl_ident->GetDXLColRef()->Id()));
696
				target_list_pos++;
697 698 699 700 701 702
				continue;
			}
		}

		// target entry corresponding to the tables column not found, therefore
		// add a project element with null value scalar child
J
Jesse Zhang 已提交
703 704 705 706 707
		CDXLNode *project_elem_dxlnode =
			CTranslatorUtils::CreateDXLProjElemConstNULL(
				m_mp, m_md_accessor, m_colid_counter, mdcol);
		ULONG colid =
			CDXLScalarProjElem::Cast(project_elem_dxlnode->GetOperator())->Id();
708
		project_list_dxlnode->AddChild(project_elem_dxlnode);
J
Jesse Zhang 已提交
709
		source_array->Append(GPOS_NEW(m_mp) ULONG(colid));
710 711
	}

J
Jesse Zhang 已提交
712 713
	CDXLLogicalInsert *insert_dxlnode =
		GPOS_NEW(m_mp) CDXLLogicalInsert(m_mp, table_descr, source_array);
714

715
	if (NULL != project_list_dxlnode)
716
	{
717
		GPOS_ASSERT(0 < project_list_dxlnode->Arity());
J
Jesse Zhang 已提交
718 719 720

		CDXLNode *project_dxlnode = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalProject(m_mp));
721 722 723
		project_dxlnode->AddChild(project_list_dxlnode);
		project_dxlnode->AddChild(query_dxlnode);
		query_dxlnode = project_dxlnode;
724 725
	}

726
	return GPOS_NEW(m_mp) CDXLNode(m_mp, insert_dxlnode, query_dxlnode);
727 728 729 730
}

//---------------------------------------------------------------------------
//	@function:
731
//		CTranslatorQueryToDXL::TranslateCTASToDXL
732 733 734 735 736 737
//
//	@doc:
//		Translate a CTAS
//
//---------------------------------------------------------------------------
CDXLNode *
738
CTranslatorQueryToDXL::TranslateCTASToDXL()
739
{
740 741
	GPOS_ASSERT(CMD_SELECT == m_query->commandType);
	GPOS_ASSERT(NULL != m_query->intoClause);
742

743 744 745 746 747
	m_is_ctas_query = true;
	CDXLNode *query_dxlnode = TranslateSelectQueryToDXL();

	IntoClause *into_clause = m_query->intoClause;

J
Jesse Zhang 已提交
748 749 750 751 752 753
	CMDName *md_relname =
		CDXLUtils::CreateMDNameFromCharArray(m_mp, into_clause->rel->relname);

	CDXLColDescrArray *dxl_col_descr_array =
		GPOS_NEW(m_mp) CDXLColDescrArray(m_mp);

754
	const ULONG num_columns = gpdb::ListLength(m_query->targetList);
755

756
	ULongPtrArray *source_array = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
J
Jesse Zhang 已提交
757 758
	IntPtrArray *var_typmods = GPOS_NEW(m_mp) IntPtrArray(m_mp);

759 760
	List *col_names = into_clause->colNames;
	for (ULONG ul = 0; ul < num_columns; ul++)
761
	{
J
Jesse Zhang 已提交
762 763
		TargetEntry *target_entry =
			(TargetEntry *) gpdb::ListNth(m_query->targetList, ul);
764
		if (target_entry->resjunk)
765 766 767
		{
			continue;
		}
768
		AttrNumber resno = target_entry->resno;
J
Jesse Zhang 已提交
769
		int var_typmod = gpdb::ExprTypeMod((Node *) target_entry->expr);
770
		var_typmods->Append(GPOS_NEW(m_mp) INT(var_typmod));
771

772
		CDXLNode *dxl_column = (*m_dxl_query_output_cols)[ul];
J
Jesse Zhang 已提交
773 774 775 776 777
		CDXLScalarIdent *dxl_ident =
			CDXLScalarIdent::Cast(dxl_column->GetOperator());
		source_array->Append(GPOS_NEW(m_mp)
								 ULONG(dxl_ident->GetDXLColRef()->Id()));

778 779
		CMDName *md_colname = NULL;
		if (NULL != col_names && ul < gpdb::ListLength(col_names))
780
		{
781
			ColumnDef *col_def = (ColumnDef *) gpdb::ListNth(col_names, ul);
J
Jesse Zhang 已提交
782 783
			md_colname =
				CDXLUtils::CreateMDNameFromCharArray(m_mp, col_def->colname);
784 785 786
		}
		else
		{
J
Jesse Zhang 已提交
787 788
			md_colname = GPOS_NEW(m_mp)
				CMDName(m_mp, dxl_ident->GetDXLColRef()->MdName()->GetMDName());
789
		}
J
Jesse Zhang 已提交
790

791 792 793
		GPOS_ASSERT(NULL != md_colname);
		IMDId *mdid = dxl_ident->MdidType();
		mdid->AddRef();
J
Jesse Zhang 已提交
794 795 796 797
		CDXLColDescr *dxl_col_descr = GPOS_NEW(m_mp) CDXLColDescr(
			m_mp, md_colname, m_colid_counter->next_id(), resno /* attno */,
			mdid, dxl_ident->TypeModifier(), false /* is_dropped */
		);
798
		dxl_col_descr_array->Append(dxl_col_descr);
799 800
	}

J
Jesse Zhang 已提交
801 802
	IMDRelation::Ereldistrpolicy rel_distr_policy =
		IMDRelation::EreldistrRandom;
803
	ULongPtrArray *distribution_colids = NULL;
J
Jesse Zhang 已提交
804

805
	if (NULL != m_query->intoPolicy)
806
	{
J
Jesse Zhang 已提交
807 808 809
		rel_distr_policy =
			CTranslatorRelcacheToDXL::GetRelDistribution(m_query->intoPolicy);

810
		if (IMDRelation::EreldistrHash == rel_distr_policy)
811
		{
812
			distribution_colids = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
813

814
			for (ULONG ul = 0; ul < (ULONG) m_query->intoPolicy->nattrs; ul++)
815
			{
816
				AttrNumber attno = m_query->intoPolicy->attrs[ul];
817
				GPOS_ASSERT(0 < attno);
818
				distribution_colids->Append(GPOS_NEW(m_mp) ULONG(attno - 1));
819 820 821 822 823
			}
		}
	}
	else
	{
J
Jesse Zhang 已提交
824 825 826 827
		GpdbEreport(
			ERRCODE_SUCCESSFUL_COMPLETION, NOTICE,
			"Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.",
			NULL);
828
	}
J
Jesse Zhang 已提交
829

830 831
	GPOS_ASSERT(IMDRelation::EreldistrMasterOnly != rel_distr_policy);
	m_has_distributed_tables = true;
832 833 834

	// TODO: Mar 5, 2014; reserve an OID
	OID oid = 1;
835
	CMDIdGPDB *mdid = GPOS_NEW(m_mp) CMDIdGPDBCtas(oid);
J
Jesse Zhang 已提交
836

837 838
	CMDName *md_tablespace_name = NULL;
	if (NULL != into_clause->tableSpaceName)
839
	{
J
Jesse Zhang 已提交
840 841
		md_tablespace_name = CDXLUtils::CreateMDNameFromCharArray(
			m_mp, into_clause->tableSpaceName);
842
	}
J
Jesse Zhang 已提交
843

844 845
	CMDName *md_schema_name = NULL;
	if (NULL != into_clause->rel->schemaname)
846
	{
J
Jesse Zhang 已提交
847 848
		md_schema_name = CDXLUtils::CreateMDNameFromCharArray(
			m_mp, into_clause->rel->schemaname);
849
	}
850

J
Jesse Zhang 已提交
851 852 853 854 855 856 857
	CDXLCtasStorageOptions::ECtasOnCommitAction ctas_commit_action =
		(CDXLCtasStorageOptions::ECtasOnCommitAction) into_clause->onCommit;

	IMDRelation::Erelstoragetype rel_storage_type =
		IMDRelation::ErelstorageHeap;
	CDXLCtasStorageOptions::CDXLCtasOptionArray *ctas_storage_options =
		GetDXLCtasOptionArray(into_clause->options, &rel_storage_type);
858 859

	BOOL has_oids = gpdb::InterpretOidsOption(into_clause->options);
J
Jesse Zhang 已提交
860 861 862 863 864 865
	CDXLLogicalCTAS *ctas_dxlop = GPOS_NEW(m_mp) CDXLLogicalCTAS(
		m_mp, mdid, md_schema_name, md_relname, dxl_col_descr_array,
		GPOS_NEW(m_mp) CDXLCtasStorageOptions(
			md_tablespace_name, ctas_commit_action, ctas_storage_options),
		rel_distr_policy, distribution_colids, into_clause->rel->istemp,
		has_oids, rel_storage_type, source_array, var_typmods);
866

867
	return GPOS_NEW(m_mp) CDXLNode(m_mp, ctas_dxlop, query_dxlnode);
868 869 870 871
}

//---------------------------------------------------------------------------
//	@function:
872
//		CTranslatorQueryToDXL::GetDXLCtasOptionArray
873 874 875 876 877
//
//	@doc:
//		Translate CTAS storage options
//
//---------------------------------------------------------------------------
878
CDXLCtasStorageOptions::CDXLCtasOptionArray *
J
Jesse Zhang 已提交
879
CTranslatorQueryToDXL::GetDXLCtasOptionArray(
880
	List *options,
J
Jesse Zhang 已提交
881 882 883
	IMDRelation::Erelstoragetype
		*storage_type  // output parameter: storage type
)
884
{
885
	if (NULL == options)
886 887 888 889
	{
		return NULL;
	}

890
	GPOS_ASSERT(NULL != storage_type);
J
Jesse Zhang 已提交
891 892 893

	CDXLCtasStorageOptions::CDXLCtasOptionArray *ctas_storage_options =
		GPOS_NEW(m_mp) CDXLCtasStorageOptions::CDXLCtasOptionArray(m_mp);
894 895 896 897
	ListCell *lc = NULL;
	BOOL is_ao_table = false;
	BOOL is_AOCO = false;
	BOOL is_parquet = false;
J
Jesse Zhang 已提交
898

899 900 901 902
	CWStringConst str_append_only(GPOS_WSZ_LIT("appendonly"));
	CWStringConst str_orientation(GPOS_WSZ_LIT("orientation"));
	CWStringConst str_orientation_parquet(GPOS_WSZ_LIT("parquet"));
	CWStringConst str_orientation_column(GPOS_WSZ_LIT("column"));
J
Jesse Zhang 已提交
903 904

	ForEach(lc, options)
905
	{
906
		DefElem *def_elem = (DefElem *) lfirst(lc);
J
Jesse Zhang 已提交
907 908
		CWStringDynamic *name_str = CDXLUtils::CreateDynamicStringFromCharArray(
			m_mp, def_elem->defname);
909
		CWStringDynamic *value_str = NULL;
J
Jesse Zhang 已提交
910

911
		BOOL is_null_arg = (NULL == def_elem->arg);
912

913 914
		// def_elem->arg is NULL for queries of the form "create table t with (oids) as ... "
		if (is_null_arg)
915 916
		{
			// we represent null options as an empty arg string and set the IsNull flag on
917
			value_str = GPOS_NEW(m_mp) CWStringDynamic(m_mp);
918 919 920
		}
		else
		{
921
			value_str = ExtractStorageOptionStr(def_elem);
922

J
Jesse Zhang 已提交
923 924
			if (name_str->Equals(&str_append_only) &&
				value_str->Equals(CDXLTokens::GetDXLTokenStr(EdxltokenTrue)))
925
			{
926
				is_ao_table = true;
927
			}
J
Jesse Zhang 已提交
928 929 930

			if (name_str->Equals(&str_orientation) &&
				value_str->Equals(&str_orientation_column))
931
			{
932 933
				GPOS_ASSERT(!is_parquet);
				is_AOCO = true;
934 935
			}

J
Jesse Zhang 已提交
936 937
			if (name_str->Equals(&str_orientation) &&
				value_str->Equals(&str_orientation_parquet))
938
			{
939 940
				GPOS_ASSERT(!is_AOCO);
				is_parquet = true;
941 942 943
			}
		}

944 945
		NodeTag arg_type = T_Null;
		if (!is_null_arg)
946
		{
947
			arg_type = def_elem->arg->type;
948 949
		}

950
		CDXLCtasStorageOptions::CDXLCtasOption *dxl_ctas_storage_option =
J
Jesse Zhang 已提交
951 952
			GPOS_NEW(m_mp) CDXLCtasStorageOptions::CDXLCtasOption(
				arg_type, name_str, value_str, is_null_arg);
953
		ctas_storage_options->Append(dxl_ctas_storage_option);
954
	}
955
	if (is_AOCO)
956
	{
957
		*storage_type = IMDRelation::ErelstorageAppendOnlyCols;
958
	}
959
	else if (is_ao_table)
960
	{
961
		*storage_type = IMDRelation::ErelstorageAppendOnlyRows;
962
	}
963
	else if (is_parquet)
964
	{
965
		*storage_type = IMDRelation::ErelstorageAppendOnlyParquet;
966
	}
J
Jesse Zhang 已提交
967

968
	return ctas_storage_options;
969 970 971 972
}

//---------------------------------------------------------------------------
//	@function:
973
//		CTranslatorQueryToDXL::ExtractStorageOptionStr
974 975 976 977 978 979
//
//	@doc:
//		Extract value for storage option
//
//---------------------------------------------------------------------------
CWStringDynamic *
J
Jesse Zhang 已提交
980
CTranslatorQueryToDXL::ExtractStorageOptionStr(DefElem *def_elem)
981
{
982
	GPOS_ASSERT(NULL != def_elem);
983

984
	CHAR *value = gpdb::DefGetString(def_elem);
985

J
Jesse Zhang 已提交
986 987 988
	CWStringDynamic *result_str =
		CDXLUtils::CreateDynamicStringFromCharArray(m_mp, value);

989
	return result_str;
990 991 992 993 994 995 996 997 998 999 1000 1001
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::GetCtidAndSegmentId
//
//	@doc:
//		Obtains the ids of the ctid and segmentid columns for the target
//		table of a DML query
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
1002
CTranslatorQueryToDXL::GetCtidAndSegmentId(ULONG *ctid, ULONG *segment_id)
1003 1004
{
	// ctid column id
J
Jesse Zhang 已提交
1005 1006 1007 1008 1009
	IMDId *mdid = CTranslatorUtils::GetSystemColType(
		m_mp, SelfItemPointerAttributeNumber);
	*ctid = CTranslatorUtils::GetColId(m_query_level, m_query->resultRelation,
									   SelfItemPointerAttributeNumber, mdid,
									   m_var_to_colid_map);
1010
	mdid->Release();
1011 1012

	// segmentid column id
1013
	mdid = CTranslatorUtils::GetSystemColType(m_mp, GpSegmentIdAttributeNumber);
J
Jesse Zhang 已提交
1014 1015 1016
	*segment_id = CTranslatorUtils::GetColId(
		m_query_level, m_query->resultRelation, GpSegmentIdAttributeNumber,
		mdid, m_var_to_colid_map);
1017
	mdid->Release();
1018 1019 1020 1021
}

//---------------------------------------------------------------------------
//	@function:
1022
//		CTranslatorQueryToDXL::GetTupleOidColId
1023 1024 1025 1026 1027 1028 1029
//
//	@doc:
//		Obtains the id of the tuple oid column for the target table of a DML
//		update
//
//---------------------------------------------------------------------------
ULONG
1030
CTranslatorQueryToDXL::GetTupleOidColId()
1031
{
J
Jesse Zhang 已提交
1032 1033 1034 1035 1036
	IMDId *mdid =
		CTranslatorUtils::GetSystemColType(m_mp, ObjectIdAttributeNumber);
	ULONG tuple_oid_colid = CTranslatorUtils::GetColId(
		m_query_level, m_query->resultRelation, ObjectIdAttributeNumber, mdid,
		m_var_to_colid_map);
1037 1038
	mdid->Release();
	return tuple_oid_colid;
1039 1040 1041 1042
}

//---------------------------------------------------------------------------
//	@function:
1043
//		CTranslatorQueryToDXL::TranslateDeleteQueryToDXL
1044 1045 1046 1047 1048 1049
//
//	@doc:
//		Translate a delete stmt
//
//---------------------------------------------------------------------------
CDXLNode *
1050
CTranslatorQueryToDXL::TranslateDeleteQueryToDXL()
1051
{
1052 1053
	GPOS_ASSERT(CMD_DELETE == m_query->commandType);
	GPOS_ASSERT(0 < m_query->resultRelation);
1054

1055 1056
	if (!optimizer_enable_dml)
	{
J
Jesse Zhang 已提交
1057 1058
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("DML not enabled"));
1059 1060
	}

1061
	CDXLNode *query_dxlnode = TranslateSelectQueryToDXL();
J
Jesse Zhang 已提交
1062 1063
	const RangeTblEntry *rte = (RangeTblEntry *) gpdb::ListNth(
		m_query->rtable, m_query->resultRelation - 1);
1064

J
Jesse Zhang 已提交
1065 1066
	CDXLTableDescr *table_descr = CTranslatorUtils::GetTableDescr(
		m_mp, m_md_accessor, m_colid_counter, rte, &m_has_distributed_tables);
1067
	const IMDRelation *md_rel = m_md_accessor->RetrieveRel(table_descr->MDId());
J
Jesse Zhang 已提交
1068 1069 1070
	if (!optimizer_enable_dml_triggers &&
		CTranslatorUtils::RelHasTriggers(m_mp, m_md_accessor, md_rel,
										 Edxldmldelete))
1071
	{
J
Jesse Zhang 已提交
1072 1073
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("DELETE with triggers"));
1074 1075
	}

1076 1077 1078
	ULONG ctid_colid = 0;
	ULONG segid_colid = 0;
	GetCtidAndSegmentId(&ctid_colid, &segid_colid);
1079

1080
	ULongPtrArray *delete_colid_array = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
1081

1082 1083
	const ULONG num_of_non_sys_cols = md_rel->ColumnCount();
	for (ULONG ul = 0; ul < num_of_non_sys_cols; ul++)
1084
	{
1085 1086
		const IMDColumn *mdcol = md_rel->GetMdCol(ul);
		if (mdcol->IsSystemColumn() || mdcol->IsDropped())
1087 1088 1089 1090
		{
			continue;
		}

J
Jesse Zhang 已提交
1091 1092 1093
		ULONG colid = CTranslatorUtils::GetColId(
			m_query_level, m_query->resultRelation, mdcol->AttrNum(),
			mdcol->MdidType(), m_var_to_colid_map);
1094
		delete_colid_array->Append(GPOS_NEW(m_mp) ULONG(colid));
1095 1096
	}

J
Jesse Zhang 已提交
1097 1098
	CDXLLogicalDelete *delete_dxlop = GPOS_NEW(m_mp) CDXLLogicalDelete(
		m_mp, table_descr, ctid_colid, segid_colid, delete_colid_array);
1099

1100
	return GPOS_NEW(m_mp) CDXLNode(m_mp, delete_dxlop, query_dxlnode);
1101 1102 1103 1104
}

//---------------------------------------------------------------------------
//	@function:
1105
//		CTranslatorQueryToDXL::TranslateUpdateQueryToDXL
1106 1107 1108 1109 1110 1111 1112
//
//	@doc:
//		Translate an update stmt
//
//---------------------------------------------------------------------------

CDXLNode *
1113
CTranslatorQueryToDXL::TranslateUpdateQueryToDXL()
1114
{
1115 1116
	GPOS_ASSERT(CMD_UPDATE == m_query->commandType);
	GPOS_ASSERT(0 < m_query->resultRelation);
1117

1118 1119
	if (!optimizer_enable_dml)
	{
J
Jesse Zhang 已提交
1120 1121
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("DML not enabled"));
1122 1123
	}

1124
	CDXLNode *query_dxlnode = TranslateSelectQueryToDXL();
J
Jesse Zhang 已提交
1125 1126
	const RangeTblEntry *rte = (RangeTblEntry *) gpdb::ListNth(
		m_query->rtable, m_query->resultRelation - 1);
1127

J
Jesse Zhang 已提交
1128 1129
	CDXLTableDescr *table_descr = CTranslatorUtils::GetTableDescr(
		m_mp, m_md_accessor, m_colid_counter, rte, &m_has_distributed_tables);
1130
	const IMDRelation *md_rel = m_md_accessor->RetrieveRel(table_descr->MDId());
J
Jesse Zhang 已提交
1131 1132 1133
	if (!optimizer_enable_dml_triggers &&
		CTranslatorUtils::RelHasTriggers(m_mp, m_md_accessor, md_rel,
										 Edxldmlupdate))
1134
	{
J
Jesse Zhang 已提交
1135 1136
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("UPDATE with triggers"));
1137
	}
J
Jesse Zhang 已提交
1138 1139 1140

	if (!optimizer_enable_dml_constraints &&
		CTranslatorUtils::RelHasConstraints(md_rel))
1141
	{
J
Jesse Zhang 已提交
1142 1143
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("UPDATE with constraints"));
1144
	}
J
Jesse Zhang 已提交
1145

1146

1147 1148 1149
	ULONG ctid_colid = 0;
	ULONG segmentid_colid = 0;
	GetCtidAndSegmentId(&ctid_colid, &segmentid_colid);
J
Jesse Zhang 已提交
1150

1151
	ULONG tuple_oid_colid = 0;
J
Jesse Zhang 已提交
1152

1153

1154 1155
	BOOL has_oids = md_rel->HasOids();
	if (has_oids)
1156
	{
1157
		tuple_oid_colid = GetTupleOidColId();
1158 1159 1160
	}

	// get (resno -> colId) mapping of columns to be updated
1161
	IntToUlongMap *update_column_map = UpdatedColumnMapping();
1162

1163 1164 1165
	const ULONG num_of_non_sys_cols = md_rel->ColumnCount();
	ULongPtrArray *insert_colid_array = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
	ULongPtrArray *delete_colid_array = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
1166

1167
	for (ULONG ul = 0; ul < num_of_non_sys_cols; ul++)
1168
	{
1169 1170
		const IMDColumn *mdcol = md_rel->GetMdCol(ul);
		if (mdcol->IsSystemColumn() || mdcol->IsDropped())
1171 1172 1173 1174
		{
			continue;
		}

1175 1176
		INT attno = mdcol->AttrNum();
		ULONG *updated_colid = update_column_map->Find(&attno);
1177

J
Jesse Zhang 已提交
1178 1179 1180
		ULONG colid = CTranslatorUtils::GetColId(
			m_query_level, m_query->resultRelation, attno, mdcol->MdidType(),
			m_var_to_colid_map);
1181 1182 1183

		// if the column is in the query outputs then use it
		// otherwise get the column id created by the child query
1184
		if (NULL != updated_colid)
1185
		{
1186
			insert_colid_array->Append(GPOS_NEW(m_mp) ULONG(*updated_colid));
1187 1188 1189
		}
		else
		{
1190
			insert_colid_array->Append(GPOS_NEW(m_mp) ULONG(colid));
1191 1192
		}

1193
		delete_colid_array->Append(GPOS_NEW(m_mp) ULONG(colid));
1194 1195
	}

1196
	update_column_map->Release();
J
Jesse Zhang 已提交
1197 1198 1199
	CDXLLogicalUpdate *pdxlopupdate = GPOS_NEW(m_mp) CDXLLogicalUpdate(
		m_mp, table_descr, ctid_colid, segmentid_colid, delete_colid_array,
		insert_colid_array, has_oids, tuple_oid_colid);
1200

1201
	return GPOS_NEW(m_mp) CDXLNode(m_mp, pdxlopupdate, query_dxlnode);
1202 1203 1204 1205
}

//---------------------------------------------------------------------------
//	@function:
1206
//		CTranslatorQueryToDXL::UpdatedColumnMapping
1207 1208 1209 1210 1211
//
//	@doc:
// 		Return resno -> colId mapping of columns to be updated
//
//---------------------------------------------------------------------------
1212 1213
IntToUlongMap *
CTranslatorQueryToDXL::UpdatedColumnMapping()
1214
{
J
Jesse Zhang 已提交
1215 1216
	GPOS_ASSERT(gpdb::ListLength(m_query->targetList) ==
				m_dxl_query_output_cols->Size());
1217
	IntToUlongMap *update_column_map = GPOS_NEW(m_mp) IntToUlongMap(m_mp);
1218

1219
	ListCell *lc = NULL;
1220
	ULONG ul = 0;
J
Jesse Zhang 已提交
1221
	ForEach(lc, m_query->targetList)
1222
	{
1223 1224 1225 1226
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		GPOS_ASSERT(IsA(target_entry, TargetEntry));
		ULONG resno = target_entry->resno;
		GPOS_ASSERT(0 < resno);
1227

1228
		CDXLNode *dxl_column = (*m_dxl_query_output_cols)[ul];
J
Jesse Zhang 已提交
1229 1230
		CDXLScalarIdent *dxl_ident =
			CDXLScalarIdent::Cast(dxl_column->GetOperator());
1231
		ULONG colid = dxl_ident->GetDXLColRef()->Id();
1232

1233
		StoreAttnoColIdMapping(update_column_map, resno, colid);
1234 1235 1236
		ul++;
	}

1237
	return update_column_map;
1238 1239 1240 1241
}

//---------------------------------------------------------------------------
//	@function:
1242
//		CTranslatorQueryToDXL::OIDFound
1243 1244 1245 1246 1247 1248
//
//	@doc:
// 		Helper to check if OID is included in given array of OIDs
//
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
1249
CTranslatorQueryToDXL::OIDFound(OID oid, const OID oids[], ULONG size)
1250
{
1251 1252
	BOOL found = false;
	for (ULONG ul = 0; !found && ul < size; ul++)
1253
	{
1254
		found = (oids[ul] == oid);
1255 1256
	}

1257
	return found;
1258 1259 1260 1261 1262
}


//---------------------------------------------------------------------------
//	@function:
1263
//		CTranslatorQueryToDXL::IsLeadWindowFunc
1264 1265 1266 1267 1268 1269
//
//	@doc:
// 		Check if given operator is LEAD window function
//
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
1270
CTranslatorQueryToDXL::IsLeadWindowFunc(CDXLOperator *dxlop)
1271
{
1272 1273
	BOOL is_lead_func = false;
	if (EdxlopScalarWindowRef == dxlop->GetDXLOperator())
1274
	{
1275
		CDXLScalarWindowRef *winref_dxlop = CDXLScalarWindowRef::Cast(dxlop);
J
Jesse Zhang 已提交
1276 1277
		const CMDIdGPDB *mdid_gpdb =
			CMDIdGPDB::CastMdid(winref_dxlop->FuncMdId());
1278
		OID oid = mdid_gpdb->Oid();
J
Jesse Zhang 已提交
1279 1280
		is_lead_func =
			OIDFound(oid, lead_func_oids, GPOS_ARRAY_SIZE(lead_func_oids));
1281 1282
	}

1283
	return is_lead_func;
1284 1285 1286 1287 1288
}


//---------------------------------------------------------------------------
//	@function:
1289
//		CTranslatorQueryToDXL::IsLagWindowFunc
1290 1291 1292 1293 1294 1295
//
//	@doc:
// 		Check if given operator is LAG window function
//
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
1296
CTranslatorQueryToDXL::IsLagWindowFunc(CDXLOperator *dxlop)
1297
{
1298 1299
	BOOL is_lag = false;
	if (EdxlopScalarWindowRef == dxlop->GetDXLOperator())
1300
	{
1301
		CDXLScalarWindowRef *winref_dxlop = CDXLScalarWindowRef::Cast(dxlop);
J
Jesse Zhang 已提交
1302 1303
		const CMDIdGPDB *mdid_gpdb =
			CMDIdGPDB::CastMdid(winref_dxlop->FuncMdId());
1304
		OID oid = mdid_gpdb->Oid();
J
Jesse Zhang 已提交
1305
		is_lag = OIDFound(oid, lag_func_oids, GPOS_ARRAY_SIZE(lag_func_oids));
1306 1307
	}

1308
	return is_lag;
1309 1310 1311 1312 1313
}


//---------------------------------------------------------------------------
//	@function:
1314
//		CTranslatorQueryToDXL::CreateWindowFramForLeadLag
1315 1316 1317 1318 1319 1320
//
//	@doc:
// 		Manufacture window frame for lead/lag functions
//
//---------------------------------------------------------------------------
CDXLWindowFrame *
J
Jesse Zhang 已提交
1321 1322
CTranslatorQueryToDXL::CreateWindowFramForLeadLag(BOOL is_lead_func,
												  CDXLNode *dxl_offset) const
1323
{
1324 1325 1326
	EdxlFrameBoundary dxl_frame_lead = EdxlfbBoundedFollowing;
	EdxlFrameBoundary dxl_frame_trail = EdxlfbBoundedFollowing;
	if (!is_lead_func)
1327
	{
1328 1329
		dxl_frame_lead = EdxlfbBoundedPreceding;
		dxl_frame_trail = EdxlfbBoundedPreceding;
1330 1331
	}

1332 1333 1334
	CDXLNode *dxl_lead_edge = NULL;
	CDXLNode *dxl_trail_edge = NULL;
	if (NULL == dxl_offset)
1335
	{
J
Jesse Zhang 已提交
1336 1337 1338 1339 1340 1341
		dxl_lead_edge = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarWindowFrameEdge(
							   m_mp, true /* fLeading */, dxl_frame_lead));
		dxl_trail_edge = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarWindowFrameEdge(
							   m_mp, false /* fLeading */, dxl_frame_trail));
1342

J
Jesse Zhang 已提交
1343 1344 1345 1346 1347 1348
		dxl_lead_edge->AddChild(
			CTranslatorUtils::CreateDXLProjElemFromInt8Const(
				m_mp, m_md_accessor, 1 /*iVal*/));
		dxl_trail_edge->AddChild(
			CTranslatorUtils::CreateDXLProjElemFromInt8Const(
				m_mp, m_md_accessor, 1 /*iVal*/));
1349 1350 1351 1352
	}
	else
	{
		// overwrite frame edge types based on specified offset type
J
Jesse Zhang 已提交
1353 1354
		if (EdxlopScalarConstValue !=
			dxl_offset->GetOperator()->GetDXLOperator())
1355
		{
1356
			if (is_lead_func)
1357
			{
1358 1359
				dxl_frame_lead = EdxlfbDelayedBoundedFollowing;
				dxl_frame_trail = EdxlfbDelayedBoundedFollowing;
1360 1361 1362
			}
			else
			{
1363 1364
				dxl_frame_lead = EdxlfbDelayedBoundedPreceding;
				dxl_frame_trail = EdxlfbDelayedBoundedPreceding;
1365 1366
			}
		}
J
Jesse Zhang 已提交
1367 1368 1369 1370 1371 1372
		dxl_lead_edge = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarWindowFrameEdge(
							   m_mp, true /* fLeading */, dxl_frame_lead));
		dxl_trail_edge = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarWindowFrameEdge(
							   m_mp, false /* fLeading */, dxl_frame_trail));
1373

1374 1375 1376 1377
		dxl_offset->AddRef();
		dxl_lead_edge->AddChild(dxl_offset);
		dxl_offset->AddRef();
		dxl_trail_edge->AddChild(dxl_offset);
1378 1379 1380
	}

	// manufacture a frame for LEAD/LAG function
J
Jesse Zhang 已提交
1381 1382 1383 1384 1385
	return GPOS_NEW(m_mp) CDXLWindowFrame(
		m_mp,
		EdxlfsRow,	   // frame specification
		EdxlfesNulls,  // frame exclusion strategy is set to exclude NULLs in GPDB
		dxl_lead_edge, dxl_trail_edge);
1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
}


//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::UpdateLeadLagWinSpecPos
//
//	@doc:
// 		LEAD/LAG window functions need special frames to get executed correctly;
//		these frames are system-generated and cannot be specified in query text;
//		this function adds new entries to the list of window specs holding these
//		manufactured frames, and updates window spec references of LEAD/LAG
//		functions accordingly
//
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
1403 1404 1405 1406
CTranslatorQueryToDXL::UpdateLeadLagWinSpecPos(
	CDXLNode *project_list_dxlnode,			// project list holding WinRef nodes
	CDXLWindowSpecArray *window_spec_array	// original list of window spec
) const
1407
{
1408 1409
	GPOS_ASSERT(NULL != project_list_dxlnode);
	GPOS_ASSERT(NULL != window_spec_array);
1410

1411 1412
	const ULONG arity = project_list_dxlnode->Arity();
	for (ULONG ul = 0; ul < arity; ul++)
1413
	{
1414 1415 1416 1417 1418
		CDXLNode *child_dxlnode = (*(*project_list_dxlnode)[ul])[0];
		CDXLOperator *dxlop = child_dxlnode->GetOperator();
		BOOL is_lead_func = IsLeadWindowFunc(dxlop);
		BOOL is_lag = IsLagWindowFunc(dxlop);
		if (is_lead_func || is_lag)
1419
		{
J
Jesse Zhang 已提交
1420 1421 1422 1423
			CDXLScalarWindowRef *winref_dxlop =
				CDXLScalarWindowRef::Cast(dxlop);
			CDXLWindowSpec *window_spec_dxlnode =
				(*window_spec_array)[winref_dxlop->GetWindSpecPos()];
1424 1425
			CMDName *mdname = NULL;
			if (NULL != window_spec_dxlnode->MdName())
1426
			{
J
Jesse Zhang 已提交
1427 1428
				mdname = GPOS_NEW(m_mp)
					CMDName(m_mp, window_spec_dxlnode->MdName()->GetMDName());
1429 1430 1431
			}

			// find if an offset is specified
1432 1433
			CDXLNode *dxl_offset = NULL;
			if (1 < child_dxlnode->Arity())
1434
			{
1435
				dxl_offset = (*child_dxlnode)[1];
1436 1437 1438
			}

			// create LEAD/LAG frame
J
Jesse Zhang 已提交
1439 1440
			CDXLWindowFrame *window_frame =
				CreateWindowFramForLeadLag(is_lead_func, dxl_offset);
1441 1442

			// create new window spec object
1443 1444
			window_spec_dxlnode->GetPartitionByColIdArray()->AddRef();
			window_spec_dxlnode->GetSortColListDXL()->AddRef();
J
Jesse Zhang 已提交
1445 1446 1447
			CDXLWindowSpec *pdxlwsNew = GPOS_NEW(m_mp) CDXLWindowSpec(
				m_mp, window_spec_dxlnode->GetPartitionByColIdArray(), mdname,
				window_spec_dxlnode->GetSortColListDXL(), window_frame);
1448
			window_spec_array->Append(pdxlwsNew);
1449 1450

			// update win spec pos of LEAD/LAG function
1451
			winref_dxlop->SetWinSpecPos(window_spec_array->Size() - 1);
1452 1453 1454 1455 1456 1457 1458
		}
	}
}


//---------------------------------------------------------------------------
//	@function:
1459
//		CTranslatorQueryToDXL::TranslateWindowSpecToDXL
1460 1461 1462 1463 1464
//
//	@doc:
//		Translate window specs
//
//---------------------------------------------------------------------------
1465
CDXLWindowSpecArray *
J
Jesse Zhang 已提交
1466 1467 1468
CTranslatorQueryToDXL::TranslateWindowSpecToDXL(
	List *window_clause, IntToUlongMap *sort_col_attno_to_colid_mapping,
	CDXLNode *project_list_dxlnode_node)
1469
{
1470 1471 1472
	GPOS_ASSERT(NULL != window_clause);
	GPOS_ASSERT(NULL != sort_col_attno_to_colid_mapping);
	GPOS_ASSERT(NULL != project_list_dxlnode_node);
1473

J
Jesse Zhang 已提交
1474 1475
	CDXLWindowSpecArray *window_spec_array =
		GPOS_NEW(m_mp) CDXLWindowSpecArray(m_mp);
1476 1477

	// translate window specification
1478
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
1479
	ForEach(lc, window_clause)
1480
	{
1481
		WindowSpec *pwindowspec = (WindowSpec *) lfirst(lc);
J
Jesse Zhang 已提交
1482 1483
		ULongPtrArray *part_columns = TranslatePartColumns(
			pwindowspec->partition, sort_col_attno_to_colid_mapping);
1484

1485 1486 1487
		CDXLNode *sort_col_list_dxl = NULL;
		CMDName *mdname = NULL;
		CDXLWindowFrame *window_frame = NULL;
1488 1489 1490

		if (NULL != pwindowspec->name)
		{
J
Jesse Zhang 已提交
1491 1492 1493
			CWStringDynamic *alias_str =
				CDXLUtils::CreateDynamicStringFromCharArray(m_mp,
															pwindowspec->name);
1494 1495
			mdname = GPOS_NEW(m_mp) CMDName(m_mp, alias_str);
			GPOS_DELETE(alias_str);
1496 1497
		}

1498
		if (0 < gpdb::ListLength(pwindowspec->order))
1499 1500
		{
			// create a sorting col list
J
Jesse Zhang 已提交
1501 1502
			sort_col_list_dxl = GPOS_NEW(m_mp)
				CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarSortColList(m_mp));
1503

J
Jesse Zhang 已提交
1504 1505
			CDXLNodeArray *dxl_sort_cols = TranslateSortColumnsToDXL(
				pwindowspec->order, sort_col_attno_to_colid_mapping);
1506 1507
			const ULONG size = dxl_sort_cols->Size();
			for (ULONG ul = 0; ul < size; ul++)
1508
			{
1509 1510 1511
				CDXLNode *dxl_sort_clause = (*dxl_sort_cols)[ul];
				dxl_sort_clause->AddRef();
				sort_col_list_dxl->AddChild(dxl_sort_clause);
1512
			}
1513
			dxl_sort_cols->Release();
1514 1515 1516 1517
		}

		if (NULL != pwindowspec->frame)
		{
J
Jesse Zhang 已提交
1518 1519 1520
			window_frame = m_scalar_translator->GetWindowFrame(
				(Expr *) pwindowspec->frame, m_var_to_colid_map,
				project_list_dxlnode_node, &m_has_distributed_tables);
1521 1522
		}

J
Jesse Zhang 已提交
1523 1524
		CDXLWindowSpec *window_spec_dxlnode = GPOS_NEW(m_mp) CDXLWindowSpec(
			m_mp, part_columns, mdname, sort_col_list_dxl, window_frame);
1525
		window_spec_array->Append(window_spec_dxlnode);
1526 1527
	}

1528
	return window_spec_array;
1529 1530 1531 1532 1533
}


//---------------------------------------------------------------------------
//	@function:
1534
//		CTranslatorQueryToDXL::TranslateWindowToDXL
1535 1536 1537 1538 1539 1540
//
//	@doc:
//		Translate a window operator
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
1541 1542 1543 1544
CTranslatorQueryToDXL::TranslateWindowToDXL(
	CDXLNode *child_dxlnode, List *target_list, List *window_clause,
	List *sort_clause, IntToUlongMap *sort_col_attno_to_colid_mapping,
	IntToUlongMap *output_attno_to_colid_mapping)
1545
{
1546
	if (0 == gpdb::ListLength(window_clause))
1547
	{
1548
		return child_dxlnode;
1549 1550 1551
	}

	// translate target list entries
J
Jesse Zhang 已提交
1552 1553
	CDXLNode *project_list_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
1554

J
Jesse Zhang 已提交
1555 1556
	CDXLNode *new_child_project_list_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
1557 1558
	ListCell *lc = NULL;
	ULONG resno = 1;
1559

J
Jesse Zhang 已提交
1560
	// target entries that are result of flattening join alias and
1561
	// are equivalent to a defined Window specs target entry
1562 1563
	List *omitted_target_entries = NIL;
	List *resno_list = NIL;
J
Jesse Zhang 已提交
1564 1565

	ForEach(lc, target_list)
1566
	{
1567 1568 1569
		BOOL insert_sort_info = true;
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		GPOS_ASSERT(IsA(target_entry, TargetEntry));
1570 1571

		// create the DXL node holding the target list entry
J
Jesse Zhang 已提交
1572 1573 1574 1575
		CDXLNode *project_elem_dxlnode = TranslateExprToDXLProject(
			target_entry->expr, target_entry->resname);
		ULONG colid =
			CDXLScalarProjElem::Cast(project_elem_dxlnode->GetOperator())->Id();
1576

1577
		if (IsA(target_entry->expr, WindowRef))
1578
		{
J
Jesse Zhang 已提交
1579 1580
			CTranslatorUtils::CheckAggregateWindowFn(
				(Node *) target_entry->expr);
1581
		}
1582
		if (!target_entry->resjunk)
1583
		{
J
Jesse Zhang 已提交
1584 1585
			if (IsA(target_entry->expr, Var) ||
				IsA(target_entry->expr, WindowRef))
1586 1587
			{
				// add window functions and non-computed columns to the project list of the window operator
1588
				project_list_dxlnode->AddChild(project_elem_dxlnode);
1589

J
Jesse Zhang 已提交
1590 1591
				StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno,
									   colid);
1592
			}
J
Jesse Zhang 已提交
1593 1594
			else if (CTranslatorUtils::IsWindowSpec(target_entry,
													window_clause))
1595 1596 1597
			{
				// add computed column used in window specification needed in the output columns
				// to the child's project list
1598
				new_child_project_list_dxlnode->AddChild(project_elem_dxlnode);
1599 1600 1601

				// construct a scalar identifier that points to the computed column and
				// add it to the project list of the window operator
J
Jesse Zhang 已提交
1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624
				CMDName *mdname_alias = GPOS_NEW(m_mp)
					CMDName(m_mp, CDXLScalarProjElem::Cast(
									  project_elem_dxlnode->GetOperator())
									  ->GetMdNameAlias()
									  ->GetMDName());
				CDXLNode *new_project_elem_dxlnode = GPOS_NEW(m_mp)
					CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjElem(
									   m_mp, colid, mdname_alias));
				CDXLNode *project_elem_new_child_dxlnode =
					GPOS_NEW(m_mp) CDXLNode(
						m_mp,
						GPOS_NEW(m_mp) CDXLScalarIdent(
							m_mp, GPOS_NEW(m_mp) CDXLColRef(
									  m_mp,
									  GPOS_NEW(m_mp) CMDName(
										  m_mp, mdname_alias->GetMDName()),
									  colid,
									  GPOS_NEW(m_mp) CMDIdGPDB(gpdb::ExprType(
										  (Node *) target_entry->expr)),
									  gpdb::ExprTypeMod(
										  (Node *) target_entry->expr))));
				new_project_elem_dxlnode->AddChild(
					project_elem_new_child_dxlnode);
1625
				project_list_dxlnode->AddChild(new_project_elem_dxlnode);
1626

J
Jesse Zhang 已提交
1627 1628
				StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno,
									   colid);
1629 1630 1631
			}
			else
			{
1632
				insert_sort_info = false;
J
Jesse Zhang 已提交
1633 1634
				omitted_target_entries =
					gpdb::LAppend(omitted_target_entries, target_entry);
1635
				resno_list = gpdb::LAppendInt(resno_list, resno);
1636

1637
				project_elem_dxlnode->Release();
1638 1639
			}
		}
1640
		else if (IsA(target_entry->expr, WindowRef))
1641 1642
		{
			// computed columns used in the order by clause
1643
			project_list_dxlnode->AddChild(project_elem_dxlnode);
1644
		}
1645
		else if (!IsA(target_entry->expr, Var))
1646
		{
J
Jesse Zhang 已提交
1647 1648
			GPOS_ASSERT(
				CTranslatorUtils::IsWindowSpec(target_entry, window_clause));
1649
			// computed columns used in the window specification
1650
			new_child_project_list_dxlnode->AddChild(project_elem_dxlnode);
1651 1652 1653
		}
		else
		{
1654
			project_elem_dxlnode->Release();
1655 1656
		}

1657
		if (insert_sort_info)
1658
		{
J
Jesse Zhang 已提交
1659 1660
			AddSortingGroupingColumn(target_entry,
									 sort_col_attno_to_colid_mapping, colid);
1661 1662
		}

1663
		resno++;
1664 1665
	}

1666
	lc = NULL;
1667 1668

	// process target entries that are a result of flattening join alias
1669
	ListCell *lc_resno = NULL;
J
Jesse Zhang 已提交
1670
	ForBoth(lc, omitted_target_entries, lc_resno, resno_list)
1671
	{
1672 1673
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		INT resno = (INT) lfirst_int(lc_resno);
1674

J
Jesse Zhang 已提交
1675 1676 1677
		TargetEntry *te_window_spec =
			CTranslatorUtils::GetWindowSpecTargetEntry(
				(Node *) target_entry->expr, window_clause, target_list);
1678
		if (NULL != te_window_spec)
1679
		{
J
Jesse Zhang 已提交
1680 1681 1682
			const ULONG colid = CTranslatorUtils::GetColId(
				(INT) te_window_spec->ressortgroupref,
				sort_col_attno_to_colid_mapping);
1683
			StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno, colid);
J
Jesse Zhang 已提交
1684 1685
			AddSortingGroupingColumn(target_entry,
									 sort_col_attno_to_colid_mapping, colid);
1686 1687
		}
	}
1688
	if (NIL != omitted_target_entries)
1689
	{
1690
		gpdb::GPDBFree(omitted_target_entries);
1691 1692 1693
	}

	// translate window spec
J
Jesse Zhang 已提交
1694 1695 1696
	CDXLWindowSpecArray *window_spec_array =
		TranslateWindowSpecToDXL(window_clause, sort_col_attno_to_colid_mapping,
								 new_child_project_list_dxlnode);
1697

1698
	CDXLNode *new_child_dxlnode = NULL;
1699

1700
	if (0 < new_child_project_list_dxlnode->Arity())
1701 1702
	{
		// create a project list for the computed columns used in the window specification
J
Jesse Zhang 已提交
1703 1704
		new_child_dxlnode = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalProject(m_mp));
1705 1706 1707
		new_child_dxlnode->AddChild(new_child_project_list_dxlnode);
		new_child_dxlnode->AddChild(child_dxlnode);
		child_dxlnode = new_child_dxlnode;
1708 1709 1710 1711
	}
	else
	{
		// clean up
1712
		new_child_project_list_dxlnode->Release();
1713 1714
	}

J
Jesse Zhang 已提交
1715 1716
	if (!CTranslatorUtils::HasProjElem(project_list_dxlnode,
									   EdxlopScalarWindowRef))
1717
	{
1718 1719
		project_list_dxlnode->Release();
		window_spec_array->Release();
1720

1721
		return child_dxlnode;
1722 1723 1724
	}

	// update window spec positions of LEAD/LAG functions
1725
	UpdateLeadLagWinSpecPos(project_list_dxlnode, window_spec_array);
1726

J
Jesse Zhang 已提交
1727 1728
	CDXLLogicalWindow *window_dxlop =
		GPOS_NEW(m_mp) CDXLLogicalWindow(m_mp, window_spec_array);
1729
	CDXLNode *dxlnode = GPOS_NEW(m_mp) CDXLNode(m_mp, window_dxlop);
1730

1731 1732
	dxlnode->AddChild(project_list_dxlnode);
	dxlnode->AddChild(child_dxlnode);
1733

1734
	return dxlnode;
1735 1736 1737 1738
}

//---------------------------------------------------------------------------
//	@function:
1739
//		CTranslatorQueryToDXL::TranslatePartColumns
1740 1741 1742 1743 1744
//
//	@doc:
//		Translate the list of partition-by column identifiers
//
//---------------------------------------------------------------------------
1745
ULongPtrArray *
J
Jesse Zhang 已提交
1746 1747
CTranslatorQueryToDXL::TranslatePartColumns(
	List *partition_by_clause, IntToUlongMap *col_attno_colid_mapping) const
1748
{
1749
	ULongPtrArray *part_cols = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
1750

1751
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
1752
	ForEach(lc, partition_by_clause)
1753
	{
J
Jesse Zhang 已提交
1754
		Node *partition_clause = (Node *) lfirst(lc);
1755
		GPOS_ASSERT(NULL != partition_clause);
1756

1757
		GPOS_ASSERT(IsA(partition_clause, SortClause));
J
Jesse Zhang 已提交
1758
		SortClause *sort_group_clause = (SortClause *) partition_clause;
1759 1760

		// get the colid of the partition-by column
J
Jesse Zhang 已提交
1761 1762
		ULONG colid = CTranslatorUtils::GetColId(
			(INT) sort_group_clause->tleSortGroupRef, col_attno_colid_mapping);
1763

1764
		part_cols->Append(GPOS_NEW(m_mp) ULONG(colid));
1765 1766
	}

1767
	return part_cols;
1768 1769 1770 1771
}

//---------------------------------------------------------------------------
//	@function:
1772
//		CTranslatorQueryToDXL::TranslateSortColumnsToDXL
1773 1774 1775 1776 1777
//
//	@doc:
//		Translate the list of sorting columns
//
//---------------------------------------------------------------------------
1778
CDXLNodeArray *
J
Jesse Zhang 已提交
1779 1780
CTranslatorQueryToDXL::TranslateSortColumnsToDXL(
	List *sort_clause, IntToUlongMap *col_attno_colid_mapping) const
1781
{
1782
	CDXLNodeArray *dxlnodes = GPOS_NEW(m_mp) CDXLNodeArray(m_mp);
1783

1784
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
1785
	ForEach(lc, sort_clause)
1786
	{
J
Jesse Zhang 已提交
1787
		Node *node_sort_clause = (Node *) lfirst(lc);
1788
		GPOS_ASSERT(NULL != node_sort_clause);
1789

1790
		GPOS_ASSERT(IsA(node_sort_clause, SortClause));
1791

1792
		SortClause *sort_group_clause = (SortClause *) node_sort_clause;
1793 1794

		// get the colid of the sorting column
J
Jesse Zhang 已提交
1795 1796
		const ULONG colid = CTranslatorUtils::GetColId(
			(INT) sort_group_clause->tleSortGroupRef, col_attno_colid_mapping);
1797

1798
		OID oid = sort_group_clause->sortop;
1799 1800

		// get operator name
1801 1802
		CMDIdGPDB *op_mdid = GPOS_NEW(m_mp) CMDIdGPDB(oid);
		const IMDScalarOp *md_scalar_op = m_md_accessor->RetrieveScOp(op_mdid);
1803

1804 1805
		const CWStringConst *str = md_scalar_op->Mdname().GetMDName();
		GPOS_ASSERT(NULL != str);
1806

J
Jesse Zhang 已提交
1807 1808 1809 1810
		CDXLScalarSortCol *sc_sort_col_dxlop = GPOS_NEW(m_mp)
			CDXLScalarSortCol(m_mp, colid, op_mdid,
							  GPOS_NEW(m_mp) CWStringConst(str->GetBuffer()),
							  sort_group_clause->nulls_first);
1811 1812

		// create the DXL node holding the sorting col
J
Jesse Zhang 已提交
1813 1814
		CDXLNode *sort_col_dxlnode =
			GPOS_NEW(m_mp) CDXLNode(m_mp, sc_sort_col_dxlop);
1815

1816
		dxlnodes->Append(sort_col_dxlnode);
1817 1818
	}

1819
	return dxlnodes;
1820 1821 1822 1823
}

//---------------------------------------------------------------------------
//	@function:
1824
//		CTranslatorQueryToDXL::TranslateLimitToDXLGroupBy
1825 1826 1827 1828 1829 1830 1831
//
//	@doc:
//		Translate the list of sorting columns, limit offset and limit count
//		into a CDXLLogicalGroupBy node
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
1832 1833 1834
CTranslatorQueryToDXL::TranslateLimitToDXLGroupBy(
	List *sort_clause, Node *limit_count, Node *limit_offset_node,
	CDXLNode *child_dxlnode, IntToUlongMap *grpcols_to_colid_mapping)
1835
{
J
Jesse Zhang 已提交
1836 1837
	if (0 == gpdb::ListLength(sort_clause) && NULL == limit_count &&
		NULL == limit_offset_node)
1838
	{
1839
		return child_dxlnode;
1840 1841 1842 1843
	}

	// do not remove limit if it is immediately under a DML (JIRA: GPSQL-2669)
	// otherwise we may increase the storage size because there are less opportunities for compression
J
Jesse Zhang 已提交
1844 1845 1846 1847
	BOOL is_limit_top_level = (m_is_top_query_dml && 1 == m_query_level) ||
							  (m_is_ctas_query && 0 == m_query_level);
	CDXLNode *limit_dxlnode = GPOS_NEW(m_mp) CDXLNode(
		m_mp, GPOS_NEW(m_mp) CDXLLogicalLimit(m_mp, is_limit_top_level));
1848 1849

	// create a sorting col list
J
Jesse Zhang 已提交
1850 1851
	CDXLNode *sort_col_list_dxl = GPOS_NEW(m_mp)
		CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarSortColList(m_mp));
1852

J
Jesse Zhang 已提交
1853 1854
	CDXLNodeArray *dxl_sort_cols =
		TranslateSortColumnsToDXL(sort_clause, grpcols_to_colid_mapping);
1855 1856
	const ULONG size = dxl_sort_cols->Size();
	for (ULONG ul = 0; ul < size; ul++)
1857
	{
1858 1859 1860
		CDXLNode *sort_col_dxlnode = (*dxl_sort_cols)[ul];
		sort_col_dxlnode->AddRef();
		sort_col_list_dxl->AddChild(sort_col_dxlnode);
1861
	}
1862
	dxl_sort_cols->Release();
1863 1864

	// create limit count
J
Jesse Zhang 已提交
1865 1866
	CDXLNode *limit_count_dxlnode = GPOS_NEW(m_mp)
		CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarLimitCount(m_mp));
1867

1868
	if (NULL != limit_count)
1869
	{
J
Jesse Zhang 已提交
1870
		limit_count_dxlnode->AddChild(TranslateExprToDXL((Expr *) limit_count));
1871 1872 1873
	}

	// create limit offset
J
Jesse Zhang 已提交
1874 1875
	CDXLNode *limit_offset_dxlnode = GPOS_NEW(m_mp)
		CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarLimitOffset(m_mp));
1876

1877
	if (NULL != limit_offset_node)
1878
	{
J
Jesse Zhang 已提交
1879 1880
		limit_offset_dxlnode->AddChild(
			TranslateExprToDXL((Expr *) limit_offset_node));
1881 1882
	}

1883 1884 1885 1886
	limit_dxlnode->AddChild(sort_col_list_dxl);
	limit_dxlnode->AddChild(limit_count_dxlnode);
	limit_dxlnode->AddChild(limit_offset_dxlnode);
	limit_dxlnode->AddChild(child_dxlnode);
1887

1888
	return limit_dxlnode;
1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::AddSortingGroupingColumn
//
//	@doc:
//		Add sorting and grouping column into the hash map
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
1900 1901 1902
CTranslatorQueryToDXL::AddSortingGroupingColumn(
	TargetEntry *target_entry, IntToUlongMap *sort_grpref_to_colid_mapping,
	ULONG colid) const
1903
{
1904
	if (0 < target_entry->ressortgroupref)
1905
	{
1906 1907
		INT *key = GPOS_NEW(m_mp) INT(target_entry->ressortgroupref);
		ULONG *value = GPOS_NEW(m_mp) ULONG(colid);
1908 1909 1910

		// insert idx-colid mapping in the hash map
#ifdef GPOS_DEBUG
1911
		BOOL is_res =
J
Jesse Zhang 已提交
1912 1913
#endif	// GPOS_DEBUG
			sort_grpref_to_colid_mapping->Insert(key, value);
1914

1915
		GPOS_ASSERT(is_res);
1916 1917 1918 1919 1920
	}
}

//---------------------------------------------------------------------------
//	@function:
1921
//		CTranslatorQueryToDXL::CreateSimpleGroupBy
1922 1923 1924 1925 1926 1927
//
//	@doc:
//		Translate a query with grouping clause into a CDXLLogicalGroupBy node
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
1928 1929 1930
CTranslatorQueryToDXL::CreateSimpleGroupBy(
	List *target_list, List *group_clause, CBitSet *grpby_cols_bitset,
	BOOL has_aggs, BOOL has_grouping_sets, CDXLNode *child_dxlnode,
1931 1932
	IntToUlongMap *sort_grpref_to_colid_mapping,
	IntToUlongMap *child_attno_colid_mapping,
J
Jesse Zhang 已提交
1933
	IntToUlongMap *output_attno_to_colid_mapping)
1934
{
1935
	if (NULL == grpby_cols_bitset)
J
Jesse Zhang 已提交
1936
	{
1937 1938
		GPOS_ASSERT(!has_aggs);
		if (!has_grouping_sets)
1939
		{
J
Jesse Zhang 已提交
1940
			// no group by needed and not part of a grouping sets query:
1941
			// propagate child columns to output columns
1942 1943
			IntUlongHashmapIter mi(child_attno_colid_mapping);
			while (mi.Advance())
1944
			{
J
Jesse Zhang 已提交
1945
#ifdef GPOS_DEBUG
1946
				BOOL result =
J
Jesse Zhang 已提交
1947 1948 1949 1950
#endif	// GPOS_DEBUG
					output_attno_to_colid_mapping->Insert(
						GPOS_NEW(m_mp) INT(*(mi.Key())),
						GPOS_NEW(m_mp) ULONG(*(mi.Value())));
1951
				GPOS_ASSERT(result);
1952 1953 1954 1955 1956 1957 1958
			}
		}
		// else:
		// in queries with grouping sets we may generate a branch corresponding to GB grouping sets ();
		// in that case do not propagate the child columns to the output hash map, as later
		// processing may introduce NULLs for those

1959
		return child_dxlnode;
1960 1961
	}

1962
	List *dqa_list = NIL;
1963
	// construct the project list of the group-by operator
J
Jesse Zhang 已提交
1964 1965
	CDXLNode *project_list_grpby_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
1966

1967 1968
	ListCell *lc = NULL;
	ULONG num_dqa = 0;
J
Jesse Zhang 已提交
1969
	ForEach(lc, target_list)
1970
	{
1971 1972 1973 1974
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		GPOS_ASSERT(IsA(target_entry, TargetEntry));
		GPOS_ASSERT(0 < target_entry->resno);
		ULONG resno = target_entry->resno;
1975

J
Jesse Zhang 已提交
1976 1977 1978
		TargetEntry *te_equivalent =
			CTranslatorUtils::GetGroupingColumnTargetEntry(
				(Node *) target_entry->expr, group_clause, target_list);
1979

J
Jesse Zhang 已提交
1980 1981 1982 1983
		BOOL is_grouping_col =
			grpby_cols_bitset->Get(target_entry->ressortgroupref) ||
			(NULL != te_equivalent &&
			 grpby_cols_bitset->Get(te_equivalent->ressortgroupref));
1984
		ULONG colid = 0;
1985

1986
		if (is_grouping_col)
1987 1988
		{
			// find colid for grouping column
J
Jesse Zhang 已提交
1989 1990
			colid =
				CTranslatorUtils::GetColId(resno, child_attno_colid_mapping);
1991
		}
J
Jesse Zhang 已提交
1992 1993
		else if (IsA(target_entry->expr, Aggref) ||
				 IsA(target_entry->expr, PercentileExpr))
1994
		{
J
Jesse Zhang 已提交
1995 1996 1997
			if (IsA(target_entry->expr, Aggref) &&
				((Aggref *) target_entry->expr)->aggdistinct &&
				!IsDuplicateDqaArg(dqa_list, (Aggref *) target_entry->expr))
1998
			{
J
Jesse Zhang 已提交
1999 2000
				dqa_list = gpdb::LAppend(dqa_list,
										 gpdb::CopyObject(target_entry->expr));
2001
				num_dqa++;
2002 2003 2004
			}

			// create a project element for aggregate
J
Jesse Zhang 已提交
2005 2006
			CDXLNode *project_elem_dxlnode = TranslateExprToDXLProject(
				target_entry->expr, target_entry->resname);
2007
			project_list_grpby_dxlnode->AddChild(project_elem_dxlnode);
J
Jesse Zhang 已提交
2008 2009 2010 2011 2012
			colid =
				CDXLScalarProjElem::Cast(project_elem_dxlnode->GetOperator())
					->Id();
			AddSortingGroupingColumn(target_entry, sort_grpref_to_colid_mapping,
									 colid);
2013 2014
		}

J
Jesse Zhang 已提交
2015 2016
		if (is_grouping_col || IsA(target_entry->expr, Aggref) ||
			IsA(target_entry->expr, PercentileExpr))
2017 2018
		{
			// add to the list of output columns
2019
			StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno, colid);
2020
		}
J
Jesse Zhang 已提交
2021 2022
		else if (0 == grpby_cols_bitset->Size() && !has_grouping_sets &&
				 !has_aggs)
2023
		{
2024
			StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno, colid);
2025 2026 2027
		}
	}

2028
	if (1 < num_dqa && !optimizer_enable_multiple_distinct_aggs)
2029
	{
J
Jesse Zhang 已提交
2030 2031 2032 2033
		GPOS_RAISE(
			gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
			GPOS_WSZ_LIT(
				"Multiple Distinct Qualified Aggregates are disabled in the optimizer"));
2034 2035 2036
	}

	// initialize the array of grouping columns
J
Jesse Zhang 已提交
2037 2038
	ULongPtrArray *grouping_cols = CTranslatorUtils::GetGroupingColidArray(
		m_mp, grpby_cols_bitset, sort_grpref_to_colid_mapping);
2039 2040

	// clean up
2041
	if (NIL != dqa_list)
2042
	{
2043
		gpdb::ListFree(dqa_list);
2044 2045
	}

J
Jesse Zhang 已提交
2046 2047 2048
	return GPOS_NEW(m_mp)
		CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalGroupBy(m_mp, grouping_cols),
				 project_list_grpby_dxlnode, child_dxlnode);
2049 2050 2051 2052 2053
}


//---------------------------------------------------------------------------
//	@function:
2054
//		CTranslatorQueryToDXL::IsDuplicateDqaArg
2055 2056 2057 2058 2059
//
//	@doc:
//		Check if the argument of a DQA has already being used by another DQA
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
2060
CTranslatorQueryToDXL::IsDuplicateDqaArg(List *dqa_list, Aggref *aggref)
2061
{
2062
	GPOS_ASSERT(NULL != aggref);
2063

2064
	if (NIL == dqa_list || 0 == gpdb::ListLength(dqa_list))
2065 2066 2067 2068
	{
		return false;
	}

2069
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
2070
	ForEach(lc, dqa_list)
2071
	{
2072 2073
		Node *node = (Node *) lfirst(lc);
		GPOS_ASSERT(IsA(node, Aggref));
2074

2075
		if (gpdb::Equals(aggref->args, ((Aggref *) node)->args))
2076 2077 2078 2079 2080 2081 2082 2083 2084 2085
		{
			return true;
		}
	}

	return false;
}

//---------------------------------------------------------------------------
//	@function:
2086
//		CTranslatorQueryToDXL::TranslateGroupingSets
2087 2088 2089 2090 2091 2092
//
//	@doc:
//		Translate a query with grouping sets
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
2093 2094
CTranslatorQueryToDXL::TranslateGroupingSets(
	FromExpr *from_expr, List *target_list, List *group_clause, BOOL has_aggs,
2095
	IntToUlongMap *sort_grpref_to_colid_mapping,
J
Jesse Zhang 已提交
2096
	IntToUlongMap *output_attno_to_colid_mapping)
2097
{
2098
	const ULONG num_of_cols = gpdb::ListLength(target_list) + 1;
2099

2100
	if (NULL == group_clause)
2101
	{
J
Jesse Zhang 已提交
2102 2103
		IntToUlongMap *child_attno_colid_mapping =
			GPOS_NEW(m_mp) IntToUlongMap(m_mp);
2104

J
Jesse Zhang 已提交
2105 2106 2107
		CDXLNode *select_project_join_dxlnode = TranslateSelectProjectJoinToDXL(
			target_list, from_expr, sort_grpref_to_colid_mapping,
			child_attno_colid_mapping, group_clause);
2108

2109 2110
		CBitSet *bitset = NULL;
		if (has_aggs)
J
Jesse Zhang 已提交
2111
		{
2112
			bitset = GPOS_NEW(m_mp) CBitSet(m_mp);
2113
		}
J
Jesse Zhang 已提交
2114

2115
		// in case of aggregates, construct a group by operator
J
Jesse Zhang 已提交
2116 2117 2118 2119 2120
		CDXLNode *result_dxlnode = CreateSimpleGroupBy(
			target_list, group_clause, bitset, has_aggs,
			false,	// has_grouping_sets
			select_project_join_dxlnode, sort_grpref_to_colid_mapping,
			child_attno_colid_mapping, output_attno_to_colid_mapping);
2121 2122

		// cleanup
2123 2124 2125
		child_attno_colid_mapping->Release();
		CRefCount::SafeRelease(bitset);
		return result_dxlnode;
2126 2127 2128 2129
	}

	// grouping functions refer to grouping col positions, so construct a map pos->grouping column
	// while processing the grouping clause
J
Jesse Zhang 已提交
2130 2131
	UlongToUlongMap *grpcol_index_to_colid_mapping =
		GPOS_NEW(m_mp) UlongToUlongMap(m_mp);
2132
	CBitSet *unique_grp_cols_bitset = GPOS_NEW(m_mp) CBitSet(m_mp, num_of_cols);
J
Jesse Zhang 已提交
2133 2134 2135
	CBitSetArray *bitset_array = CTranslatorUtils::GetColumnAttnosForGroupBy(
		m_mp, group_clause, num_of_cols, grpcol_index_to_colid_mapping,
		unique_grp_cols_bitset);
2136

2137
	const ULONG num_of_grouping_sets = bitset_array->Size();
2138

2139
	if (1 == num_of_grouping_sets)
2140 2141
	{
		// simple group by
J
Jesse Zhang 已提交
2142 2143 2144 2145 2146
		IntToUlongMap *child_attno_colid_mapping =
			GPOS_NEW(m_mp) IntToUlongMap(m_mp);
		CDXLNode *select_project_join_dxlnode = TranslateSelectProjectJoinToDXL(
			target_list, from_expr, sort_grpref_to_colid_mapping,
			child_attno_colid_mapping, group_clause);
2147 2148

		// translate the groupby clauses into a logical group by operator
2149
		CBitSet *bitset = (*bitset_array)[0];
2150 2151


J
Jesse Zhang 已提交
2152 2153 2154 2155 2156 2157 2158 2159 2160
		CDXLNode *groupby_dxlnode = CreateSimpleGroupBy(
			target_list, group_clause, bitset, has_aggs,
			false,	// has_grouping_sets
			select_project_join_dxlnode, sort_grpref_to_colid_mapping,
			child_attno_colid_mapping, output_attno_to_colid_mapping);

		CDXLNode *result_dxlnode = CreateDXLProjectGroupingFuncs(
			target_list, groupby_dxlnode, bitset, output_attno_to_colid_mapping,
			grpcol_index_to_colid_mapping, sort_grpref_to_colid_mapping);
2161

2162 2163 2164 2165
		child_attno_colid_mapping->Release();
		bitset_array->Release();
		unique_grp_cols_bitset->Release();
		grpcol_index_to_colid_mapping->Release();
J
Jesse Zhang 已提交
2166

2167
		return result_dxlnode;
2168
	}
J
Jesse Zhang 已提交
2169 2170 2171 2172 2173

	CDXLNode *result_dxlnode = CreateDXLUnionAllForGroupingSets(
		from_expr, target_list, group_clause, has_aggs, bitset_array,
		sort_grpref_to_colid_mapping, output_attno_to_colid_mapping,
		grpcol_index_to_colid_mapping);
2174

2175 2176
	unique_grp_cols_bitset->Release();
	grpcol_index_to_colid_mapping->Release();
J
Jesse Zhang 已提交
2177

2178
	return result_dxlnode;
2179 2180 2181 2182
}

//---------------------------------------------------------------------------
//	@function:
2183
//		CTranslatorQueryToDXL::CreateDXLUnionAllForGroupingSets
2184 2185 2186 2187 2188 2189
//
//	@doc:
//		Construct a union all for the given grouping sets
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
2190 2191 2192
CTranslatorQueryToDXL::CreateDXLUnionAllForGroupingSets(
	FromExpr *from_expr, List *target_list, List *group_clause, BOOL has_aggs,
	CBitSetArray *bitset_array, IntToUlongMap *sort_grpref_to_colid_mapping,
2193
	IntToUlongMap *output_attno_to_colid_mapping,
J
Jesse Zhang 已提交
2194 2195 2196
	UlongToUlongMap *
		grpcol_index_to_colid_mapping  // mapping pos->unique grouping columns for grouping func arguments
)
2197
{
2198 2199
	GPOS_ASSERT(NULL != bitset_array);
	GPOS_ASSERT(1 < bitset_array->Size());
2200

2201 2202 2203
	const ULONG num_of_grouping_sets = bitset_array->Size();
	CDXLNode *unionall_dxlnode = NULL;
	ULongPtrArray *colid_array_inner = NULL;
2204

2205
	const ULONG cte_id = m_cte_id_counter->next_id();
J
Jesse Zhang 已提交
2206

2207
	// construct a CTE producer on top of the SPJ query
J
Jesse Zhang 已提交
2208 2209 2210 2211 2212 2213 2214 2215
	IntToUlongMap *spj_output_attno_to_colid_mapping =
		GPOS_NEW(m_mp) IntToUlongMap(m_mp);
	IntToUlongMap *sort_groupref_to_colid_producer_mapping =
		GPOS_NEW(m_mp) IntToUlongMap(m_mp);
	CDXLNode *select_project_join_dxlnode =
		TranslateSelectProjectJoinForGrpSetsToDXL(
			target_list, from_expr, sort_groupref_to_colid_producer_mapping,
			spj_output_attno_to_colid_mapping, group_clause);
2216 2217

	// construct output colids
J
Jesse Zhang 已提交
2218 2219 2220 2221
	ULongPtrArray *op_colid_array_cte_producer =
		ExtractColIds(m_mp, spj_output_attno_to_colid_mapping);

	GPOS_ASSERT(NULL != m_dxl_cte_producers);
2222

J
Jesse Zhang 已提交
2223 2224 2225 2226
	CDXLLogicalCTEProducer *cte_prod_dxlop = GPOS_NEW(m_mp)
		CDXLLogicalCTEProducer(m_mp, cte_id, op_colid_array_cte_producer);
	CDXLNode *cte_producer_dxlnode = GPOS_NEW(m_mp)
		CDXLNode(m_mp, cte_prod_dxlop, select_project_join_dxlnode);
2227
	m_dxl_cte_producers->Append(cte_producer_dxlnode);
J
Jesse Zhang 已提交
2228 2229 2230 2231

	CMappingVarColId *var_colid_orig_mapping =
		m_var_to_colid_map->CopyMapColId(m_mp);

2232
	for (ULONG ul = 0; ul < num_of_grouping_sets; ul++)
2233
	{
2234
		CBitSet *grouping_set_bitset = (*bitset_array)[ul];
2235 2236

		// remap columns
J
Jesse Zhang 已提交
2237 2238 2239
		ULongPtrArray *colid_array_cte_consumer =
			GenerateColIds(m_mp, op_colid_array_cte_producer->Size());

2240
		// reset col mapping with new consumer columns
2241
		GPOS_DELETE(m_var_to_colid_map);
J
Jesse Zhang 已提交
2242 2243 2244 2245 2246 2247 2248 2249 2250
		m_var_to_colid_map = var_colid_orig_mapping->CopyRemapColId(
			m_mp, op_colid_array_cte_producer, colid_array_cte_consumer);

		IntToUlongMap *spj_consumer_output_attno_to_colid_mapping =
			RemapColIds(m_mp, spj_output_attno_to_colid_mapping,
						op_colid_array_cte_producer, colid_array_cte_consumer);
		IntToUlongMap *phmiulSortgrouprefColIdConsumer =
			RemapColIds(m_mp, sort_groupref_to_colid_producer_mapping,
						op_colid_array_cte_producer, colid_array_cte_consumer);
2251 2252

		// construct a CTE consumer
J
Jesse Zhang 已提交
2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264
		CDXLNode *cte_consumer_dxlnode = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalCTEConsumer(
							   m_mp, cte_id, colid_array_cte_consumer));

		IntToUlongMap *groupby_attno_to_colid_mapping =
			GPOS_NEW(m_mp) IntToUlongMap(m_mp);
		CDXLNode *groupby_dxlnode = CreateSimpleGroupBy(
			target_list, group_clause, grouping_set_bitset, has_aggs,
			true,  // has_grouping_sets
			cte_consumer_dxlnode, phmiulSortgrouprefColIdConsumer,
			spj_consumer_output_attno_to_colid_mapping,
			groupby_attno_to_colid_mapping);
2265 2266

		// add a project list for the NULL values
J
Jesse Zhang 已提交
2267 2268 2269 2270 2271 2272 2273 2274
		CDXLNode *project_dxlnode = CreateDXLProjectNullsForGroupingSets(
			target_list, groupby_dxlnode, grouping_set_bitset,
			phmiulSortgrouprefColIdConsumer, groupby_attno_to_colid_mapping,
			grpcol_index_to_colid_mapping);

		ULongPtrArray *colids_outer_array =
			CTranslatorUtils::GetOutputColIdsArray(
				m_mp, target_list, groupby_attno_to_colid_mapping);
2275
		if (NULL != unionall_dxlnode)
2276
		{
2277
			GPOS_ASSERT(NULL != colid_array_inner);
J
Jesse Zhang 已提交
2278 2279 2280 2281
			CDXLColDescrArray *dxl_col_descr_array =
				CTranslatorUtils::GetDXLColumnDescrArray(
					m_mp, target_list, colids_outer_array,
					true /* keep_res_junked */);
2282

2283
			colids_outer_array->AddRef();
2284

J
Jesse Zhang 已提交
2285 2286
			ULongPtr2dArray *input_colids =
				GPOS_NEW(m_mp) ULongPtr2dArray(m_mp);
2287 2288
			input_colids->Append(colids_outer_array);
			input_colids->Append(colid_array_inner);
2289

J
Jesse Zhang 已提交
2290 2291 2292 2293 2294
			CDXLLogicalSetOp *dxl_setop = GPOS_NEW(m_mp)
				CDXLLogicalSetOp(m_mp, EdxlsetopUnionAll, dxl_col_descr_array,
								 input_colids, false);
			unionall_dxlnode = GPOS_NEW(m_mp)
				CDXLNode(m_mp, dxl_setop, project_dxlnode, unionall_dxlnode);
2295 2296 2297
		}
		else
		{
2298
			unionall_dxlnode = project_dxlnode;
2299 2300
		}

2301
		colid_array_inner = colids_outer_array;
J
Jesse Zhang 已提交
2302

2303
		if (ul == num_of_grouping_sets - 1)
2304 2305
		{
			// add the sortgroup columns to output map of the last column
2306 2307
			ULONG te_pos = 0;
			ListCell *lc = NULL;
J
Jesse Zhang 已提交
2308
			ForEach(lc, target_list)
2309
			{
2310
				TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
2311

J
Jesse Zhang 已提交
2312 2313 2314 2315
				INT sortgroupref = INT(target_entry->ressortgroupref);
				if (0 < sortgroupref &&
					NULL !=
						phmiulSortgrouprefColIdConsumer->Find(&sortgroupref))
2316 2317
				{
					// add the mapping information for sorting columns
J
Jesse Zhang 已提交
2318 2319 2320
					AddSortingGroupingColumn(target_entry,
											 sort_grpref_to_colid_mapping,
											 *(*colid_array_inner)[te_pos]);
2321 2322
				}

2323
				te_pos++;
2324 2325 2326 2327
			}
		}

		// cleanup
2328 2329
		groupby_attno_to_colid_mapping->Release();
		spj_consumer_output_attno_to_colid_mapping->Release();
2330 2331 2332 2333
		phmiulSortgrouprefColIdConsumer->Release();
	}

	// cleanup
2334 2335 2336 2337
	spj_output_attno_to_colid_mapping->Release();
	sort_groupref_to_colid_producer_mapping->Release();
	GPOS_DELETE(var_colid_orig_mapping);
	colid_array_inner->Release();
2338 2339

	// compute output columns
J
Jesse Zhang 已提交
2340 2341
	CDXLLogicalSetOp *union_dxlop =
		CDXLLogicalSetOp::Cast(unionall_dxlnode->GetOperator());
2342

2343 2344
	ListCell *lc = NULL;
	ULONG output_col_idx = 0;
J
Jesse Zhang 已提交
2345
	ForEach(lc, target_list)
2346
	{
2347 2348 2349 2350
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		GPOS_ASSERT(IsA(target_entry, TargetEntry));
		GPOS_ASSERT(0 < target_entry->resno);
		ULONG resno = target_entry->resno;
2351 2352

		// note that all target list entries are kept in union all's output column
2353
		// this is achieved by the keep_res_junked flag in CTranslatorUtils::GetDXLColumnDescrArray
J
Jesse Zhang 已提交
2354 2355
		const CDXLColDescr *dxl_col_descr =
			union_dxlop->GetColumnDescrAt(output_col_idx);
2356 2357
		const ULONG colid = dxl_col_descr->Id();
		output_col_idx++;
2358

2359
		if (!target_entry->resjunk)
2360 2361
		{
			// add non-resjunk columns to the hash map that maintains the output columns
2362
			StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno, colid);
2363 2364 2365 2366
		}
	}

	// cleanup
2367
	bitset_array->Release();
2368 2369

	// construct a CTE anchor operator on top of the union all
J
Jesse Zhang 已提交
2370 2371 2372
	return GPOS_NEW(m_mp)
		CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalCTEAnchor(m_mp, cte_id),
				 unionall_dxlnode);
2373 2374 2375 2376
}

//---------------------------------------------------------------------------
//	@function:
2377
//		CTranslatorQueryToDXL::DXLDummyConstTableGet
2378 2379 2380 2381 2382 2383
//
//	@doc:
//		Create a dummy constant table get (CTG) with a boolean true value
//
//---------------------------------------------------------------------------
CDXLNode *
2384
CTranslatorQueryToDXL::DXLDummyConstTableGet() const
2385 2386
{
	// construct the schema of the const table
J
Jesse Zhang 已提交
2387 2388
	CDXLColDescrArray *dxl_col_descr_array =
		GPOS_NEW(m_mp) CDXLColDescrArray(m_mp);
2389

J
Jesse Zhang 已提交
2390 2391
	const CMDTypeBoolGPDB *md_type_bool = dynamic_cast<const CMDTypeBoolGPDB *>(
		m_md_accessor->PtMDType<IMDTypeBool>(m_sysid));
2392
	const CMDIdGPDB *mdid = CMDIdGPDB::CastMdid(md_type_bool->MDId());
2393 2394

	// empty column name
2395 2396
	CWStringConst str_unnamed_col(GPOS_WSZ_LIT(""));
	CMDName *mdname = GPOS_NEW(m_mp) CMDName(m_mp, &str_unnamed_col);
J
Jesse Zhang 已提交
2397 2398 2399 2400 2401
	CDXLColDescr *dxl_col_descr = GPOS_NEW(m_mp)
		CDXLColDescr(m_mp, mdname, m_colid_counter->next_id(), 1 /* attno */,
					 GPOS_NEW(m_mp) CMDIdGPDB(mdid->Oid()),
					 default_type_modifier, false /* is_dropped */
		);
2402
	dxl_col_descr_array->Append(dxl_col_descr);
2403 2404

	// create the array of datum arrays
J
Jesse Zhang 已提交
2405 2406 2407
	CDXLDatum2dArray *dispatch_identifier_datum_arrays =
		GPOS_NEW(m_mp) CDXLDatum2dArray(m_mp);

2408
	// create a datum array
2409
	CDXLDatumArray *dxl_datum_array = GPOS_NEW(m_mp) CDXLDatumArray(m_mp);
2410

J
Jesse Zhang 已提交
2411 2412
	Const *const_expr =
		(Const *) gpdb::MakeBoolConst(true /*value*/, false /*isnull*/);
2413 2414
	CDXLDatum *datum_dxl = m_scalar_translator->TranslateConstToDXL(const_expr);
	gpdb::GPDBFree(const_expr);
2415

2416 2417
	dxl_datum_array->Append(datum_dxl);
	dispatch_identifier_datum_arrays->Append(dxl_datum_array);
2418

J
Jesse Zhang 已提交
2419 2420
	CDXLLogicalConstTable *dxlop = GPOS_NEW(m_mp) CDXLLogicalConstTable(
		m_mp, dxl_col_descr_array, dispatch_identifier_datum_arrays);
2421

2422
	return GPOS_NEW(m_mp) CDXLNode(m_mp, dxlop);
2423 2424 2425 2426
}

//---------------------------------------------------------------------------
//	@function:
2427
//		CTranslatorQueryToDXL::TranslateSetOpToDXL
2428 2429 2430 2431 2432 2433
//
//	@doc:
//		Translate a set operation
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
2434 2435 2436
CTranslatorQueryToDXL::TranslateSetOpToDXL(
	Node *setop_node, List *target_list,
	IntToUlongMap *output_attno_to_colid_mapping)
2437
{
2438
	GPOS_ASSERT(IsA(setop_node, SetOperationStmt));
J
Jesse Zhang 已提交
2439
	SetOperationStmt *psetopstmt = (SetOperationStmt *) setop_node;
2440 2441
	GPOS_ASSERT(SETOP_NONE != psetopstmt->op);

J
Jesse Zhang 已提交
2442 2443
	EdxlSetOpType setop_type =
		CTranslatorUtils::GetSetOpType(psetopstmt->op, psetopstmt->all);
2444 2445

	// translate the left and right child
2446 2447 2448 2449
	ULongPtrArray *leftchild_array = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
	ULongPtrArray *rightchild_array = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
	IMdIdArray *mdid_array_leftchild = GPOS_NEW(m_mp) IMdIdArray(m_mp);
	IMdIdArray *mdid_array_rightchild = GPOS_NEW(m_mp) IMdIdArray(m_mp);
2450

J
Jesse Zhang 已提交
2451 2452 2453 2454
	CDXLNode *left_child_dxlnode = TranslateSetOpChild(
		psetopstmt->larg, leftchild_array, mdid_array_leftchild, target_list);
	CDXLNode *right_child_dxlnode = TranslateSetOpChild(
		psetopstmt->rarg, rightchild_array, mdid_array_rightchild, target_list);
2455 2456

	// mark outer references in input columns from left child
2457 2458 2459 2460
	ULONG *colid = GPOS_NEW_ARRAY(m_mp, ULONG, leftchild_array->Size());
	BOOL *outer_ref_array = GPOS_NEW_ARRAY(m_mp, BOOL, leftchild_array->Size());
	const ULONG size = leftchild_array->Size();
	for (ULONG ul = 0; ul < size; ul++)
2461
	{
J
Jesse Zhang 已提交
2462
		colid[ul] = *(*leftchild_array)[ul];
2463
		outer_ref_array[ul] = true;
2464
	}
J
Jesse Zhang 已提交
2465 2466
	CTranslatorUtils::MarkOuterRefs(colid, outer_ref_array, size,
									left_child_dxlnode);
2467

2468 2469 2470
	ULongPtr2dArray *input_colids = GPOS_NEW(m_mp) ULongPtr2dArray(m_mp);
	input_colids->Append(leftchild_array);
	input_colids->Append(rightchild_array);
J
Jesse Zhang 已提交
2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484

	ULongPtrArray *output_colids = CTranslatorUtils::GenerateColIds(
		m_mp, target_list, mdid_array_leftchild, leftchild_array,
		outer_ref_array, m_colid_counter);
	GPOS_ASSERT(output_colids->Size() == leftchild_array->Size());

	GPOS_DELETE_ARRAY(colid);
	GPOS_DELETE_ARRAY(outer_ref_array);

	BOOL is_cast_across_input =
		SetOpNeedsCast(target_list, mdid_array_leftchild) ||
		SetOpNeedsCast(target_list, mdid_array_rightchild);

	CDXLNodeArray *children_dxlnodes = GPOS_NEW(m_mp) CDXLNodeArray(m_mp);
2485 2486
	children_dxlnodes->Append(left_child_dxlnode);
	children_dxlnodes->Append(right_child_dxlnode);
2487

J
Jesse Zhang 已提交
2488 2489 2490 2491
	CDXLNode *dxlnode = CreateDXLSetOpFromColumns(
		setop_type, target_list, output_colids, input_colids, children_dxlnodes,
		is_cast_across_input, false /* keep_res_junked */
	);
2492

2493
	CDXLLogicalSetOp *dxlop = CDXLLogicalSetOp::Cast(dxlnode->GetOperator());
J
Jesse Zhang 已提交
2494 2495
	const CDXLColDescrArray *dxl_col_descr_array =
		dxlop->GetDXLColumnDescrArray();
2496

2497 2498
	ULONG output_col_idx = 0;
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
2499
	ForEach(lc, target_list)
2500
	{
2501 2502 2503 2504
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		GPOS_ASSERT(IsA(target_entry, TargetEntry));
		GPOS_ASSERT(0 < target_entry->resno);
		ULONG resno = target_entry->resno;
2505

2506
		if (!target_entry->resjunk)
2507
		{
J
Jesse Zhang 已提交
2508 2509
			const CDXLColDescr *dxl_col_descr_new =
				(*dxl_col_descr_array)[output_col_idx];
2510 2511 2512
			ULONG colid = dxl_col_descr_new->Id();
			StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno, colid);
			output_col_idx++;
2513 2514 2515 2516
		}
	}

	// clean up
2517 2518 2519
	output_colids->Release();
	mdid_array_leftchild->Release();
	mdid_array_rightchild->Release();
2520

2521
	return dxlnode;
2522 2523 2524 2525 2526 2527 2528 2529 2530 2531
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::PdxlSetOp
//
//	@doc:
//		Create a set op after adding dummy cast on input columns where needed
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
2532 2533 2534 2535 2536
CTranslatorQueryToDXL::CreateDXLSetOpFromColumns(
	EdxlSetOpType setop_type, List *output_target_list,
	ULongPtrArray *output_colids, ULongPtr2dArray *input_colids,
	CDXLNodeArray *children_dxlnodes, BOOL is_cast_across_input,
	BOOL keep_res_junked) const
2537
{
2538 2539 2540 2541 2542 2543
	GPOS_ASSERT(NULL != output_target_list);
	GPOS_ASSERT(NULL != output_colids);
	GPOS_ASSERT(NULL != input_colids);
	GPOS_ASSERT(NULL != children_dxlnodes);
	GPOS_ASSERT(1 < input_colids->Size());
	GPOS_ASSERT(1 < children_dxlnodes->Size());
2544 2545

	// positions of output columns in the target list
J
Jesse Zhang 已提交
2546 2547
	ULongPtrArray *output_col_pos = CTranslatorUtils::GetPosInTargetList(
		m_mp, output_target_list, keep_res_junked);
2548

2549 2550 2551 2552
	const ULONG num_of_cols = output_colids->Size();
	ULongPtrArray *input_first_child_array = (*input_colids)[0];
	GPOS_ASSERT(num_of_cols == input_first_child_array->Size());
	GPOS_ASSERT(num_of_cols == output_colids->Size());
2553

2554
	CBitSet *bitset = GPOS_NEW(m_mp) CBitSet(m_mp);
2555 2556

	// project list to maintain the casting of the duplicate input columns
J
Jesse Zhang 已提交
2557 2558
	CDXLNode *new_child_project_list_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
2559

J
Jesse Zhang 已提交
2560 2561 2562 2563
	ULongPtrArray *input_first_child_new_array =
		GPOS_NEW(m_mp) ULongPtrArray(m_mp);
	CDXLColDescrArray *output_col_descrs =
		GPOS_NEW(m_mp) CDXLColDescrArray(m_mp);
2564
	for (ULONG ul = 0; ul < num_of_cols; ul++)
2565
	{
2566 2567
		ULONG colid_output = *(*output_colids)[ul];
		ULONG colid_input = *(*input_first_child_array)[ul];
2568

2569 2570
		BOOL is_col_exists = bitset->Get(colid_input);
		BOOL is_casted_col = (colid_output != colid_input);
2571

2572
		ULONG target_list_pos = *(*output_col_pos)[ul];
J
Jesse Zhang 已提交
2573 2574
		TargetEntry *target_entry =
			(TargetEntry *) gpdb::ListNth(output_target_list, target_list_pos);
2575
		GPOS_ASSERT(NULL != target_entry);
2576

2577 2578
		CDXLColDescr *output_col_descr = NULL;
		if (!is_col_exists)
2579
		{
2580
			bitset->ExchangeSet(colid_input);
J
Jesse Zhang 已提交
2581 2582
			input_first_child_new_array->Append(GPOS_NEW(m_mp)
													ULONG(colid_input));
2583

J
Jesse Zhang 已提交
2584 2585
			output_col_descr = CTranslatorUtils::GetColumnDescrAt(
				m_mp, target_entry, colid_output, ul + 1);
2586 2587 2588 2589
		}
		else
		{
			// we add a dummy-cast to distinguish between the output columns of the union
2590
			ULONG colid_new = m_colid_counter->next_id();
J
Jesse Zhang 已提交
2591 2592
			input_first_child_new_array->Append(GPOS_NEW(m_mp)
													ULONG(colid_new));
2593

2594 2595
			ULONG colid_union_output = colid_new;
			if (is_casted_col)
2596 2597
			{
				// create new output column id since current colid denotes its duplicate
2598
				colid_union_output = m_colid_counter->next_id();
2599 2600
			}

J
Jesse Zhang 已提交
2601 2602 2603 2604 2605
			output_col_descr = CTranslatorUtils::GetColumnDescrAt(
				m_mp, target_entry, colid_union_output, ul + 1);
			CDXLNode *project_elem_dxlnode =
				CTranslatorUtils::CreateDummyProjectElem(
					m_mp, colid_input, colid_new, output_col_descr);
2606

2607
			new_child_project_list_dxlnode->AddChild(project_elem_dxlnode);
2608 2609
		}

2610
		output_col_descrs->Append(output_col_descr);
2611 2612
	}

2613
	input_colids->Replace(0, input_first_child_new_array);
2614

2615
	if (0 < new_child_project_list_dxlnode->Arity())
2616 2617
	{
		// create a project node for the dummy casted columns
2618 2619
		CDXLNode *first_child_dxlnode = (*children_dxlnodes)[0];
		first_child_dxlnode->AddRef();
J
Jesse Zhang 已提交
2620 2621
		CDXLNode *new_child_dxlnode = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalProject(m_mp));
2622 2623
		new_child_dxlnode->AddChild(new_child_project_list_dxlnode);
		new_child_dxlnode->AddChild(first_child_dxlnode);
2624

2625
		children_dxlnodes->Replace(0, new_child_dxlnode);
2626 2627 2628
	}
	else
	{
2629
		new_child_project_list_dxlnode->Release();
2630 2631
	}

J
Jesse Zhang 已提交
2632 2633 2634
	CDXLLogicalSetOp *dxlop =
		GPOS_NEW(m_mp) CDXLLogicalSetOp(m_mp, setop_type, output_col_descrs,
										input_colids, is_cast_across_input);
2635
	CDXLNode *dxlnode = GPOS_NEW(m_mp) CDXLNode(m_mp, dxlop, children_dxlnodes);
2636

2637 2638
	bitset->Release();
	output_col_pos->Release();
2639

2640
	return dxlnode;
2641 2642 2643 2644
}

//---------------------------------------------------------------------------
//	@function:
2645
//		CTranslatorQueryToDXL::SetOpNeedsCast
2646 2647 2648 2649 2650 2651
//
//	@doc:
//		Check if the set operation need to cast any of its input columns
//
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
2652 2653
CTranslatorQueryToDXL::SetOpNeedsCast(List *target_list,
									  IMdIdArray *input_col_mdids) const
2654
{
2655 2656
	GPOS_ASSERT(NULL != input_col_mdids);
	GPOS_ASSERT(NULL != target_list);
J
Jesse Zhang 已提交
2657 2658 2659
	GPOS_ASSERT(
		input_col_mdids->Size() <=
		gpdb::ListLength(target_list));	 // there may be resjunked columns
2660

2661 2662
	ULONG col_pos_idx = 0;
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
2663
	ForEach(lc, target_list)
2664
	{
2665
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
J
Jesse Zhang 已提交
2666
		OID expr_type_oid = gpdb::ExprType((Node *) target_entry->expr);
2667
		if (!target_entry->resjunk)
2668
		{
2669 2670
			IMDId *mdid = (*input_col_mdids)[col_pos_idx];
			if (CMDIdGPDB::CastMdid(mdid)->Oid() != expr_type_oid)
2671 2672 2673
			{
				return true;
			}
2674
			col_pos_idx++;
2675 2676 2677 2678 2679 2680 2681 2682
		}
	}

	return false;
}

//---------------------------------------------------------------------------
//	@function:
2683
//		CTranslatorQueryToDXL::TranslateSetOpChild
2684 2685 2686 2687 2688 2689
//
//	@doc:
//		Translate the child of a set operation
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
2690 2691 2692 2693
CTranslatorQueryToDXL::TranslateSetOpChild(Node *child_node,
										   ULongPtrArray *colids,
										   IMdIdArray *input_col_mdids,
										   List *target_list)
2694
{
2695 2696
	GPOS_ASSERT(NULL != colids);
	GPOS_ASSERT(NULL != input_col_mdids);
2697

2698
	if (IsA(child_node, RangeTblRef))
2699
	{
J
Jesse Zhang 已提交
2700
		RangeTblRef *range_tbl_ref = (RangeTblRef *) child_node;
2701
		const ULONG rt_index = range_tbl_ref->rtindex;
J
Jesse Zhang 已提交
2702 2703
		const RangeTblEntry *rte =
			(RangeTblEntry *) gpdb::ListNth(m_query->rtable, rt_index - 1);
2704

2705
		if (RTE_SUBQUERY == rte->rtekind)
2706
		{
J
Jesse Zhang 已提交
2707 2708
			Query *query_derived_tbl = CTranslatorUtils::FixUnknownTypeConstant(
				rte->subquery, target_list);
2709
			GPOS_ASSERT(NULL != query_derived_tbl);
2710

J
Jesse Zhang 已提交
2711 2712 2713 2714 2715 2716
			CMappingVarColId *var_colid_mapping =
				m_var_to_colid_map->CopyMapColId(m_mp);
			CTranslatorQueryToDXL query_to_dxl_translator(
				m_mp, m_md_accessor, m_colid_counter, m_cte_id_counter,
				var_colid_mapping, query_derived_tbl, m_query_level + 1,
				IsDMLQuery(), m_query_level_to_cte_map);
2717 2718

			// translate query representing the derived table to its DXL representation
J
Jesse Zhang 已提交
2719 2720
			CDXLNode *query_dxlnode =
				query_to_dxl_translator.TranslateSelectQueryToDXL();
2721
			GPOS_ASSERT(NULL != query_dxlnode);
2722

J
Jesse Zhang 已提交
2723 2724
			CDXLNodeArray *cte_dxlnode_array =
				query_to_dxl_translator.GetCTEs();
2725
			CUtils::AddRefAppend(m_dxl_cte_producers, cte_dxlnode_array);
J
Jesse Zhang 已提交
2726 2727 2728
			m_has_distributed_tables =
				m_has_distributed_tables ||
				query_to_dxl_translator.HasDistributedTables();
2729 2730

			// get the output columns of the derived table
J
Jesse Zhang 已提交
2731 2732
			CDXLNodeArray *dxlnodes =
				query_to_dxl_translator.GetQueryOutputCols();
2733 2734 2735
			GPOS_ASSERT(dxlnodes != NULL);
			const ULONG length = dxlnodes->Size();
			for (ULONG ul = 0; ul < length; ul++)
2736
			{
2737
				CDXLNode *current_dxlnode = (*dxlnodes)[ul];
J
Jesse Zhang 已提交
2738 2739 2740 2741
				CDXLScalarIdent *dxl_scalar_ident =
					CDXLScalarIdent::Cast(current_dxlnode->GetOperator());
				ULONG *colid = GPOS_NEW(m_mp)
					ULONG(dxl_scalar_ident->GetDXLColRef()->Id());
2742 2743 2744 2745 2746 2747
				colids->Append(colid);

				IMDId *mdid_col = dxl_scalar_ident->MdidType();
				GPOS_ASSERT(NULL != mdid_col);
				mdid_col->AddRef();
				input_col_mdids->Append(mdid_col);
2748 2749
			}

2750
			return query_dxlnode;
2751 2752
		}
	}
2753
	else if (IsA(child_node, SetOperationStmt))
2754
	{
J
Jesse Zhang 已提交
2755 2756 2757 2758
		IntToUlongMap *output_attno_to_colid_mapping =
			GPOS_NEW(m_mp) IntToUlongMap(m_mp);
		CDXLNode *dxlnode = TranslateSetOpToDXL(child_node, target_list,
												output_attno_to_colid_mapping);
2759 2760

		// cleanup
2761
		output_attno_to_colid_mapping->Release();
2762

J
Jesse Zhang 已提交
2763 2764 2765
		const CDXLColDescrArray *dxl_col_descr_array =
			CDXLLogicalSetOp::Cast(dxlnode->GetOperator())
				->GetDXLColumnDescrArray();
2766 2767 2768
		GPOS_ASSERT(NULL != dxl_col_descr_array);
		const ULONG length = dxl_col_descr_array->Size();
		for (ULONG ul = 0; ul < length; ul++)
2769
		{
2770 2771 2772 2773 2774 2775 2776 2777
			const CDXLColDescr *dxl_col_descr = (*dxl_col_descr_array)[ul];
			ULONG *colid = GPOS_NEW(m_mp) ULONG(dxl_col_descr->Id());
			colids->Append(colid);

			IMDId *mdid_col = dxl_col_descr->MdidType();
			GPOS_ASSERT(NULL != mdid_col);
			mdid_col->AddRef();
			input_col_mdids->Append(mdid_col);
2778 2779
		}

2780
		return dxlnode;
2781 2782
	}

J
Jesse Zhang 已提交
2783 2784 2785 2786
	CHAR *temp_str =
		(CHAR *) gpdb::NodeToString(const_cast<Node *>(child_node));
	CWStringDynamic *str =
		CDXLUtils::CreateDynamicStringFromCharArray(m_mp, temp_str);
2787

J
Jesse Zhang 已提交
2788 2789
	GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
			   str->GetBuffer());
2790 2791 2792 2793 2794
	return NULL;
}

//---------------------------------------------------------------------------
//	@function:
2795
//		CTranslatorQueryToDXL::TranslateFromExprToDXL
2796 2797 2798 2799 2800 2801 2802
//
//	@doc:
//		Translate the FromExpr on a GPDB query into either a CDXLLogicalJoin
//		or a CDXLLogicalGet
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
2803
CTranslatorQueryToDXL::TranslateFromExprToDXL(FromExpr *from_expr)
2804
{
2805
	CDXLNode *dxlnode = NULL;
2806

2807
	if (0 == gpdb::ListLength(from_expr->fromlist))
2808
	{
2809
		dxlnode = DXLDummyConstTableGet();
2810 2811 2812
	}
	else
	{
2813
		if (1 == gpdb::ListLength(from_expr->fromlist))
2814
		{
J
Jesse Zhang 已提交
2815
			Node *node = (Node *) gpdb::ListNth(from_expr->fromlist, 0);
2816 2817
			GPOS_ASSERT(NULL != node);
			dxlnode = TranslateFromClauseToDXL(node);
2818 2819 2820 2821 2822 2823 2824
		}
		else
		{
			// In DXL, we represent an n-ary join (where n>2) by an inner join with condition true.
			// The join conditions represented in the FromExpr->quals is translated
			// into a CDXLLogicalSelect on top of the CDXLLogicalJoin

J
Jesse Zhang 已提交
2825 2826
			dxlnode = GPOS_NEW(m_mp) CDXLNode(
				m_mp, GPOS_NEW(m_mp) CDXLLogicalJoin(m_mp, EdxljtInner));
2827

2828
			ListCell *lc = NULL;
J
Jesse Zhang 已提交
2829
			ForEach(lc, from_expr->fromlist)
2830
			{
J
Jesse Zhang 已提交
2831
				Node *node = (Node *) lfirst(lc);
2832 2833
				CDXLNode *child_dxlnode = TranslateFromClauseToDXL(node);
				dxlnode->AddChild(child_dxlnode);
2834 2835 2836 2837 2838
			}
		}
	}

	// translate the quals
2839 2840 2841
	Node *qual_node = from_expr->quals;
	CDXLNode *condition_dxlnode = NULL;
	if (NULL != qual_node)
2842
	{
J
Jesse Zhang 已提交
2843
		condition_dxlnode = TranslateExprToDXL((Expr *) qual_node);
2844 2845
	}

2846
	if (1 >= gpdb::ListLength(from_expr->fromlist))
2847
	{
2848
		if (NULL != condition_dxlnode)
2849
		{
J
Jesse Zhang 已提交
2850 2851
			CDXLNode *select_dxlnode = GPOS_NEW(m_mp)
				CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalSelect(m_mp));
2852 2853
			select_dxlnode->AddChild(condition_dxlnode);
			select_dxlnode->AddChild(dxlnode);
2854

2855
			dxlnode = select_dxlnode;
2856 2857
		}
	}
J
Jesse Zhang 已提交
2858
	else  //n-ary joins
2859
	{
2860
		if (NULL == condition_dxlnode)
2861 2862
		{
			// A cross join (the scalar condition is true)
2863
			condition_dxlnode = CreateDXLConstValueTrue();
2864 2865
		}

2866
		dxlnode->AddChild(condition_dxlnode);
2867 2868
	}

2869
	return dxlnode;
2870 2871 2872 2873
}

//---------------------------------------------------------------------------
//	@function:
2874
//		CTranslatorQueryToDXL::TranslateFromClauseToDXL
2875 2876 2877 2878 2879 2880 2881
//
//	@doc:
//		Returns a CDXLNode representing a from clause entry which can either be
//		(1) a fromlist entry in the FromExpr or (2) left/right child of a JoinExpr
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
2882
CTranslatorQueryToDXL::TranslateFromClauseToDXL(Node *node)
2883
{
2884
	GPOS_ASSERT(NULL != node);
2885

2886
	if (IsA(node, RangeTblRef))
2887
	{
2888
		RangeTblRef *range_tbl_ref = (RangeTblRef *) node;
J
Jesse Zhang 已提交
2889 2890 2891
		ULONG rt_index = range_tbl_ref->rtindex;
		const RangeTblEntry *rte =
			(RangeTblEntry *) gpdb::ListNth(m_query->rtable, rt_index - 1);
2892
		GPOS_ASSERT(NULL != rte);
2893

2894
		if (rte->forceDistRandom)
2895
		{
J
Jesse Zhang 已提交
2896 2897
			GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
					   GPOS_WSZ_LIT("gp_dist_random"));
2898 2899
		}

J
Jesse Zhang 已提交
2900
		static const SRTETranslator dxlop_translator_func_mapping_array[] = {
2901 2902 2903 2904 2905
			{RTE_RELATION, &CTranslatorQueryToDXL::TranslateRTEToDXLLogicalGet},
			{RTE_VALUES, &CTranslatorQueryToDXL::TranslateValueScanRTEToDXL},
			{RTE_CTE, &CTranslatorQueryToDXL::TranslateCTEToDXL},
			{RTE_SUBQUERY, &CTranslatorQueryToDXL::TranslateDerivedTablesToDXL},
			{RTE_FUNCTION, &CTranslatorQueryToDXL::TranslateTVFToDXL},
2906
		};
J
Jesse Zhang 已提交
2907 2908 2909 2910

		const ULONG num_of_translators =
			GPOS_ARRAY_SIZE(dxlop_translator_func_mapping_array);

2911
		// find translator for the rtekind
2912 2913
		DXLNodeToLogicalFunc dxlnode_to_logical_funct = NULL;
		for (ULONG ul = 0; ul < num_of_translators; ul++)
2914
		{
2915 2916
			SRTETranslator elem = dxlop_translator_func_mapping_array[ul];
			if (rte->rtekind == elem.m_rtekind)
2917
			{
2918
				dxlnode_to_logical_funct = elem.dxlnode_to_logical_funct;
2919 2920 2921
				break;
			}
		}
J
Jesse Zhang 已提交
2922

2923
		if (NULL == dxlnode_to_logical_funct)
2924
		{
2925
			UnsupportedRTEKind(rte->rtekind);
2926 2927 2928

			return NULL;
		}
J
Jesse Zhang 已提交
2929

2930
		return (this->*dxlnode_to_logical_funct)(rte, rt_index, m_query_level);
2931 2932
	}

2933
	if (IsA(node, JoinExpr))
2934
	{
J
Jesse Zhang 已提交
2935
		return TranslateJoinExprInFromToDXL((JoinExpr *) node);
2936 2937
	}

J
Jesse Zhang 已提交
2938 2939 2940
	CHAR *sz = (CHAR *) gpdb::NodeToString(const_cast<Node *>(node));
	CWStringDynamic *str =
		CDXLUtils::CreateDynamicStringFromCharArray(m_mp, sz);
2941

J
Jesse Zhang 已提交
2942 2943
	GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
			   str->GetBuffer());
2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955

	return NULL;
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::UnsupportedRTEKind
//
//	@doc:
//		Raise exception for unsupported RangeTblEntries of a particular kind
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
2956
CTranslatorQueryToDXL::UnsupportedRTEKind(RTEKind rtekind) const
2957
{
J
Jesse Zhang 已提交
2958 2959 2960
	GPOS_ASSERT(!(RTE_RELATION == rtekind || RTE_CTE == rtekind ||
				  RTE_FUNCTION == rtekind || RTE_SUBQUERY == rtekind ||
				  RTE_VALUES == rtekind));
2961

J
Jesse Zhang 已提交
2962
	static const SRTENameElem rte_name_map[] = {
2963 2964 2965
		{RTE_JOIN, GPOS_WSZ_LIT("RangeTableEntry of type Join")},
		{RTE_SPECIAL, GPOS_WSZ_LIT("RangeTableEntry of type Special")},
		{RTE_VOID, GPOS_WSZ_LIT("RangeTableEntry of type Void")},
J
Jesse Zhang 已提交
2966 2967
		{RTE_TABLEFUNCTION,
		 GPOS_WSZ_LIT("RangeTableEntry of type Table Function")}};
2968

2969 2970
	const ULONG length = GPOS_ARRAY_SIZE(rte_name_map);
	for (ULONG ul = 0; ul < length; ul++)
2971
	{
2972
		SRTENameElem mapelem = rte_name_map[ul];
2973 2974 2975

		if (mapelem.m_rtekind == rtekind)
		{
J
Jesse Zhang 已提交
2976 2977
			GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
					   mapelem.m_rte_name);
2978 2979 2980 2981 2982 2983 2984 2985
		}
	}

	GPOS_ASSERT(!"Unrecognized RTE kind");
}

//---------------------------------------------------------------------------
//	@function:
2986
//		CTranslatorQueryToDXL::TranslateRTEToDXLLogicalGet
2987 2988 2989 2990 2991 2992
//
//	@doc:
//		Returns a CDXLNode representing a from relation range table entry
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
2993 2994 2995 2996
CTranslatorQueryToDXL::TranslateRTEToDXLLogicalGet(const RangeTblEntry *rte,
												   ULONG rt_index,
												   ULONG  //current_query_level
)
2997
{
J
Jesse Zhang 已提交
2998 2999
	CDXLTableDescr *table_descr = CTranslatorUtils::GetTableDescr(
		m_mp, m_md_accessor, m_colid_counter, rte, &m_has_distributed_tables);
3000 3001 3002

	CDXLLogicalGet *dxlop = NULL;
	const IMDRelation *md_rel = m_md_accessor->RetrieveRel(table_descr->MDId());
3003

3004

J
Jesse Zhang 已提交
3005 3006
	if (false == rte->inh &&
		IMDRelation::ErelstorageExternal != md_rel->RetrieveRelStorageType())
3007
	{
3008 3009 3010
		GPOS_ASSERT(RTE_RELATION == rte->rtekind);
		// RangeTblEntry::inh is set to false iff there is ONLY in the FROM
		// clause. c.f. transformTableEntry, called from transformFromClauseItem
J
Jesse Zhang 已提交
3011 3012
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("ONLY in the FROM clause"));
3013 3014
	}

3015 3016
	// construct table descriptor for the scan node from the range table entry
	if (IMDRelation::ErelstorageExternal == md_rel->RetrieveRelStorageType())
3017
	{
3018
		dxlop = GPOS_NEW(m_mp) CDXLLogicalExternalGet(m_mp, table_descr);
3019 3020 3021
	}
	else
	{
3022
		dxlop = GPOS_NEW(m_mp) CDXLLogicalGet(m_mp, table_descr);
3023 3024
	}

3025
	CDXLNode *get_dxlnode = GPOS_NEW(m_mp) CDXLNode(m_mp, dxlop);
3026 3027

	// make note of new columns from base relation
3028
	m_var_to_colid_map->LoadTblColumns(m_query_level, rt_index, table_descr);
3029

3030
	return get_dxlnode;
3031 3032 3033 3034
}

//---------------------------------------------------------------------------
//	@function:
3035
//		CTranslatorQueryToDXL::TranslateValueScanRTEToDXL
3036 3037 3038 3039 3040 3041
//
//	@doc:
//		Returns a CDXLNode representing a range table entry of values
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
3042 3043 3044
CTranslatorQueryToDXL::TranslateValueScanRTEToDXL(const RangeTblEntry *rte,
												  ULONG rt_index,
												  ULONG current_query_level)
3045
{
3046 3047
	List *tuples_list = rte->values_lists;
	GPOS_ASSERT(NULL != tuples_list);
3048

3049 3050
	const ULONG num_of_tuples = gpdb::ListLength(tuples_list);
	GPOS_ASSERT(0 < num_of_tuples);
3051 3052

	// children of the UNION ALL
3053
	CDXLNodeArray *dxlnodes = GPOS_NEW(m_mp) CDXLNodeArray(m_mp);
3054 3055

	// array of datum arrays for Values
J
Jesse Zhang 已提交
3056 3057
	CDXLDatum2dArray *dxl_values_datum_array =
		GPOS_NEW(m_mp) CDXLDatum2dArray(m_mp);
3058

3059
	// array of input colid arrays
3060
	ULongPtr2dArray *input_colids = GPOS_NEW(m_mp) ULongPtr2dArray(m_mp);
3061 3062

	// array of column descriptor for the UNION ALL operator
J
Jesse Zhang 已提交
3063 3064
	CDXLColDescrArray *dxl_col_descr_array =
		GPOS_NEW(m_mp) CDXLColDescrArray(m_mp);
3065

3066
	// translate the tuples in the value scan
3067 3068 3069
	ULONG tuple_pos = 0;
	ListCell *lc_tuple = NULL;
	GPOS_ASSERT(NULL != rte->eref);
3070 3071 3072

	// flag for checking value list has only constants. For all constants --> VALUESCAN operator else retain UnionAll
	BOOL fAllConstant = true;
J
Jesse Zhang 已提交
3073
	ForEach(lc_tuple, tuples_list)
3074
	{
3075 3076
		List *tuple_list = (List *) lfirst(lc_tuple);
		GPOS_ASSERT(IsA(tuple_list, List));
3077

J
Jesse Zhang 已提交
3078
		// array of column colids
3079
		ULongPtrArray *colid_array = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
3080 3081

		// array of project elements (for expression elements)
J
Jesse Zhang 已提交
3082 3083
		CDXLNodeArray *project_elem_dxlnode_array =
			GPOS_NEW(m_mp) CDXLNodeArray(m_mp);
3084

3085
		// array of datum (for datum constant values)
3086
		CDXLDatumArray *dxl_datum_array = GPOS_NEW(m_mp) CDXLDatumArray(m_mp);
3087

3088
		// array of column descriptors for the CTG containing the datum array
J
Jesse Zhang 已提交
3089 3090
		CDXLColDescrArray *dxl_column_descriptors =
			GPOS_NEW(m_mp) CDXLColDescrArray(m_mp);
3091

3092 3093
		List *col_names = rte->eref->colnames;
		GPOS_ASSERT(NULL != col_names);
J
Jesse Zhang 已提交
3094 3095
		GPOS_ASSERT(gpdb::ListLength(tuple_list) ==
					gpdb::ListLength(col_names));
3096

3097
		// translate the columns
3098 3099
		ULONG col_pos_idx = 0;
		ListCell *lc_column = NULL;
J
Jesse Zhang 已提交
3100
		ForEach(lc_column, tuple_list)
3101
		{
3102
			Expr *expr = (Expr *) lfirst(lc_column);
3103

J
Jesse Zhang 已提交
3104 3105
			CHAR *col_name_char_array =
				(CHAR *) strVal(gpdb::ListNth(col_names, col_pos_idx));
3106 3107
			ULONG colid = gpos::ulong_max;
			if (IsA(expr, Const))
3108 3109
			{
				// extract the datum
3110
				Const *const_expr = (Const *) expr;
J
Jesse Zhang 已提交
3111 3112
				CDXLDatum *datum_dxl =
					m_scalar_translator->TranslateConstToDXL(const_expr);
3113
				dxl_datum_array->Append(datum_dxl);
3114

3115
				colid = m_colid_counter->next_id();
3116

J
Jesse Zhang 已提交
3117 3118 3119
				CWStringDynamic *alias_str =
					CDXLUtils::CreateDynamicStringFromCharArray(
						m_mp, col_name_char_array);
3120 3121
				CMDName *mdname = GPOS_NEW(m_mp) CMDName(m_mp, alias_str);
				GPOS_DELETE(alias_str);
3122

J
Jesse Zhang 已提交
3123 3124 3125 3126 3127
				CDXLColDescr *dxl_col_descr = GPOS_NEW(m_mp) CDXLColDescr(
					m_mp, mdname, colid, col_pos_idx + 1 /* attno */,
					GPOS_NEW(m_mp) CMDIdGPDB(const_expr->consttype),
					const_expr->consttypmod, false /* is_dropped */
				);
3128

3129
				if (0 == tuple_pos)
3130
				{
3131 3132
					dxl_col_descr->AddRef();
					dxl_col_descr_array->Append(dxl_col_descr);
3133
				}
3134
				dxl_column_descriptors->Append(dxl_col_descr);
3135 3136 3137
			}
			else
			{
3138
				fAllConstant = false;
3139
				// translate the scalar expression into a project element
J
Jesse Zhang 已提交
3140 3141
				CDXLNode *project_elem_dxlnode = TranslateExprToDXLProject(
					expr, col_name_char_array, true /* insist_new_colids */);
3142
				project_elem_dxlnode_array->Append(project_elem_dxlnode);
J
Jesse Zhang 已提交
3143 3144 3145
				colid = CDXLScalarProjElem::Cast(
							project_elem_dxlnode->GetOperator())
							->Id();
3146

3147
				if (0 == tuple_pos)
3148
				{
J
Jesse Zhang 已提交
3149 3150 3151
					CWStringDynamic *alias_str =
						CDXLUtils::CreateDynamicStringFromCharArray(
							m_mp, col_name_char_array);
3152 3153
					CMDName *mdname = GPOS_NEW(m_mp) CMDName(m_mp, alias_str);
					GPOS_DELETE(alias_str);
3154

J
Jesse Zhang 已提交
3155 3156 3157 3158 3159
					CDXLColDescr *dxl_col_descr = GPOS_NEW(m_mp) CDXLColDescr(
						m_mp, mdname, colid, col_pos_idx + 1 /* attno */,
						GPOS_NEW(m_mp) CMDIdGPDB(gpdb::ExprType((Node *) expr)),
						gpdb::ExprTypeMod((Node *) expr), false /* is_dropped */
					);
3160
					dxl_col_descr_array->Append(dxl_col_descr);
3161
				}
3162
			}
3163

3164
			GPOS_ASSERT(gpos::ulong_max != colid);
3165

3166 3167
			colid_array->Append(GPOS_NEW(m_mp) ULONG(colid));
			col_pos_idx++;
3168
		}
3169

J
Jesse Zhang 已提交
3170 3171 3172
		dxlnodes->Append(
			TranslateColumnValuesToDXL(dxl_datum_array, dxl_column_descriptors,
									   project_elem_dxlnode_array));
3173 3174
		if (fAllConstant)
		{
3175 3176
			dxl_datum_array->AddRef();
			dxl_values_datum_array->Append(dxl_datum_array);
3177 3178
		}

3179 3180
		input_colids->Append(colid_array);
		tuple_pos++;
3181

3182
		// cleanup
3183 3184 3185
		dxl_datum_array->Release();
		project_elem_dxlnode_array->Release();
		dxl_column_descriptors->Release();
3186
	}
3187

3188
	GPOS_ASSERT(NULL != dxl_col_descr_array);
3189

3190 3191 3192
	if (fAllConstant)
	{
		// create Const Table DXL Node
J
Jesse Zhang 已提交
3193 3194
		CDXLLogicalConstTable *dxlop = GPOS_NEW(m_mp) CDXLLogicalConstTable(
			m_mp, dxl_col_descr_array, dxl_values_datum_array);
3195
		CDXLNode *dxlnode = GPOS_NEW(m_mp) CDXLNode(m_mp, dxlop);
3196 3197

		// make note of new columns from Value Scan
J
Jesse Zhang 已提交
3198 3199
		m_var_to_colid_map->LoadColumns(m_query_level, rt_index,
										dxlop->GetDXLColumnDescrArray());
3200 3201

		// cleanup
3202 3203
		dxlnodes->Release();
		input_colids->Release();
3204

3205
		return dxlnode;
3206
	}
3207
	else if (1 < num_of_tuples)
3208 3209
	{
		// create a UNION ALL operator
J
Jesse Zhang 已提交
3210 3211
		CDXLLogicalSetOp *dxlop = GPOS_NEW(m_mp) CDXLLogicalSetOp(
			m_mp, EdxlsetopUnionAll, dxl_col_descr_array, input_colids, false);
3212
		CDXLNode *dxlnode = GPOS_NEW(m_mp) CDXLNode(m_mp, dxlop, dxlnodes);
3213

3214
		// make note of new columns from UNION ALL
J
Jesse Zhang 已提交
3215 3216
		m_var_to_colid_map->LoadColumns(m_query_level, rt_index,
										dxlop->GetDXLColumnDescrArray());
3217
		dxl_values_datum_array->Release();
3218

3219
		return dxlnode;
3220 3221
	}

3222
	GPOS_ASSERT(1 == dxlnodes->Size());
3223

3224 3225
	CDXLNode *dxlnode = (*dxlnodes)[0];
	dxlnode->AddRef();
3226

3227
	// make note of new columns
J
Jesse Zhang 已提交
3228 3229
	m_var_to_colid_map->LoadColumns(m_query_level, rt_index,
									dxl_col_descr_array);
3230

3231
	//cleanup
3232 3233 3234 3235
	dxl_values_datum_array->Release();
	dxlnodes->Release();
	input_colids->Release();
	dxl_col_descr_array->Release();
3236

3237
	return dxlnode;
3238 3239 3240 3241
}

//---------------------------------------------------------------------------
//	@function:
3242
//		CTranslatorQueryToDXL::TranslateColumnValuesToDXL
3243 3244
//
//	@doc:
J
Jesse Zhang 已提交
3245
//		Generate a DXL node from column values, where each column value is
3246 3247 3248 3249
//		either a datum or scalar expression represented as project element.
//		Each datum is associated with a column descriptors used by the CTG
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
3250
CTranslatorQueryToDXL::TranslateColumnValuesToDXL(
3251 3252
	CDXLDatumArray *dxl_datum_array_const_tbl_get,
	CDXLColDescrArray *dxl_column_descriptors,
J
Jesse Zhang 已提交
3253
	CDXLNodeArray *project_elem_dxlnode_array) const
3254
{
3255 3256
	GPOS_ASSERT(NULL != dxl_datum_array_const_tbl_get);
	GPOS_ASSERT(NULL != project_elem_dxlnode_array);
J
Jesse Zhang 已提交
3257

3258 3259
	CDXLNode *const_tbl_get_dxlnode = NULL;
	if (0 == dxl_datum_array_const_tbl_get->Size())
3260 3261
	{
		// add a dummy CTG
3262
		const_tbl_get_dxlnode = DXLDummyConstTableGet();
3263
	}
J
Jesse Zhang 已提交
3264
	else
3265 3266
	{
		// create the array of datum arrays
J
Jesse Zhang 已提交
3267 3268 3269
		CDXLDatum2dArray *dxl_datum_arrays_const_tbl_get =
			GPOS_NEW(m_mp) CDXLDatum2dArray(m_mp);

3270 3271
		dxl_datum_array_const_tbl_get->AddRef();
		dxl_datum_arrays_const_tbl_get->Append(dxl_datum_array_const_tbl_get);
J
Jesse Zhang 已提交
3272

3273
		dxl_column_descriptors->AddRef();
J
Jesse Zhang 已提交
3274 3275 3276
		CDXLLogicalConstTable *dxlop = GPOS_NEW(m_mp) CDXLLogicalConstTable(
			m_mp, dxl_column_descriptors, dxl_datum_arrays_const_tbl_get);

3277
		const_tbl_get_dxlnode = GPOS_NEW(m_mp) CDXLNode(m_mp, dxlop);
3278 3279
	}

3280
	if (0 == project_elem_dxlnode_array->Size())
3281
	{
3282
		return const_tbl_get_dxlnode;
3283 3284 3285
	}

	// create a project node for the list of project elements
3286
	project_elem_dxlnode_array->AddRef();
J
Jesse Zhang 已提交
3287 3288 3289 3290 3291 3292 3293
	CDXLNode *project_list_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp),
								project_elem_dxlnode_array);

	CDXLNode *project_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalProject(m_mp),
								project_list_dxlnode, const_tbl_get_dxlnode);
3294

3295
	return project_dxlnode;
3296 3297 3298 3299
}

//---------------------------------------------------------------------------
//	@function:
3300
//		CTranslatorQueryToDXL::TranslateTVFToDXL
3301 3302 3303 3304 3305
//
//	@doc:
//		Returns a CDXLNode representing a from relation range table entry
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
3306 3307 3308 3309
CTranslatorQueryToDXL::TranslateTVFToDXL(const RangeTblEntry *rte,
										 ULONG rt_index,
										 ULONG	//current_query_level
)
3310
{
3311
	GPOS_ASSERT(NULL != rte->funcexpr);
3312 3313

	// if this is a folded function expression, generate a project over a CTG
3314
	if (!IsA(rte->funcexpr, FuncExpr))
3315
	{
3316
		CDXLNode *const_tbl_get_dxlnode = DXLDummyConstTableGet();
3317

J
Jesse Zhang 已提交
3318 3319
		CDXLNode *project_list_dxlnode = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
3320

J
Jesse Zhang 已提交
3321 3322 3323
		CDXLNode *project_elem_dxlnode = TranslateExprToDXLProject(
			(Expr *) rte->funcexpr, rte->eref->aliasname,
			true /* insist_new_colids */);
3324
		project_list_dxlnode->AddChild(project_elem_dxlnode);
3325

J
Jesse Zhang 已提交
3326 3327
		CDXLNode *project_dxlnode = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalProject(m_mp));
3328 3329
		project_dxlnode->AddChild(project_list_dxlnode);
		project_dxlnode->AddChild(const_tbl_get_dxlnode);
3330

J
Jesse Zhang 已提交
3331 3332
		m_var_to_colid_map->LoadProjectElements(m_query_level, rt_index,
												project_list_dxlnode);
3333

3334
		return project_dxlnode;
3335 3336
	}

J
Jesse Zhang 已提交
3337 3338
	CDXLLogicalTVF *tvf_dxlop = CTranslatorUtils::ConvertToCDXLLogicalTVF(
		m_mp, m_md_accessor, m_colid_counter, rte);
3339
	CDXLNode *tvf_dxlnode = GPOS_NEW(m_mp) CDXLNode(m_mp, tvf_dxlop);
3340 3341

	// make note of new columns from function
J
Jesse Zhang 已提交
3342 3343
	m_var_to_colid_map->LoadColumns(m_query_level, rt_index,
									tvf_dxlop->GetDXLColumnDescrArray());
3344

3345 3346
	FuncExpr *func_expr = (FuncExpr *) rte->funcexpr;
	BOOL is_subquery_in_args = false;
3347 3348

	// check if arguments contain SIRV functions
3349
	if (NIL != func_expr->args && HasSirvFunctions((Node *) func_expr->args))
3350
	{
J
Jesse Zhang 已提交
3351 3352
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("SIRV functions"));
3353 3354
	}

3355
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
3356
	ForEach(lc, func_expr->args)
3357
	{
3358
		Node *arg_node = (Node *) lfirst(lc);
J
Jesse Zhang 已提交
3359 3360
		is_subquery_in_args =
			is_subquery_in_args || CTranslatorUtils::HasSubquery(arg_node);
3361
		CDXLNode *func_expr_arg_dxlnode =
J
Jesse Zhang 已提交
3362 3363 3364
			m_scalar_translator->TranslateScalarToDXL(
				(Expr *) arg_node, m_var_to_colid_map,
				&m_has_distributed_tables);
3365 3366
		GPOS_ASSERT(NULL != func_expr_arg_dxlnode);
		tvf_dxlnode->AddChild(func_expr_arg_dxlnode);
3367 3368
	}

3369 3370
	CMDIdGPDB *mdid_func = GPOS_NEW(m_mp) CMDIdGPDB(func_expr->funcid);
	const IMDFunction *pmdfunc = m_md_accessor->RetrieveFunc(mdid_func);
J
Jesse Zhang 已提交
3371 3372
	if (is_subquery_in_args &&
		IMDFunction::EfsVolatile == pmdfunc->GetFuncStability())
3373
	{
J
Jesse Zhang 已提交
3374 3375 3376
		GPOS_RAISE(
			gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
			GPOS_WSZ_LIT("Volatile functions with subqueries in arguments"));
3377
	}
3378
	mdid_func->Release();
3379

3380
	return tvf_dxlnode;
3381 3382 3383 3384
}

//---------------------------------------------------------------------------
//	@function:
3385
//		CTranslatorQueryToDXL::TranslateCTEToDXL
3386 3387 3388 3389 3390 3391
//
//	@doc:
//		Translate a common table expression into CDXLNode
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
3392 3393 3394
CTranslatorQueryToDXL::TranslateCTEToDXL(const RangeTblEntry *rte,
										 ULONG rt_index,
										 ULONG current_query_level)
3395
{
3396
	const ULONG cte_query_level = current_query_level - rte->ctelevelsup;
J
Jesse Zhang 已提交
3397 3398
	const CCTEListEntry *cte_list_entry =
		m_query_level_to_cte_map->Find(&cte_query_level);
3399
	if (NULL == cte_list_entry)
3400 3401
	{
		// TODO: Sept 09 2013, remove temporary fix  (revert exception to assert) to avoid crash during algebrization
J
Jesse Zhang 已提交
3402 3403
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLError,
				   GPOS_WSZ_LIT("No CTE"));
3404 3405
	}

J
Jesse Zhang 已提交
3406 3407 3408 3409 3410 3411 3412
	const CDXLNode *cte_producer_dxlnode =
		cte_list_entry->GetCTEProducer(rte->ctename);
	const List *cte_producer_target_list =
		cte_list_entry->GetCTEProducerTargetList(rte->ctename);

	GPOS_ASSERT(NULL != cte_producer_dxlnode &&
				NULL != cte_producer_target_list);
3413

J
Jesse Zhang 已提交
3414 3415
	CDXLLogicalCTEProducer *cte_producer_dxlop =
		CDXLLogicalCTEProducer::Cast(cte_producer_dxlnode->GetOperator());
3416
	ULONG cte_id = cte_producer_dxlop->Id();
J
Jesse Zhang 已提交
3417 3418 3419
	ULongPtrArray *op_colid_array_cte_producer =
		cte_producer_dxlop->GetOutputColIdsArray();

3420
	// construct output column array
J
Jesse Zhang 已提交
3421 3422 3423
	ULongPtrArray *colid_array_cte_consumer =
		GenerateColIds(m_mp, op_colid_array_cte_producer->Size());

3424
	// load the new columns from the CTE
J
Jesse Zhang 已提交
3425 3426 3427
	m_var_to_colid_map->LoadCTEColumns(
		current_query_level, rt_index, colid_array_cte_consumer,
		const_cast<List *>(cte_producer_target_list));
3428

J
Jesse Zhang 已提交
3429 3430
	CDXLLogicalCTEConsumer *cte_consumer_dxlop = GPOS_NEW(m_mp)
		CDXLLogicalCTEConsumer(m_mp, cte_id, colid_array_cte_consumer);
3431
	CDXLNode *cte_dxlnode = GPOS_NEW(m_mp) CDXLNode(m_mp, cte_consumer_dxlop);
3432

3433
	return cte_dxlnode;
3434 3435 3436 3437
}

//---------------------------------------------------------------------------
//	@function:
3438
//		CTranslatorQueryToDXL::TranslateDerivedTablesToDXL
3439 3440 3441 3442 3443 3444
//
//	@doc:
//		Translate a derived table into CDXLNode
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
3445 3446 3447
CTranslatorQueryToDXL::TranslateDerivedTablesToDXL(const RangeTblEntry *rte,
												   ULONG rt_index,
												   ULONG current_query_level)
3448
{
3449 3450 3451
	CMappingVarColId *var_to_colid_map = m_var_to_colid_map->CopyMapColId(m_mp);
	Query *query_derived_tbl = rte->subquery;
	GPOS_ASSERT(NULL != query_derived_tbl);
3452

J
Jesse Zhang 已提交
3453 3454 3455 3456
	CTranslatorQueryToDXL query_to_dxl_translator(
		m_mp, m_md_accessor, m_colid_counter, m_cte_id_counter,
		var_to_colid_map, query_derived_tbl, m_query_level + 1, IsDMLQuery(),
		m_query_level_to_cte_map);
3457 3458

	// translate query representing the derived table to its DXL representation
J
Jesse Zhang 已提交
3459 3460
	CDXLNode *derived_tbl_dxlnode =
		query_to_dxl_translator.TranslateSelectQueryToDXL();
3461 3462

	// get the output columns of the derived table
J
Jesse Zhang 已提交
3463 3464
	CDXLNodeArray *query_output_cols_dxlnode_array =
		query_to_dxl_translator.GetQueryOutputCols();
3465
	CDXLNodeArray *cte_dxlnode_array = query_to_dxl_translator.GetCTEs();
J
Jesse Zhang 已提交
3466 3467
	GPOS_ASSERT(NULL != derived_tbl_dxlnode &&
				query_output_cols_dxlnode_array != NULL);
3468

3469
	CUtils::AddRefAppend(m_dxl_cte_producers, cte_dxlnode_array);
J
Jesse Zhang 已提交
3470 3471 3472

	m_has_distributed_tables = m_has_distributed_tables ||
							   query_to_dxl_translator.HasDistributedTables();
3473 3474

	// make note of new columns from derived table
J
Jesse Zhang 已提交
3475 3476 3477
	m_var_to_colid_map->LoadDerivedTblColumns(
		current_query_level, rt_index, query_output_cols_dxlnode_array,
		query_to_dxl_translator.Pquery()->targetList);
3478

3479
	return derived_tbl_dxlnode;
3480 3481 3482 3483
}

//---------------------------------------------------------------------------
//	@function:
3484
//		CTranslatorQueryToDXL::TranslateExprToDXL
3485 3486 3487 3488 3489 3490
//
//	@doc:
//		Translate the Expr into a CDXLScalar node
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
3491
CTranslatorQueryToDXL::TranslateExprToDXL(Expr *expr)
3492
{
J
Jesse Zhang 已提交
3493 3494
	CDXLNode *scalar_dxlnode = m_scalar_translator->TranslateScalarToDXL(
		expr, m_var_to_colid_map, &m_has_distributed_tables);
3495
	GPOS_ASSERT(NULL != scalar_dxlnode);
3496

3497
	return scalar_dxlnode;
3498 3499 3500 3501
}

//---------------------------------------------------------------------------
//	@function:
3502
//		CTranslatorQueryToDXL::TranslateJoinExprInFromToDXL
3503 3504 3505 3506 3507 3508
//
//	@doc:
//		Translate the JoinExpr on a GPDB query into a CDXLLogicalJoin node
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
3509
CTranslatorQueryToDXL::TranslateJoinExprInFromToDXL(JoinExpr *join_expr)
3510
{
3511
	GPOS_ASSERT(NULL != join_expr);
3512

3513 3514
	CDXLNode *left_child_dxlnode = TranslateFromClauseToDXL(join_expr->larg);
	CDXLNode *right_child_dxlnode = TranslateFromClauseToDXL(join_expr->rarg);
J
Jesse Zhang 已提交
3515 3516 3517 3518
	EdxlJoinType join_type =
		CTranslatorUtils::ConvertToDXLJoinType(join_expr->jointype);
	CDXLNode *join_dxlnode = GPOS_NEW(m_mp)
		CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalJoin(m_mp, join_type));
3519

3520
	GPOS_ASSERT(NULL != left_child_dxlnode && NULL != right_child_dxlnode);
3521

3522 3523
	join_dxlnode->AddChild(left_child_dxlnode);
	join_dxlnode->AddChild(right_child_dxlnode);
3524

J
Jesse Zhang 已提交
3525
	Node *node = join_expr->quals;
3526 3527

	// translate the join condition
3528
	if (NULL != node)
3529
	{
J
Jesse Zhang 已提交
3530
		join_dxlnode->AddChild(TranslateExprToDXL((Expr *) node));
3531 3532 3533 3534
	}
	else
	{
		// a cross join therefore add a CDXLScalarConstValue representing the value "true"
3535
		join_dxlnode->AddChild(CreateDXLConstValueTrue());
3536 3537 3538 3539 3540 3541
	}

	// extract the range table entry for the join expr to:
	// 1. Process the alias names of the columns
	// 2. Generate a project list for the join expr and maintain it in our hash map

3542
	const ULONG rtindex = join_expr->rtindex;
J
Jesse Zhang 已提交
3543 3544
	RangeTblEntry *rte =
		(RangeTblEntry *) gpdb::ListNth(m_query->rtable, rtindex - 1);
3545
	GPOS_ASSERT(NULL != rte);
3546

3547 3548
	Alias *alias = rte->eref;
	GPOS_ASSERT(NULL != alias);
J
Jesse Zhang 已提交
3549 3550 3551 3552
	GPOS_ASSERT(NULL != alias->colnames &&
				0 < gpdb::ListLength(alias->colnames));
	GPOS_ASSERT(gpdb::ListLength(rte->joinaliasvars) ==
				gpdb::ListLength(alias->colnames));
3553

J
Jesse Zhang 已提交
3554 3555 3556 3557
	CDXLNode *project_list_computed_cols_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
	CDXLNode *project_list_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
3558 3559

	// construct a proj element node for each entry in the joinaliasvars
3560 3561
	ListCell *lc_node = NULL;
	ListCell *lc_col_name = NULL;
J
Jesse Zhang 已提交
3562
	ForBoth(lc_node, rte->joinaliasvars, lc_col_name, alias->colnames)
3563
	{
3564
		Node *join_alias_node = (Node *) lfirst(lc_node);
J
Jesse Zhang 已提交
3565 3566
		GPOS_ASSERT(IsA(join_alias_node, Var) ||
					IsA(join_alias_node, CoalesceExpr));
3567 3568
		Value *value = (Value *) lfirst(lc_col_name);
		CHAR *col_name_char_array = strVal(value);
3569 3570

		// create the DXL node holding the target list entry and add it to proj list
J
Jesse Zhang 已提交
3571 3572
		CDXLNode *project_elem_dxlnode = TranslateExprToDXLProject(
			(Expr *) join_alias_node, col_name_char_array);
3573
		project_list_dxlnode->AddChild(project_elem_dxlnode);
3574

3575
		if (IsA(join_alias_node, CoalesceExpr))
3576 3577
		{
			// add coalesce expression to the computed columns
3578 3579
			project_elem_dxlnode->AddRef();
			project_list_computed_cols_dxlnode->AddChild(project_elem_dxlnode);
3580 3581
		}
	}
J
Jesse Zhang 已提交
3582 3583
	m_var_to_colid_map->LoadProjectElements(m_query_level, rtindex,
											project_list_dxlnode);
3584
	project_list_dxlnode->Release();
3585

3586
	if (0 == project_list_computed_cols_dxlnode->Arity())
3587
	{
3588 3589
		project_list_computed_cols_dxlnode->Release();
		return join_dxlnode;
3590 3591
	}

J
Jesse Zhang 已提交
3592 3593
	CDXLNode *project_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalProject(m_mp));
3594 3595
	project_dxlnode->AddChild(project_list_computed_cols_dxlnode);
	project_dxlnode->AddChild(join_dxlnode);
3596

3597
	return project_dxlnode;
3598 3599 3600 3601
}

//---------------------------------------------------------------------------
//	@function:
3602
//		CTranslatorQueryToDXL::TranslateTargetListToDXLProject
3603 3604 3605 3606 3607 3608 3609
//
//	@doc:
//		Create a DXL project list from the target list. The function allocates
//		memory in the translator memory pool and caller responsible for freeing it.
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
3610 3611
CTranslatorQueryToDXL::TranslateTargetListToDXLProject(
	List *target_list, CDXLNode *child_dxlnode,
3612
	IntToUlongMap *sort_grpref_to_colid_mapping,
J
Jesse Zhang 已提交
3613 3614
	IntToUlongMap *output_attno_to_colid_mapping, List *plgrpcl,
	BOOL is_expand_aggref_expr)
3615
{
J
Jesse Zhang 已提交
3616 3617
	BOOL is_groupby =
		(0 != gpdb::ListLength(m_query->groupClause) || m_query->hasAggs);
3618

J
Jesse Zhang 已提交
3619 3620
	CDXLNode *project_list_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
3621 3622

	// construct a proj element node for each entry in the target list
3623
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
3624

3625 3626
	// target entries that are result of flattening join alias
	// and are equivalent to a defined grouping column target entry
3627
	List *omitted_te_list = NIL;
3628 3629

	// list for all vars used in aggref expressions
3630 3631 3632
	List *vars_list = NULL;
	ULONG resno = 0;
	ForEach(lc, target_list)
3633
	{
3634 3635 3636 3637
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		GPOS_ASSERT(IsA(target_entry, TargetEntry));
		GPOS_ASSERT(0 < target_entry->resno);
		resno = target_entry->resno;
3638

J
Jesse Zhang 已提交
3639 3640
		BOOL is_grouping_col =
			CTranslatorUtils::IsGroupingColumn(target_entry, plgrpcl);
3641
		if (!is_groupby || (is_groupby && is_grouping_col))
3642
		{
3643 3644 3645 3646 3647
			// Insist projection for any outer refs to ensure any decorelation of a
			// subquery results in a correct plan using the projected reference,
			// instead of the outer ref directly.
			// TODO: Remove is_grouping_col from this check once const projections in
			// subqueries no longer prevent decorrelation
J
Jesse Zhang 已提交
3648 3649
			BOOL insist_proj =
				(IsA(target_entry->expr, Var) &&
3650 3651
				 ((Var *) (target_entry->expr))->varlevelsup > 0 &&
				 !is_grouping_col);
J
Jesse Zhang 已提交
3652 3653 3654 3655 3656 3657
			CDXLNode *project_elem_dxlnode = TranslateExprToDXLProject(
				target_entry->expr, target_entry->resname,
				insist_proj /* insist_new_colids */);
			ULONG colid =
				CDXLScalarProjElem::Cast(project_elem_dxlnode->GetOperator())
					->Id();
3658

J
Jesse Zhang 已提交
3659 3660
			AddSortingGroupingColumn(target_entry, sort_grpref_to_colid_mapping,
									 colid);
3661 3662

			// add column to the list of output columns of the query
3663
			StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno, colid);
3664

3665
			if (!IsA(target_entry->expr, Var) || insist_proj)
3666
			{
3667
				// only add computed columns to the project list or if it's an outerref
3668
				project_list_dxlnode->AddChild(project_elem_dxlnode);
3669 3670 3671
			}
			else
			{
3672
				project_elem_dxlnode->Release();
3673 3674
			}
		}
3675
		else if (is_expand_aggref_expr && IsA(target_entry->expr, Aggref))
3676
		{
J
Jesse Zhang 已提交
3677 3678 3679 3680
			vars_list = gpdb::ListConcat(
				vars_list,
				gpdb::ExtractNodesExpression((Node *) target_entry->expr, T_Var,
											 false /*descendIntoSubqueries*/));
3681
		}
3682
		else if (!IsA(target_entry->expr, Aggref))
3683
		{
3684
			omitted_te_list = gpdb::LAppend(omitted_te_list, target_entry);
3685 3686 3687 3688
		}
	}

	// process target entries that are a result of flattening join alias
3689 3690
	lc = NULL;
	ForEach(lc, omitted_te_list)
3691
	{
3692 3693
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		INT sort_group_ref = (INT) target_entry->ressortgroupref;
3694

J
Jesse Zhang 已提交
3695 3696 3697
		TargetEntry *te_grouping_col =
			CTranslatorUtils::GetGroupingColumnTargetEntry(
				(Node *) target_entry->expr, plgrpcl, target_list);
3698
		if (NULL != te_grouping_col)
3699
		{
J
Jesse Zhang 已提交
3700 3701 3702 3703 3704 3705 3706
			const ULONG colid = CTranslatorUtils::GetColId(
				(INT) te_grouping_col->ressortgroupref,
				sort_grpref_to_colid_mapping);
			StoreAttnoColIdMapping(output_attno_to_colid_mapping,
								   target_entry->resno, colid);
			if (0 < sort_group_ref && 0 < colid &&
				NULL == sort_grpref_to_colid_mapping->Find(&sort_group_ref))
3707
			{
J
Jesse Zhang 已提交
3708 3709
				AddSortingGroupingColumn(target_entry,
										 sort_grpref_to_colid_mapping, colid);
3710 3711 3712
			}
		}
	}
3713
	if (NIL != omitted_te_list)
3714
	{
3715
		gpdb::GPDBFree(omitted_te_list);
3716 3717
	}

3718
	GPOS_ASSERT_IMP(!is_expand_aggref_expr, NULL == vars_list);
J
Jesse Zhang 已提交
3719

3720
	// process all additional vars in aggref expressions
3721
	ListCell *lc_var = NULL;
J
Jesse Zhang 已提交
3722
	ForEach(lc_var, vars_list)
3723
	{
3724 3725
		resno++;
		Var *var = (Var *) lfirst(lc_var);
3726 3727

		// TODO: Dec 28, 2012; figure out column's name
J
Jesse Zhang 已提交
3728 3729 3730 3731 3732
		CDXLNode *project_elem_dxlnode =
			TranslateExprToDXLProject((Expr *) var, "?col?");

		ULONG colid =
			CDXLScalarProjElem::Cast(project_elem_dxlnode->GetOperator())->Id();
3733 3734

		// add column to the list of output columns of the query
3735
		StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno, colid);
J
Jesse Zhang 已提交
3736

3737
		project_elem_dxlnode->Release();
3738
	}
J
Jesse Zhang 已提交
3739

3740
	if (0 < project_list_dxlnode->Arity())
3741 3742 3743
	{
		// create a node with the CDXLLogicalProject operator and add as its children:
		// the CDXLProjectList node and the node representing the input to the project node
J
Jesse Zhang 已提交
3744 3745
		CDXLNode *project_dxlnode = GPOS_NEW(m_mp)
			CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalProject(m_mp));
3746 3747 3748 3749
		project_dxlnode->AddChild(project_list_dxlnode);
		project_dxlnode->AddChild(child_dxlnode);
		GPOS_ASSERT(NULL != project_dxlnode);
		return project_dxlnode;
3750 3751
	}

3752 3753
	project_list_dxlnode->Release();
	return child_dxlnode;
3754 3755 3756 3757
}

//---------------------------------------------------------------------------
//	@function:
3758
//		CTranslatorQueryToDXL::CreateDXLProjectNullsForGroupingSets
3759 3760 3761 3762 3763 3764 3765
//
//	@doc:
//		Construct a DXL project node projecting NULL values for the columns in the
//		given bitset
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
3766 3767 3768 3769 3770 3771 3772 3773 3774
CTranslatorQueryToDXL::CreateDXLProjectNullsForGroupingSets(
	List *target_list, CDXLNode *child_dxlnode,
	CBitSet *bitset,  // group by columns
	IntToUlongMap
		*sort_grouping_col_mapping,	 // mapping of sorting and grouping columns
	IntToUlongMap *output_attno_to_colid_mapping,  // mapping of output columns
	UlongToUlongMap *
		grpcol_index_to_colid_mapping  // mapping of unique grouping col positions
) const
3775
{
J
Jesse Zhang 已提交
3776 3777
	CDXLNode *project_list_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
3778 3779 3780

	// construct a proj element node for those non-aggregate entries in the target list which
	// are not included in the grouping set
3781
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
3782
	ForEach(lc, target_list)
3783
	{
3784 3785
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		GPOS_ASSERT(IsA(target_entry, TargetEntry));
3786

3787 3788
		BOOL is_grouping_col = bitset->Get(target_entry->ressortgroupref);
		ULONG resno = target_entry->resno;
3789

3790
		ULONG colid = 0;
J
Jesse Zhang 已提交
3791

3792
		if (IsA(target_entry->expr, GroupingFunc))
3793
		{
3794
			colid = m_colid_counter->next_id();
J
Jesse Zhang 已提交
3795 3796
			CDXLNode *grouping_func_dxlnode = TranslateGroupingFuncToDXL(
				target_entry->expr, bitset, grpcol_index_to_colid_mapping);
3797
			CMDName *mdname_alias = NULL;
3798

3799
			if (NULL == target_entry->resname)
3800
			{
3801 3802
				CWStringConst str_unnamed_col(GPOS_WSZ_LIT("grouping"));
				mdname_alias = GPOS_NEW(m_mp) CMDName(m_mp, &str_unnamed_col);
3803 3804 3805
			}
			else
			{
J
Jesse Zhang 已提交
3806 3807 3808
				CWStringDynamic *alias_str =
					CDXLUtils::CreateDynamicStringFromCharArray(
						m_mp, target_entry->resname);
3809 3810
				mdname_alias = GPOS_NEW(m_mp) CMDName(m_mp, alias_str);
				GPOS_DELETE(alias_str);
3811
			}
J
Jesse Zhang 已提交
3812 3813 3814 3815
			CDXLNode *project_elem_dxlnode = GPOS_NEW(m_mp) CDXLNode(
				m_mp,
				GPOS_NEW(m_mp) CDXLScalarProjElem(m_mp, colid, mdname_alias),
				grouping_func_dxlnode);
3816 3817
			project_list_dxlnode->AddChild(project_elem_dxlnode);
			StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno, colid);
3818
		}
3819
		else if (!is_grouping_col && !IsA(target_entry->expr, Aggref))
3820
		{
3821
			OID oid_type = gpdb::ExprType((Node *) target_entry->expr);
3822

3823
			colid = m_colid_counter->next_id();
3824

3825
			CMDIdGPDB *mdid = GPOS_NEW(m_mp) CMDIdGPDB(oid_type);
J
Jesse Zhang 已提交
3826 3827 3828
			CDXLNode *project_elem_dxlnode =
				CTranslatorUtils::CreateDXLProjElemConstNULL(
					m_mp, m_md_accessor, mdid, colid, target_entry->resname);
3829
			mdid->Release();
J
Jesse Zhang 已提交
3830

3831 3832
			project_list_dxlnode->AddChild(project_elem_dxlnode);
			StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno, colid);
3833
		}
J
Jesse Zhang 已提交
3834 3835 3836 3837 3838 3839 3840 3841 3842 3843

		INT sort_group_ref = INT(target_entry->ressortgroupref);

		GPOS_ASSERT_IMP(
			0 == sort_grouping_col_mapping,
			NULL != sort_grouping_col_mapping->Find(&sort_group_ref) &&
				"Grouping column with no mapping");

		if (0 < sort_group_ref && 0 < colid &&
			NULL == sort_grouping_col_mapping->Find(&sort_group_ref))
3844
		{
J
Jesse Zhang 已提交
3845 3846
			AddSortingGroupingColumn(target_entry, sort_grouping_col_mapping,
									 colid);
3847 3848 3849
		}
	}

3850
	if (0 == project_list_dxlnode->Arity())
3851 3852
	{
		// no project necessary
3853 3854
		project_list_dxlnode->Release();
		return child_dxlnode;
3855 3856
	}

J
Jesse Zhang 已提交
3857 3858 3859
	return GPOS_NEW(m_mp)
		CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalProject(m_mp),
				 project_list_dxlnode, child_dxlnode);
3860 3861 3862 3863
}

//---------------------------------------------------------------------------
//	@function:
3864
//		CTranslatorQueryToDXL::CreateDXLProjectGroupingFuncs
3865 3866 3867
//
//	@doc:
//		Construct a DXL project node projecting values for the grouping funcs in
J
Jesse Zhang 已提交
3868
//		the target list
3869 3870 3871
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
3872 3873
CTranslatorQueryToDXL::CreateDXLProjectGroupingFuncs(
	List *target_list, CDXLNode *child_dxlnode, CBitSet *bitset,
3874 3875
	IntToUlongMap *output_attno_to_colid_mapping,
	UlongToUlongMap *grpcol_index_to_colid_mapping,
J
Jesse Zhang 已提交
3876
	IntToUlongMap *sort_grpref_to_colid_mapping) const
3877
{
J
Jesse Zhang 已提交
3878 3879
	CDXLNode *project_list_dxlnode =
		GPOS_NEW(m_mp) CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));
3880 3881 3882

	// construct a proj element node for those non-aggregate entries in the target list which
	// are not included in the grouping set
3883
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
3884
	ForEach(lc, target_list)
3885
	{
3886 3887
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		GPOS_ASSERT(IsA(target_entry, TargetEntry));
3888

3889
		ULONG resno = target_entry->resno;
3890

3891
		if (IsA(target_entry->expr, GroupingFunc))
3892
		{
3893
			ULONG colid = m_colid_counter->next_id();
J
Jesse Zhang 已提交
3894 3895
			CDXLNode *grouping_func_dxlnode = TranslateGroupingFuncToDXL(
				target_entry->expr, bitset, grpcol_index_to_colid_mapping);
3896
			CMDName *mdname_alias = NULL;
3897

3898
			if (NULL == target_entry->resname)
3899
			{
3900 3901
				CWStringConst str_unnamed_col(GPOS_WSZ_LIT("grouping"));
				mdname_alias = GPOS_NEW(m_mp) CMDName(m_mp, &str_unnamed_col);
3902 3903 3904
			}
			else
			{
J
Jesse Zhang 已提交
3905 3906 3907
				CWStringDynamic *alias_str =
					CDXLUtils::CreateDynamicStringFromCharArray(
						m_mp, target_entry->resname);
3908 3909
				mdname_alias = GPOS_NEW(m_mp) CMDName(m_mp, alias_str);
				GPOS_DELETE(alias_str);
3910
			}
J
Jesse Zhang 已提交
3911 3912 3913 3914
			CDXLNode *project_elem_dxlnode = GPOS_NEW(m_mp) CDXLNode(
				m_mp,
				GPOS_NEW(m_mp) CDXLScalarProjElem(m_mp, colid, mdname_alias),
				grouping_func_dxlnode);
3915 3916
			project_list_dxlnode->AddChild(project_elem_dxlnode);
			StoreAttnoColIdMapping(output_attno_to_colid_mapping, resno, colid);
J
Jesse Zhang 已提交
3917 3918
			AddSortingGroupingColumn(target_entry, sort_grpref_to_colid_mapping,
									 colid);
3919 3920 3921
		}
	}

3922
	if (0 == project_list_dxlnode->Arity())
3923 3924
	{
		// no project necessary
3925 3926
		project_list_dxlnode->Release();
		return child_dxlnode;
3927 3928
	}

J
Jesse Zhang 已提交
3929 3930 3931
	return GPOS_NEW(m_mp)
		CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLLogicalProject(m_mp),
				 project_list_dxlnode, child_dxlnode);
3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::StoreAttnoColIdMapping
//
//	@doc:
//		Store mapping between attno and generate colid
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
3943 3944
CTranslatorQueryToDXL::StoreAttnoColIdMapping(
	IntToUlongMap *attno_to_colid_mapping, INT attno, ULONG colid) const
3945
{
3946
	GPOS_ASSERT(NULL != attno_to_colid_mapping);
3947 3948

#ifdef GPOS_DEBUG
3949
	BOOL result =
J
Jesse Zhang 已提交
3950 3951 3952
#endif	// GPOS_DEBUG
		attno_to_colid_mapping->Insert(GPOS_NEW(m_mp) INT(attno),
									   GPOS_NEW(m_mp) ULONG(colid));
3953

3954
	GPOS_ASSERT(result);
3955 3956 3957 3958
}

//---------------------------------------------------------------------------
//	@function:
3959
//		CTranslatorQueryToDXL::CreateDXLOutputCols
3960 3961 3962 3963 3964
//
//	@doc:
//		Construct an array of DXL nodes representing the query output
//
//---------------------------------------------------------------------------
3965
CDXLNodeArray *
J
Jesse Zhang 已提交
3966 3967
CTranslatorQueryToDXL::CreateDXLOutputCols(
	List *target_list, IntToUlongMap *attno_to_colid_mapping) const
3968
{
3969 3970
	GPOS_ASSERT(NULL != target_list);
	GPOS_ASSERT(NULL != attno_to_colid_mapping);
3971

3972
	CDXLNodeArray *dxlnodes = GPOS_NEW(m_mp) CDXLNodeArray(m_mp);
3973

3974
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
3975
	ForEach(lc, target_list)
3976
	{
3977 3978 3979
		TargetEntry *target_entry = (TargetEntry *) lfirst(lc);
		GPOS_ASSERT(0 < target_entry->resno);
		ULONG resno = target_entry->resno;
3980

3981
		if (target_entry->resjunk)
3982 3983 3984 3985
		{
			continue;
		}

3986 3987 3988
		GPOS_ASSERT(NULL != target_entry);
		CMDName *mdname = NULL;
		if (NULL == target_entry->resname)
3989
		{
3990 3991
			CWStringConst str_unnamed_col(GPOS_WSZ_LIT("?column?"));
			mdname = GPOS_NEW(m_mp) CMDName(m_mp, &str_unnamed_col);
3992 3993 3994
		}
		else
		{
J
Jesse Zhang 已提交
3995 3996 3997
			CWStringDynamic *alias_str =
				CDXLUtils::CreateDynamicStringFromCharArray(
					m_mp, target_entry->resname);
3998
			mdname = GPOS_NEW(m_mp) CMDName(m_mp, alias_str);
3999
			// CName constructor copies string
4000
			GPOS_DELETE(alias_str);
4001 4002
		}

J
Jesse Zhang 已提交
4003 4004
		const ULONG colid =
			CTranslatorUtils::GetColId(resno, attno_to_colid_mapping);
4005 4006

		// create a column reference
J
Jesse Zhang 已提交
4007 4008 4009 4010 4011 4012 4013
		IMDId *mdid_type = GPOS_NEW(m_mp)
			CMDIdGPDB(gpdb::ExprType((Node *) target_entry->expr));
		INT type_modifier = gpdb::ExprTypeMod((Node *) target_entry->expr);
		CDXLColRef *dxl_colref = GPOS_NEW(m_mp)
			CDXLColRef(m_mp, mdname, colid, mdid_type, type_modifier);
		CDXLScalarIdent *dxl_ident =
			GPOS_NEW(m_mp) CDXLScalarIdent(m_mp, dxl_colref);
4014 4015

		// create the DXL node holding the scalar ident operator
4016
		CDXLNode *dxlnode = GPOS_NEW(m_mp) CDXLNode(m_mp, dxl_ident);
4017

4018
		dxlnodes->Append(dxlnode);
4019 4020
	}

4021
	return dxlnodes;
4022 4023 4024 4025
}

//---------------------------------------------------------------------------
//	@function:
4026
//		CTranslatorQueryToDXL::TranslateExprToDXLProject
4027 4028 4029 4030 4031 4032 4033
//
//	@doc:
//		Create a DXL project element node from the target list entry or var.
//		The function allocates memory in the translator memory pool, and the caller
//		is responsible for freeing it.
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
4034 4035 4036
CTranslatorQueryToDXL::TranslateExprToDXLProject(Expr *expr,
												 const CHAR *alias_name,
												 BOOL insist_new_colids)
4037
{
4038
	GPOS_ASSERT(NULL != expr);
4039 4040

	// construct a scalar operator
4041
	CDXLNode *child_dxlnode = TranslateExprToDXL(expr);
4042 4043

	// get the id and alias for the proj elem
4044 4045
	ULONG project_elem_id;
	CMDName *mdname_alias = NULL;
4046

4047
	if (NULL == alias_name)
4048
	{
4049 4050
		CWStringConst str_unnamed_col(GPOS_WSZ_LIT("?column?"));
		mdname_alias = GPOS_NEW(m_mp) CMDName(m_mp, &str_unnamed_col);
4051 4052 4053
	}
	else
	{
J
Jesse Zhang 已提交
4054 4055
		CWStringDynamic *alias_str =
			CDXLUtils::CreateDynamicStringFromCharArray(m_mp, alias_name);
4056 4057
		mdname_alias = GPOS_NEW(m_mp) CMDName(m_mp, alias_str);
		GPOS_DELETE(alias_str);
4058 4059
	}

4060
	if (IsA(expr, Var) && !insist_new_colids)
4061 4062
	{
		// project elem is a a reference to a column - use the colref id
J
Jesse Zhang 已提交
4063 4064 4065 4066
		GPOS_ASSERT(EdxlopScalarIdent ==
					child_dxlnode->GetOperator()->GetDXLOperator());
		CDXLScalarIdent *dxl_ident =
			(CDXLScalarIdent *) child_dxlnode->GetOperator();
4067
		project_elem_id = dxl_ident->GetDXLColRef()->Id();
4068 4069 4070 4071
	}
	else
	{
		// project elem is a defined column - get a new id
4072
		project_elem_id = m_colid_counter->next_id();
4073 4074
	}

J
Jesse Zhang 已提交
4075 4076 4077
	CDXLNode *project_elem_dxlnode = GPOS_NEW(m_mp) CDXLNode(
		m_mp,
		GPOS_NEW(m_mp) CDXLScalarProjElem(m_mp, project_elem_id, mdname_alias));
4078
	project_elem_dxlnode->AddChild(child_dxlnode);
4079

4080
	return project_elem_dxlnode;
4081 4082 4083 4084
}

//---------------------------------------------------------------------------
//	@function:
4085
//		CTranslatorQueryToDXL::CreateDXLConstValueTrue
4086 4087 4088 4089 4090
//
//	@doc:
//		Returns a CDXLNode representing scalar condition "true"
//---------------------------------------------------------------------------
CDXLNode *
4091
CTranslatorQueryToDXL::CreateDXLConstValueTrue()
4092
{
J
Jesse Zhang 已提交
4093 4094 4095
	Const *const_expr =
		(Const *) gpdb::MakeBoolConst(true /*value*/, false /*isnull*/);
	CDXLNode *dxlnode = TranslateExprToDXL((Expr *) const_expr);
4096
	gpdb::GPDBFree(const_expr);
4097

4098
	return dxlnode;
4099 4100 4101 4102
}

//---------------------------------------------------------------------------
//	@function:
4103
//		CTranslatorQueryToDXL::TranslateGroupingFuncToDXL
4104 4105 4106 4107 4108 4109
//
//	@doc:
//		Translate grouping func
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
4110 4111 4112
CTranslatorQueryToDXL::TranslateGroupingFuncToDXL(
	const Expr *expr, CBitSet *bitset,
	UlongToUlongMap *grpcol_index_to_colid_mapping) const
4113
{
4114 4115
	GPOS_ASSERT(IsA(expr, GroupingFunc));
	GPOS_ASSERT(NULL != grpcol_index_to_colid_mapping);
4116

4117
	const GroupingFunc *grouping_func = (GroupingFunc *) expr;
4118

4119
	if (1 < gpdb::ListLength(grouping_func->args))
4120
	{
J
Jesse Zhang 已提交
4121 4122
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
				   GPOS_WSZ_LIT("Grouping function with multiple arguments"));
4123
	}
J
Jesse Zhang 已提交
4124

4125 4126
	Node *node = (Node *) gpdb::ListNth(grouping_func->args, 0);
	ULONG grouping_idx = gpdb::GetIntFromValue(node);
J
Jesse Zhang 已提交
4127

4128 4129
	// generate a constant value for the result of the grouping function as follows:
	// if the grouping function argument is a group-by column, result is 0
J
Jesse Zhang 已提交
4130
	// otherwise, the result is 1
4131
	LINT l_value = 0;
J
Jesse Zhang 已提交
4132

4133 4134 4135 4136
	ULONG *sort_group_ref = grpcol_index_to_colid_mapping->Find(&grouping_idx);
	GPOS_ASSERT(NULL != sort_group_ref);
	BOOL is_grouping_col = bitset->Get(*sort_group_ref);
	if (!is_grouping_col)
4137 4138
	{
		// not a grouping column
4139
		l_value = 1;
4140 4141
	}

4142 4143 4144
	const IMDType *md_type = m_md_accessor->PtMDType<IMDTypeInt8>(m_sysid);
	CMDIdGPDB *mdid_cast = CMDIdGPDB::CastMdid(md_type->MDId());
	CMDIdGPDB *mdid = GPOS_NEW(m_mp) CMDIdGPDB(*mdid_cast);
J
Jesse Zhang 已提交
4145 4146 4147 4148 4149

	CDXLDatum *datum_dxl =
		GPOS_NEW(m_mp) CDXLDatumInt8(m_mp, mdid, false /* is_null */, l_value);
	CDXLScalarConstValue *dxlop =
		GPOS_NEW(m_mp) CDXLScalarConstValue(m_mp, datum_dxl);
4150
	return GPOS_NEW(m_mp) CDXLNode(m_mp, dxlop);
4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::ConstructCTEProducerList
//
//	@doc:
//		Construct a list of CTE producers from the query's CTE list
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
4162 4163
CTranslatorQueryToDXL::ConstructCTEProducerList(List *cte_list,
												ULONG cte_query_level)
4164
{
J
Jesse Zhang 已提交
4165 4166 4167
	GPOS_ASSERT(NULL != m_dxl_cte_producers &&
				"CTE Producer list not initialized");

4168
	if (NULL == cte_list)
4169 4170 4171
	{
		return;
	}
J
Jesse Zhang 已提交
4172

4173
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
4174 4175

	ForEach(lc, cte_list)
4176
	{
4177 4178
		CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
		GPOS_ASSERT(IsA(cte->ctequery, Query));
J
Jesse Zhang 已提交
4179

4180
		if (cte->cterecursive)
H
Haisheng Yuan 已提交
4181
		{
J
Jesse Zhang 已提交
4182 4183
			GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
					   GPOS_WSZ_LIT("WITH RECURSIVE"));
H
Haisheng Yuan 已提交
4184 4185
		}

J
Jesse Zhang 已提交
4186 4187 4188
		Query *cte_query = CQueryMutators::NormalizeQuery(
			m_mp, m_md_accessor, (Query *) cte->ctequery, cte_query_level + 1);

4189 4190
		// the query representing the cte can only access variables defined in the current level as well as
		// those defined at prior query levels
J
Jesse Zhang 已提交
4191 4192 4193 4194 4195 4196 4197
		CMappingVarColId *var_colid_mapping =
			m_var_to_colid_map->CopyMapColId(cte_query_level);

		CTranslatorQueryToDXL query_to_dxl_translator(
			m_mp, m_md_accessor, m_colid_counter, m_cte_id_counter,
			var_colid_mapping, cte_query, cte_query_level + 1, IsDMLQuery(),
			m_query_level_to_cte_map);
4198 4199

		// translate query representing the cte table to its DXL representation
J
Jesse Zhang 已提交
4200 4201
		CDXLNode *cte_child_dxlnode =
			query_to_dxl_translator.TranslateSelectQueryToDXL();
4202 4203

		// get the output columns of the cte table
J
Jesse Zhang 已提交
4204 4205
		CDXLNodeArray *cte_query_output_colds_dxlnode_array =
			query_to_dxl_translator.GetQueryOutputCols();
4206
		CDXLNodeArray *cte_dxlnode_array = query_to_dxl_translator.GetCTEs();
J
Jesse Zhang 已提交
4207 4208 4209 4210 4211 4212 4213
		m_has_distributed_tables =
			m_has_distributed_tables ||
			query_to_dxl_translator.HasDistributedTables();

		GPOS_ASSERT(NULL != cte_child_dxlnode &&
					NULL != cte_query_output_colds_dxlnode_array &&
					NULL != cte_dxlnode_array);
4214 4215

		// append any nested CTE
4216
		CUtils::AddRefAppend(m_dxl_cte_producers, cte_dxlnode_array);
J
Jesse Zhang 已提交
4217

4218
		ULongPtrArray *colid_array = GPOS_NEW(m_mp) ULongPtrArray(m_mp);
J
Jesse Zhang 已提交
4219 4220 4221

		const ULONG output_columns =
			cte_query_output_colds_dxlnode_array->Size();
4222
		for (ULONG ul = 0; ul < output_columns; ul++)
4223
		{
J
Jesse Zhang 已提交
4224 4225 4226 4227 4228 4229
			CDXLNode *output_col_dxlnode =
				(*cte_query_output_colds_dxlnode_array)[ul];
			CDXLScalarIdent *dxl_scalar_ident =
				CDXLScalarIdent::Cast(output_col_dxlnode->GetOperator());
			colid_array->Append(
				GPOS_NEW(m_mp) ULONG(dxl_scalar_ident->GetDXLColRef()->Id()));
4230
		}
J
Jesse Zhang 已提交
4231 4232 4233 4234 4235 4236 4237

		CDXLLogicalCTEProducer *lg_cte_prod_dxlop =
			GPOS_NEW(m_mp) CDXLLogicalCTEProducer(
				m_mp, m_cte_id_counter->next_id(), colid_array);
		CDXLNode *cte_producer_dxlnode =
			GPOS_NEW(m_mp) CDXLNode(m_mp, lg_cte_prod_dxlop, cte_child_dxlnode);

4238
		m_dxl_cte_producers->Append(cte_producer_dxlnode);
4239
#ifdef GPOS_DEBUG
4240
		BOOL result =
4241
#endif
J
Jesse Zhang 已提交
4242 4243 4244
			m_cteid_at_current_query_level_map->Insert(
				GPOS_NEW(m_mp) ULONG(lg_cte_prod_dxlop->Id()),
				GPOS_NEW(m_mp) BOOL(true));
4245
		GPOS_ASSERT(result);
J
Jesse Zhang 已提交
4246

4247
		// update CTE producer mappings
J
Jesse Zhang 已提交
4248 4249
		CCTEListEntry *cte_list_entry =
			m_query_level_to_cte_map->Find(&cte_query_level);
4250
		if (NULL == cte_list_entry)
4251
		{
J
Jesse Zhang 已提交
4252 4253
			cte_list_entry = GPOS_NEW(m_mp)
				CCTEListEntry(m_mp, cte_query_level, cte, cte_producer_dxlnode);
4254
#ifdef GPOS_DEBUG
J
Jesse Zhang 已提交
4255
			BOOL is_res =
4256
#endif
J
Jesse Zhang 已提交
4257 4258
				m_query_level_to_cte_map->Insert(
					GPOS_NEW(m_mp) ULONG(cte_query_level), cte_list_entry);
4259
			GPOS_ASSERT(is_res);
4260 4261 4262
		}
		else
		{
4263
			cte_list_entry->AddCTEProducer(m_mp, cte, cte_producer_dxlnode);
4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276
		}
	}
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorQueryToDXL::ConstructCTEAnchors
//
//	@doc:
//		Construct a stack of CTE anchors for each CTE producer in the given array
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
4277 4278 4279
CTranslatorQueryToDXL::ConstructCTEAnchors(CDXLNodeArray *dxlnodes,
										   CDXLNode **dxl_cte_anchor_top,
										   CDXLNode **dxl_cte_anchor_bottom)
4280
{
4281 4282
	GPOS_ASSERT(NULL == *dxl_cte_anchor_top);
	GPOS_ASSERT(NULL == *dxl_cte_anchor_bottom);
4283

4284
	if (NULL == dxlnodes || 0 == dxlnodes->Size())
4285 4286 4287
	{
		return;
	}
J
Jesse Zhang 已提交
4288

4289
	const ULONG num_of_ctes = dxlnodes->Size();
J
Jesse Zhang 已提交
4290

4291
	for (ULONG ul = num_of_ctes; ul > 0; ul--)
4292 4293
	{
		// construct a new CTE anchor on top of the previous one
J
Jesse Zhang 已提交
4294 4295 4296
		CDXLNode *cte_producer_dxlnode = (*dxlnodes)[ul - 1];
		CDXLLogicalCTEProducer *cte_prod_dxlop =
			CDXLLogicalCTEProducer::Cast(cte_producer_dxlnode->GetOperator());
4297
		ULONG cte_producer_id = cte_prod_dxlop->Id();
J
Jesse Zhang 已提交
4298

4299
		if (NULL == m_cteid_at_current_query_level_map->Find(&cte_producer_id))
4300 4301 4302 4303
		{
			// cte not defined at this level: CTE anchor was already added
			continue;
		}
J
Jesse Zhang 已提交
4304 4305 4306 4307

		CDXLNode *cte_anchor_new_dxlnode = GPOS_NEW(m_mp) CDXLNode(
			m_mp, GPOS_NEW(m_mp) CDXLLogicalCTEAnchor(m_mp, cte_producer_id));

4308
		if (NULL == *dxl_cte_anchor_bottom)
4309
		{
4310
			*dxl_cte_anchor_bottom = cte_anchor_new_dxlnode;
4311 4312
		}

4313
		if (NULL != *dxl_cte_anchor_top)
4314
		{
4315
			cte_anchor_new_dxlnode->AddChild(*dxl_cte_anchor_top);
4316
		}
4317
		*dxl_cte_anchor_top = cte_anchor_new_dxlnode;
4318 4319 4320 4321 4322
	}
}

//---------------------------------------------------------------------------
//	@function:
4323
//		CTranslatorQueryToDXL::GenerateColIds
4324 4325 4326 4327 4328
//
//	@doc:
//		Generate an array of new column ids of the given size
//
//---------------------------------------------------------------------------
4329
ULongPtrArray *
J
Jesse Zhang 已提交
4330
CTranslatorQueryToDXL::GenerateColIds(CMemoryPool *mp, ULONG size) const
4331
{
4332
	ULongPtrArray *colid_array = GPOS_NEW(mp) ULongPtrArray(mp);
J
Jesse Zhang 已提交
4333

4334
	for (ULONG ul = 0; ul < size; ul++)
4335
	{
4336
		colid_array->Append(GPOS_NEW(mp) ULONG(m_colid_counter->next_id()));
4337
	}
J
Jesse Zhang 已提交
4338

4339
	return colid_array;
4340 4341 4342 4343
}

//---------------------------------------------------------------------------
//	@function:
4344
//		CTranslatorQueryToDXL::ExtractColIds
4345 4346 4347 4348 4349
//
//	@doc:
//		Extract column ids from the given mapping
//
//---------------------------------------------------------------------------
4350
ULongPtrArray *
J
Jesse Zhang 已提交
4351 4352
CTranslatorQueryToDXL::ExtractColIds(
	CMemoryPool *mp, IntToUlongMap *attno_to_colid_mapping) const
4353
{
4354
	UlongToUlongMap *old_new_col_mapping = GPOS_NEW(mp) UlongToUlongMap(mp);
J
Jesse Zhang 已提交
4355

4356
	ULongPtrArray *colid_array = GPOS_NEW(mp) ULongPtrArray(mp);
J
Jesse Zhang 已提交
4357

4358 4359
	IntUlongHashmapIter att_iter(attno_to_colid_mapping);
	while (att_iter.Advance())
4360
	{
4361
		ULONG colid = *(att_iter.Value());
J
Jesse Zhang 已提交
4362

4363
		// do not insert colid if already inserted
4364
		if (NULL == old_new_col_mapping->Find(&colid))
4365
		{
4366
			colid_array->Append(GPOS_NEW(m_mp) ULONG(colid));
J
Jesse Zhang 已提交
4367 4368
			old_new_col_mapping->Insert(GPOS_NEW(m_mp) ULONG(colid),
										GPOS_NEW(m_mp) ULONG(colid));
4369 4370
		}
	}
J
Jesse Zhang 已提交
4371

4372 4373
	old_new_col_mapping->Release();
	return colid_array;
4374 4375 4376 4377
}

//---------------------------------------------------------------------------
//	@function:
4378
//		CTranslatorQueryToDXL::RemapColIds
4379 4380 4381 4382 4383 4384
//
//	@doc:
//		Construct a new hashmap which replaces the values in the From array
//		with the corresponding value in the To array
//
//---------------------------------------------------------------------------
4385
IntToUlongMap *
J
Jesse Zhang 已提交
4386 4387 4388 4389
CTranslatorQueryToDXL::RemapColIds(CMemoryPool *mp,
								   IntToUlongMap *attno_to_colid_mapping,
								   ULongPtrArray *from_list_colids,
								   ULongPtrArray *to_list_colids) const
4390
{
4391 4392 4393
	GPOS_ASSERT(NULL != attno_to_colid_mapping);
	GPOS_ASSERT(NULL != from_list_colids && NULL != to_list_colids);
	GPOS_ASSERT(from_list_colids->Size() == to_list_colids->Size());
J
Jesse Zhang 已提交
4394

4395
	// compute a map of the positions in the from array
4396 4397 4398
	UlongToUlongMap *old_new_col_mapping = GPOS_NEW(mp) UlongToUlongMap(mp);
	const ULONG size = from_list_colids->Size();
	for (ULONG ul = 0; ul < size; ul++)
4399 4400
	{
#ifdef GPOS_DEBUG
4401
		BOOL result =
J
Jesse Zhang 已提交
4402 4403 4404 4405
#endif	// GPOS_DEBUG
			old_new_col_mapping->Insert(
				GPOS_NEW(mp) ULONG(*((*from_list_colids)[ul])),
				GPOS_NEW(mp) ULONG(*((*to_list_colids)[ul])));
4406
		GPOS_ASSERT(result);
4407 4408
	}

J
Jesse Zhang 已提交
4409 4410
	IntToUlongMap *result_attno_to_colid_mapping =
		GPOS_NEW(mp) IntToUlongMap(mp);
4411 4412
	IntUlongHashmapIter mi(attno_to_colid_mapping);
	while (mi.Advance())
4413
	{
4414 4415 4416
		INT *key = GPOS_NEW(mp) INT(*(mi.Key()));
		const ULONG *value = mi.Value();
		GPOS_ASSERT(NULL != value);
J
Jesse Zhang 已提交
4417 4418 4419

		ULONG *remapped_value =
			GPOS_NEW(mp) ULONG(*(old_new_col_mapping->Find(value)));
4420
		result_attno_to_colid_mapping->Insert(key, remapped_value);
4421
	}
J
Jesse Zhang 已提交
4422

4423
	old_new_col_mapping->Release();
J
Jesse Zhang 已提交
4424

4425
	return result_attno_to_colid_mapping;
4426 4427 4428 4429
}

//---------------------------------------------------------------------------
//	@function:
4430
//		CTranslatorQueryToDXL::RemapColIds
4431 4432 4433 4434 4435 4436
//
//	@doc:
//		True iff this query or one of its ancestors is a DML query
//
//---------------------------------------------------------------------------
BOOL
4437
CTranslatorQueryToDXL::IsDMLQuery()
4438
{
4439
	return (m_is_top_query_dml || m_query->resultRelation != 0);
4440 4441 4442
}

// EOF