CTranslatorRelcacheToDXL.cpp 97.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
//---------------------------------------------------------------------------
//	Greenplum Database
//	Copyright (C) 2011 EMC Corp.
//
//	@filename:
//		CTranslatorRelcacheToDXL.cpp
//
//	@doc:
//		Class translating relcache entries into DXL objects
//
//	@test:
//
//
//---------------------------------------------------------------------------

#include "postgres.h"
#include "utils/array.h"
#include "utils/rel.h"
#include "utils/relcache.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
#include "utils/datum.h"
#include "utils/elog.h"
#include "utils/guc.h"
#include "cdb/cdbhash.h"
#include "access/heapam.h"
#include "catalog/pg_exttable.h"
29
#include "catalog/pg_proc.h"
30 31 32 33 34

#include "cdb/cdbpartition.h"
#include "catalog/namespace.h"
#include "catalog/pg_statistic.h"

35 36 37 38 39 40
#include "naucrates/md/CMDIdCast.h"
#include "naucrates/md/CMDIdScCmp.h"

#include "naucrates/dxl/gpdb_types.h"

#include "naucrates/md/CMDCastGPDB.h"
41
#include "naucrates/md/CMDArrayCoerceCastGPDB.h"
42 43 44 45 46 47 48
#include "naucrates/md/CMDScCmpGPDB.h"

#include "gpopt/translate/CTranslatorUtils.h"
#include "gpopt/translate/CTranslatorRelcacheToDXL.h"
#include "gpopt/translate/CTranslatorScalarToDXL.h"
#include "gpopt/mdcache/CMDAccessor.h"

49 50 51
#include "gpos/base.h"
#include "gpos/error/CException.h"

52
#include "naucrates/exception.h"
53

54 55
#include "naucrates/dxl/CDXLUtils.h"
#include "naucrates/dxl/xml/dxltokens.h"
56

57 58 59 60 61 62 63 64 65 66 67 68
#include "naucrates/md/CMDTypeBoolGPDB.h"
#include "naucrates/md/CMDTypeGenericGPDB.h"
#include "naucrates/md/CMDTypeInt2GPDB.h"
#include "naucrates/md/CMDTypeInt4GPDB.h"
#include "naucrates/md/CMDTypeInt8GPDB.h"
#include "naucrates/md/CMDTypeOidGPDB.h"
#include "naucrates/md/CMDIndexGPDB.h"
#include "naucrates/md/CMDPartConstraintGPDB.h"
#include "naucrates/md/CMDIdRelStats.h"
#include "naucrates/md/CDXLRelStats.h"
#include "naucrates/md/CMDIdColStats.h"
#include "naucrates/md/CDXLColStats.h"
69 70 71 72 73 74 75 76 77

#include "gpopt/base/CUtils.h"

#include "gpopt/gpdbwrappers.h"

using namespace gpdxl;
using namespace gpopt;


J
Jesse Zhang 已提交
78 79 80 81
static const ULONG cmp_type_mappings[][2] = {
	{IMDType::EcmptEq, CmptEq},	  {IMDType::EcmptNEq, CmptNEq},
	{IMDType::EcmptL, CmptLT},	  {IMDType::EcmptG, CmptGT},
	{IMDType::EcmptGEq, CmptGEq}, {IMDType::EcmptLEq, CmptLEq}};
82 83 84

//---------------------------------------------------------------------------
//	@function:
85
//		CTranslatorRelcacheToDXL::RetrieveObject
86 87 88 89 90 91
//
//	@doc:
//		Retrieve a metadata object from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
IMDCacheObject *
J
Jesse Zhang 已提交
92 93
CTranslatorRelcacheToDXL::RetrieveObject(CMemoryPool *mp,
										 CMDAccessor *md_accessor, IMDId *mdid)
94
{
95 96
	IMDCacheObject *md_obj = NULL;
	GPOS_ASSERT(NULL != md_accessor);
97

98
#ifdef FAULT_INJECTOR
99
	gpdb::InjectFaultInOptTasks(OptRelcacheTranslatorCatalogAccess);
J
Jesse Zhang 已提交
100
#endif	// FAULT_INJECTOR
101

J
Jesse Zhang 已提交
102
	switch (mdid->MdidType())
103 104
	{
		case IMDId::EmdidGPDB:
105
			md_obj = RetrieveObjectGPDB(mp, md_accessor, mdid);
106
			break;
J
Jesse Zhang 已提交
107

108
		case IMDId::EmdidRelStats:
109
			md_obj = RetrieveRelStats(mp, mdid);
110
			break;
J
Jesse Zhang 已提交
111

112
		case IMDId::EmdidColStats:
113
			md_obj = RetrieveColStats(mp, md_accessor, mdid);
114
			break;
J
Jesse Zhang 已提交
115

116
		case IMDId::EmdidCastFunc:
117
			md_obj = RetrieveCast(mp, mdid);
118
			break;
J
Jesse Zhang 已提交
119

120
		case IMDId::EmdidScCmp:
121
			md_obj = RetrieveScCmp(mp, mdid);
122
			break;
J
Jesse Zhang 已提交
123

124 125 126 127
		default:
			break;
	}

128
	if (NULL == md_obj)
129 130
	{
		// no match found
J
Jesse Zhang 已提交
131 132
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
133 134
	}

135
	return md_obj;
136 137 138 139
}

//---------------------------------------------------------------------------
//	@function:
140
//		CTranslatorRelcacheToDXL::RetrieveMDObjGPDB
141 142 143 144 145 146
//
//	@doc:
//		Retrieve a GPDB metadata object from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
IMDCacheObject *
J
Jesse Zhang 已提交
147 148 149
CTranslatorRelcacheToDXL::RetrieveObjectGPDB(CMemoryPool *mp,
											 CMDAccessor *md_accessor,
											 IMDId *mdid)
150
{
151
	GPOS_ASSERT(mdid->MdidType() == CMDIdGPDB::EmdidGPDB);
152

153
	OID oid = CMDIdGPDB::CastMdid(mdid)->Oid();
154 155 156 157 158

	GPOS_ASSERT(0 != oid);

	// find out what type of object this oid stands for

159
	if (gpdb::IndexExists(oid))
160
	{
161
		return RetrieveIndex(mp, md_accessor, mdid);
162 163
	}

164
	if (gpdb::TypeExists(oid))
165
	{
166
		return RetrieveType(mp, mdid);
167 168
	}

169
	if (gpdb::RelationExists(oid))
170
	{
171
		return RetrieveRel(mp, md_accessor, mdid);
172 173
	}

174
	if (gpdb::OperatorExists(oid))
175
	{
176
		return RetrieveScOp(mp, mdid);
177 178
	}

179
	if (gpdb::AggregateExists(oid))
180
	{
181
		return RetrieveAgg(mp, mdid);
182 183
	}

184
	if (gpdb::FunctionExists(oid))
185
	{
186
		return RetrieveFunc(mp, mdid);
187 188
	}

189
	if (gpdb::TriggerExists(oid))
190
	{
191
		return RetrieveTrigger(mp, mdid);
192 193
	}

194
	if (gpdb::CheckConstraintExists(oid))
195
	{
196
		return RetrieveCheckConstraints(mp, md_accessor, mdid);
197 198 199 200 201 202 203 204
	}

	// no match found
	return NULL;
}

//---------------------------------------------------------------------------
//	@function:
205
//		CTranslatorRelcacheToDXL::GetRelName
206 207 208 209 210 211
//
//	@doc:
//		Return a relation name
//
//---------------------------------------------------------------------------
CMDName *
J
Jesse Zhang 已提交
212
CTranslatorRelcacheToDXL::GetRelName(CMemoryPool *mp, Relation rel)
213 214
{
	GPOS_ASSERT(NULL != rel);
215
	CHAR *relname = NameStr(rel->rd_rel->relname);
J
Jesse Zhang 已提交
216 217
	CWStringDynamic *relname_str =
		CDXLUtils::CreateDynamicStringFromCharArray(mp, relname);
218 219 220
	CMDName *mdname = GPOS_NEW(mp) CMDName(mp, relname_str);
	GPOS_DELETE(relname_str);
	return mdname;
221 222 223 224
}

//---------------------------------------------------------------------------
//	@function:
225
//		CTranslatorRelcacheToDXL::RetrieveRelIndexInfo
226 227 228 229 230
//
//	@doc:
//		Return the indexes defined on the given relation
//
//---------------------------------------------------------------------------
231
CMDIndexInfoArray *
J
Jesse Zhang 已提交
232
CTranslatorRelcacheToDXL::RetrieveRelIndexInfo(CMemoryPool *mp, Relation rel)
233 234 235
{
	GPOS_ASSERT(NULL != rel);

236
	if (gpdb::RelPartIsNone(rel->rd_id) || gpdb::IsLeafPartition(rel->rd_id))
237
	{
238
		return RetrieveRelIndexInfoForNonPartTable(mp, rel);
239
	}
240
	else if (gpdb::RelPartIsRoot(rel->rd_id))
241
	{
242
		return RetrieveRelIndexInfoForPartTable(mp, rel);
243
	}
J
Jesse Zhang 已提交
244
	else
245
	{
246
		// interior partition: do not consider indexes
J
Jesse Zhang 已提交
247 248
		CMDIndexInfoArray *md_index_info_array =
			GPOS_NEW(mp) CMDIndexInfoArray(mp);
249
		return md_index_info_array;
250
	}
251 252 253
}

// return index info list of indexes defined on a partitioned table
254
CMDIndexInfoArray *
J
Jesse Zhang 已提交
255 256
CTranslatorRelcacheToDXL::RetrieveRelIndexInfoForPartTable(CMemoryPool *mp,
														   Relation root_rel)
257
{
258
	CMDIndexInfoArray *md_index_info_array = GPOS_NEW(mp) CMDIndexInfoArray(mp);
259 260

	// root of partitioned table: aggregate index information across different parts
261
	List *plLogicalIndexInfo = RetrievePartTableIndexInfo(root_rel);
262

263
	ListCell *lc = NULL;
264

J
Jesse Zhang 已提交
265
	ForEach(lc, plLogicalIndexInfo)
266
	{
267 268
		LogicalIndexInfo *logicalIndexInfo = (LogicalIndexInfo *) lfirst(lc);
		OID index_oid = logicalIndexInfo->logicalIndexOid;
269 270

		// only add supported indexes
271
		Relation index_rel = gpdb::GetRelation(index_oid);
272

273
		if (NULL == index_rel)
274
		{
275 276
			WCHAR wstr[1024];
			CWStringStatic str(wstr, 1024);
277
			COstreamString oss(&str);
278
			oss << (ULONG) index_oid;
J
Jesse Zhang 已提交
279 280
			GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
					   str.GetBuffer());
281 282
		}

283
		GPOS_ASSERT(NULL != index_rel->rd_indextuple);
284 285 286

		GPOS_TRY
		{
287
			if (IsIndexSupported(index_rel))
288
			{
289
				CMDIdGPDB *mdid_index = GPOS_NEW(mp) CMDIdGPDB(index_oid);
J
Jesse Zhang 已提交
290 291 292 293
				BOOL is_partial = (NULL != logicalIndexInfo->partCons) ||
								  (NIL != logicalIndexInfo->defaultLevels);
				CMDIndexInfo *md_index_info =
					GPOS_NEW(mp) CMDIndexInfo(mdid_index, is_partial);
294
				md_index_info_array->Append(md_index_info);
295 296
			}

297
			gpdb::CloseRelation(index_rel);
298 299 300
		}
		GPOS_CATCH_EX(ex)
		{
301
			gpdb::CloseRelation(index_rel);
302 303 304 305
			GPOS_RETHROW(ex);
		}
		GPOS_CATCH_END;
	}
306
	return md_index_info_array;
307 308 309
}

// return index info list of indexes defined on regular, external tables or leaf partitions
310
CMDIndexInfoArray *
J
Jesse Zhang 已提交
311 312
CTranslatorRelcacheToDXL::RetrieveRelIndexInfoForNonPartTable(CMemoryPool *mp,
															  Relation rel)
313
{
314
	CMDIndexInfoArray *md_index_info_array = GPOS_NEW(mp) CMDIndexInfoArray(mp);
315 316

	// not a partitioned table: obtain indexes directly from the catalog
317
	List *index_oids = gpdb::GetRelationIndexes(rel);
318

319
	ListCell *lc = NULL;
320

J
Jesse Zhang 已提交
321
	ForEach(lc, index_oids)
322
	{
323
		OID index_oid = lfirst_oid(lc);
324 325

		// only add supported indexes
326
		Relation index_rel = gpdb::GetRelation(index_oid);
327

328
		if (NULL == index_rel)
329
		{
330 331
			WCHAR wstr[1024];
			CWStringStatic str(wstr, 1024);
332
			COstreamString oss(&str);
333
			oss << (ULONG) index_oid;
J
Jesse Zhang 已提交
334 335
			GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
					   str.GetBuffer());
336 337
		}

338
		GPOS_ASSERT(NULL != index_rel->rd_indextuple);
339 340 341

		GPOS_TRY
		{
342
			if (IsIndexSupported(index_rel))
343
			{
344
				CMDIdGPDB *mdid_index = GPOS_NEW(mp) CMDIdGPDB(index_oid);
345
				// for a regular table, external table or leaf partition, an index is always complete
J
Jesse Zhang 已提交
346 347
				CMDIndexInfo *md_index_info = GPOS_NEW(mp)
					CMDIndexInfo(mdid_index, false /* is_partial */);
348
				md_index_info_array->Append(md_index_info);
349 350
			}

351
			gpdb::CloseRelation(index_rel);
352 353 354
		}
		GPOS_CATCH_EX(ex)
		{
355
			gpdb::CloseRelation(index_rel);
356 357 358 359 360
			GPOS_RETHROW(ex);
		}
		GPOS_CATCH_END;
	}

361
	return md_index_info_array;
362 363 364 365
}

//---------------------------------------------------------------------------
//	@function:
366
//		CTranslatorRelcacheToDXL::RetrievePartTableIndexInfo
367 368
//
//	@doc:
369
//		Return the index info list of on a partitioned table
370 371 372
//
//---------------------------------------------------------------------------
List *
J
Jesse Zhang 已提交
373
CTranslatorRelcacheToDXL::RetrievePartTableIndexInfo(Relation rel)
374
{
375
	List *index_info_list = NIL;
J
Jesse Zhang 已提交
376

377
	LogicalIndexes *logical_indexes = gpdb::GetLogicalPartIndexes(rel->rd_id);
378

379
	if (NULL == logical_indexes)
380 381 382
	{
		return NIL;
	}
383 384
	GPOS_ASSERT(NULL != logical_indexes);
	GPOS_ASSERT(0 <= logical_indexes->numLogicalIndexes);
J
Jesse Zhang 已提交
385

386 387
	const ULONG num_indexes = (ULONG) logical_indexes->numLogicalIndexes;
	for (ULONG ul = 0; ul < num_indexes; ul++)
388
	{
389 390
		LogicalIndexInfo *index_info = (logical_indexes->logicalIndexInfo)[ul];
		index_info_list = gpdb::LAppend(index_info_list, index_info);
391
	}
J
Jesse Zhang 已提交
392

393
	gpdb::GPDBFree(logical_indexes);
J
Jesse Zhang 已提交
394

395
	return index_info_list;
396 397 398 399
}

//---------------------------------------------------------------------------
//	@function:
400
//		CTranslatorRelcacheToDXL::RetrieveRelTriggers
401 402 403 404 405
//
//	@doc:
//		Return the triggers defined on the given relation
//
//---------------------------------------------------------------------------
406
IMdIdArray *
J
Jesse Zhang 已提交
407
CTranslatorRelcacheToDXL::RetrieveRelTriggers(CMemoryPool *mp, Relation rel)
408 409 410 411 412 413 414 415 416 417 418
{
	GPOS_ASSERT(NULL != rel);
	if (0 < rel->rd_rel->reltriggers && NULL == rel->trigdesc)
	{
		gpdb::BuildRelationTriggers(rel);
		if (NULL == rel->trigdesc)
		{
			rel->rd_rel->reltriggers = 0;
		}
	}

419
	IMdIdArray *mdid_triggers_array = GPOS_NEW(mp) IMdIdArray(mp);
420 421
	const ULONG ulTriggers = rel->rd_rel->reltriggers;

J
Jesse Zhang 已提交
422 423 424 425 426 427
	for (ULONG ul = 0; ul < ulTriggers; ul++)
	{
		Trigger trigger = rel->trigdesc->triggers[ul];
		OID trigger_oid = trigger.tgoid;
		CMDIdGPDB *mdid_trigger = GPOS_NEW(mp) CMDIdGPDB(trigger_oid);
		mdid_triggers_array->Append(mdid_trigger);
428 429
	}

430
	return mdid_triggers_array;
431 432 433 434
}

//---------------------------------------------------------------------------
//	@function:
435
//		CTranslatorRelcacheToDXL::RetrieveRelCheckConstraints
436 437 438 439 440
//
//	@doc:
//		Return the check constraints defined on the relation with the given oid
//
//---------------------------------------------------------------------------
441
IMdIdArray *
J
Jesse Zhang 已提交
442
CTranslatorRelcacheToDXL::RetrieveRelCheckConstraints(CMemoryPool *mp, OID oid)
443
{
444 445
	IMdIdArray *check_constraint_mdids = GPOS_NEW(mp) IMdIdArray(mp);
	List *check_constraints = gpdb::GetCheckConstraintOids(oid);
446

447
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
448
	ForEach(lc, check_constraints)
449
	{
450 451
		OID check_constraint_oid = lfirst_oid(lc);
		GPOS_ASSERT(0 != check_constraint_oid);
J
Jesse Zhang 已提交
452 453
		CMDIdGPDB *mdid_check_constraint =
			GPOS_NEW(mp) CMDIdGPDB(check_constraint_oid);
454
		check_constraint_mdids->Append(mdid_check_constraint);
455 456
	}

457
	return check_constraint_mdids;
458 459 460 461 462 463 464 465 466 467 468
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorRelcacheToDXL::CheckUnsupportedRelation
//
//	@doc:
//		Check and fall back to planner for unsupported relations
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
469
CTranslatorRelcacheToDXL::CheckUnsupportedRelation(OID rel_oid)
470
{
471
	if (gpdb::RelPartIsInterior(rel_oid))
472
	{
J
Jesse Zhang 已提交
473 474
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDObjUnsupported,
				   GPOS_WSZ_LIT("Query on intermediate partition"));
475 476
	}

477 478
	List *part_keys = gpdb::GetPartitionAttrs(rel_oid);
	ULONG num_of_levels = gpdb::ListLength(part_keys);
479

480
	if (0 == num_of_levels && gpdb::FHasSubclass(rel_oid))
481
	{
J
Jesse Zhang 已提交
482 483
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDObjUnsupported,
				   GPOS_WSZ_LIT("Inherited tables"));
484 485
	}

486
	if (1 < num_of_levels)
487 488 489
	{
		if (!optimizer_multilevel_partitioning)
		{
J
Jesse Zhang 已提交
490 491
			GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDObjUnsupported,
					   GPOS_WSZ_LIT("Multi-level partitioned tables"));
492 493
		}

494
		if (!gpdb::IsMultilevelPartitionUniform(rel_oid))
495
		{
J
Jesse Zhang 已提交
496 497 498 499
			GPOS_RAISE(
				gpdxl::ExmaMD, gpdxl::ExmiMDObjUnsupported,
				GPOS_WSZ_LIT(
					"Multi-level partitioned tables with non-uniform partitioning structure"));
500 501 502 503 504 505
		}
	}
}

//---------------------------------------------------------------------------
//	@function:
506
//		CTranslatorRelcacheToDXL::RetrieveRel
507 508 509 510 511 512
//
//	@doc:
//		Retrieve a relation from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
IMDRelation *
J
Jesse Zhang 已提交
513 514
CTranslatorRelcacheToDXL::RetrieveRel(CMemoryPool *mp, CMDAccessor *md_accessor,
									  IMDId *mdid)
515
{
516
	OID oid = CMDIdGPDB::CastMdid(mdid)->Oid();
517 518 519 520
	GPOS_ASSERT(InvalidOid != oid);

	CheckUnsupportedRelation(oid);

521
	Relation rel = gpdb::GetRelation(oid);
522 523 524

	if (NULL == rel)
	{
J
Jesse Zhang 已提交
525 526
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
527 528
	}

529
	CMDName *mdname = NULL;
J
Jesse Zhang 已提交
530 531
	IMDRelation::Erelstoragetype rel_storage_type =
		IMDRelation::ErelstorageSentinel;
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
	CMDColumnArray *mdcol_array = NULL;
	IMDRelation::Ereldistrpolicy dist = IMDRelation::EreldistrSentinel;
	ULongPtrArray *distr_cols = NULL;
	CMDIndexInfoArray *md_index_info_array = NULL;
	IMdIdArray *mdid_triggers_array = NULL;
	ULongPtrArray *part_keys = NULL;
	CharPtrArray *part_types = NULL;
	ULONG num_leaf_partitions = 0;
	BOOL convert_hash_to_random = false;
	ULongPtr2dArray *keyset_array = NULL;
	IMdIdArray *check_constraint_mdids = NULL;
	BOOL is_temporary = false;
	BOOL has_oids = false;
	BOOL is_partitioned = false;
	IMDRelation *md_rel = NULL;
547 548 549 550 551


	GPOS_TRY
	{
		// get rel name
552
		mdname = GetRelName(mp, rel);
553 554

		// get storage type
555
		rel_storage_type = RetrieveRelStorageType(rel->rd_rel->relstorage);
556 557

		// get relation columns
J
Jesse Zhang 已提交
558 559 560 561
		mdcol_array =
			RetrieveRelColumns(mp, md_accessor, rel, rel_storage_type);
		const ULONG max_cols =
			GPDXL_SYSTEM_COLUMNS + (ULONG) rel->rd_att->natts + 1;
562
		ULONG *attno_mapping = ConstructAttnoMapping(mp, mdcol_array, max_cols);
563 564

		// get distribution policy
565 566
		GpPolicy *gp_policy = gpdb::GetDistributionPolicy(rel);
		dist = GetRelDistribution(gp_policy);
567 568

		// get distribution columns
569
		if (IMDRelation::EreldistrHash == dist)
570
		{
J
Jesse Zhang 已提交
571 572
			distr_cols = RetrieveRelDistrbutionCols(mp, gp_policy, mdcol_array,
													max_cols);
573 574
		}

575
		convert_hash_to_random = gpdb::IsChildPartDistributionMismatched(rel);
E
Entong Shen 已提交
576

577
		// collect relation indexes
578
		md_index_info_array = RetrieveRelIndexInfo(mp, rel);
579 580

		// collect relation triggers
581
		mdid_triggers_array = RetrieveRelTriggers(mp, rel);
582 583

		// get partition keys
584
		if (IMDRelation::ErelstorageExternal != rel_storage_type)
585
		{
586
			RetrievePartKeysAndTypes(mp, rel, oid, &part_keys, &part_types);
587
		}
588
		is_partitioned = (NULL != part_keys && 0 < part_keys->Size());
589

J
Jesse Zhang 已提交
590 591 592
		if (is_partitioned &&
			IMDRelation::ErelstorageAppendOnlyParquet != rel_storage_type &&
			IMDRelation::ErelstorageExternal != rel_storage_type)
593 594
		{
			// mark relation as Parquet if one of its children is parquet
595
			if (gpdb::HasParquetChildren(oid))
596
			{
597
				rel_storage_type = IMDRelation::ErelstorageAppendOnlyParquet;
598 599
			}
		}
600 601

		// get number of leaf partitions
602
		if (gpdb::RelPartIsRoot(oid))
603
		{
604
			num_leaf_partitions = gpdb::CountLeafPartTables(oid);
605 606
		}

607
		// get key sets
J
Jesse Zhang 已提交
608 609 610 611
		BOOL should_add_default_keys =
			RelHasSystemColumns(rel->rd_rel->relkind);
		keyset_array = RetrieveRelKeysets(mp, oid, should_add_default_keys,
										  is_partitioned, attno_mapping);
612 613

		// collect all check constraints
614
		check_constraint_mdids = RetrieveRelCheckConstraints(mp, oid);
615

616 617
		is_temporary = rel->rd_istemp;
		has_oids = rel->rd_rel->relhasoids;
J
Jesse Zhang 已提交
618

619
		GPOS_DELETE_ARRAY(attno_mapping);
620 621 622 623 624 625 626 627 628
		gpdb::CloseRelation(rel);
	}
	GPOS_CATCH_EX(ex)
	{
		gpdb::CloseRelation(rel);
		GPOS_RETHROW(ex);
	}
	GPOS_CATCH_END;

629 630
	GPOS_ASSERT(IMDRelation::ErelstorageSentinel != rel_storage_type);
	GPOS_ASSERT(IMDRelation::EreldistrSentinel != dist);
631

632
	mdid->AddRef();
633

634
	if (IMDRelation::ErelstorageExternal == rel_storage_type)
635
	{
636
		ExtTableEntry *extentry = gpdb::GetExternalTableEntry(oid);
637 638 639 640 641

		// get format error table id
		IMDId *pmdidFmtErrTbl = NULL;
		if (InvalidOid != extentry->fmterrtbl)
		{
642
			pmdidFmtErrTbl = GPOS_NEW(mp) CMDIdGPDB(extentry->fmterrtbl);
643 644
		}

J
Jesse Zhang 已提交
645 646 647 648 649
		md_rel = GPOS_NEW(mp) CMDRelationExternalGPDB(
			mp, mdid, mdname, dist, mdcol_array, distr_cols,
			convert_hash_to_random, keyset_array, md_index_info_array,
			mdid_triggers_array, check_constraint_mdids, extentry->rejectlimit,
			('r' == extentry->rejectlimittype), pmdidFmtErrTbl);
650 651 652
	}
	else
	{
653
		CMDPartConstraintGPDB *mdpart_constraint = NULL;
654 655

		// retrieve the part constraints if relation is partitioned
656
		if (is_partitioned)
J
Jesse Zhang 已提交
657 658 659 660 661 662 663 664 665 666
			mdpart_constraint = RetrievePartConstraintForRel(
				mp, md_accessor, oid, mdcol_array,
				md_index_info_array->Size() > 0 /*has_index*/);

		md_rel = GPOS_NEW(mp) CMDRelationGPDB(
			mp, mdid, mdname, is_temporary, rel_storage_type, dist, mdcol_array,
			distr_cols, part_keys, part_types, num_leaf_partitions,
			convert_hash_to_random, keyset_array, md_index_info_array,
			mdid_triggers_array, check_constraint_mdids, mdpart_constraint,
			has_oids);
667 668
	}

669
	return md_rel;
670 671 672 673
}

//---------------------------------------------------------------------------
//	@function:
674
//		CTranslatorRelcacheToDXL::RetrieveRelColumns
675 676 677 678 679
//
//	@doc:
//		Get relation columns
//
//---------------------------------------------------------------------------
680
CMDColumnArray *
J
Jesse Zhang 已提交
681 682 683
CTranslatorRelcacheToDXL::RetrieveRelColumns(
	CMemoryPool *mp, CMDAccessor *md_accessor, Relation rel,
	IMDRelation::Erelstoragetype rel_storage_type)
684
{
685
	CMDColumnArray *mdcol_array = GPOS_NEW(mp) CMDColumnArray(mp);
686

J
Jesse Zhang 已提交
687
	for (ULONG ul = 0; ul < (ULONG) rel->rd_att->natts; ul++)
688 689
	{
		Form_pg_attribute att = rel->rd_att->attrs[ul];
J
Jesse Zhang 已提交
690 691 692
		CMDName *md_colname =
			CDXLUtils::CreateMDNameFromCharArray(mp, NameStr(att->attname));

693
		// translate the default column value
694
		CDXLNode *dxl_default_col_val = NULL;
J
Jesse Zhang 已提交
695

696 697
		if (!att->attisdropped)
		{
J
Jesse Zhang 已提交
698 699
			dxl_default_col_val = GetDefaultColumnValue(
				mp, md_accessor, rel->rd_att, att->attnum);
700 701
		}

702 703
		ULONG col_len = gpos::ulong_max;
		CMDIdGPDB *mdid_col = GPOS_NEW(mp) CMDIdGPDB(att->atttypid);
J
Jesse Zhang 已提交
704
		HeapTuple stats_tup = gpdb::GetAttStats(rel->rd_id, ul + 1);
705 706 707 708 709 710 711 712

		// Column width priority:
		// 1. If there is average width kept in the stats for that column, pick that value.
		// 2. If not, if it is a fixed length text type, pick the size of it. E.g if it is
		//    varchar(10), assign 10 as the column length.
		// 3. Else if it not dropped and a fixed length type such as int4, assign the fixed
		//    length.
		// 4. Otherwise, assign it to default column width which is 8.
J
Jesse Zhang 已提交
713
		if (HeapTupleIsValid(stats_tup))
714
		{
J
Jesse Zhang 已提交
715 716
			Form_pg_statistic form_pg_stats =
				(Form_pg_statistic) GETSTRUCT(stats_tup);
717 718

			// column width
719 720
			col_len = form_pg_stats->stawidth;
			gpdb::FreeHeapTuple(stats_tup);
721
		}
J
Jesse Zhang 已提交
722 723 724
		else if ((mdid_col->Equals(&CMDIdGPDB::m_mdid_bpchar) ||
				  mdid_col->Equals(&CMDIdGPDB::m_mdid_varchar)) &&
				 (VARHDRSZ < att->atttypmod))
725
		{
726
			col_len = (ULONG) att->atttypmod - VARHDRSZ;
727
		}
728 729
		else
		{
730 731
			DOUBLE width = CStatistics::DefaultColumnWidth.Get();
			col_len = (ULONG) width;
732 733 734

			if (!att->attisdropped)
			{
J
Jesse Zhang 已提交
735 736 737
				IMDType *md_type =
					CTranslatorRelcacheToDXL::RetrieveType(mp, mdid_col);
				if (md_type->IsFixedLength())
738
				{
739
					col_len = md_type->Length();
740
				}
741
				md_type->Release();
742 743
			}
		}
744

J
Jesse Zhang 已提交
745 746 747 748
		CMDColumn *md_col = GPOS_NEW(mp)
			CMDColumn(md_colname, att->attnum, mdid_col, att->atttypmod,
					  !att->attnotnull, att->attisdropped,
					  dxl_default_col_val /* default value */, col_len);
749

750
		mdcol_array->Append(md_col);
751 752 753
	}

	// add system columns
754
	if (RelHasSystemColumns(rel->rd_rel->relkind))
755
	{
J
Jesse Zhang 已提交
756 757 758
		BOOL is_ao_table =
			IMDRelation::ErelstorageAppendOnlyRows == rel_storage_type ||
			IMDRelation::ErelstorageAppendOnlyCols == rel_storage_type;
759
		AddSystemColumns(mp, mdcol_array, rel, is_ao_table);
760 761
	}

762
	return mdcol_array;
763 764 765 766
}

//---------------------------------------------------------------------------
//	@function:
767
//		CTranslatorRelcacheToDXL::GetDefaultColumnValue
768 769 770 771 772 773
//
//	@doc:
//		Return the dxl representation of column's default value
//
//---------------------------------------------------------------------------
CDXLNode *
J
Jesse Zhang 已提交
774 775 776 777
CTranslatorRelcacheToDXL::GetDefaultColumnValue(CMemoryPool *mp,
												CMDAccessor *md_accessor,
												TupleDesc rd_att,
												AttrNumber attno)
778 779 780
{
	GPOS_ASSERT(attno > 0);

781
	Node *node = NULL;
782 783 784 785 786

	// Scan to see if relation has a default for this column
	if (NULL != rd_att->constr && 0 < rd_att->constr->num_defval)
	{
		AttrDefault *defval = rd_att->constr->defval;
J
Jesse Zhang 已提交
787
		INT num_def = rd_att->constr->num_defval;
788 789

		GPOS_ASSERT(NULL != defval);
790
		for (ULONG ul = 0; ul < (ULONG) num_def; ul++)
791
		{
792
			if (attno == defval[ul].adnum)
793 794
			{
				// found it, convert string representation to node tree.
795
				node = gpdb::StringToNode(defval[ul].adbin);
796 797 798 799 800
				break;
			}
		}
	}

801
	if (NULL == node)
802 803 804
	{
		// get the default value for the type
		Form_pg_attribute att_tup = rd_att->attrs[attno - 1];
805
		node = gpdb::GetTypeDefault(att_tup->atttypid);
806 807
	}

808
	if (NULL == node)
809 810 811 812 813
	{
		return NULL;
	}

	// translate the default value expression
J
Jesse Zhang 已提交
814 815 816 817 818 819 820 821 822 823 824 825 826
	CTranslatorScalarToDXL scalar_translator(mp, md_accessor,
											 NULL, /* pulidgtorCol */
											 NULL, /* pulidgtorCTE */
											 0,	   /* query_level */
											 true, /* m_fQuery */
											 NULL, /* query_level_to_cte_map */
											 NULL  /* cte_dxlnode_array */
	);

	return scalar_translator.TranslateScalarToDXL(
		(Expr *) node,
		NULL /* var_colid_mapping --- subquery or external variable are not supported in default expression */
	);
827 828 829 830
}

//---------------------------------------------------------------------------
//	@function:
831
//		CTranslatorRelcacheToDXL::GetRelDistribution
832 833 834 835 836 837
//
//	@doc:
//		Return the distribution policy of the relation
//
//---------------------------------------------------------------------------
IMDRelation::Ereldistrpolicy
J
Jesse Zhang 已提交
838
CTranslatorRelcacheToDXL::GetRelDistribution(GpPolicy *gp_policy)
839
{
840
	if (NULL == gp_policy)
841 842 843 844
	{
		return IMDRelation::EreldistrMasterOnly;
	}

845
	if (POLICYTYPE_PARTITIONED == gp_policy->ptype)
846
	{
847
		if (0 == gp_policy->nattrs)
848 849 850 851 852 853 854
		{
			return IMDRelation::EreldistrRandom;
		}

		return IMDRelation::EreldistrHash;
	}

855
	if (POLICYTYPE_ENTRY == gp_policy->ptype)
856 857 858 859 860 861 862 863 864 865
	{
		return IMDRelation::EreldistrMasterOnly;
	}

	GPOS_ASSERT(!"Unrecognized distribution policy");
	return IMDRelation::EreldistrSentinel;
}

//---------------------------------------------------------------------------
//	@function:
866
//		CTranslatorRelcacheToDXL::RetrieveRelDistrbutionCols
867 868 869 870 871
//
//	@doc:
//		Get distribution columns
//
//---------------------------------------------------------------------------
872
ULongPtrArray *
J
Jesse Zhang 已提交
873 874 875
CTranslatorRelcacheToDXL::RetrieveRelDistrbutionCols(
	CMemoryPool *mp, GpPolicy *gp_policy, CMDColumnArray *mdcol_array,
	ULONG size)
876
{
J
Jesse Zhang 已提交
877
	ULONG *attno_mapping = GPOS_NEW_ARRAY(mp, ULONG, size);
878

J
Jesse Zhang 已提交
879
	for (ULONG ul = 0; ul < mdcol_array->Size(); ul++)
880
	{
881 882
		const IMDColumn *md_col = (*mdcol_array)[ul];
		INT attno = md_col->AttrNum();
883

J
Jesse Zhang 已提交
884
		ULONG idx = (ULONG)(GPDXL_SYSTEM_COLUMNS + attno);
885
		attno_mapping[idx] = ul;
886 887
	}

888
	ULongPtrArray *distr_cols = GPOS_NEW(mp) ULongPtrArray(mp);
889

890
	for (ULONG ul = 0; ul < (ULONG) gp_policy->nattrs; ul++)
891
	{
892
		AttrNumber attno = gp_policy->attrs[ul];
J
Jesse Zhang 已提交
893 894
		distr_cols->Append(
			GPOS_NEW(mp) ULONG(GetAttributePosition(attno, attno_mapping)));
895 896
	}

897 898
	GPOS_DELETE_ARRAY(attno_mapping);
	return distr_cols;
899 900 901 902 903 904 905 906 907 908 909
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorRelcacheToDXL::AddSystemColumns
//
//	@doc:
//		Adding system columns (oid, tid, xmin, etc) in table descriptors
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
910 911 912
CTranslatorRelcacheToDXL::AddSystemColumns(CMemoryPool *mp,
										   CMDColumnArray *mdcol_array,
										   Relation rel, BOOL is_ao_table)
913
{
914 915
	BOOL has_oids = rel->rd_att->tdhasoid;
	is_ao_table = is_ao_table || gpdb::IsAppendOnlyPartitionTable(rel->rd_id);
916

J
Jesse Zhang 已提交
917 918
	for (INT i = SelfItemPointerAttributeNumber;
		 i > FirstLowInvalidHeapAttributeNumber; i--)
919 920 921 922
	{
		AttrNumber attno = AttrNumber(i);
		GPOS_ASSERT(0 != attno);

923
		if (ObjectIdAttributeNumber == i && !has_oids)
924 925 926
		{
			continue;
		}
927

928
		if (IsTransactionVisibilityAttribute(i) && is_ao_table)
929 930 931 932
		{
			// skip transaction attrbutes like xmin, xmax, cmin, cmax for AO tables
			continue;
		}
933

934
		// get system name for that attribute
J
Jesse Zhang 已提交
935 936
		const CWStringConst *sys_colname =
			CTranslatorUtils::GetSystemColName(attno);
937
		GPOS_ASSERT(NULL != sys_colname);
938 939

		// copy string into column name
940
		CMDName *md_colname = GPOS_NEW(mp) CMDName(mp, sys_colname);
941

J
Jesse Zhang 已提交
942 943 944 945 946 947 948
		CMDColumn *md_col = GPOS_NEW(mp) CMDColumn(
			md_colname, attno, CTranslatorUtils::GetSystemColType(mp, attno),
			default_type_modifier,
			false,	// is_nullable
			false,	// is_dropped
			NULL,	// default value
			CTranslatorUtils::GetSystemColLength(attno));
949

950
		mdcol_array->Append(md_col);
951 952 953 954 955
	}
}

//---------------------------------------------------------------------------
//	@function:
956
//		CTranslatorRelcacheToDXL::IsTransactionVisibilityAttribute
957 958
//
//	@doc:
J
Jesse Zhang 已提交
959
//		Check if attribute number is one of the system attributes related to
960 961 962 963
//		transaction visibility such as xmin, xmax, cmin, cmax
//
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
964
CTranslatorRelcacheToDXL::IsTransactionVisibilityAttribute(INT attno)
965
{
J
Jesse Zhang 已提交
966 967 968 969
	return attno == MinTransactionIdAttributeNumber ||
		   attno == MaxTransactionIdAttributeNumber ||
		   attno == MinCommandIdAttributeNumber ||
		   attno == MaxCommandIdAttributeNumber;
970 971 972 973
}

//---------------------------------------------------------------------------
//	@function:
974
//		CTranslatorRelcacheToDXL::RetrieveIndex
975 976 977 978 979 980
//
//	@doc:
//		Retrieve an index from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
IMDIndex *
J
Jesse Zhang 已提交
981 982 983
CTranslatorRelcacheToDXL::RetrieveIndex(CMemoryPool *mp,
										CMDAccessor *md_accessor,
										IMDId *mdid_index)
984
{
985 986 987
	OID index_oid = CMDIdGPDB::CastMdid(mdid_index)->Oid();
	GPOS_ASSERT(0 != index_oid);
	Relation index_rel = gpdb::GetRelation(index_oid);
988

989
	if (NULL == index_rel)
990
	{
J
Jesse Zhang 已提交
991 992
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid_index->GetBuffer());
993 994
	}

995 996 997 998 999
	const IMDRelation *md_rel = NULL;
	Form_pg_index form_pg_index = NULL;
	CMDName *mdname = NULL;
	IMDIndex::EmdindexType index_type = IMDIndex::EmdindSentinel;
	IMDId *mdid_item_type = NULL;
1000 1001 1002
	bool index_clustered = false;
	ULongPtrArray *index_key_cols_array = NULL;
	ULONG *attno_mapping = NULL;
1003 1004 1005

	GPOS_TRY
	{
1006
		if (!IsIndexSupported(index_rel))
1007
		{
J
Jesse Zhang 已提交
1008 1009
			GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDObjUnsupported,
					   GPOS_WSZ_LIT("Index type"));
1010 1011
		}

1012
		form_pg_index = index_rel->rd_index;
J
Jesse Zhang 已提交
1013
		GPOS_ASSERT(NULL != form_pg_index);
1014
		index_clustered = form_pg_index->indisclustered;
1015

1016
		OID rel_oid = form_pg_index->indrelid;
1017

1018
		if (gpdb::IsLeafPartition(rel_oid))
1019
		{
1020
			rel_oid = gpdb::GetRootPartition(rel_oid);
1021 1022
		}

1023
		CMDIdGPDB *mdid_rel = GPOS_NEW(mp) CMDIdGPDB(rel_oid);
1024

1025
		md_rel = md_accessor->RetrieveRel(mdid_rel);
J
Jesse Zhang 已提交
1026

1027
		if (md_rel->IsPartitioned())
1028
		{
J
Jesse Zhang 已提交
1029 1030
			LogicalIndexes *logical_indexes =
				gpdb::GetLogicalPartIndexes(rel_oid);
1031
			GPOS_ASSERT(NULL != logical_indexes);
1032

J
Jesse Zhang 已提交
1033 1034
			IMDIndex *index = RetrievePartTableIndex(
				mp, md_accessor, mdid_index, md_rel, logical_indexes);
1035 1036

			// cleanup
1037
			gpdb::GPDBFree(logical_indexes);
1038

1039
			if (NULL != index)
1040
			{
1041 1042 1043
				mdid_rel->Release();
				gpdb::CloseRelation(index_rel);
				return index;
1044
			}
1045
		}
J
Jesse Zhang 已提交
1046

1047
		index_type = IMDIndex::EmdindBtree;
1048
		mdid_item_type = GPOS_NEW(mp) CMDIdGPDB(GPDB_ANY);
1049
		if (GIST_AM_OID == index_rel->rd_rel->relam)
A
Ashuka Xue 已提交
1050
		{
1051
			index_type = IMDIndex::EmdindGist;
A
Ashuka Xue 已提交
1052
		}
1053
		else if (BITMAP_AM_OID == index_rel->rd_rel->relam)
1054
		{
1055
			index_type = IMDIndex::EmdindBitmap;
1056
		}
A
Ashuka Xue 已提交
1057

1058
		// get the index name
1059
		CHAR *index_name = NameStr(index_rel->rd_rel->relname);
J
Jesse Zhang 已提交
1060 1061
		CWStringDynamic *str_name =
			CDXLUtils::CreateDynamicStringFromCharArray(mp, index_name);
1062 1063
		mdname = GPOS_NEW(mp) CMDName(mp, str_name);
		GPOS_DELETE(str_name);
1064

J
Jesse Zhang 已提交
1065 1066
		Relation table =
			gpdb::GetRelation(CMDIdGPDB::CastMdid(md_rel->MDId())->Oid());
1067
		ULONG size = GPDXL_SYSTEM_COLUMNS + (ULONG) table->rd_att->natts + 1;
J
Jesse Zhang 已提交
1068
		gpdb::CloseRelation(table);	 // close relation as early as possible
1069 1070 1071 1072 1073 1074

		attno_mapping = PopulateAttnoPositionMap(mp, md_rel, size);

		// extract the position of the key columns
		index_key_cols_array = GPOS_NEW(mp) ULongPtrArray(mp);

1075
		for (int i = 0; i < form_pg_index->indnatts; i++)
1076
		{
1077
			INT attno = form_pg_index->indkey.values[i];
1078 1079
			GPOS_ASSERT(0 != attno && "Index expressions not supported");

J
Jesse Zhang 已提交
1080 1081
			index_key_cols_array->Append(
				GPOS_NEW(mp) ULONG(GetAttributePosition(attno, attno_mapping)));
1082
		}
1083 1084
		mdid_rel->Release();
		gpdb::CloseRelation(index_rel);
1085 1086 1087
	}
	GPOS_CATCH_EX(ex)
	{
1088
		gpdb::CloseRelation(index_rel);
1089 1090 1091 1092
		GPOS_RETHROW(ex);
	}
	GPOS_CATCH_END;

1093 1094 1095
	ULongPtrArray *included_cols = ComputeIncludedCols(mp, md_rel);
	mdid_index->AddRef();
	IMdIdArray *op_families_mdids = RetrieveIndexOpFamilies(mp, mdid_index);
1096

J
Jesse Zhang 已提交
1097 1098 1099 1100 1101
	CMDIndexGPDB *index = GPOS_NEW(mp) CMDIndexGPDB(
		mp, mdid_index, mdname, index_clustered, index_type, mdid_item_type,
		index_key_cols_array, included_cols, op_families_mdids,
		NULL  // mdpart_constraint
	);
1102

1103 1104
	GPOS_DELETE_ARRAY(attno_mapping);
	return index;
1105 1106 1107 1108
}

//---------------------------------------------------------------------------
//	@function:
1109
//		CTranslatorRelcacheToDXL::RetrievePartTableIndex
1110 1111
//
//	@doc:
J
Jesse Zhang 已提交
1112
//		Retrieve an index over a partitioned table from the relcache given its
1113 1114 1115 1116
//		mdid
//
//---------------------------------------------------------------------------
IMDIndex *
J
Jesse Zhang 已提交
1117 1118 1119
CTranslatorRelcacheToDXL::RetrievePartTableIndex(
	CMemoryPool *mp, CMDAccessor *md_accessor, IMDId *mdid_index,
	const IMDRelation *md_rel, LogicalIndexes *logical_indexes)
1120
{
1121 1122
	GPOS_ASSERT(NULL != logical_indexes);
	GPOS_ASSERT(0 < logical_indexes->numLogicalIndexes);
J
Jesse Zhang 已提交
1123

1124
	OID oid = CMDIdGPDB::CastMdid(mdid_index)->Oid();
J
Jesse Zhang 已提交
1125

1126 1127
	LogicalIndexInfo *index_info = LookupLogicalIndexById(logical_indexes, oid);
	if (NULL == index_info)
1128
	{
J
Jesse Zhang 已提交
1129
		return NULL;
1130
	}
J
Jesse Zhang 已提交
1131 1132 1133

	return RetrievePartTableIndex(mp, md_accessor, index_info, mdid_index,
								  md_rel);
1134 1135 1136 1137
}

//---------------------------------------------------------------------------
//	@function:
1138
//		CTranslatorRelcacheToDXL::LookupLogicalIndexById
1139 1140 1141 1142 1143 1144
//
//	@doc:
//		Lookup an index given its id from the logical indexes structure
//
//---------------------------------------------------------------------------
LogicalIndexInfo *
J
Jesse Zhang 已提交
1145 1146
CTranslatorRelcacheToDXL::LookupLogicalIndexById(
	LogicalIndexes *logical_indexes, OID oid)
1147
{
J
Jesse Zhang 已提交
1148 1149 1150
	GPOS_ASSERT(NULL != logical_indexes &&
				0 <= logical_indexes->numLogicalIndexes);

1151
	const ULONG num_index = logical_indexes->numLogicalIndexes;
J
Jesse Zhang 已提交
1152

1153
	for (ULONG ul = 0; ul < num_index; ul++)
1154
	{
1155
		LogicalIndexInfo *index_info = (logical_indexes->logicalIndexInfo)[ul];
J
Jesse Zhang 已提交
1156

1157
		if (oid == index_info->logicalIndexOid)
1158
		{
1159
			return index_info;
1160 1161
		}
	}
J
Jesse Zhang 已提交
1162

1163 1164 1165 1166 1167
	return NULL;
}

//---------------------------------------------------------------------------
//	@function:
1168
//		CTranslatorRelcacheToDXL::RetrievePartTableIndex
1169 1170 1171 1172 1173 1174
//
//	@doc:
//		Construct an MD cache index object given its logical index representation
//
//---------------------------------------------------------------------------
IMDIndex *
J
Jesse Zhang 已提交
1175 1176 1177 1178 1179
CTranslatorRelcacheToDXL::RetrievePartTableIndex(CMemoryPool *mp,
												 CMDAccessor *md_accessor,
												 LogicalIndexInfo *index_info,
												 IMDId *mdid_index,
												 const IMDRelation *md_rel)
1180
{
1181
	OID index_oid = index_info->logicalIndexOid;
J
Jesse Zhang 已提交
1182

1183
	Relation index_rel = gpdb::GetRelation(index_oid);
1184

1185
	if (NULL == index_rel)
1186
	{
J
Jesse Zhang 已提交
1187 1188
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid_index->GetBuffer());
1189 1190
	}

1191
	if (!IsIndexSupported(index_rel))
1192
	{
1193
		gpdb::CloseRelation(index_rel);
J
Jesse Zhang 已提交
1194 1195
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDObjUnsupported,
				   GPOS_WSZ_LIT("Index type"));
1196
	}
J
Jesse Zhang 已提交
1197

1198
	// get the index name
1199 1200
	GPOS_ASSERT(NULL != index_rel->rd_index);
	Form_pg_index form_pg_index = index_rel->rd_index;
J
Jesse Zhang 已提交
1201

1202 1203 1204
	CHAR *index_name = NameStr(index_rel->rd_rel->relname);
	CMDName *mdname = CDXLUtils::CreateMDNameFromCharArray(mp, index_name);
	gpdb::CloseRelation(index_rel);
1205

1206 1207 1208 1209
	OID rel_oid = CMDIdGPDB::CastMdid(md_rel->MDId())->Oid();
	Relation table = gpdb::GetRelation(rel_oid);
	ULONG size = GPDXL_SYSTEM_COLUMNS + (ULONG) table->rd_att->natts + 1;
	gpdb::CloseRelation(table);
1210

1211
	ULONG *attno_mapping = PopulateAttnoPositionMap(mp, md_rel, size);
1212

1213
	ULongPtrArray *included_cols = ComputeIncludedCols(mp, md_rel);
1214 1215

	// extract the position of the key columns
1216 1217
	ULongPtrArray *index_key_cols_array = GPOS_NEW(mp) ULongPtrArray(mp);

1218
	for (int i = 0; i < index_info->nColumns; i++)
1219
	{
1220
		INT attno = index_info->indexKeys[i];
1221
		GPOS_ASSERT(0 != attno && "Index expressions not supported");
1222

J
Jesse Zhang 已提交
1223 1224
		index_key_cols_array->Append(
			GPOS_NEW(mp) ULONG(GetAttributePosition(attno, attno_mapping)));
1225
	}
J
Jesse Zhang 已提交
1226

1227
	/*
1228
	 * If an index exists only on a leaf part, part_constraint refers to the expression
1229 1230 1231
	 * identifying the path to reach the partition holding the index. For indexes
	 * available on all parts it is set to NULL.
	 */
1232
	Node *part_constraint = index_info->partCons;
J
Jesse Zhang 已提交
1233

1234 1235 1236 1237 1238 1239
	/*
	 * If an index exists all on the parts including default, the logical index
	 * info created marks defaultLevels as NIL. However, if an index exists only on
	 * leaf parts plDefaultLevel contains the default part level which come across while
	 * reaching to the leaf part from root.
	 */
1240
	List *default_levels = index_info->defaultLevels;
J
Jesse Zhang 已提交
1241

1242
	// get number of partitioning levels
1243 1244 1245
	List *part_keys = gpdb::GetPartitionAttrs(rel_oid);
	const ULONG num_of_levels = gpdb::ListLength(part_keys);
	gpdb::ListFree(part_keys);
1246

1247
	/* get relation constraints
1248
	 * default_levels_rel indicates the levels on which default partitions exists
1249 1250
	 * for the partitioned table
	 */
1251
	List *default_levels_rel = NIL;
J
Jesse Zhang 已提交
1252 1253
	Node *part_constraints_rel =
		gpdb::GetRelationPartContraints(rel_oid, &default_levels_rel);
1254

1255 1256
	BOOL is_unbounded = (NULL == part_constraint) && (NIL == default_levels);
	for (ULONG ul = 0; ul < num_of_levels; ul++)
1257
	{
J
Jesse Zhang 已提交
1258 1259
		is_unbounded =
			is_unbounded && LevelHasDefaultPartition(default_levels_rel, ul);
1260 1261
	}

1262
	/*
1263
	 * If part_constraint is NULL and default_levels is NIL,
1264 1265 1266 1267 1268
	 * it indicates that the index is available on all the parts including
	 * default part. So, we can say that levels on which default partitions
	 * exists for the relation applies to the index as well and the relative
	 * scan will not be partial.
	 */
1269 1270 1271
	List *default_levels_derived_list = NIL;
	if (NULL == part_constraint && NIL == default_levels)
		default_levels_derived_list = default_levels_rel;
1272
	else
1273
		default_levels_derived_list = default_levels;
J
Jesse Zhang 已提交
1274

1275 1276
	ULongPtrArray *default_levels_derived = GPOS_NEW(mp) ULongPtrArray(mp);
	for (ULONG ul = 0; ul < num_of_levels; ul++)
1277
	{
J
Jesse Zhang 已提交
1278 1279
		if (is_unbounded ||
			LevelHasDefaultPartition(default_levels_derived_list, ul))
1280
		{
1281
			default_levels_derived->Append(GPOS_NEW(mp) ULONG(ul));
1282 1283
		}
	}
1284
	gpdb::ListFree(default_levels_derived_list);
1285

1286
	if (NULL == part_constraint)
1287
	{
1288
		if (NIL == default_levels)
1289 1290
		{
			// NULL part constraints means all non-default partitions -> get constraint from the part table
1291
			part_constraint = part_constraints_rel;
1292 1293 1294
		}
		else
		{
J
Jesse Zhang 已提交
1295 1296
			part_constraint =
				gpdb::MakeBoolConst(false /*value*/, false /*isull*/);
1297 1298
		}
	}
J
Jesse Zhang 已提交
1299 1300 1301 1302

	CMDPartConstraintGPDB *mdpart_constraint =
		RetrievePartConstraintForIndex(mp, md_accessor, md_rel, part_constraint,
									   default_levels_derived, is_unbounded);
A
Ashuka Xue 已提交
1303

1304 1305
	default_levels_derived->Release();
	mdid_index->AddRef();
J
Jesse Zhang 已提交
1306 1307 1308 1309 1310

	GPOS_ASSERT(INDTYPE_BITMAP == index_info->indType ||
				INDTYPE_BTREE == index_info->indType ||
				INDTYPE_GIST == index_info->indType);

1311
	IMDIndex::EmdindexType index_type = IMDIndex::EmdindBtree;
J
Jesse Zhang 已提交
1312 1313
	IMDId *mdid_item_type = GPOS_NEW(mp) CMDIdGPDB(GPDB_ANY);
	;
1314
	if (INDTYPE_BITMAP == index_info->indType)
1315
	{
1316
		index_type = IMDIndex::EmdindBitmap;
1317
	}
1318
	else if (INDTYPE_GIST == index_info->indType)
A
Ashuka Xue 已提交
1319
	{
1320
		index_type = IMDIndex::EmdindGist;
A
Ashuka Xue 已提交
1321
	}
J
Jesse Zhang 已提交
1322

1323
	IMdIdArray *pdrgpmdidOpFamilies = RetrieveIndexOpFamilies(mp, mdid_index);
J
Jesse Zhang 已提交
1324 1325 1326 1327 1328 1329

	CMDIndexGPDB *index = GPOS_NEW(mp)
		CMDIndexGPDB(mp, mdid_index, mdname, form_pg_index->indisclustered,
					 index_type, mdid_item_type, index_key_cols_array,
					 included_cols, pdrgpmdidOpFamilies, mdpart_constraint);

1330
	GPOS_DELETE_ARRAY(attno_mapping);
J
Jesse Zhang 已提交
1331

1332
	return index;
1333 1334 1335 1336
}

//---------------------------------------------------------------------------
//	@function:
1337
//		CTranslatorRelcacheToDXL::LevelHasDefaultPartition
1338 1339 1340 1341 1342 1343
//
//	@doc:
//		Check whether the default partition at level one is included
//
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
1344 1345
CTranslatorRelcacheToDXL::LevelHasDefaultPartition(List *default_levels,
												   ULONG level)
1346
{
1347
	if (NIL == default_levels)
1348 1349 1350
	{
		return false;
	}
J
Jesse Zhang 已提交
1351

1352
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
1353
	ForEach(lc, default_levels)
1354
	{
1355 1356
		ULONG default_level = (ULONG) lfirst_int(lc);
		if (level == default_level)
1357 1358 1359 1360
		{
			return true;
		}
	}
J
Jesse Zhang 已提交
1361

1362 1363 1364 1365 1366
	return false;
}

//---------------------------------------------------------------------------
//	@function:
1367
//		CTranslatorRelcacheToDXL::ComputeIncludedCols
1368 1369 1370 1371 1372
//
//	@doc:
//		Compute the included colunms in an index
//
//---------------------------------------------------------------------------
1373
ULongPtrArray *
J
Jesse Zhang 已提交
1374 1375
CTranslatorRelcacheToDXL::ComputeIncludedCols(CMemoryPool *mp,
											  const IMDRelation *md_rel)
1376 1377 1378 1379
{
	// TODO: 3/19/2012; currently we assume that all the columns
	// in the table are available from the index.

1380 1381
	ULongPtrArray *included_cols = GPOS_NEW(mp) ULongPtrArray(mp);
	const ULONG num_included_cols = md_rel->ColumnCount();
J
Jesse Zhang 已提交
1382
	for (ULONG ul = 0; ul < num_included_cols; ul++)
1383
	{
1384
		if (!md_rel->GetMdCol(ul)->IsDropped())
1385
		{
1386
			included_cols->Append(GPOS_NEW(mp) ULONG(ul));
1387 1388
		}
	}
J
Jesse Zhang 已提交
1389

1390
	return included_cols;
1391 1392 1393 1394 1395
}


//---------------------------------------------------------------------------
//	@function:
1396
//		CTranslatorRelcacheToDXL::GetAttributePosition
1397 1398 1399 1400 1401 1402
//
//	@doc:
//		Return the position of a given attribute
//
//---------------------------------------------------------------------------
ULONG
J
Jesse Zhang 已提交
1403 1404
CTranslatorRelcacheToDXL::GetAttributePosition(INT attno,
											   ULONG *GetAttributePosition)
1405
{
J
Jesse Zhang 已提交
1406
	ULONG idx = (ULONG)(GPDXL_SYSTEM_COLUMNS + attno);
1407 1408
	ULONG pos = GetAttributePosition[idx];
	GPOS_ASSERT(gpos::ulong_max != pos);
1409

1410
	return pos;
1411 1412 1413 1414
}

//---------------------------------------------------------------------------
//	@function:
1415
//		CTranslatorRelcacheToDXL::PopulateAttnoPositionMap
1416 1417 1418 1419 1420 1421
//
//	@doc:
//		Populate the attribute to position mapping
//
//---------------------------------------------------------------------------
ULONG *
J
Jesse Zhang 已提交
1422 1423 1424
CTranslatorRelcacheToDXL::PopulateAttnoPositionMap(CMemoryPool *mp,
												   const IMDRelation *md_rel,
												   ULONG size)
1425
{
1426 1427
	GPOS_ASSERT(NULL != md_rel);
	const ULONG num_included_cols = md_rel->ColumnCount();
1428

1429
	GPOS_ASSERT(num_included_cols <= size);
J
Jesse Zhang 已提交
1430
	ULONG *attno_mapping = GPOS_NEW_ARRAY(mp, ULONG, size);
1431

1432
	for (ULONG ul = 0; ul < size; ul++)
1433
	{
1434
		attno_mapping[ul] = gpos::ulong_max;
1435 1436
	}

J
Jesse Zhang 已提交
1437
	for (ULONG ul = 0; ul < num_included_cols; ul++)
1438
	{
1439
		const IMDColumn *md_col = md_rel->GetMdCol(ul);
1440

1441
		INT attno = md_col->AttrNum();
1442

J
Jesse Zhang 已提交
1443
		ULONG idx = (ULONG)(GPDXL_SYSTEM_COLUMNS + attno);
1444 1445
		GPOS_ASSERT(size > idx);
		attno_mapping[idx] = ul;
1446 1447
	}

1448
	return attno_mapping;
1449 1450 1451 1452 1453
}


//---------------------------------------------------------------------------
//	@function:
1454
//		CTranslatorRelcacheToDXL::RetrieveType
1455 1456 1457 1458 1459 1460
//
//	@doc:
//		Retrieve a type from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
IMDType *
J
Jesse Zhang 已提交
1461
CTranslatorRelcacheToDXL::RetrieveType(CMemoryPool *mp, IMDId *mdid)
1462
{
1463 1464
	OID oid_type = CMDIdGPDB::CastMdid(mdid)->Oid();
	GPOS_ASSERT(InvalidOid != oid_type);
J
Jesse Zhang 已提交
1465

1466
	// check for supported base types
1467
	switch (oid_type)
1468 1469
	{
		case GPDB_INT2_OID:
1470
			return GPOS_NEW(mp) CMDTypeInt2GPDB(mp);
1471 1472

		case GPDB_INT4_OID:
1473
			return GPOS_NEW(mp) CMDTypeInt4GPDB(mp);
1474 1475

		case GPDB_INT8_OID:
1476
			return GPOS_NEW(mp) CMDTypeInt8GPDB(mp);
1477 1478

		case GPDB_BOOL:
1479
			return GPOS_NEW(mp) CMDTypeBoolGPDB(mp);
1480 1481

		case GPDB_OID_OID:
1482
			return GPOS_NEW(mp) CMDTypeOidGPDB(mp);
1483 1484 1485 1486
	}

	// continue to construct a generic type
	INT iFlags = TYPECACHE_EQ_OPR | TYPECACHE_LT_OPR | TYPECACHE_GT_OPR |
J
Jesse Zhang 已提交
1487 1488
				 TYPECACHE_CMP_PROC | TYPECACHE_EQ_OPR_FINFO |
				 TYPECACHE_CMP_PROC_FINFO | TYPECACHE_TUPDESC;
1489

1490
	TypeCacheEntry *ptce = gpdb::LookupTypeCache(oid_type, iFlags);
1491 1492

	// get type name
1493
	CMDName *mdname = GetTypeName(mp, mdid);
1494

1495 1496
	BOOL is_fixed_length = false;
	ULONG length = 0;
1497 1498 1499

	if (0 < ptce->typlen)
	{
1500 1501
		is_fixed_length = true;
		length = ptce->typlen;
1502 1503
	}

1504
	BOOL is_passed_by_value = ptce->typbyval;
1505 1506

	// collect ids of different comparison operators for types
1507
	CMDIdGPDB *mdid_op_eq = GPOS_NEW(mp) CMDIdGPDB(ptce->eq_opr);
J
Jesse Zhang 已提交
1508 1509
	CMDIdGPDB *mdid_op_neq =
		GPOS_NEW(mp) CMDIdGPDB(gpdb::GetInverseOp(ptce->eq_opr));
1510
	CMDIdGPDB *mdid_op_lt = GPOS_NEW(mp) CMDIdGPDB(ptce->lt_opr);
J
Jesse Zhang 已提交
1511 1512
	CMDIdGPDB *mdid_op_leq =
		GPOS_NEW(mp) CMDIdGPDB(gpdb::GetInverseOp(ptce->gt_opr));
1513
	CMDIdGPDB *mdid_op_gt = GPOS_NEW(mp) CMDIdGPDB(ptce->gt_opr);
J
Jesse Zhang 已提交
1514 1515
	CMDIdGPDB *mdid_op_geq =
		GPOS_NEW(mp) CMDIdGPDB(gpdb::GetInverseOp(ptce->lt_opr));
1516 1517
	CMDIdGPDB *mdid_op_cmp = GPOS_NEW(mp) CMDIdGPDB(ptce->cmp_proc);
	BOOL is_hashable = gpdb::IsOpHashJoinable(ptce->eq_opr);
A
Ashuka Xue 已提交
1518
	BOOL is_merge_joinable = gpdb::IsOpMergeJoinable(ptce->eq_opr);
1519
	BOOL is_composite_type = gpdb::IsCompositeType(oid_type);
1520
	BOOL is_text_related_type = gpdb::IsTextRelatedType(oid_type);
1521 1522

	// get standard aggregates
J
Jesse Zhang 已提交
1523 1524 1525 1526 1527 1528 1529 1530 1531
	CMDIdGPDB *mdid_min =
		GPOS_NEW(mp) CMDIdGPDB(gpdb::GetAggregate("min", oid_type));
	CMDIdGPDB *mdid_max =
		GPOS_NEW(mp) CMDIdGPDB(gpdb::GetAggregate("max", oid_type));
	CMDIdGPDB *mdid_avg =
		GPOS_NEW(mp) CMDIdGPDB(gpdb::GetAggregate("avg", oid_type));
	CMDIdGPDB *mdid_sum =
		GPOS_NEW(mp) CMDIdGPDB(gpdb::GetAggregate("sum", oid_type));

1532
	// count aggregate is the same for all types
1533
	CMDIdGPDB *mdid_count = GPOS_NEW(mp) CMDIdGPDB(COUNT_ANY_OID);
J
Jesse Zhang 已提交
1534

1535
	// check if type is composite
1536 1537
	CMDIdGPDB *mdid_type_relid = NULL;
	if (is_composite_type)
1538
	{
1539
		mdid_type_relid = GPOS_NEW(mp) CMDIdGPDB(gpdb::GetTypeRelid(oid_type));
1540 1541 1542
	}

	// get array type mdid
J
Jesse Zhang 已提交
1543 1544
	CMDIdGPDB *mdid_type_array =
		GPOS_NEW(mp) CMDIdGPDB(gpdb::GetArrayType(oid_type));
1545

1546
	BOOL is_redistributable = gpdb::IsGreenplumDbHashable(oid_type);
1547

1548
	mdid->AddRef();
1549

J
Jesse Zhang 已提交
1550 1551 1552 1553 1554 1555
	return GPOS_NEW(mp) CMDTypeGenericGPDB(
		mp, mdid, mdname, is_redistributable, is_fixed_length, length,
		is_passed_by_value, mdid_op_eq, mdid_op_neq, mdid_op_lt, mdid_op_leq,
		mdid_op_gt, mdid_op_geq, mdid_op_cmp, mdid_min, mdid_max, mdid_avg,
		mdid_sum, mdid_count, is_hashable, is_merge_joinable, is_composite_type,
		is_text_related_type, mdid_type_relid, mdid_type_array, ptce->typlen);
1556 1557 1558 1559 1560
}


//---------------------------------------------------------------------------
//	@function:
1561
//		CTranslatorRelcacheToDXL::RetrieveScOp
1562 1563 1564 1565 1566 1567
//
//	@doc:
//		Retrieve a scalar operator from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
CMDScalarOpGPDB *
J
Jesse Zhang 已提交
1568
CTranslatorRelcacheToDXL::RetrieveScOp(CMemoryPool *mp, IMDId *mdid)
1569
{
1570
	OID op_oid = CMDIdGPDB::CastMdid(mdid)->Oid();
1571

1572
	GPOS_ASSERT(InvalidOid != op_oid);
1573 1574

	// get operator name
1575
	CHAR *name = gpdb::GetOpName(op_oid);
1576

1577
	if (NULL == name)
1578
	{
J
Jesse Zhang 已提交
1579 1580
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
1581 1582
	}

1583
	CMDName *mdname = CDXLUtils::CreateMDNameFromCharArray(mp, name);
J
Jesse Zhang 已提交
1584

1585 1586
	OID left_oid = InvalidOid;
	OID right_oid = InvalidOid;
1587 1588

	// get operator argument types
1589
	gpdb::GetOpInputTypes(op_oid, &left_oid, &right_oid);
1590

1591 1592
	CMDIdGPDB *mdid_type_left = NULL;
	CMDIdGPDB *mdid_type_right = NULL;
1593

1594
	if (InvalidOid != left_oid)
1595
	{
1596
		mdid_type_left = GPOS_NEW(mp) CMDIdGPDB(left_oid);
1597 1598
	}

1599
	if (InvalidOid != right_oid)
1600
	{
1601
		mdid_type_right = GPOS_NEW(mp) CMDIdGPDB(right_oid);
1602 1603 1604
	}

	// get comparison type
J
Jesse Zhang 已提交
1605 1606
	CmpType cmpt =
		(CmpType) gpdb::GetComparisonType(op_oid, left_oid, right_oid);
1607
	IMDType::ECmpType cmp_type = ParseCmpType(cmpt);
J
Jesse Zhang 已提交
1608

1609
	// get func oid
1610 1611
	OID func_oid = gpdb::GetOpFunc(op_oid);
	GPOS_ASSERT(InvalidOid != func_oid);
1612

1613
	CMDIdGPDB *mdid_func = GPOS_NEW(mp) CMDIdGPDB(func_oid);
1614 1615

	// get result type
1616
	OID result_oid = gpdb::GetFuncRetType(func_oid);
1617

1618
	GPOS_ASSERT(InvalidOid != result_oid);
1619

1620
	CMDIdGPDB *result_type_mdid = GPOS_NEW(mp) CMDIdGPDB(result_oid);
1621 1622

	// get commutator and inverse
1623
	CMDIdGPDB *mdid_commute_opr = NULL;
1624

1625
	OID commute_oid = gpdb::GetCommutatorOp(op_oid);
1626

J
Jesse Zhang 已提交
1627
	if (InvalidOid != commute_oid)
1628
	{
1629
		mdid_commute_opr = GPOS_NEW(mp) CMDIdGPDB(commute_oid);
1630 1631
	}

1632
	CMDIdGPDB *m_mdid_inverse_opr = NULL;
1633

1634
	OID inverse_oid = gpdb::GetInverseOp(op_oid);
1635

J
Jesse Zhang 已提交
1636
	if (InvalidOid != inverse_oid)
1637
	{
1638
		m_mdid_inverse_opr = GPOS_NEW(mp) CMDIdGPDB(inverse_oid);
1639 1640
	}

1641
	BOOL returns_null_on_null_input = gpdb::IsOpStrict(op_oid);
1642
	BOOL is_ndv_preserving = gpdb::IsOpNDVPreserving(op_oid);
1643

1644
	mdid->AddRef();
J
Jesse Zhang 已提交
1645 1646 1647 1648 1649
	CMDScalarOpGPDB *md_scalar_op = GPOS_NEW(mp) CMDScalarOpGPDB(
		mp, mdid, mdname, mdid_type_left, mdid_type_right, result_type_mdid,
		mdid_func, mdid_commute_opr, m_mdid_inverse_opr, cmp_type,
		returns_null_on_null_input, is_ndv_preserving,
		RetrieveScOpOpFamilies(mp, mdid));
1650
	return md_scalar_op;
1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662
}


//---------------------------------------------------------------------------
//	@function:
//		CTranslatorRelcacheToDXL::LookupFuncProps
//
//	@doc:
//		Lookup function properties
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
1663
CTranslatorRelcacheToDXL::LookupFuncProps(
1664
	OID func_oid,
J
Jesse Zhang 已提交
1665 1666 1667 1668 1669 1670
	IMDFunction::EFuncStbl *stability,	// output: function stability
	IMDFunction::EFuncDataAcc *access,	// output: function datya access
	BOOL *is_strict,					// output: is function strict?
	BOOL *is_ndv_preserving,			// output: preserves NDVs of inputs
	BOOL *returns_set					// output: does function return set?
)
1671
{
1672 1673 1674
	GPOS_ASSERT(NULL != stability);
	GPOS_ASSERT(NULL != access);
	GPOS_ASSERT(NULL != is_strict);
1675
	GPOS_ASSERT(NULL != is_ndv_preserving);
1676
	GPOS_ASSERT(NULL != returns_set);
1677

1678 1679
	CHAR cFuncStability = gpdb::FuncStability(func_oid);
	*stability = GetFuncStability(cFuncStability);
1680

1681 1682
	CHAR cFuncDataAccess = gpdb::FuncDataAccess(func_oid);
	*access = GetEFuncDataAccess(cFuncDataAccess);
1683

1684 1685
	*returns_set = gpdb::GetFuncRetset(func_oid);
	*is_strict = gpdb::FuncStrict(func_oid);
1686
	*is_ndv_preserving = gpdb::IsFuncNDVPreserving(func_oid);
1687 1688 1689 1690 1691
}


//---------------------------------------------------------------------------
//	@function:
1692
//		CTranslatorRelcacheToDXL::RetrieveFunc
1693 1694 1695 1696 1697 1698
//
//	@doc:
//		Retrieve a function from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
CMDFunctionGPDB *
J
Jesse Zhang 已提交
1699
CTranslatorRelcacheToDXL::RetrieveFunc(CMemoryPool *mp, IMDId *mdid)
1700
{
1701
	OID func_oid = CMDIdGPDB::CastMdid(mdid)->Oid();
1702

1703
	GPOS_ASSERT(InvalidOid != func_oid);
1704 1705

	// get func name
1706
	CHAR *name = gpdb::GetFuncName(func_oid);
1707

1708
	if (NULL == name)
1709
	{
J
Jesse Zhang 已提交
1710 1711
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
1712 1713
	}

J
Jesse Zhang 已提交
1714 1715
	CWStringDynamic *func_name_str =
		CDXLUtils::CreateDynamicStringFromCharArray(mp, name);
1716
	CMDName *mdname = GPOS_NEW(mp) CMDName(mp, func_name_str);
1717 1718

	// CMDName ctor created a copy of the string
1719
	GPOS_DELETE(func_name_str);
1720 1721

	// get result type
1722
	OID result_oid = gpdb::GetFuncRetType(func_oid);
1723

1724
	GPOS_ASSERT(InvalidOid != result_oid);
1725

1726
	CMDIdGPDB *result_type_mdid = GPOS_NEW(mp) CMDIdGPDB(result_oid);
1727 1728

	// get output argument types if any
1729
	List *out_arg_types_list = gpdb::GetFuncOutputArgTypes(func_oid);
1730

1731 1732
	IMdIdArray *arg_type_mdids = NULL;
	if (NULL != out_arg_types_list)
1733
	{
1734 1735
		ListCell *lc = NULL;
		arg_type_mdids = GPOS_NEW(mp) IMdIdArray(mp);
1736

J
Jesse Zhang 已提交
1737
		ForEach(lc, out_arg_types_list)
1738
		{
1739
			OID oidArgType = lfirst_oid(lc);
1740
			GPOS_ASSERT(InvalidOid != oidArgType);
1741 1742
			CMDIdGPDB *pmdidArgType = GPOS_NEW(mp) CMDIdGPDB(oidArgType);
			arg_type_mdids->Append(pmdidArgType);
1743 1744
		}

1745
		gpdb::GPDBFree(out_arg_types_list);
1746 1747
	}

1748 1749 1750 1751
	IMDFunction::EFuncStbl stability = IMDFunction::EfsImmutable;
	IMDFunction::EFuncDataAcc access = IMDFunction::EfdaNoSQL;
	BOOL is_strict = true;
	BOOL returns_set = true;
1752
	BOOL is_ndv_preserving = true;
J
Jesse Zhang 已提交
1753 1754
	LookupFuncProps(func_oid, &stability, &access, &is_strict,
					&is_ndv_preserving, &returns_set);
1755

1756
	mdid->AddRef();
J
Jesse Zhang 已提交
1757 1758 1759
	CMDFunctionGPDB *md_func = GPOS_NEW(mp) CMDFunctionGPDB(
		mp, mdid, mdname, result_type_mdid, arg_type_mdids, returns_set,
		stability, access, is_strict, is_ndv_preserving);
1760

1761
	return md_func;
1762 1763 1764 1765
}

//---------------------------------------------------------------------------
//	@function:
1766
//		CTranslatorRelcacheToDXL::RetrieveAgg
1767 1768 1769 1770 1771 1772
//
//	@doc:
//		Retrieve an aggregate from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
CMDAggregateGPDB *
J
Jesse Zhang 已提交
1773
CTranslatorRelcacheToDXL::RetrieveAgg(CMemoryPool *mp, IMDId *mdid)
1774
{
1775
	OID agg_oid = CMDIdGPDB::CastMdid(mdid)->Oid();
1776

1777
	GPOS_ASSERT(InvalidOid != agg_oid);
1778 1779

	// get agg name
1780
	CHAR *name = gpdb::GetFuncName(agg_oid);
1781

1782
	if (NULL == name)
1783
	{
J
Jesse Zhang 已提交
1784 1785
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
1786 1787
	}

J
Jesse Zhang 已提交
1788 1789
	CWStringDynamic *agg_name_str =
		CDXLUtils::CreateDynamicStringFromCharArray(mp, name);
1790
	CMDName *mdname = GPOS_NEW(mp) CMDName(mp, agg_name_str);
1791 1792

	// CMDName ctor created a copy of the string
1793
	GPOS_DELETE(agg_name_str);
1794 1795

	// get result type
1796
	OID result_oid = gpdb::GetFuncRetType(agg_oid);
1797

1798
	GPOS_ASSERT(InvalidOid != result_oid);
1799

1800
	CMDIdGPDB *result_type_mdid = GPOS_NEW(mp) CMDIdGPDB(result_oid);
J
Jesse Zhang 已提交
1801 1802
	IMDId *intermediate_result_type_mdid =
		RetrieveAggIntermediateResultType(mp, mdid);
1803

1804
	mdid->AddRef();
J
Jesse Zhang 已提交
1805

1806
	BOOL is_ordered = gpdb::IsOrderedAgg(agg_oid);
J
Jesse Zhang 已提交
1807

1808 1809
	// GPDB does not support splitting of ordered aggs and aggs without a
	// preliminary function
1810
	BOOL is_splittable = !is_ordered && gpdb::AggHasPrelimFunc(agg_oid);
J
Jesse Zhang 已提交
1811

1812 1813
	// cannot use hash agg for ordered aggs or aggs without a prelim func
	// due to the fact that hashAgg may spill
1814
	BOOL is_hash_agg_capable = !is_ordered && gpdb::AggHasPrelimFunc(agg_oid);
1815

J
Jesse Zhang 已提交
1816 1817 1818
	CMDAggregateGPDB *pmdagg = GPOS_NEW(mp) CMDAggregateGPDB(
		mp, mdid, mdname, result_type_mdid, intermediate_result_type_mdid,
		is_ordered, is_splittable, is_hash_agg_capable);
1819 1820 1821 1822 1823
	return pmdagg;
}

//---------------------------------------------------------------------------
//	@function:
1824
//		CTranslatorRelcacheToDXL::RetrieveTrigger
1825 1826 1827 1828 1829 1830
//
//	@doc:
//		Retrieve a trigger from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
CMDTriggerGPDB *
J
Jesse Zhang 已提交
1831
CTranslatorRelcacheToDXL::RetrieveTrigger(CMemoryPool *mp, IMDId *mdid)
1832
{
1833
	OID trigger_oid = CMDIdGPDB::CastMdid(mdid)->Oid();
1834

1835
	GPOS_ASSERT(InvalidOid != trigger_oid);
1836 1837

	// get trigger name
1838
	CHAR *name = gpdb::GetTriggerName(trigger_oid);
1839

1840
	if (NULL == name)
1841
	{
J
Jesse Zhang 已提交
1842 1843
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
1844 1845
	}

J
Jesse Zhang 已提交
1846 1847
	CWStringDynamic *trigger_name_str =
		CDXLUtils::CreateDynamicStringFromCharArray(mp, name);
1848 1849
	CMDName *mdname = GPOS_NEW(mp) CMDName(mp, trigger_name_str);
	GPOS_DELETE(trigger_name_str);
1850 1851

	// get relation oid
1852 1853 1854
	OID rel_oid = gpdb::GetTriggerRelid(trigger_oid);
	GPOS_ASSERT(InvalidOid != rel_oid);
	CMDIdGPDB *mdid_rel = GPOS_NEW(mp) CMDIdGPDB(rel_oid);
1855 1856

	// get function oid
1857 1858 1859
	OID func_oid = gpdb::GetTriggerFuncid(trigger_oid);
	GPOS_ASSERT(InvalidOid != func_oid);
	CMDIdGPDB *mdid_func = GPOS_NEW(mp) CMDIdGPDB(func_oid);
1860 1861

	// get type
1862
	INT trigger_type = gpdb::GetTriggerType(trigger_oid);
1863 1864

	// is trigger enabled
1865
	BOOL is_enabled = gpdb::IsTriggerEnabled(trigger_oid);
1866

1867
	mdid->AddRef();
J
Jesse Zhang 已提交
1868 1869
	CMDTriggerGPDB *pmdtrigger = GPOS_NEW(mp) CMDTriggerGPDB(
		mp, mdid, mdname, mdid_rel, mdid_func, trigger_type, is_enabled);
1870 1871 1872 1873 1874
	return pmdtrigger;
}

//---------------------------------------------------------------------------
//	@function:
1875
//		CTranslatorRelcacheToDXL::RetrieveCheckConstraints
1876 1877 1878 1879 1880 1881
//
//	@doc:
//		Retrieve a check constraint from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
CMDCheckConstraintGPDB *
J
Jesse Zhang 已提交
1882 1883 1884
CTranslatorRelcacheToDXL::RetrieveCheckConstraints(CMemoryPool *mp,
												   CMDAccessor *md_accessor,
												   IMDId *mdid)
1885
{
1886 1887
	OID check_constraint_oid = CMDIdGPDB::CastMdid(mdid)->Oid();
	GPOS_ASSERT(InvalidOid != check_constraint_oid);
1888 1889

	// get name of the check constraint
1890 1891
	CHAR *name = gpdb::GetCheckConstraintName(check_constraint_oid);
	if (NULL == name)
1892
	{
J
Jesse Zhang 已提交
1893 1894
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
1895
	}
J
Jesse Zhang 已提交
1896 1897
	CWStringDynamic *check_constr_name =
		CDXLUtils::CreateDynamicStringFromCharArray(mp, name);
1898 1899
	CMDName *mdname = GPOS_NEW(mp) CMDName(mp, check_constr_name);
	GPOS_DELETE(check_constr_name);
1900 1901

	// get relation oid associated with the check constraint
1902 1903 1904
	OID rel_oid = gpdb::GetCheckConstraintRelid(check_constraint_oid);
	GPOS_ASSERT(InvalidOid != rel_oid);
	CMDIdGPDB *mdid_rel = GPOS_NEW(mp) CMDIdGPDB(rel_oid);
1905 1906

	// translate the check constraint expression
1907 1908
	Node *node = gpdb::PnodeCheckConstraint(check_constraint_oid);
	GPOS_ASSERT(NULL != node);
1909

J
Jesse Zhang 已提交
1910 1911 1912 1913 1914 1915 1916 1917
	CTranslatorScalarToDXL scalar_translator(mp, md_accessor,
											 NULL, /* pulidgtorCol */
											 NULL, /* pulidgtorCTE */
											 0,	   /* query_level */
											 true, /* m_fQuery */
											 NULL, /* query_level_to_cte_map */
											 NULL  /* cte_dxlnode_array */
	);
1918 1919

	// generate a mock mapping between var to column information
1920 1921 1922 1923 1924
	CMappingVarColId *var_colid_mapping = GPOS_NEW(mp) CMappingVarColId(mp);
	CDXLColDescrArray *dxl_col_descr_array = GPOS_NEW(mp) CDXLColDescrArray(mp);
	const IMDRelation *md_rel = md_accessor->RetrieveRel(mdid_rel);
	const ULONG length = md_rel->ColumnCount();
	for (ULONG ul = 0; ul < length; ul++)
1925
	{
1926
		const IMDColumn *md_col = md_rel->GetMdCol(ul);
J
Jesse Zhang 已提交
1927 1928
		CMDName *md_colname =
			GPOS_NEW(mp) CMDName(mp, md_col->Mdname().GetMDName());
1929 1930
		CMDIdGPDB *mdid_col_type = CMDIdGPDB::CastMdid(md_col->MdidType());
		mdid_col_type->AddRef();
1931 1932

		// create a column descriptor for the column
J
Jesse Zhang 已提交
1933 1934 1935 1936
		CDXLColDescr *dxl_col_descr = GPOS_NEW(mp) CDXLColDescr(
			mp, md_colname, ul + 1 /*colid*/, md_col->AttrNum(), mdid_col_type,
			md_col->TypeModifier(), false /* fColDropped */
		);
1937
		dxl_col_descr_array->Append(dxl_col_descr);
1938
	}
J
Jesse Zhang 已提交
1939 1940
	var_colid_mapping->LoadColumns(0 /*query_level */, 1 /* rteIndex */,
								   dxl_col_descr_array);
1941 1942

	// translate the check constraint expression
J
Jesse Zhang 已提交
1943 1944
	CDXLNode *scalar_dxlnode = scalar_translator.TranslateScalarToDXL(
		(Expr *) node, var_colid_mapping);
1945 1946

	// cleanup
1947 1948
	dxl_col_descr_array->Release();
	GPOS_DELETE(var_colid_mapping);
1949

1950
	mdid->AddRef();
1951

J
Jesse Zhang 已提交
1952 1953
	return GPOS_NEW(mp)
		CMDCheckConstraintGPDB(mp, mdid, mdname, mdid_rel, scalar_dxlnode);
1954 1955 1956 1957
}

//---------------------------------------------------------------------------
//	@function:
1958
//		CTranslatorRelcacheToDXL::GetTypeName
1959 1960 1961 1962 1963 1964
//
//	@doc:
//		Retrieve a type's name from the relcache given its metadata id.
//
//---------------------------------------------------------------------------
CMDName *
J
Jesse Zhang 已提交
1965
CTranslatorRelcacheToDXL::GetTypeName(CMemoryPool *mp, IMDId *mdid)
1966
{
1967
	OID oid_type = CMDIdGPDB::CastMdid(mdid)->Oid();
1968

1969
	GPOS_ASSERT(InvalidOid != oid_type);
1970

1971 1972
	CHAR *typename_str = gpdb::GetTypeName(oid_type);
	GPOS_ASSERT(NULL != typename_str);
1973

J
Jesse Zhang 已提交
1974 1975
	CWStringDynamic *str_name =
		CDXLUtils::CreateDynamicStringFromCharArray(mp, typename_str);
1976
	CMDName *mdname = GPOS_NEW(mp) CMDName(mp, str_name);
1977 1978

	// cleanup
1979 1980
	GPOS_DELETE(str_name);
	return mdname;
1981 1982 1983 1984
}

//---------------------------------------------------------------------------
//	@function:
1985
//		CTranslatorRelcacheToDXL::GetFuncStability
1986 1987 1988 1989 1990 1991
//
//	@doc:
//		Get function stability property from the GPDB character representation
//
//---------------------------------------------------------------------------
CMDFunctionGPDB::EFuncStbl
J
Jesse Zhang 已提交
1992
CTranslatorRelcacheToDXL::GetFuncStability(CHAR c)
1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
{
	CMDFunctionGPDB::EFuncStbl efuncstbl = CMDFunctionGPDB::EfsSentinel;

	switch (c)
	{
		case 's':
			efuncstbl = CMDFunctionGPDB::EfsStable;
			break;
		case 'i':
			efuncstbl = CMDFunctionGPDB::EfsImmutable;
			break;
		case 'v':
			efuncstbl = CMDFunctionGPDB::EfsVolatile;
			break;
		default:
			GPOS_ASSERT(!"Invalid stability property");
	}

	return efuncstbl;
}

//---------------------------------------------------------------------------
//	@function:
2016
//		CTranslatorRelcacheToDXL::GetEFuncDataAccess
2017 2018 2019 2020 2021 2022
//
//	@doc:
//		Get function data access property from the GPDB character representation
//
//---------------------------------------------------------------------------
CMDFunctionGPDB::EFuncDataAcc
J
Jesse Zhang 已提交
2023
CTranslatorRelcacheToDXL::GetEFuncDataAccess(CHAR c)
2024
{
2025
	CMDFunctionGPDB::EFuncDataAcc access = CMDFunctionGPDB::EfdaSentinel;
2026 2027 2028 2029

	switch (c)
	{
		case 'n':
2030
			access = CMDFunctionGPDB::EfdaNoSQL;
2031 2032
			break;
		case 'c':
2033
			access = CMDFunctionGPDB::EfdaContainsSQL;
2034 2035
			break;
		case 'r':
2036
			access = CMDFunctionGPDB::EfdaReadsSQLData;
2037 2038
			break;
		case 'm':
2039
			access = CMDFunctionGPDB::EfdaModifiesSQLData;
2040 2041
			break;
		case 's':
J
Jesse Zhang 已提交
2042 2043
			GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
					   GPOS_WSZ_LIT("unknown data access"));
2044 2045 2046 2047
		default:
			GPOS_ASSERT(!"Invalid data access property");
	}

2048
	return access;
2049 2050 2051 2052
}

//---------------------------------------------------------------------------
//	@function:
2053
//		CTranslatorRelcacheToDXL::RetrieveAggIntermediateResultType
2054 2055 2056 2057 2058 2059
//
//	@doc:
//		Retrieve the type id of an aggregate's intermediate results
//
//---------------------------------------------------------------------------
IMDId *
J
Jesse Zhang 已提交
2060 2061
CTranslatorRelcacheToDXL::RetrieveAggIntermediateResultType(CMemoryPool *mp,
															IMDId *mdid)
2062
{
2063
	OID agg_oid = CMDIdGPDB::CastMdid(mdid)->Oid();
2064

2065 2066
	GPOS_ASSERT(InvalidOid != agg_oid);
	return GPOS_NEW(mp) CMDIdGPDB(gpdb::GetAggIntermediateResultType(agg_oid));
2067 2068 2069 2070
}

//---------------------------------------------------------------------------
//	@function:
2071
//		CTranslatorRelcacheToDXL::RetrieveRelStats
2072 2073 2074 2075 2076 2077
//
//	@doc:
//		Retrieve relation statistics from relcache
//
//---------------------------------------------------------------------------
IMDCacheObject *
J
Jesse Zhang 已提交
2078
CTranslatorRelcacheToDXL::RetrieveRelStats(CMemoryPool *mp, IMDId *mdid)
2079
{
2080 2081 2082
	CMDIdRelStats *m_rel_stats_mdid = CMDIdRelStats::CastMdid(mdid);
	IMDId *mdid_rel = m_rel_stats_mdid->GetRelMdId();
	OID rel_oid = CMDIdGPDB::CastMdid(mdid_rel)->Oid();
2083

2084
	Relation rel = gpdb::GetRelation(rel_oid);
2085 2086
	if (NULL == rel)
	{
J
Jesse Zhang 已提交
2087 2088
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
2089 2090
	}

2091 2092
	double num_rows = 0.0;
	CMDName *mdname = NULL;
2093 2094 2095 2096

	GPOS_TRY
	{
		// get rel name
2097
		CHAR *relname = NameStr(rel->rd_rel->relname);
J
Jesse Zhang 已提交
2098 2099
		CWStringDynamic *relname_str =
			CDXLUtils::CreateDynamicStringFromCharArray(mp, relname);
2100
		mdname = GPOS_NEW(mp) CMDName(mp, relname_str);
2101
		// CMDName ctor created a copy of the string
2102
		GPOS_DELETE(relname_str);
2103 2104

		BlockNumber pages = 0;
2105
		GpPolicy *gp_policy = gpdb::GetDistributionPolicy(rel);
J
Jesse Zhang 已提交
2106
		if (!gp_policy || gp_policy->ptype != POLICYTYPE_PARTITIONED)
2107
		{
2108
			gpdb::EstimateRelationSize(rel, NULL, &pages, &num_rows);
2109 2110 2111
		}
		else
		{
2112
			num_rows = rel->rd_rel->reltuples;
2113

2114
			if (num_rows == 0 && gp_enable_relsize_collection)
2115 2116
			{
				RelOptInfo *relOptInfo = makeNode(RelOptInfo);
2117
				relOptInfo->cdbpolicy = gpdb::GetDistributionPolicy(rel);
2118
				bool default_stats_used = false;
J
Jesse Zhang 已提交
2119 2120
				gpdb::CdbEstimateRelationSize(relOptInfo, rel, NULL, &pages,
											  &num_rows, &default_stats_used);
2121 2122
				pfree(relOptInfo);
			}
2123 2124
		}

2125
		m_rel_stats_mdid->AddRef();
2126 2127 2128 2129 2130 2131 2132 2133
		gpdb::CloseRelation(rel);
	}
	GPOS_CATCH_EX(ex)
	{
		gpdb::CloseRelation(rel);
		GPOS_RETHROW(ex);
	}
	GPOS_CATCH_END;
J
Jesse Zhang 已提交
2134

2135 2136
	BOOL stats_empty = false;
	if (num_rows == 0.0)
2137
	{
2138
		stats_empty = true;
2139
	}
J
Jesse Zhang 已提交
2140 2141 2142

	CDXLRelStats *dxl_rel_stats = GPOS_NEW(mp) CDXLRelStats(
		mp, m_rel_stats_mdid, mdname, CDouble(num_rows), stats_empty);
2143 2144


2145
	return dxl_rel_stats;
2146 2147
}

O
Omer Arap 已提交
2148 2149 2150 2151 2152
// Retrieve column statistics from relcache
// If all statistics are missing, create dummy statistics
// Also, if the statistics are broken, create dummy statistics
// However, if any statistics are present and not broken,
// create column statistics using these statistics
2153
IMDCacheObject *
J
Jesse Zhang 已提交
2154 2155 2156
CTranslatorRelcacheToDXL::RetrieveColStats(CMemoryPool *mp,
										   CMDAccessor *md_accessor,
										   IMDId *mdid)
2157
{
2158 2159 2160 2161
	CMDIdColStats *mdid_col_stats = CMDIdColStats::CastMdid(mdid);
	IMDId *mdid_rel = mdid_col_stats->GetRelMdId();
	ULONG pos = mdid_col_stats->Position();
	OID rel_oid = CMDIdGPDB::CastMdid(mdid_rel)->Oid();
2162

2163
	Relation rel = gpdb::GetRelation(rel_oid);
2164 2165
	if (NULL == rel)
	{
J
Jesse Zhang 已提交
2166 2167
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
2168 2169
	}

2170 2171 2172
	const IMDRelation *md_rel = md_accessor->RetrieveRel(mdid_rel);
	const IMDColumn *md_col = md_rel->GetMdCol(pos);
	AttrNumber attno = (AttrNumber) md_col->AttrNum();
2173 2174

	// number of rows from pg_class
2175
	CDouble num_rows(rel->rd_rel->reltuples);
2176 2177

	// extract column name and type
J
Jesse Zhang 已提交
2178 2179
	CMDName *md_colname =
		GPOS_NEW(mp) CMDName(mp, md_col->Mdname().GetMDName());
2180
	OID att_type = CMDIdGPDB::CastMdid(md_col->MdidType())->Oid();
2181 2182
	gpdb::CloseRelation(rel);

2183
	CDXLBucketArray *dxl_stats_bucket_array = GPOS_NEW(mp) CDXLBucketArray(mp);
2184

2185
	if (0 > attno)
2186
	{
2187
		mdid_col_stats->AddRef();
J
Jesse Zhang 已提交
2188 2189 2190
		return GenerateStatsForSystemCols(mp, rel_oid, mdid_col_stats,
										  md_colname, att_type, attno,
										  dxl_stats_bucket_array, num_rows);
2191 2192 2193
	}

	// extract out histogram and mcv information from pg_statistic
2194
	HeapTuple stats_tup = gpdb::GetAttStats(rel_oid, attno);
2195 2196

	// if there is no colstats
2197
	if (!HeapTupleIsValid(stats_tup))
2198
	{
2199 2200
		dxl_stats_bucket_array->Release();
		mdid_col_stats->AddRef();
2201

2202
		CDouble width = CStatistics::DefaultColumnWidth;
2203

2204
		if (!md_col->IsDropped())
2205
		{
2206 2207 2208 2209 2210
			CMDIdGPDB *mdid_atttype = GPOS_NEW(mp) CMDIdGPDB(att_type);
			IMDType *md_type = RetrieveType(mp, mdid_atttype);
			width = CStatisticsUtils::DefaultColumnWidth(md_type);
			md_type->Release();
			mdid_atttype->Release();
2211 2212
		}

J
Jesse Zhang 已提交
2213 2214
		return CDXLColStats::CreateDXLDummyColStats(mp, mdid_col_stats,
													md_colname, width);
2215 2216
	}

2217
	Form_pg_statistic form_pg_stats = (Form_pg_statistic) GETSTRUCT(stats_tup);
2218 2219

	// null frequency and NDV
2220 2221 2222
	CDouble null_freq(0.0);
	int null_ndv = 0;
	if (CStatistics::Epsilon < form_pg_stats->stanullfrac)
2223
	{
2224 2225
		null_freq = form_pg_stats->stanullfrac;
		null_ndv = 1;
2226 2227 2228
	}

	// column width
2229
	CDouble width = CDouble(form_pg_stats->stawidth);
2230 2231

	// calculate total number of distinct values
2232 2233
	CDouble num_distinct(1.0);
	if (form_pg_stats->stadistinct < 0)
2234
	{
2235
		GPOS_ASSERT(form_pg_stats->stadistinct > -1.01);
J
Jesse Zhang 已提交
2236 2237
		num_distinct =
			num_rows * (1 - null_freq) * CDouble(-form_pg_stats->stadistinct);
2238 2239 2240
	}
	else
	{
2241
		num_distinct = CDouble(form_pg_stats->stadistinct);
2242
	}
2243
	num_distinct = num_distinct.Ceil();
2244

2245
	BOOL is_dummy_stats = false;
O
Omer Arap 已提交
2246 2247
	// most common values and their frequencies extracted from the pg_statistic
	// tuple for a given column
2248
	AttStatsSlot mcv_slot;
O
Omer Arap 已提交
2249

J
Jesse Zhang 已提交
2250 2251 2252
	(void) gpdb::GetAttrStatsSlot(&mcv_slot, stats_tup, STATISTIC_KIND_MCV,
								  InvalidOid,
								  ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2253
	if (InvalidOid != mcv_slot.valuetype && mcv_slot.valuetype != att_type)
O
Omer Arap 已提交
2254 2255
	{
		char msgbuf[NAMEDATALEN * 2 + 100];
J
Jesse Zhang 已提交
2256 2257 2258 2259 2260 2261 2262
		snprintf(
			msgbuf, sizeof(msgbuf),
			"Type mismatch between attribute %ls of table %ls having type %d and statistic having type %d, please ANALYZE the table again",
			md_col->Mdname().GetMDName()->GetBuffer(),
			md_rel->Mdname().GetMDName()->GetBuffer(), att_type,
			mcv_slot.valuetype);
		GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION, NOTICE, msgbuf, NULL);
O
Omer Arap 已提交
2263

2264 2265
		gpdb::FreeAttrStatsSlot(&mcv_slot);
		is_dummy_stats = true;
O
Omer Arap 已提交
2266 2267
	}

2268
	else if (mcv_slot.nvalues != mcv_slot.nnumbers)
O
Omer Arap 已提交
2269 2270
	{
		char msgbuf[NAMEDATALEN * 2 + 100];
J
Jesse Zhang 已提交
2271 2272 2273 2274 2275 2276
		snprintf(
			msgbuf, sizeof(msgbuf),
			"The number of most common values and frequencies do not match on column %ls of table %ls.",
			md_col->Mdname().GetMDName()->GetBuffer(),
			md_rel->Mdname().GetMDName()->GetBuffer());
		GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION, NOTICE, msgbuf, NULL);
O
Omer Arap 已提交
2277 2278

		// if the number of MCVs(nvalues) and number of MCFs(nnumbers) do not match, we discard the MCVs and MCFs
2279 2280
		gpdb::FreeAttrStatsSlot(&mcv_slot);
		is_dummy_stats = true;
O
Omer Arap 已提交
2281 2282
	}
	else
2283
	{
O
Omer Arap 已提交
2284
		// fix mcv and null frequencies (sometimes they can add up to more than 1.0)
J
Jesse Zhang 已提交
2285 2286
		NormalizeFrequencies(mcv_slot.numbers, (ULONG) mcv_slot.nvalues,
							 &null_freq);
O
Omer Arap 已提交
2287 2288

		// total MCV frequency
2289 2290
		CDouble sum_mcv_freq = 0.0;
		for (int i = 0; i < mcv_slot.nvalues; i++)
O
Omer Arap 已提交
2291
		{
2292
			sum_mcv_freq = sum_mcv_freq + CDouble(mcv_slot.numbers[i]);
O
Omer Arap 已提交
2293
		}
2294 2295
	}

O
Omer Arap 已提交
2296
	// histogram values extracted from the pg_statistic tuple for a given column
2297
	AttStatsSlot hist_slot;
O
Omer Arap 已提交
2298

2299
	// get histogram datums from pg_statistic entry
J
Jesse Zhang 已提交
2300 2301 2302
	(void) gpdb::GetAttrStatsSlot(&hist_slot, stats_tup,
								  STATISTIC_KIND_HISTOGRAM, InvalidOid,
								  ATTSTATSSLOT_VALUES);
2303

2304
	if (InvalidOid != hist_slot.valuetype && hist_slot.valuetype != att_type)
O
Omer Arap 已提交
2305 2306
	{
		char msgbuf[NAMEDATALEN * 2 + 100];
J
Jesse Zhang 已提交
2307 2308 2309 2310 2311 2312 2313
		snprintf(
			msgbuf, sizeof(msgbuf),
			"Type mismatch between attribute %ls of table %ls having type %d and statistic having type %d, please ANALYZE the table again",
			md_col->Mdname().GetMDName()->GetBuffer(),
			md_rel->Mdname().GetMDName()->GetBuffer(), att_type,
			hist_slot.valuetype);
		GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION, NOTICE, msgbuf, NULL);
O
Omer Arap 已提交
2314

2315 2316
		gpdb::FreeAttrStatsSlot(&hist_slot);
		is_dummy_stats = true;
O
Omer Arap 已提交
2317 2318
	}

2319
	if (is_dummy_stats)
O
Omer Arap 已提交
2320
	{
2321 2322
		dxl_stats_bucket_array->Release();
		mdid_col_stats->AddRef();
O
Omer Arap 已提交
2323

2324 2325
		CDouble col_width = CStatistics::DefaultColumnWidth;
		gpdb::FreeHeapTuple(stats_tup);
J
Jesse Zhang 已提交
2326 2327
		return CDXLColStats::CreateDXLDummyColStats(mp, mdid_col_stats,
													md_colname, col_width);
O
Omer Arap 已提交
2328 2329
	}

2330 2331 2332 2333
	CDouble num_ndv_buckets(0.0);
	CDouble num_freq_buckets(0.0);
	CDouble distinct_remaining(0.0);
	CDouble freq_remaining(0.0);
2334

2335 2336 2337
	// transform all the bits and pieces from pg_statistic
	// to a single bucket structure
	CDXLBucketArray *dxl_stats_bucket_array_transformed =
J
Jesse Zhang 已提交
2338 2339 2340 2341
		TransformStatsToDXLBucketArray(
			mp, att_type, num_distinct, null_freq, mcv_slot.values,
			mcv_slot.numbers, ULONG(mcv_slot.nvalues), hist_slot.values,
			ULONG(hist_slot.nvalues));
2342

2343
	GPOS_ASSERT(NULL != dxl_stats_bucket_array_transformed);
2344

2345 2346 2347 2348 2349 2350
	const ULONG num_buckets = dxl_stats_bucket_array_transformed->Size();
	for (ULONG ul = 0; ul < num_buckets; ul++)
	{
		CDXLBucket *dxl_bucket = (*dxl_stats_bucket_array_transformed)[ul];
		num_ndv_buckets = num_ndv_buckets + dxl_bucket->GetNumDistinct();
		num_freq_buckets = num_freq_buckets + dxl_bucket->GetFrequency();
2351
	}
2352

J
Jesse Zhang 已提交
2353 2354
	CUtils::AddRefAppend(dxl_stats_bucket_array,
						 dxl_stats_bucket_array_transformed);
2355 2356 2357 2358 2359 2360
	dxl_stats_bucket_array_transformed->Release();

	// there will be remaining tuples if the merged histogram and the NULLS do not cover
	// the total number of distinct values
	if ((1 - CStatistics::Epsilon > num_freq_buckets + null_freq) &&
		(0 < num_distinct - num_ndv_buckets))
2361
	{
J
Jesse Zhang 已提交
2362 2363 2364 2365
		distinct_remaining =
			std::max(CDouble(0.0), (num_distinct - num_ndv_buckets));
		freq_remaining =
			std::max(CDouble(0.0), (1 - num_freq_buckets - null_freq));
2366 2367 2368
	}

	// free up allocated datum and float4 arrays
2369 2370
	gpdb::FreeAttrStatsSlot(&mcv_slot);
	gpdb::FreeAttrStatsSlot(&hist_slot);
2371

2372
	gpdb::FreeHeapTuple(stats_tup);
2373 2374

	// create col stats object
2375
	mdid_col_stats->AddRef();
J
Jesse Zhang 已提交
2376 2377 2378 2379
	CDXLColStats *dxl_col_stats = GPOS_NEW(mp) CDXLColStats(
		mp, mdid_col_stats, md_colname, width, null_freq, distinct_remaining,
		freq_remaining, dxl_stats_bucket_array, false /* is_col_stats_missing */
	);
2380

2381
	return dxl_col_stats;
2382 2383 2384 2385 2386
}


//---------------------------------------------------------------------------
//      @function:
2387
//              CTranslatorRelcacheToDXL::GenerateStatsForSystemCols
2388 2389 2390 2391 2392 2393
//
//      @doc:
//              Generate statistics for the system level columns
//
//---------------------------------------------------------------------------
CDXLColStats *
J
Jesse Zhang 已提交
2394 2395 2396 2397
CTranslatorRelcacheToDXL::GenerateStatsForSystemCols(
	CMemoryPool *mp, OID rel_oid, CMDIdColStats *mdid_col_stats,
	CMDName *md_colname, OID att_type, AttrNumber attno,
	CDXLBucketArray *dxl_stats_bucket_array, CDouble num_rows)
2398
{
J
Jesse Zhang 已提交
2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419
	GPOS_ASSERT(NULL != mdid_col_stats);
	GPOS_ASSERT(NULL != md_colname);
	GPOS_ASSERT(InvalidOid != att_type);
	GPOS_ASSERT(0 > attno);
	GPOS_ASSERT(NULL != dxl_stats_bucket_array);

	CMDIdGPDB *mdid_atttype = GPOS_NEW(mp) CMDIdGPDB(att_type);
	IMDType *md_type = RetrieveType(mp, mdid_atttype);
	GPOS_ASSERT(md_type->IsFixedLength());

	BOOL is_col_stats_missing = true;
	CDouble null_freq(0.0);
	CDouble width(md_type->Length());
	CDouble distinct_remaining(0.0);
	CDouble freq_remaining(0.0);

	if (CStatistics::MinRows <= num_rows)
	{
		switch (attno)
		{
			case GpSegmentIdAttributeNumber:  // gp_segment_id
2420
			{
J
Jesse Zhang 已提交
2421 2422 2423 2424
				is_col_stats_missing = false;
				freq_remaining = CDouble(1.0);
				distinct_remaining = CDouble(gpdb::GetGPSegmentCount());
				break;
2425
			}
J
Jesse Zhang 已提交
2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452
			case TableOidAttributeNumber:  // tableoid
			{
				is_col_stats_missing = false;
				freq_remaining = CDouble(1.0);
				distinct_remaining =
					CDouble(RetrieveNumChildPartitions(rel_oid));
				break;
			}
			case SelfItemPointerAttributeNumber:  // ctid
			{
				is_col_stats_missing = false;
				freq_remaining = CDouble(1.0);
				distinct_remaining = num_rows;
				break;
			}
			default:
				break;
		}
	}

	// cleanup
	mdid_atttype->Release();
	md_type->Release();

	return GPOS_NEW(mp) CDXLColStats(
		mp, mdid_col_stats, md_colname, width, null_freq, distinct_remaining,
		freq_remaining, dxl_stats_bucket_array, is_col_stats_missing);
2453 2454 2455 2456 2457
}


//---------------------------------------------------------------------------
//     @function:
2458
//     CTranslatorRelcacheToDXL::RetrieveNumChildPartitions
2459 2460 2461 2462 2463 2464 2465
//
//  @doc:
//      For non-leaf partition tables return the number of child partitions
//      else return 1
//
//---------------------------------------------------------------------------
ULONG
J
Jesse Zhang 已提交
2466
CTranslatorRelcacheToDXL::RetrieveNumChildPartitions(OID rel_oid)
2467
{
J
Jesse Zhang 已提交
2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487
	GPOS_ASSERT(InvalidOid != rel_oid);

	ULONG num_part_tables = gpos::ulong_max;
	if (gpdb::RelPartIsNone(rel_oid))
	{
		// not a partitioned table
		num_part_tables = 1;
	}
	else if (gpdb::IsLeafPartition(rel_oid))
	{
		// leaf partition
		num_part_tables = 1;
	}
	else
	{
		num_part_tables = gpdb::CountLeafPartTables(rel_oid);
	}
	GPOS_ASSERT(gpos::ulong_max != num_part_tables);

	return num_part_tables;
2488 2489 2490 2491 2492
}


//---------------------------------------------------------------------------
//	@function:
2493
//		CTranslatorRelcacheToDXL::RetrieveCast
2494 2495 2496 2497 2498 2499
//
//	@doc:
//		Retrieve a cast function from relcache
//
//---------------------------------------------------------------------------
IMDCacheObject *
J
Jesse Zhang 已提交
2500
CTranslatorRelcacheToDXL::RetrieveCast(CMemoryPool *mp, IMDId *mdid)
2501
{
2502 2503 2504
	CMDIdCast *mdid_cast = CMDIdCast::CastMdid(mdid);
	IMDId *mdid_src = mdid_cast->MdidSrc();
	IMDId *mdid_dest = mdid_cast->MdidDest();
2505
	IMDCast::EmdCoercepathType coercePathType;
2506

2507 2508
	OID src_oid = CMDIdGPDB::CastMdid(mdid_src)->Oid();
	OID dest_oid = CMDIdGPDB::CastMdid(mdid_dest)->Oid();
J
Jesse Zhang 已提交
2509
	CoercionPathType pathtype;
2510

2511 2512
	OID cast_fn_oid = 0;
	BOOL is_binary_coercible = false;
J
Jesse Zhang 已提交
2513 2514 2515 2516

	BOOL cast_exists = gpdb::GetCastFunc(
		src_oid, dest_oid, &is_binary_coercible, &cast_fn_oid, &pathtype);

2517
	if (!cast_exists)
2518
	{
J
Jesse Zhang 已提交
2519 2520 2521 2522
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
	}

2523 2524
	CHAR *func_name = NULL;
	if (InvalidOid != cast_fn_oid)
2525
	{
2526
		func_name = gpdb::GetFuncName(cast_fn_oid);
2527 2528 2529 2530
	}
	else
	{
		// no explicit cast function: use the destination type name as the cast name
2531
		func_name = gpdb::GetTypeName(dest_oid);
2532
	}
J
Jesse Zhang 已提交
2533

2534
	if (NULL == func_name)
2535
	{
J
Jesse Zhang 已提交
2536 2537
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
2538 2539
	}

2540 2541 2542
	mdid->AddRef();
	mdid_src->AddRef();
	mdid_dest->AddRef();
2543

2544
	CMDName *mdname = CDXLUtils::CreateMDNameFromCharArray(mp, func_name);
J
Jesse Zhang 已提交
2545 2546 2547

	switch (pathtype)
	{
2548 2549 2550
		case COERCION_PATH_ARRAYCOERCE:
		{
			coercePathType = IMDCast::EmdtArrayCoerce;
J
Jesse Zhang 已提交
2551 2552 2553 2554
			return GPOS_NEW(mp) CMDArrayCoerceCastGPDB(
				mp, mdid, mdname, mdid_src, mdid_dest, is_binary_coercible,
				GPOS_NEW(mp) CMDIdGPDB(cast_fn_oid), IMDCast::EmdtArrayCoerce,
				default_type_modifier, false, EdxlcfImplicitCast, -1);
2555
		}
J
Jesse Zhang 已提交
2556
		break;
2557
		case COERCION_PATH_FUNC:
J
Jesse Zhang 已提交
2558 2559 2560
			return GPOS_NEW(mp) CMDCastGPDB(
				mp, mdid, mdname, mdid_src, mdid_dest, is_binary_coercible,
				GPOS_NEW(mp) CMDIdGPDB(cast_fn_oid), IMDCast::EmdtFunc);
2561 2562 2563 2564 2565 2566
			break;
		default:
			break;
	}

	// fall back for none path types
J
Jesse Zhang 已提交
2567 2568 2569
	return GPOS_NEW(mp)
		CMDCastGPDB(mp, mdid, mdname, mdid_src, mdid_dest, is_binary_coercible,
					GPOS_NEW(mp) CMDIdGPDB(cast_fn_oid));
2570 2571 2572 2573
}

//---------------------------------------------------------------------------
//	@function:
2574
//		CTranslatorRelcacheToDXL::RetrieveScCmp
2575 2576 2577 2578 2579 2580
//
//	@doc:
//		Retrieve a scalar comparison from relcache
//
//---------------------------------------------------------------------------
IMDCacheObject *
J
Jesse Zhang 已提交
2581
CTranslatorRelcacheToDXL::RetrieveScCmp(CMemoryPool *mp, IMDId *mdid)
2582
{
2583 2584 2585
	CMDIdScCmp *mdid_scalar_cmp = CMDIdScCmp::CastMdid(mdid);
	IMDId *mdid_left = mdid_scalar_cmp->GetLeftMdid();
	IMDId *mdid_right = mdid_scalar_cmp->GetRightMdid();
J
Jesse Zhang 已提交
2586

2587
	IMDType::ECmpType cmp_type = mdid_scalar_cmp->ParseCmpType();
2588

2589 2590 2591
	OID left_oid = CMDIdGPDB::CastMdid(mdid_left)->Oid();
	OID right_oid = CMDIdGPDB::CastMdid(mdid_right)->Oid();
	CmpType cmpt = (CmpType) GetComparisonType(cmp_type);
J
Jesse Zhang 已提交
2592

2593
	OID scalar_cmp_oid = gpdb::GetComparisonOperator(left_oid, right_oid, cmpt);
J
Jesse Zhang 已提交
2594

2595
	if (InvalidOid == scalar_cmp_oid)
2596
	{
J
Jesse Zhang 已提交
2597 2598 2599
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
	}
2600

2601
	CHAR *name = gpdb::GetOpName(scalar_cmp_oid);
2602

2603
	if (NULL == name)
2604
	{
J
Jesse Zhang 已提交
2605 2606
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDCacheEntryNotFound,
				   mdid->GetBuffer());
2607 2608
	}

2609 2610 2611
	mdid->AddRef();
	mdid_left->AddRef();
	mdid_right->AddRef();
2612

2613
	CMDName *mdname = CDXLUtils::CreateMDNameFromCharArray(mp, name);
2614

J
Jesse Zhang 已提交
2615 2616 2617
	return GPOS_NEW(mp)
		CMDScCmpGPDB(mp, mdid, mdname, mdid_left, mdid_right, cmp_type,
					 GPOS_NEW(mp) CMDIdGPDB(scalar_cmp_oid));
2618 2619 2620 2621
}

//---------------------------------------------------------------------------
//	@function:
2622
//		CTranslatorRelcacheToDXL::TransformStatsToDXLBucketArray
2623 2624 2625 2626 2627
//
//	@doc:
//		transform stats from pg_stats form to optimizer's preferred form
//
//---------------------------------------------------------------------------
2628
CDXLBucketArray *
J
Jesse Zhang 已提交
2629 2630 2631 2632
CTranslatorRelcacheToDXL::TransformStatsToDXLBucketArray(
	CMemoryPool *mp, OID att_type, CDouble num_distinct, CDouble null_freq,
	const Datum *mcv_values, const float4 *mcv_frequencies,
	ULONG num_mcv_values, const Datum *hist_values, ULONG num_hist_values)
2633
{
2634 2635
	CMDIdGPDB *mdid_atttype = GPOS_NEW(mp) CMDIdGPDB(att_type);
	IMDType *md_type = RetrieveType(mp, mdid_atttype);
2636 2637

	// translate MCVs to Orca histogram. Create an empty histogram if there are no MCVs.
J
Jesse Zhang 已提交
2638 2639
	CHistogram *gpdb_mcv_hist = TransformMcvToOrcaHistogram(
		mp, md_type, mcv_values, mcv_frequencies, num_mcv_values);
2640

2641
	GPOS_ASSERT(gpdb_mcv_hist->IsValid());
2642

2643 2644
	CDouble mcv_freq = gpdb_mcv_hist->GetFrequency();
	BOOL has_mcv = 0 < num_mcv_values && CStatistics::Epsilon < mcv_freq;
2645

2646 2647
	CDouble hist_freq = 0.0;
	if (1 < num_hist_values)
2648
	{
2649
		hist_freq = CDouble(1.0) - null_freq - mcv_freq;
2650
	}
J
Jesse Zhang 已提交
2651 2652 2653 2654 2655 2656

	BOOL is_text_type = mdid_atttype->Equals(&CMDIdGPDB::m_mdid_varchar) ||
						mdid_atttype->Equals(&CMDIdGPDB::m_mdid_bpchar) ||
						mdid_atttype->Equals(&CMDIdGPDB::m_mdid_text);
	BOOL has_hist = !is_text_type && 1 < num_hist_values &&
					CStatistics::Epsilon < hist_freq;
2657

2658
	CHistogram *histogram = NULL;
2659 2660

	// if histogram has any significant information, then extract it
2661
	if (has_hist)
2662 2663
	{
		// histogram from gpdb histogram
J
Jesse Zhang 已提交
2664 2665
		histogram = TransformHistToOrcaHistogram(
			mp, md_type, hist_values, num_hist_values, num_distinct, hist_freq);
A
Ashuka Xue 已提交
2666
		if (0 == histogram->GetNumBuckets())
2667
		{
2668
			has_hist = false;
2669
		}
2670 2671
	}

2672
	CDXLBucketArray *dxl_stats_bucket_array = NULL;
2673

2674
	if (has_hist && !has_mcv)
2675 2676
	{
		// if histogram exists and dominates, use histogram only
J
Jesse Zhang 已提交
2677 2678
		dxl_stats_bucket_array =
			TransformHistogramToDXLBucketArray(mp, md_type, histogram);
2679
	}
2680
	else if (!has_hist && has_mcv)
2681 2682
	{
		// if MCVs exist and dominate, use MCVs only
J
Jesse Zhang 已提交
2683 2684
		dxl_stats_bucket_array =
			TransformHistogramToDXLBucketArray(mp, md_type, gpdb_mcv_hist);
2685
	}
2686
	else if (has_hist && has_mcv)
2687 2688
	{
		// both histogram and MCVs exist and have significant info, merge MCV and histogram buckets
J
Jesse Zhang 已提交
2689 2690 2691 2692
		CHistogram *merged_hist =
			CStatisticsUtils::MergeMCVHist(mp, gpdb_mcv_hist, histogram);
		dxl_stats_bucket_array =
			TransformHistogramToDXLBucketArray(mp, md_type, merged_hist);
2693
		GPOS_DELETE(merged_hist);
2694 2695 2696 2697
	}
	else
	{
		// no MCVs nor histogram
2698 2699
		GPOS_ASSERT(!has_hist && !has_mcv);
		dxl_stats_bucket_array = GPOS_NEW(mp) CDXLBucketArray(mp);
2700 2701 2702
	}

	// cleanup
2703 2704 2705
	mdid_atttype->Release();
	md_type->Release();
	GPOS_DELETE(gpdb_mcv_hist);
2706

2707
	if (NULL != histogram)
2708
	{
2709
		GPOS_DELETE(histogram);
2710 2711
	}

2712
	return dxl_stats_bucket_array;
2713 2714 2715 2716
}

//---------------------------------------------------------------------------
//	@function:
2717
//		CTranslatorRelcacheToDXL::TransformMcvToOrcaHistogram
2718 2719 2720 2721 2722 2723
//
//	@doc:
//		Transform gpdb's mcv info to optimizer histogram
//
//---------------------------------------------------------------------------
CHistogram *
J
Jesse Zhang 已提交
2724 2725 2726
CTranslatorRelcacheToDXL::TransformMcvToOrcaHistogram(
	CMemoryPool *mp, const IMDType *md_type, const Datum *mcv_values,
	const float4 *mcv_frequencies, ULONG num_mcv_values)
2727
{
2728 2729
	IDatumArray *datums = GPOS_NEW(mp) IDatumArray(mp);
	CDoubleArray *freqs = GPOS_NEW(mp) CDoubleArray(mp);
2730

2731
	for (ULONG ul = 0; ul < num_mcv_values; ul++)
2732
	{
2733
		Datum datumMCV = mcv_values[ul];
J
Jesse Zhang 已提交
2734 2735
		IDatum *datum = CTranslatorScalarToDXL::CreateIDatumFromGpdbDatum(
			mp, md_type, false /* is_null */, datumMCV);
2736 2737
		datums->Append(datum);
		freqs->Append(GPOS_NEW(mp) CDouble(mcv_frequencies[ul]));
2738

2739
		if (!datum->StatsAreComparable(datum))
2740 2741 2742
		{
			// if less than operation is not supported on this datum, then no point
			// building a histogram. return an empty histogram
2743 2744
			datums->Release();
			freqs->Release();
2745
			return GPOS_NEW(mp) CHistogram(mp);
2746 2747 2748
		}
	}

J
Jesse Zhang 已提交
2749 2750
	CHistogram *hist = CStatisticsUtils::TransformMCVToHist(
		mp, md_type, datums, freqs, num_mcv_values);
2751

2752 2753 2754
	datums->Release();
	freqs->Release();
	return hist;
2755 2756 2757 2758
}

//---------------------------------------------------------------------------
//	@function:
2759
//		CTranslatorRelcacheToDXL::TransformHistToOrcaHistogram
2760 2761 2762 2763 2764 2765
//
//	@doc:
//		Transform GPDB's hist info to optimizer's histogram
//
//---------------------------------------------------------------------------
CHistogram *
J
Jesse Zhang 已提交
2766 2767 2768
CTranslatorRelcacheToDXL::TransformHistToOrcaHistogram(
	CMemoryPool *mp, const IMDType *md_type, const Datum *hist_values,
	ULONG num_hist_values, CDouble num_distinct, CDouble hist_freq)
2769
{
2770
	GPOS_ASSERT(1 < num_hist_values);
2771

2772 2773 2774
	const ULONG num_buckets = num_hist_values - 1;
	CDouble distinct_per_bucket = num_distinct / CDouble(num_buckets);
	CDouble freq_per_bucket = hist_freq / CDouble(num_buckets);
2775

2776
	BOOL last_bucket_was_singleton = false;
2777
	// create buckets
2778 2779
	CBucketArray *buckets = GPOS_NEW(mp) CBucketArray(mp);
	for (ULONG ul = 0; ul < num_buckets; ul++)
2780
	{
J
Jesse Zhang 已提交
2781 2782 2783 2784
		IDatum *min_datum = CTranslatorScalarToDXL::CreateIDatumFromGpdbDatum(
			mp, md_type, false /* is_null */, hist_values[ul]);
		IDatum *max_datum = CTranslatorScalarToDXL::CreateIDatumFromGpdbDatum(
			mp, md_type, false /* is_null */, hist_values[ul + 1]);
2785
		BOOL is_lower_closed, is_upper_closed;
2786

2787
		if (min_datum->StatsAreEqual(max_datum))
2788 2789
		{
			// Singleton bucket !!!!!!!!!!!!!
2790 2791 2792
			is_lower_closed = true;
			is_upper_closed = true;
			last_bucket_was_singleton = true;
2793
		}
2794
		else if (last_bucket_was_singleton)
2795 2796
		{
			// Last bucket was a singleton, so lower must be open now.
2797 2798 2799
			is_lower_closed = false;
			is_upper_closed = false;
			last_bucket_was_singleton = false;
2800 2801 2802 2803 2804
		}
		else
		{
			// Normal bucket
			// GPDB histograms assumes lower bound to be closed and upper bound to be open
2805 2806
			is_lower_closed = true;
			is_upper_closed = false;
2807
		}
2808

2809
		if (ul == num_buckets - 1)
2810 2811
		{
			// last bucket upper bound is also closed
2812
			is_upper_closed = true;
2813 2814
		}

J
Jesse Zhang 已提交
2815 2816 2817 2818
		CBucket *bucket = GPOS_NEW(mp)
			CBucket(GPOS_NEW(mp) CPoint(min_datum),
					GPOS_NEW(mp) CPoint(max_datum), is_lower_closed,
					is_upper_closed, freq_per_bucket, distinct_per_bucket);
2819
		buckets->Append(bucket);
2820

J
Jesse Zhang 已提交
2821 2822
		if (!min_datum->StatsAreComparable(max_datum) ||
			!min_datum->StatsAreLessThan(max_datum))
2823 2824 2825 2826 2827 2828 2829 2830
		{
			// if less than operation is not supported on this datum,
			// or the translated histogram does not conform to GPDB sort order (e.g. text column in Linux platform),
			// then no point building a histogram. return an empty histogram

			// TODO: 03/01/2014 translate histogram into Orca even if sort
			// order is different in GPDB, and use const expression eval to compare
			// datums in Orca (MPP-22780)
2831
			buckets->Release();
2832
			return GPOS_NEW(mp) CHistogram(mp);
2833 2834 2835
		}
	}

2836
	CHistogram *hist = GPOS_NEW(mp) CHistogram(mp, buckets);
2837
	return hist;
2838 2839 2840 2841 2842
}


//---------------------------------------------------------------------------
//	@function:
2843
//		CTranslatorRelcacheToDXL::TransformHistogramToDXLBucketArray
2844 2845 2846 2847 2848
//
//	@doc:
//		Histogram to array of dxl buckets
//
//---------------------------------------------------------------------------
2849
CDXLBucketArray *
J
Jesse Zhang 已提交
2850 2851
CTranslatorRelcacheToDXL::TransformHistogramToDXLBucketArray(
	CMemoryPool *mp, const IMDType *md_type, const CHistogram *hist)
2852
{
2853
	CDXLBucketArray *dxl_stats_bucket_array = GPOS_NEW(mp) CDXLBucketArray(mp);
A
Ashuka Xue 已提交
2854
	const CBucketArray *buckets = hist->GetBuckets();
2855 2856 2857 2858 2859 2860 2861 2862
	ULONG num_buckets = buckets->Size();
	for (ULONG ul = 0; ul < num_buckets; ul++)
	{
		CBucket *bucket = (*buckets)[ul];
		IDatum *datum_lower = bucket->GetLowerBound()->GetDatum();
		CDXLDatum *dxl_lower = md_type->GetDatumVal(mp, datum_lower);
		IDatum *datum_upper = bucket->GetUpperBound()->GetDatum();
		CDXLDatum *dxl_upper = md_type->GetDatumVal(mp, datum_upper);
J
Jesse Zhang 已提交
2863 2864 2865 2866
		CDXLBucket *dxl_bucket = GPOS_NEW(mp)
			CDXLBucket(dxl_lower, dxl_upper, bucket->IsLowerClosed(),
					   bucket->IsUpperClosed(), bucket->GetFrequency(),
					   bucket->GetNumDistinct());
2867
		dxl_stats_bucket_array->Append(dxl_bucket);
2868
	}
2869
	return dxl_stats_bucket_array;
2870 2871 2872 2873
}

//---------------------------------------------------------------------------
//	@function:
2874
//		CTranslatorRelcacheToDXL::RetrieveRelStorageType
2875 2876 2877 2878 2879 2880
//
//	@doc:
//		Get relation storage type
//
//---------------------------------------------------------------------------
IMDRelation::Erelstoragetype
J
Jesse Zhang 已提交
2881
CTranslatorRelcacheToDXL::RetrieveRelStorageType(CHAR storage_type)
2882
{
J
Jesse Zhang 已提交
2883 2884
	IMDRelation::Erelstoragetype rel_storage_type =
		IMDRelation::ErelstorageSentinel;
2885

2886
	switch (storage_type)
2887 2888
	{
		case RELSTORAGE_HEAP:
2889
			rel_storage_type = IMDRelation::ErelstorageHeap;
2890 2891
			break;
		case RELSTORAGE_AOCOLS:
2892
			rel_storage_type = IMDRelation::ErelstorageAppendOnlyCols;
2893 2894
			break;
		case RELSTORAGE_AOROWS:
2895
			rel_storage_type = IMDRelation::ErelstorageAppendOnlyRows;
2896 2897
			break;
		case RELSTORAGE_VIRTUAL:
2898
			rel_storage_type = IMDRelation::ErelstorageVirtual;
2899 2900
			break;
		case RELSTORAGE_EXTERNAL:
2901
			rel_storage_type = IMDRelation::ErelstorageExternal;
2902 2903 2904 2905 2906
			break;
		default:
			GPOS_ASSERT(!"Unsupported relation type");
	}

2907
	return rel_storage_type;
2908 2909 2910 2911
}

//---------------------------------------------------------------------------
//	@function:
2912
//		CTranslatorRelcacheToDXL::RetrievePartKeysAndTypes
2913 2914
//
//	@doc:
2915
//		Get partition keys and types for relation or NULL if relation not partitioned.
2916 2917 2918
//		Caller responsible for closing the relation if an exception is raised
//
//---------------------------------------------------------------------------
2919
void
J
Jesse Zhang 已提交
2920 2921 2922 2923
CTranslatorRelcacheToDXL::RetrievePartKeysAndTypes(CMemoryPool *mp,
												   Relation rel, OID oid,
												   ULongPtrArray **part_keys,
												   CharPtrArray **part_types)
2924 2925 2926
{
	GPOS_ASSERT(NULL != rel);

2927
	if (!gpdb::RelPartIsRoot(oid))
2928 2929
	{
		// not a partitioned table
2930 2931
		*part_keys = NULL;
		*part_types = NULL;
2932
		return;
2933 2934 2935 2936
	}

	// TODO: Feb 23, 2012; support intermediate levels

2937 2938
	*part_keys = GPOS_NEW(mp) ULongPtrArray(mp);
	*part_types = GPOS_NEW(mp) CharPtrArray(mp);
2939

J
Jesse Zhang 已提交
2940 2941 2942
	PartitionNode *pn =
		gpdb::GetParts(oid, 0 /*level*/, 0 /*parent*/, false /*inctemplate*/,
					   true /*includesubparts*/);
2943
	GPOS_ASSERT(NULL != pn);
2944

2945 2946
	if (gpdb::FHashPartitioned(pn->part->parkind))
	{
J
Jesse Zhang 已提交
2947 2948
		GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDObjUnsupported,
				   GPOS_WSZ_LIT("Hash partitioning"));
2949 2950
	}

2951 2952 2953 2954 2955 2956
	List *part_keys_list = NIL;
	List *part_types_list = NIL;
	gpdb::GetOrderedPartKeysAndKinds(oid, &part_keys_list, &part_types_list);

	ListCell *lc_key = NULL;
	ListCell *lc_type = NULL;
J
Jesse Zhang 已提交
2957
	ForBoth(lc_key, part_keys_list, lc_type, part_types_list)
2958
	{
2959
		List *part_key = (List *) lfirst(lc_key);
2960

2961
		if (1 < gpdb::ListLength(part_key))
2962
		{
J
Jesse Zhang 已提交
2963 2964
			GPOS_RAISE(gpdxl::ExmaMD, gpdxl::ExmiMDObjUnsupported,
					   GPOS_WSZ_LIT("Composite part key"));
2965 2966
		}

2967 2968 2969 2970 2971
		INT attno = linitial_int(part_key);
		CHAR part_type = (CHAR) lfirst_int(lc_type);
		GPOS_ASSERT(0 < attno);
		(*part_keys)->Append(GPOS_NEW(mp) ULONG(attno - 1));
		(*part_types)->Append(GPOS_NEW(mp) CHAR(part_type));
2972 2973
	}

2974 2975
	gpdb::ListFree(part_keys_list);
	gpdb::ListFree(part_types_list);
2976 2977 2978 2979 2980
}


//---------------------------------------------------------------------------
//	@function:
2981
//		CTranslatorRelcacheToDXL::ConstructAttnoMapping
2982 2983 2984 2985 2986 2987
//
//	@doc:
//		Construct a mapping for GPDB attnos to positions in the columns array
//
//---------------------------------------------------------------------------
ULONG *
J
Jesse Zhang 已提交
2988 2989 2990
CTranslatorRelcacheToDXL::ConstructAttnoMapping(CMemoryPool *mp,
												CMDColumnArray *mdcol_array,
												ULONG max_cols)
2991
{
2992 2993 2994
	GPOS_ASSERT(NULL != mdcol_array);
	GPOS_ASSERT(0 < mdcol_array->Size());
	GPOS_ASSERT(max_cols > mdcol_array->Size());
2995 2996

	// build a mapping for attnos->positions
2997 2998
	const ULONG num_of_cols = mdcol_array->Size();
	ULONG *attno_mapping = GPOS_NEW_ARRAY(mp, ULONG, max_cols);
2999

3000
	// initialize all positions to gpos::ulong_max
J
Jesse Zhang 已提交
3001
	for (ULONG ul = 0; ul < max_cols; ul++)
3002
	{
3003
		attno_mapping[ul] = gpos::ulong_max;
3004
	}
J
Jesse Zhang 已提交
3005 3006

	for (ULONG ul = 0; ul < num_of_cols; ul++)
3007
	{
3008 3009
		const IMDColumn *md_col = (*mdcol_array)[ul];
		INT attno = md_col->AttrNum();
3010

J
Jesse Zhang 已提交
3011
		ULONG idx = (ULONG)(GPDXL_SYSTEM_COLUMNS + attno);
3012
		attno_mapping[idx] = ul;
3013 3014
	}

3015
	return attno_mapping;
3016 3017 3018 3019
}

//---------------------------------------------------------------------------
//	@function:
3020
//		CTranslatorRelcacheToDXL::RetrieveRelKeysets
3021 3022 3023 3024 3025
//
//	@doc:
//		Get key sets for relation
//
//---------------------------------------------------------------------------
3026
ULongPtr2dArray *
J
Jesse Zhang 已提交
3027 3028 3029 3030
CTranslatorRelcacheToDXL::RetrieveRelKeysets(CMemoryPool *mp, OID oid,
											 BOOL should_add_default_keys,
											 BOOL is_partitioned,
											 ULONG *attno_mapping)
3031
{
3032
	ULongPtr2dArray *key_sets = GPOS_NEW(mp) ULongPtr2dArray(mp);
3033

3034
	List *rel_keys = gpdb::GetRelationKeys(oid);
3035

3036
	ListCell *lc_key = NULL;
J
Jesse Zhang 已提交
3037
	ForEach(lc_key, rel_keys)
3038
	{
3039
		List *key_elem_list = (List *) lfirst(lc_key);
3040

3041
		ULongPtrArray *key_set = GPOS_NEW(mp) ULongPtrArray(mp);
3042

3043
		ListCell *lc_key_elem = NULL;
J
Jesse Zhang 已提交
3044
		ForEach(lc_key_elem, key_elem_list)
3045
		{
3046 3047 3048
			INT key_idx = lfirst_int(lc_key_elem);
			ULONG pos = GetAttributePosition(key_idx, attno_mapping);
			key_set->Append(GPOS_NEW(mp) ULONG(pos));
3049
		}
3050
		GPOS_ASSERT(0 < key_set->Size());
3051

3052
		key_sets->Append(key_set);
3053
	}
J
Jesse Zhang 已提交
3054

3055
	// add {segid, ctid} as a key
J
Jesse Zhang 已提交
3056

3057
	if (should_add_default_keys)
3058
	{
3059 3060
		ULongPtrArray *key_set = GPOS_NEW(mp) ULongPtrArray(mp);
		if (is_partitioned)
3061 3062
		{
			// TableOid is part of default key for partitioned tables
J
Jesse Zhang 已提交
3063 3064
			ULONG table_oid_pos =
				GetAttributePosition(TableOidAttributeNumber, attno_mapping);
3065
			key_set->Append(GPOS_NEW(mp) ULONG(table_oid_pos));
3066
		}
J
Jesse Zhang 已提交
3067 3068 3069 3070
		ULONG seg_id_pos =
			GetAttributePosition(GpSegmentIdAttributeNumber, attno_mapping);
		ULONG ctid_pos =
			GetAttributePosition(SelfItemPointerAttributeNumber, attno_mapping);
3071 3072
		key_set->Append(GPOS_NEW(mp) ULONG(seg_id_pos));
		key_set->Append(GPOS_NEW(mp) ULONG(ctid_pos));
J
Jesse Zhang 已提交
3073

3074
		key_sets->Append(key_set);
3075
	}
J
Jesse Zhang 已提交
3076

3077
	return key_sets;
3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089
}

//---------------------------------------------------------------------------
//	@function:
//		CTranslatorRelcacheToDXL::NormalizeFrequencies
//
//	@doc:
//		Sometimes a set of frequencies can add up to more than 1.0.
//		Fix these cases
//
//---------------------------------------------------------------------------
void
J
Jesse Zhang 已提交
3090 3091
CTranslatorRelcacheToDXL::NormalizeFrequencies(float4 *freqs, ULONG length,
											   CDouble *null_freq)
3092
{
3093
	if (length == 0 && (*null_freq) < 1.0)
3094 3095 3096 3097
	{
		return;
	}

3098 3099
	CDouble total = *null_freq;
	for (ULONG ul = 0; ul < length; ul++)
3100
	{
3101
		total = total + CDouble(freqs[ul]);
3102 3103
	}

3104
	if (total > CDouble(1.0))
3105
	{
J
Jesse Zhang 已提交
3106
		float4 denom = (float4)(total + CStatistics::Epsilon).Get();
3107 3108

		// divide all values by the total
3109
		for (ULONG ul = 0; ul < length; ul++)
3110
		{
3111
			freqs[ul] = freqs[ul] / denom;
3112
		}
3113
		*null_freq = *null_freq / denom;
3114 3115 3116 3117
	}

#ifdef GPOS_DEBUG
	// recheck
3118 3119
	CDouble recheck_total = *null_freq;
	for (ULONG ul = 0; ul < length; ul++)
3120
	{
3121
		recheck_total = recheck_total + CDouble(freqs[ul]);
3122
	}
3123
	GPOS_ASSERT(recheck_total <= CDouble(1.0));
3124 3125 3126 3127 3128
#endif
}

//---------------------------------------------------------------------------
//	@function:
3129
//		CTranslatorRelcacheToDXL::IsIndexSupported
3130 3131 3132 3133 3134 3135
//
//	@doc:
//		Check if index type is supported
//
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
3136
CTranslatorRelcacheToDXL::IsIndexSupported(Relation index_rel)
3137
{
3138
	HeapTupleData *tup = index_rel->rd_indextuple;
J
Jesse Zhang 已提交
3139

3140
	// index expressions and index constraints not supported
3141
	return gpdb::HeapAttIsNull(tup, Anum_pg_index_indexprs) &&
J
Jesse Zhang 已提交
3142 3143 3144 3145 3146
		   gpdb::HeapAttIsNull(tup, Anum_pg_index_indpred) &&
		   index_rel->rd_index->indisvalid &&
		   (BTREE_AM_OID == index_rel->rd_rel->relam ||
			BITMAP_AM_OID == index_rel->rd_rel->relam ||
			GIST_AM_OID == index_rel->rd_rel->relam);
3147 3148 3149 3150
}

//---------------------------------------------------------------------------
//	@function:
3151
//		CTranslatorRelcacheToDXL::RetrievePartConstraintForIndex
3152 3153 3154 3155 3156 3157
//
//	@doc:
//		Retrieve part constraint for index
//
//---------------------------------------------------------------------------
CMDPartConstraintGPDB *
J
Jesse Zhang 已提交
3158 3159 3160 3161
CTranslatorRelcacheToDXL::RetrievePartConstraintForIndex(
	CMemoryPool *mp, CMDAccessor *md_accessor, const IMDRelation *md_rel,
	Node *part_constraint, ULongPtrArray *level_with_default_part_array,
	BOOL is_unbounded)
3162
{
3163 3164
	CDXLColDescrArray *dxl_col_descr_array = GPOS_NEW(mp) CDXLColDescrArray(mp);
	const ULONG num_columns = md_rel->ColumnCount();
J
Jesse Zhang 已提交
3165

3166
	for (ULONG ul = 0; ul < num_columns; ul++)
3167
	{
3168
		const IMDColumn *md_col = md_rel->GetMdCol(ul);
J
Jesse Zhang 已提交
3169 3170
		CMDName *md_colname =
			GPOS_NEW(mp) CMDName(mp, md_col->Mdname().GetMDName());
3171 3172
		CMDIdGPDB *mdid_col_type = CMDIdGPDB::CastMdid(md_col->MdidType());
		mdid_col_type->AddRef();
3173 3174

		// create a column descriptor for the column
J
Jesse Zhang 已提交
3175 3176 3177 3178 3179 3180
		CDXLColDescr *dxl_col_descr = GPOS_NEW(mp) CDXLColDescr(
			mp, md_colname,
			ul + 1,	 // colid
			md_col->AttrNum(), mdid_col_type, md_col->TypeModifier(),
			false  // fColDropped
		);
3181
		dxl_col_descr_array->Append(dxl_col_descr);
3182
	}
3183

J
Jesse Zhang 已提交
3184 3185 3186 3187
	CMDPartConstraintGPDB *mdpart_constraint = RetrievePartConstraintFromNode(
		mp, md_accessor, dxl_col_descr_array, part_constraint,
		level_with_default_part_array, is_unbounded);

3188
	dxl_col_descr_array->Release();
3189

3190
	return mdpart_constraint;
3191 3192 3193 3194
}

//---------------------------------------------------------------------------
//	@function:
3195
//		CTranslatorRelcacheToDXL::RetrievePartConstraintForRel
3196 3197 3198 3199 3200 3201
//
//	@doc:
//		Retrieve part constraint for relation
//
//---------------------------------------------------------------------------
CMDPartConstraintGPDB *
J
Jesse Zhang 已提交
3202 3203 3204
CTranslatorRelcacheToDXL::RetrievePartConstraintForRel(
	CMemoryPool *mp, CMDAccessor *md_accessor, OID rel_oid,
	CMDColumnArray *mdcol_array, bool has_index)
3205 3206
{
	// get the part constraints
3207 3208
	List *default_levels_rel = NIL;
	Node *node = gpdb::GetRelationPartContraints(rel_oid, &default_levels_rel);
3209

3210 3211
	// don't retrieve part constraints if there are no indices
	// and no default partitions at any level
3212
	if (!has_index && NIL == default_levels_rel)
3213 3214 3215 3216
	{
		return NULL;
	}

3217 3218 3219
	List *part_keys = gpdb::GetPartitionAttrs(rel_oid);
	const ULONG num_of_levels = gpdb::ListLength(part_keys);
	gpdb::ListFree(part_keys);
3220

3221 3222 3223
	BOOL is_unbounded = true;
	ULongPtrArray *default_levels_derived = GPOS_NEW(mp) ULongPtrArray(mp);
	for (ULONG ul = 0; ul < num_of_levels; ul++)
3224
	{
3225
		if (LevelHasDefaultPartition(default_levels_rel, ul))
3226
		{
3227
			default_levels_derived->Append(GPOS_NEW(mp) ULONG(ul));
3228 3229 3230
		}
		else
		{
3231
			is_unbounded = false;
3232 3233 3234
		}
	}

3235
	CMDPartConstraintGPDB *mdpart_constraint = NULL;
3236

3237
	if (!has_index)
3238
	{
3239 3240 3241
		// if there are no indices then we don't need to construct the partition constraint
		// expression since ORCA is never going to use it.
		// only send the default partition information.
3242
		default_levels_derived->AddRef();
J
Jesse Zhang 已提交
3243 3244
		mdpart_constraint = GPOS_NEW(mp) CMDPartConstraintGPDB(
			mp, default_levels_derived, is_unbounded, NULL);
3245 3246 3247
	}
	else
	{
J
Jesse Zhang 已提交
3248 3249
		CDXLColDescrArray *dxl_col_descr_array =
			GPOS_NEW(mp) CDXLColDescrArray(mp);
3250 3251
		const ULONG num_columns = mdcol_array->Size();
		for (ULONG ul = 0; ul < num_columns; ul++)
3252
		{
3253
			const IMDColumn *md_col = (*mdcol_array)[ul];
J
Jesse Zhang 已提交
3254 3255
			CMDName *md_colname =
				GPOS_NEW(mp) CMDName(mp, md_col->Mdname().GetMDName());
3256 3257
			CMDIdGPDB *mdid_col_type = CMDIdGPDB::CastMdid(md_col->MdidType());
			mdid_col_type->AddRef();
3258

3259
			// create a column descriptor for the column
J
Jesse Zhang 已提交
3260 3261 3262 3263 3264 3265
			CDXLColDescr *dxl_col_descr = GPOS_NEW(mp) CDXLColDescr(
				mp, md_colname,
				ul + 1,	 // colid
				md_col->AttrNum(), mdid_col_type, md_col->TypeModifier(),
				false  // fColDropped
			);
3266
			dxl_col_descr_array->Append(dxl_col_descr);
3267 3268
		}

J
Jesse Zhang 已提交
3269 3270 3271
		mdpart_constraint = RetrievePartConstraintFromNode(
			mp, md_accessor, dxl_col_descr_array, node, default_levels_derived,
			is_unbounded);
3272
		dxl_col_descr_array->Release();
3273 3274
	}

3275 3276
	gpdb::ListFree(default_levels_rel);
	default_levels_derived->Release();
3277

3278
	return mdpart_constraint;
3279 3280 3281 3282
}

//---------------------------------------------------------------------------
//	@function:
3283
//		CTranslatorRelcacheToDXL::RetrievePartConstraintFromNode
3284 3285 3286 3287 3288 3289
//
//	@doc:
//		Retrieve part constraint from GPDB node
//
//---------------------------------------------------------------------------
CMDPartConstraintGPDB *
J
Jesse Zhang 已提交
3290 3291 3292 3293
CTranslatorRelcacheToDXL::RetrievePartConstraintFromNode(
	CMemoryPool *mp, CMDAccessor *md_accessor,
	CDXLColDescrArray *dxl_col_descr_array, Node *part_constraints,
	ULongPtrArray *level_with_default_part_array, BOOL is_unbounded)
3294
{
3295
	if (NULL == part_constraints)
3296 3297 3298 3299
	{
		return NULL;
	}

J
Jesse Zhang 已提交
3300 3301 3302 3303 3304 3305 3306 3307
	CTranslatorScalarToDXL scalar_translator(mp, md_accessor,
											 NULL,	// pulidgtorCol
											 NULL,	// pulidgtorCTE
											 0,		// query_level
											 true,	// m_fQuery
											 NULL,	// query_level_to_cte_map
											 NULL	// cte_dxlnode_array
	);
3308 3309

	// generate a mock mapping between var to column information
3310
	CMappingVarColId *var_colid_mapping = GPOS_NEW(mp) CMappingVarColId(mp);
3311

J
Jesse Zhang 已提交
3312 3313
	var_colid_mapping->LoadColumns(0 /*query_level */, 1 /* rteIndex */,
								   dxl_col_descr_array);
3314 3315

	// translate the check constraint expression
J
Jesse Zhang 已提交
3316 3317
	CDXLNode *scalar_dxlnode = scalar_translator.TranslateScalarToDXL(
		(Expr *) part_constraints, var_colid_mapping);
3318 3319

	// cleanup
3320
	GPOS_DELETE(var_colid_mapping);
3321

3322
	level_with_default_part_array->AddRef();
J
Jesse Zhang 已提交
3323 3324
	return GPOS_NEW(mp) CMDPartConstraintGPDB(mp, level_with_default_part_array,
											  is_unbounded, scalar_dxlnode);
3325 3326 3327 3328
}

//---------------------------------------------------------------------------
//	@function:
3329
//		CTranslatorRelcacheToDXL::RelHasSystemColumns
3330 3331 3332 3333 3334 3335 3336 3337
//
//	@doc:
//		Does given relation type have system columns.
//		Currently only regular relations, sequences, toast values relations and
//		AO segment relations have system columns
//
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
3338
CTranslatorRelcacheToDXL::RelHasSystemColumns(char rel_kind)
3339
{
J
Jesse Zhang 已提交
3340 3341
	return RELKIND_RELATION == rel_kind || RELKIND_SEQUENCE == rel_kind ||
		   RELKIND_AOSEGMENTS == rel_kind || RELKIND_TOASTVALUE == rel_kind;
3342 3343 3344 3345
}

//---------------------------------------------------------------------------
//	@function:
3346
//		CTranslatorRelcacheToDXL::ParseCmpType
3347 3348 3349 3350 3351 3352
//
//	@doc:
//		Translate GPDB comparison types into optimizer comparison types
//
//---------------------------------------------------------------------------
IMDType::ECmpType
J
Jesse Zhang 已提交
3353
CTranslatorRelcacheToDXL::ParseCmpType(ULONG cmpt)
3354
{
3355
	for (ULONG ul = 0; ul < GPOS_ARRAY_SIZE(cmp_type_mappings); ul++)
3356
	{
3357 3358
		const ULONG *mapping = cmp_type_mappings[ul];
		if (mapping[1] == cmpt)
3359
		{
3360
			return (IMDType::ECmpType) mapping[0];
3361 3362
		}
	}
J
Jesse Zhang 已提交
3363

3364 3365 3366 3367 3368
	return IMDType::EcmptOther;
}

//---------------------------------------------------------------------------
//	@function:
3369
//		CTranslatorRelcacheToDXL::GetComparisonType
3370 3371 3372 3373 3374
//
//	@doc:
//		Translate optimizer comparison types into GPDB comparison types
//
//---------------------------------------------------------------------------
J
Jesse Zhang 已提交
3375 3376
ULONG
CTranslatorRelcacheToDXL::GetComparisonType(IMDType::ECmpType cmp_type)
3377
{
3378
	for (ULONG ul = 0; ul < GPOS_ARRAY_SIZE(cmp_type_mappings); ul++)
3379
	{
3380 3381
		const ULONG *mapping = cmp_type_mappings[ul];
		if (mapping[0] == cmp_type)
3382
		{
3383
			return (ULONG) mapping[1];
3384 3385
		}
	}
J
Jesse Zhang 已提交
3386

3387 3388 3389 3390 3391
	return CmptOther;
}

//---------------------------------------------------------------------------
//	@function:
3392
//		CTranslatorRelcacheToDXL::RetrieveIndexOpFamilies
3393 3394
//
//	@doc:
3395
//		Retrieve the opfamilies for the keys of the given index
3396 3397
//
//---------------------------------------------------------------------------
3398
IMdIdArray *
J
Jesse Zhang 已提交
3399 3400
CTranslatorRelcacheToDXL::RetrieveIndexOpFamilies(CMemoryPool *mp,
												  IMDId *mdid_index)
3401
{
J
Jesse Zhang 已提交
3402 3403
	List *op_families =
		gpdb::GetIndexOpFamilies(CMDIdGPDB::CastMdid(mdid_index)->Oid());
3404
	IMdIdArray *input_col_mdids = GPOS_NEW(mp) IMdIdArray(mp);
J
Jesse Zhang 已提交
3405

3406
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
3407

3408
	ForEach(lc, op_families)
3409
	{
3410 3411
		OID op_family_oid = lfirst_oid(lc);
		input_col_mdids->Append(GPOS_NEW(mp) CMDIdGPDB(op_family_oid));
3412
	}
J
Jesse Zhang 已提交
3413

3414
	return input_col_mdids;
3415 3416 3417 3418
}

//---------------------------------------------------------------------------
//	@function:
3419
//		CTranslatorRelcacheToDXL::RetrieveScOpOpFamilies
3420 3421
//
//	@doc:
3422
//		Retrieve the families for the keys of the given scalar operator
3423 3424
//
//---------------------------------------------------------------------------
3425
IMdIdArray *
J
Jesse Zhang 已提交
3426 3427
CTranslatorRelcacheToDXL::RetrieveScOpOpFamilies(CMemoryPool *mp,
												 IMDId *mdid_scalar_op)
3428
{
J
Jesse Zhang 已提交
3429 3430
	List *op_families =
		gpdb::GetOpFamiliesForScOp(CMDIdGPDB::CastMdid(mdid_scalar_op)->Oid());
3431
	IMdIdArray *input_col_mdids = GPOS_NEW(mp) IMdIdArray(mp);
J
Jesse Zhang 已提交
3432

3433
	ListCell *lc = NULL;
J
Jesse Zhang 已提交
3434

3435
	ForEach(lc, op_families)
3436
	{
3437 3438
		OID op_family_oid = lfirst_oid(lc);
		input_col_mdids->Append(GPOS_NEW(mp) CMDIdGPDB(op_family_oid));
3439
	}
J
Jesse Zhang 已提交
3440

3441
	return input_col_mdids;
3442 3443 3444
}

// EOF