提交 5447a83f 编写于 作者: E Ekta Khanna and Jemish Patel 提交者: khannaekta

For Text/Varchar/Char/Bpchar columns, we should ignore generating StatsBuckets in DXL

Instead we should maintain NDVRemain and NullFreq to do Cardinality
Estimation.

Adding function to check if we need to create stats bucket in DXL

Function `FCreateStatsBucket` returns true if column data type is
not a text/varchar/char/bpchar type.
Signed-off-by: NEkta Khanna <ekhanna@pivotal.io>
上级 03bf5e9c
......@@ -2366,32 +2366,42 @@ CTranslatorRelcacheToDXL::PimdobjColStats
&pdrgdatumHistValues, &iNumHistValues,
NULL, NULL);
// transform all the bits and pieces from pg_statistic
// to a single bucket structure
DrgPdxlbucket *pdrgpdxlbucketTransformed =
PdrgpdxlbucketTransformStats
(
pmp,
oidAttType,
dDistinct,
dNullFrequency,
pdrgdatumMCVValues,
pdrgfMCVFrequencies,
ULONG(iNumMCVValues),
pdrgdatumHistValues,
ULONG(iNumHistValues)
);
GPOS_ASSERT(NULL != pdrgpdxlbucketTransformed);
CDouble dNDVBuckets(0.0);
CDouble dFreqBuckets(0.0);
const ULONG ulBuckets = pdrgpdxlbucketTransformed->UlLength();
for (ULONG ul = 0; ul < ulBuckets; ul++)
{
CDXLBucket *pdxlbucket = (*pdrgpdxlbucketTransformed)[ul];
dNDVBuckets = dNDVBuckets + pdxlbucket->DDistinct();
dFreqBuckets = dFreqBuckets + pdxlbucket->DFrequency();
// We only want to create statistics buckets if the column is NOT a text, varchar, char or bpchar type
// For the above column types we will use NDVRemain and NullFreq to do cardinality estimation.
if (CTranslatorUtils::FCreateStatsBucket(oidAttType))
{
// transform all the bits and pieces from pg_statistic
// to a single bucket structure
DrgPdxlbucket *pdrgpdxlbucketTransformed =
PdrgpdxlbucketTransformStats
(
pmp,
oidAttType,
dDistinct,
dNullFrequency,
pdrgdatumMCVValues,
pdrgfMCVFrequencies,
ULONG(iNumMCVValues),
pdrgdatumHistValues,
ULONG(iNumHistValues)
);
GPOS_ASSERT(NULL != pdrgpdxlbucketTransformed);
const ULONG ulBuckets = pdrgpdxlbucketTransformed->UlLength();
for (ULONG ul = 0; ul < ulBuckets; ul++)
{
CDXLBucket *pdxlbucket = (*pdrgpdxlbucketTransformed)[ul];
dNDVBuckets = dNDVBuckets + pdxlbucket->DDistinct();
dFreqBuckets = dFreqBuckets + pdxlbucket->DFrequency();
}
CUtils::AddRefAppend(pdrgpdxlbucket, pdrgpdxlbucketTransformed);
pdrgpdxlbucketTransformed->Release();
}
// there will be remaining tuples if the merged histogram and the NULLS do not cover
......@@ -2406,16 +2416,12 @@ CTranslatorRelcacheToDXL::PimdobjColStats
dFreqRemain = std::max(CDouble(0.0), (1 - dFreqBuckets - dNullFrequency));
}
CUtils::AddRefAppend(pdrgpdxlbucket, pdrgpdxlbucketTransformed);
// free up allocated datum and float4 arrays
gpdb::FreeAttrStatsSlot(oidAttType, pdrgdatumMCVValues, iNumMCVValues, pdrgfMCVFrequencies, iNumMCVFrequencies);
gpdb::FreeAttrStatsSlot(oidAttType, pdrgdatumHistValues, iNumHistValues, NULL, 0);
gpdb::FreeHeapTuple(heaptupleStats);
pdrgpdxlbucketTransformed->Release();
// create col stats object
pmdidColStats->AddRef();
CDXLColStats *pdxlcolstats = GPOS_NEW(pmp) CDXLColStats
......
......@@ -2806,4 +2806,18 @@ CTranslatorUtils::UlNonSystemColumns
return ulNonSystemCols;
}
// Function to check if we should create stats bucket in DXL
// Returns true if column datatype is not text/char/varchar/bpchar
BOOL
CTranslatorUtils::FCreateStatsBucket
(
OID oidAttType
)
{
if (oidAttType != TEXTOID && oidAttType != CHAROID && oidAttType != VARCHAROID && oidAttType != BPCHAROID)
return true;
return false;
}
// EOF
......@@ -410,6 +410,10 @@ namespace gpdxl
// return the count of non-system columns in the relation
static
ULONG UlNonSystemColumns(const IMDRelation *pmdrel);
// check if we need to create stats buckets in DXL for the column attribute
static
BOOL FCreateStatsBucket(OID oidAttType);
};
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册