提交 8e36c3d7 编写于 作者: O Omer Arap 提交者: Melanie Plageman

Create dummy stats for type mismatch

If the column statistics in `pg_statistic` has values with type
different than column type, metadata accessor should not translate the
stats and create a dummy stats instead.

This commit also reorders stats collection from the `pg_statistic` to
align with how analyze generates stats. MCV and Histogram translation is
moved to the end after NDV, nullfraction and column width extraction.
Signed-off-by: NMelanie Plageman <mplageman@pivotal.io>
上级 58fa2611
......@@ -2308,14 +2308,11 @@ CTranslatorRelcacheToDXL::PimdobjRelStats
return pdxlrelstats;
}
//---------------------------------------------------------------------------
// @function:
// CTranslatorRelcacheToDXL::PimdobjColStats
//
// @doc:
// Retrieve column statistics from relcache
//
//---------------------------------------------------------------------------
// Retrieve column statistics from relcache
// If all statistics are missing, create dummy statistics
// Also, if the statistics are broken, create dummy statistics
// However, if any statistics are present and not broken,
// create column statistics using these statistics
IMDCacheObject *
CTranslatorRelcacheToDXL::PimdobjColStats
(
......@@ -2388,42 +2385,6 @@ CTranslatorRelcacheToDXL::PimdobjColStats
return CDXLColStats::PdxlcolstatsDummy(pmp, pmdidColStats, pmdnameCol, dWidth);
}
// histogram values extracted from the pg_statistic tuple for a given column
AttStatsSlot histSlot;
// most common values and their frequencies extracted from the pg_statistic
// tuple for a given column
AttStatsSlot mcvSlot;
(void) gpdb::FGetAttrStatsSlot
(
&mcvSlot,
heaptupleStats,
STATISTIC_KIND_MCV,
InvalidOid,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS
);
if (mcvSlot.nvalues != mcvSlot.nnumbers)
{
// if the number of MCVs(nvalues) and number of MCFs(nnumbers) do not match, we discard the MCVs and MCFs
gpdb::FreeAttrStatsSlot(&mcvSlot);
mcvSlot.numbers = NULL;
mcvSlot.values = NULL;
mcvSlot.values_arr = NULL;
mcvSlot.numbers_arr = NULL;
mcvSlot.nnumbers = 0;
mcvSlot.nvalues = 0;
char msgbuf[NAMEDATALEN * 2 + 100];
snprintf(msgbuf, sizeof(msgbuf), "The number of most common values and frequencies do not match on column %ls of table %ls.",
pmdcol->Mdname().Pstr()->Wsz(), pmdrel->Mdname().Pstr()->Wsz());
GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION,
LOG,
msgbuf,
NULL);
}
Form_pg_statistic fpsStats = (Form_pg_statistic) GETSTRUCT(heaptupleStats);
// null frequency and NDV
......@@ -2435,9 +2396,6 @@ CTranslatorRelcacheToDXL::PimdobjColStats
iNullNDV = 1;
}
// fix mcv and null frequencies (sometimes they can add up to more than 1.0)
NormalizeFrequencies(mcvSlot.numbers, (ULONG) mcvSlot.nvalues, &dNullFrequency);
// column width
CDouble dWidth = CDouble(fpsStats->stawidth);
......@@ -2454,13 +2412,63 @@ CTranslatorRelcacheToDXL::PimdobjColStats
}
dDistinct = dDistinct.FpCeil();
// total MCV frequency
CDouble dMCFSum = 0.0;
for (int i = 0; i < mcvSlot.nvalues; i++)
BOOL fDummyStats = false;
// most common values and their frequencies extracted from the pg_statistic
// tuple for a given column
AttStatsSlot mcvSlot;
(void) gpdb::FGetAttrStatsSlot
(
&mcvSlot,
heaptupleStats,
STATISTIC_KIND_MCV,
InvalidOid,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS
);
if (InvalidOid != mcvSlot.valuetype && mcvSlot.valuetype != oidAttType)
{
char msgbuf[NAMEDATALEN * 2 + 100];
snprintf(msgbuf, sizeof(msgbuf), "Type mismatch between attribute %ls of table %ls having type %d and statistic having type %d, please ANALYZE the table again",
pmdcol->Mdname().Pstr()->Wsz(), pmdrel->Mdname().Pstr()->Wsz(), oidAttType, mcvSlot.valuetype);
GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION,
NOTICE,
msgbuf,
NULL);
gpdb::FreeAttrStatsSlot(&mcvSlot);
fDummyStats = true;
}
else if (mcvSlot.nvalues != mcvSlot.nnumbers)
{
char msgbuf[NAMEDATALEN * 2 + 100];
snprintf(msgbuf, sizeof(msgbuf), "The number of most common values and frequencies do not match on column %ls of table %ls.",
pmdcol->Mdname().Pstr()->Wsz(), pmdrel->Mdname().Pstr()->Wsz());
GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION,
NOTICE,
msgbuf,
NULL);
// if the number of MCVs(nvalues) and number of MCFs(nnumbers) do not match, we discard the MCVs and MCFs
gpdb::FreeAttrStatsSlot(&mcvSlot);
fDummyStats = true;
}
else
{
dMCFSum = dMCFSum + CDouble(mcvSlot.numbers[i]);
// fix mcv and null frequencies (sometimes they can add up to more than 1.0)
NormalizeFrequencies(mcvSlot.numbers, (ULONG) mcvSlot.nvalues, &dNullFrequency);
// total MCV frequency
CDouble dMCFSum = 0.0;
for (int i = 0; i < mcvSlot.nvalues; i++)
{
dMCFSum = dMCFSum + CDouble(mcvSlot.numbers[i]);
}
}
// histogram values extracted from the pg_statistic tuple for a given column
AttStatsSlot histSlot;
// get histogram datums from pg_statistic entry
(void) gpdb::FGetAttrStatsSlot
(
......@@ -2471,6 +2479,30 @@ CTranslatorRelcacheToDXL::PimdobjColStats
ATTSTATSSLOT_VALUES
);
if (InvalidOid != histSlot.valuetype && histSlot.valuetype != oidAttType)
{
char msgbuf[NAMEDATALEN * 2 + 100];
snprintf(msgbuf, sizeof(msgbuf), "Type mismatch between attribute %ls of table %ls having type %d and statistic having type %d, please ANALYZE the table again",
pmdcol->Mdname().Pstr()->Wsz(), pmdrel->Mdname().Pstr()->Wsz(), oidAttType, histSlot.valuetype);
GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION,
NOTICE,
msgbuf,
NULL);
gpdb::FreeAttrStatsSlot(&histSlot);
fDummyStats = true;
}
if (fDummyStats)
{
pdrgpdxlbucket->Release();
pmdidColStats->AddRef();
CDouble dWidth = CStatistics::DDefaultColumnWidth;
gpdb::FreeHeapTuple(heaptupleStats);
return CDXLColStats::PdxlcolstatsDummy(pmp, pmdidColStats, pmdnameCol, dWidth);
}
CDouble dNDVBuckets(0.0);
CDouble dFreqBuckets(0.0);
CDouble dDistinctRemain(0.0);
......
......@@ -440,7 +440,11 @@ INSERT INTO test_broken_stats VALUES(1, 'abc'), (2, 'cde'), (3, 'efg'), (3, 'efg
ANALYZE test_broken_stats;
SET allow_system_table_mods='DML';
-- Simulate broken stats by changing the data type of MCV slot to a different type than in pg_attribute
UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='bfv_statistic.test_broken_stats'::regclass AND staattnum=2;
-- Broken MCVs
UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2;
-- Broken histogram
UPDATE pg_statistic SET stakind2=2 WHERE starelid ='test_broken_stats'::regclass AND staattnum=2;
UPDATE pg_statistic SET stavalues2='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2 and stakind2=2;
SELECT * FROM test_broken_stats t1, good_tab t2 WHERE t1.b = t2.b;
a | b | a | b
---+---+---+---
......
......@@ -57,6 +57,9 @@ set allow_system_table_mods=DML;
update pg_statistic set stavalues1='{6,3,1,5,4,2}'::int[] where starelid='bfv_statistics_foo2'::regclass;
-- excercise the translator
explain select a from bfv_statistics_foo2 where a > 1 order by a;
NOTICE: The number of most common values and frequencies do not match on column a of table bfv_statistics_foo2.
NOTICE: One or more columns in the following table(s) do not have statistics: bfv_statistics_foo2
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=8 width=4)
......@@ -440,8 +443,16 @@ INSERT INTO test_broken_stats VALUES(1, 'abc'), (2, 'cde'), (3, 'efg'), (3, 'efg
ANALYZE test_broken_stats;
SET allow_system_table_mods='DML';
-- Simulate broken stats by changing the data type of MCV slot to a different type than in pg_attribute
UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='bfv_statistic.test_broken_stats'::regclass AND staattnum=2;
-- Broken MCVs
UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2;
-- Broken histogram
UPDATE pg_statistic SET stakind2=2 WHERE starelid ='test_broken_stats'::regclass AND staattnum=2;
UPDATE pg_statistic SET stavalues2='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2 and stakind2=2;
SELECT * FROM test_broken_stats t1, good_tab t2 WHERE t1.b = t2.b;
NOTICE: Type mismatch between attribute b of table test_broken_stats having type 25 and statistic having type 23, please ANALYZE the table again
NOTICE: Type mismatch between attribute b of table test_broken_stats having type 25 and statistic having type 23, please ANALYZE the table again
NOTICE: One or more columns in the following table(s) do not have statistics: test_broken_stats
HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For partitioned tables, run analyze rootpartition <table_name>(<column_list>). See log for columns missing statistics.
a | b | a | b
---+---+---+---
(0 rows)
......
......@@ -272,7 +272,11 @@ INSERT INTO test_broken_stats VALUES(1, 'abc'), (2, 'cde'), (3, 'efg'), (3, 'efg
ANALYZE test_broken_stats;
SET allow_system_table_mods='DML';
-- Simulate broken stats by changing the data type of MCV slot to a different type than in pg_attribute
UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='bfv_statistic.test_broken_stats'::regclass AND staattnum=2;
-- Broken MCVs
UPDATE pg_statistic SET stavalues1='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2;
-- Broken histogram
UPDATE pg_statistic SET stakind2=2 WHERE starelid ='test_broken_stats'::regclass AND staattnum=2;
UPDATE pg_statistic SET stavalues2='{1,2,3}'::int[] WHERE starelid ='test_broken_stats'::regclass AND staattnum=2 and stakind2=2;
SELECT * FROM test_broken_stats t1, good_tab t2 WHERE t1.b = t2.b;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册