Disable changing distribution keys implicitly when creating unique index (#10510)

In previous GPDB version, the distribution keys may be changed implicitly when creating a unique index on a hash-distributed empty table. ```SQL create table foo(a int, b int) distributed by(a); create unique index on foo(b); -- now, foo is hash distributed by b, not by a ``` It might be useful(maybe) to avoid changing the distribution keys. However, on the other side, it's crazy if the user doesn't notice the NOTICE message like, "NOTICE: updating distribution policy to match new UNIQUE index". What's worse, this behavior could bring data inconsistency. See, ```SQL create table foo(a int, b int) distributed by(a); insert into foo select i,i from generate_series(1,5)i; create table foopart (i int4, j int4) distributed by (i) partition by range (i) (start (1) end (3) every (1)); create unique index on foopart_1_prt_1 (j); insert into foopart values(1,2),(2,1); ``` The data inconsistency is ``` gpadmin=# select gp_segment_id, * from foopart_1_prt_1; gp_segment_id | i | j ---------------+---+--- 1 | 1 | 2 (1 row) gpadmin=# select * from foo f, foopart_1_prt_1 p where f.a = p.j; a | b | i | j ---+---+---+--- (0 rows) ``` Implicitly changing the distribution keys is not very useful, but harmful. This PR disables changing the distribution keys when creating a unique index. Reviewed-by: N Hubert Zhang <hzhang@pivotal.io>

Disable changing distribution keys implicitly when creating unique index (#10510)
In previous GPDB version, the distribution keys may be changed implicitly when creating a unique index on a hash-distributed empty table. ```SQL create table foo(a int, b int) distributed by(a); create unique index on foo(b); -- now, foo is hash distributed by b, not by a ``` It might be useful(maybe) to avoid changing the distribution keys. However, on the other side, it's crazy if the user doesn't notice the NOTICE message like, "NOTICE: updating distribution policy to match new UNIQUE index". What's worse, this behavior could bring data inconsistency. See, ```SQL create table foo(a int, b int) distributed by(a); insert into foo select i,i from generate_series(1,5)i; create table foopart (i int4, j int4) distributed by (i) partition by range (i) (start (1) end (3) every (1)); create unique index on foopart_1_prt_1 (j); insert into foopart values(1,2),(2,1); ``` The data inconsistency is ``` gpadmin=# select gp_segment_id, * from foopart_1_prt_1; gp_segment_id | i | j ---------------+---+--- 1 | 1 | 2 (1 row) gpadmin=# select * from foo f, foopart_1_prt_1 p where f.a = p.j; a | b | i | j ---+---+---+--- (0 rows) ``` Implicitly changing the distribution keys is not very useful, but harmful. This PR disables changing the distribution keys when creating a unique index. Reviewed-by: N Hubert Zhang <hzhang@pivotal.io>
84d2a23f · Hao Wu · GitHub · 4facbcd7 · 84d2a23f · 84d2a23f
18 changed file
--- a/src/backend/cdb/cdbcat.c
+++ b/src/backend/cdb/cdbcat.c
@@ -1010,87 +1010,3 @@ errdetails_index_policy(char *attname,
 	return 0;
 }

-/*
- * If the proposed index does not match the distribution policy but the relation
- * is empty and does not have a primary key or unique index, update the
- * distribution policy to match the index definition (MPP-101).
- */
-bool
-change_policy_to_match_index(Relation rel,
-							 AttrNumber *indattr,
-							 Oid *indclasses,
-							 Oid *exclop,
-							 int nidxatts)
-{
-	TupleDesc	desc = RelationGetDescr(rel);
-	GpPolicy *policy;
-	int			i;
-	MemoryContext oldcontext;
-
-	/*
-	 * Don't do anything with exclusion constraints, for now. We could
-	 * do the analogous thing we do with unique indexes, if any of the
-	 * exclusion operators have a compatible hash opclass. But we don't
-	 * bother.
-	 */
-	if (exclop)
-		return false;
-
-	policy = makeGpPolicy(POLICYTYPE_PARTITIONED, nidxatts,
-						  rel->rd_cdbpolicy->numsegments);
-	for (i = 0; i < nidxatts; i++)
-	{
-		AttrNumber	attno = indattr[i];
-		Oid			typeid = desc->attrs[attno - 1]->atttypid;
-		Oid			policy_opclass;
-		Oid			policy_opfamily;
-		Oid			policy_eqop;
-		Oid			indopfamily;
-		Oid			indeqop;
-
-		policy_opclass = cdb_default_distribution_opclass_for_type(typeid);
-		if (!policy_opclass)
-		{
-			/*
-			 * The datatype has no default opclass. Can't use it in the
-			 * distribution key.
-			 */
-			return false;
-		}
-
-		policy_opfamily = get_opclass_family(policy_opclass);
-		policy_eqop = get_opfamily_member(policy_opfamily,
-										  typeid,
-										  typeid,
-										  HTEqualStrategyNumber);
-
-		indopfamily = get_opclass_family(indclasses[i]);
-		indeqop = get_opfamily_member(indopfamily,
-									  typeid,
-									  typeid,
-									  BTEqualStrategyNumber);
-		if (policy_eqop != indeqop)
-		{
-			/*
-			 * The default hash opclass isn't compatible with the index opclass.
-			 * That is, they use a different equality operator. Give up.
-			 *
-			 * We could perhaps work a bit harder, and search for a different
-			 * hash opclass that would be compatible. But doesn't seem worth
-			 * the trouble.
-			 */
-			return false;
-		}
-
-		policy->attrs[i] = attno;
-		policy->opclasses[i] = policy_opclass;
-	}
-
-	GpPolicyReplace(rel->rd_id, policy);
-
-	oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
-	rel->rd_cdbpolicy = GpPolicyCopy(policy);
-	MemoryContextSwitchTo(oldcontext);
-
-	return true;
-}
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -772,7 +772,7 @@ DefineIndex(Oid relationId,
 			stmt->unique ||
 			stmt->excludeOpNames)
 		{
-			bool		compatible;
+			index_check_policy_compatible_context ctx;

 			/* Don't allow indexes on system attributes. Except OIDs. */
 			for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
@@ -784,84 +784,20 @@ DefineIndex(Oid relationId,
 							 errmsg("cannot create constraint or unique index on system column")));
 			}

-			compatible =
-				index_check_policy_compatible(rel->rd_cdbpolicy,
-											  RelationGetDescr(rel),
-											  indexInfo->ii_KeyAttrNumbers,
-											  classObjectId,
-											  indexInfo->ii_ExclusionOps,
-											  indexInfo->ii_NumIndexAttrs,
-											  false, /* report_error */
-											  NULL);
-
-			/*
-			 * If the constraint isn't compatible with the current distribution policy,
-			 * try to change the distribution policy to match the constraint.
-			 *
-			 * The table must be empty, and it mustn't have any other constraints,
-			 * and the index mustn't contain expressions.
-			 */
-			if (!compatible &&
-				!GpPolicyIsRandomPartitioned(rel->rd_cdbpolicy) &&
-				(Gp_role == GP_ROLE_EXECUTE || cdbRelMaxSegSize(rel) == 0) &&
-				!relationHasPrimaryKey(rel) &&
-				!relationHasUniqueIndex(rel) &&
-				list_length(indexInfo->ii_Expressions) == 0)
-			{
-				compatible =
-					change_policy_to_match_index(rel,
+			memset(&ctx, 0, sizeof(ctx));
+			ctx.for_alter_dist_policy = false;
+			ctx.is_constraint = stmt->isconstraint;
+			ctx.is_unique = stmt->unique;
+			ctx.is_primarykey = stmt->primary;
+			ctx.constraint_name = indexRelationName;
+			(void) index_check_policy_compatible(rel->rd_cdbpolicy,
+												 RelationGetDescr(rel),
 												 indexInfo->ii_KeyAttrNumbers,
 												 classObjectId,
 												 indexInfo->ii_ExclusionOps,
-												 indexInfo->ii_NumIndexAttrs);
-				if (compatible && Gp_role == GP_ROLE_DISPATCH)
-				{
-					if (stmt->primary)
-						elog(NOTICE, "updating distribution policy to match new PRIMARY KEY");
-					else if (stmt->excludeOpNames)
-						elog(NOTICE, "updating distribution policy to match new exclusion constraint");
-					else
-					{
-						Assert(stmt->unique);
-						if (stmt->isconstraint)
-							elog(NOTICE, "updating distribution policy to match new UNIQUE constraint");
-						else
-							elog(NOTICE, "updating distribution policy to match new UNIQUE index");
-					}
-				}
-			}
-
-			if (!compatible)
-			{
-				/*
-				 * Not compatible, and couldn't change the distribution policy to match.
-				 * Report the error to the user. Do that by calling
-				 * index_check_policy_compatible() again, but pass report_error=true so
-				 * that it will throw an error. index_check_policy_compatible() can
-				 * give a better error message than we could here.
-				 */
-				index_check_policy_compatible_context ctx;
-
-				memset(&ctx, 0, sizeof(ctx));
-				ctx.for_alter_dist_policy = false;
-				ctx.is_constraint = stmt->isconstraint;
-				ctx.is_unique = stmt->unique;
-				ctx.is_primarykey = stmt->primary;
-				ctx.constraint_name = indexRelationName;
-				(void) index_check_policy_compatible(rel->rd_cdbpolicy,
-													 RelationGetDescr(rel),
-													 indexInfo->ii_KeyAttrNumbers,
-													 classObjectId,
-													 indexInfo->ii_ExclusionOps,
-													 indexInfo->ii_NumIndexAttrs,
-													 true, /* report_error */
-													 &ctx);
-				/*
-				 * index_check_policy_compatible() should not return, because the earlier
-				 * call already determined that it's incompatible. But just in case..
-				 */
-				elog(ERROR, "constraint is not compatible with distribution key");
-			}
+												 indexInfo->ii_NumIndexAttrs,
+												 true, /* report_error */
+												 &ctx);
 		}
 	}


--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -2103,6 +2103,87 @@ transformDistributedBy(ParseState *pstate,
 		distrkeys = likeDistributedBy->keyCols;
 	}

+	/**
+	 * check for unique index.
+	 * If distrkeys is not determined by the above process,
+	 * we consider the most common columns in all unique indexes
+	 * as the distribution keys. UNIQUE/PRIMARY KEY INDEX is a global constraint
+	 * for the table and we require the hash distribution keys map the same values
+	 * on the unique constraint to the same segment. So, the set of the distribution
+	 * keys must be a subset of the set of columns on the unique constraint.
+	 *
+	 * Note: the UNIQUE/PRIMARY KEY index is not only an index, but also a constraint.
+	 * Even CREATE TABLE LIKE clause includes only constraints, not indexes, we still
+	 * check the uniqueness to compute the distribution keys.
+	 */
+	foreach(lc, cxt->inh_indexes)
+	{
+		IndexStmt  *index_stmt;
+		ListCell *cell;
+		List *new_distrkeys = NIL;
+
+		index_stmt = (IndexStmt *) lfirst(lc);
+		if (!index_stmt->unique && !index_stmt->primary)
+			continue;
+
+		if (distrkeys)
+		{
+			foreach(cell, index_stmt->indexParams)
+			{
+				IndexElem *iparam = lfirst(cell);
+				ListCell *dkcell;
+
+				/*
+				 * The index element could be either a column name or an expression.
+				 * If the index element is not a column name, it should be skipped
+				 * to compute the most common columns. For example,
+				 *
+				 *   create table t(i int, j int, k int) distributed by (i,j);
+				 *   create unique index on t(i, func1(j));
+				 *
+				 * The first index element is a name, the second index element
+				 * is an expression. The set of distribution keys is not a subset
+				 * of the column names in the index, so it violates the
+				 * compatibility and finally it fails.
+				 * But `create unique index on t(i, j);` will success.
+				 */
+				if (!iparam || !iparam->name)
+					continue;
+				foreach(dkcell, distrkeys)
+				{
+					DistributionKeyElem  *dk = (DistributionKeyElem *) lfirst(dkcell);
+					if (strcmp(dk->name, iparam->name) == 0)
+					{
+						new_distrkeys = lappend(new_distrkeys, dk);
+						break;
+					}
+				}
+			}
+			/* If there were no common columns, we're out of luck. */
+			if (new_distrkeys == NIL)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("UNIQUE or PRIMARY KEY definitions are incompatible with each other"),
+						 errhint("When there are multiple PRIMARY KEY / UNIQUE constraints, they must have at least one column in common.")));
+		}
+		else
+		{
+			foreach(cell, index_stmt->indexParams)
+			{
+				IndexElem *iparam = lfirst(cell);
+				if (iparam && iparam->name)
+				{
+					IndexElem *distrkey = makeNode(IndexElem);
+					distrkey->name = iparam->name;
+					distrkey->opclass = NULL;
+					new_distrkeys = lappend(new_distrkeys, distrkey);
+				}
+			}
+		}
+
+		distrkeys = new_distrkeys;
+	}
+
 	if (gp_create_table_random_default_distribution && NIL == distrkeys)
 	{
 		Assert(NULL == likeDistributedBy);

--- a/src/include/cdb/cdbcat.h
+++ b/src/include/cdb/cdbcat.h
@@ -42,10 +42,4 @@ extern bool index_check_policy_compatible(GpPolicy *policy,
 							  bool report_error,
 							  index_check_policy_compatible_context *error_context);

-extern bool change_policy_to_match_index(Relation rel,
-							 AttrNumber *indattr,
-							 Oid *indclasses,
-							 Oid *exclop,
-							 int nidxatts);
-
 #endif   /* CDBCAT_H */
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -2944,6 +2944,8 @@ NOTICE:  drop cascades to table test_drop_constr_child
 --
 ALTER TABLE IF EXISTS tt8 ADD COLUMN f int;
 NOTICE:  relation "tt8" does not exist, skipping
+ALTER TABLE IF EXISTS tt8 SET DISTRIBUTED BY(f);
+NOTICE:  relation "tt8" does not exist, skipping
 ALTER TABLE IF EXISTS tt8 ADD CONSTRAINT xxx PRIMARY KEY(f);
 NOTICE:  relation "tt8" does not exist, skipping
 ALTER TABLE IF EXISTS tt8 ADD CHECK (f BETWEEN 0 AND 10);
@@ -2957,8 +2959,8 @@ NOTICE:  relation "tt8" does not exist, skipping
 CREATE TABLE tt8(a int);
 CREATE SCHEMA alter2;
 ALTER TABLE IF EXISTS tt8 ADD COLUMN f int;
+ALTER TABLE IF EXISTS tt8 SET DISTRIBUTED BY(f);
 ALTER TABLE IF EXISTS tt8 ADD CONSTRAINT xxx PRIMARY KEY(f);
-NOTICE:  updating distribution policy to match new PRIMARY KEY
 ALTER TABLE IF EXISTS tt8 ADD CHECK (f BETWEEN 0 AND 10);
 ALTER TABLE IF EXISTS tt8 ALTER COLUMN f SET DEFAULT 0;
 ALTER TABLE IF EXISTS tt8 RENAME COLUMN f TO f1;

--- a/src/test/regress/expected/bfv_olap.out
+++ b/src/test/regress/expected/bfv_olap.out
@@ -208,8 +208,8 @@ CREATE TABLE r
    d NUMERIC(10,0), 
    e DATE
 ) DISTRIBUTED BY (a,b);
+ALTER TABLE r SET DISTRIBUTED BY (b);
 ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
-NOTICE:  updating distribution policy to match new PRIMARY KEY
 --TEST
 SELECT MAX(a) AS m FROM r GROUP BY b ORDER BY m;
 m 

--- a/src/test/regress/expected/bfv_olap_optimizer.out
+++ b/src/test/regress/expected/bfv_olap_optimizer.out
@@ -208,8 +208,8 @@ CREATE TABLE r
    d NUMERIC(10,0), 
    e DATE
 ) DISTRIBUTED BY (a,b);
+ALTER TABLE r SET DISTRIBUTED BY (b);
 ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);
-NOTICE:  updating distribution policy to match new PRIMARY KEY
 --TEST
 SELECT MAX(a) AS m FROM r GROUP BY b ORDER BY m;
 m 

--- a/src/test/regress/expected/create_table_like.out
+++ b/src/test/regress/expected/create_table_like.out
@@ -82,12 +82,11 @@ DETAIL:  Distribution key column "yy" is not included in the constraint.
 /* Ok to create multiple unique indexes */
 /* GPDB: This query will fail because unique index must contain all distribution key */
 CREATE TABLE inhg (x text UNIQUE, LIKE inhz INCLUDING INDEXES);
-ERROR:  UNIQUE constraint must contain all columns in the table's distribution key
-DETAIL:  Distribution key column "x" is not included in the constraint.
+ERROR:  UNIQUE or PRIMARY KEY definitions are incompatible with each other
+HINT:  When there are multiple PRIMARY KEY / UNIQUE constraints, they must have at least one column in common.
 CREATE TABLE inhg (x text, LIKE inhz INCLUDING INDEXES);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Greenplum Database data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-NOTICE:  updating distribution policy to match new UNIQUE constraint
 INSERT INTO inhg (xx, yy, x) VALUES ('test', 5, 10);
 INSERT INTO inhg (xx, yy, x) VALUES ('test', 10, 15);
 INSERT INTO inhg (xx, yy, x) VALUES ('foo', 10, 15); -- should fail

--- a/src/test/regress/expected/gp_create_table.out
+++ b/src/test/regress/expected/gp_create_table.out
@@ -45,46 +45,6 @@ select distkey, distclass from gp_distribution_policy where localoid = 'distpol'
 2       | 10027
 (1 row)

-drop table distpol;
-- now test that MPP-101 /actually/ works
-create table distpol (i int, j int, k int) distributed by (i);
-alter table distpol add primary key (j);
-NOTICE:  updating distribution policy to match new PRIMARY KEY
-select distkey, distclass from gp_distribution_policy where localoid = 'distpol'::regclass;
- distkey | distclass 
---------+-----------
- 2       | 10027
-(1 row)
-
-- make sure we can't overwrite it
-create unique index distpol_uidx on distpol(k);
-ERROR:  UNIQUE index must contain all columns in the table's distribution key
-DETAIL:  Distribution key column "j" is not included in the constraint.
-- should be able to now
-alter table distpol drop constraint distpol_pkey;
-create unique index distpol_uidx on distpol(k);
-NOTICE:  updating distribution policy to match new UNIQUE index
-select distkey, distclass from gp_distribution_policy where localoid = 'distpol'::regclass;
- distkey | distclass 
---------+-----------
- 3       | 10027
-(1 row)
-
-drop index distpol_uidx;
-- expressions shouldn't be able to update the distribution key
-create unique index distpol_uidx on distpol(ln(k));
-ERROR:  UNIQUE index must contain all columns in the table's distribution key
-DETAIL:  Distribution key column "k" is not included in the constraint.
-drop index distpol_uidx;
-ERROR:  index "distpol_uidx" does not exist
-- lets make sure we don't change the policy when the table is full
-insert into distpol values(1, 2, 3);
-create unique index distpol_uidx on distpol(i);
-ERROR:  UNIQUE index must contain all columns in the table's distribution key
-DETAIL:  Distribution key column "k" is not included in the constraint.
-alter table distpol add primary key (i);
-ERROR:  PRIMARY KEY definition must contain all columns in the table's distribution key
-DETAIL:  Distribution key column "k" is not included in the constraint.
 drop table distpol;
 -- if the datatype of the index column is not hashable, can't update distribution
 -- key to it.

--- a/src/test/regress/expected/gp_index.out
+++ b/src/test/regress/expected/gp_index.out
@@ -37,3 +37,74 @@ Table "public.tbl_drop_ind_concur"
 Distributed by: (dk)

 DROP TABLE tbl_drop_ind_concur;
+-- Creating UNIQUE/PRIMARY KEY index is disallowed to change the distribution
+-- keys implicitly
+CREATE TABLE tbl_create_index(i int, j int, k int) distributed by(i, j);
+-- should fail
+CREATE UNIQUE INDEX ON tbl_create_index(i);
+ERROR:  UNIQUE index must contain all columns in the table's distribution key
+DETAIL:  Distribution key column "j" is not included in the constraint.
+CREATE UNIQUE INDEX ON tbl_create_index(k);
+ERROR:  UNIQUE index must contain all columns in the table's distribution key
+DETAIL:  Distribution key column "i" is not included in the constraint.
+CREATE UNIQUE INDEX ON tbl_create_index(i, k);
+ERROR:  UNIQUE index must contain all columns in the table's distribution key
+DETAIL:  Distribution key column "j" is not included in the constraint.
+ALTER TABLE tbl_create_index ADD CONSTRAINT PKEY PRIMARY KEY(i);
+ERROR:  PRIMARY KEY definition must contain all columns in the table's distribution key
+DETAIL:  Distribution key column "j" is not included in the constraint.
+ALTER TABLE tbl_create_index ADD CONSTRAINT PKEY PRIMARY KEY(k);
+ERROR:  PRIMARY KEY definition must contain all columns in the table's distribution key
+DETAIL:  Distribution key column "i" is not included in the constraint.
+ALTER TABLE tbl_create_index ADD CONSTRAINT PKEY PRIMARY KEY(i, k);
+ERROR:  PRIMARY KEY definition must contain all columns in the table's distribution key
+DETAIL:  Distribution key column "j" is not included in the constraint.
+-- should success
+CREATE UNIQUE INDEX tbl_create_index_ij ON tbl_create_index(i, j);
+CREATE UNIQUE INDEX tbl_create_index_ijk ON tbl_create_index(i, j, k);
+\d tbl_create_index
+Table "public.tbl_create_index"
+ Column |  Type   | Modifiers 
+--------+---------+-----------
+ i      | integer | 
+ j      | integer | 
+ k      | integer | 
+Indexes:
+    "tbl_create_index_ij" UNIQUE, btree (i, j)
+    "tbl_create_index_ijk" UNIQUE, btree (i, j, k)
+Distributed by: (i, j)
+
+DROP INDEX tbl_create_index_ij;
+DROP INDEX tbl_create_index_ijk;
+ALTER TABLE tbl_create_index ADD CONSTRAINT PKEY PRIMARY KEY(i, j, k);
+\d tbl_create_index
+Table "public.tbl_create_index"
+ Column |  Type   | Modifiers 
+--------+---------+-----------
+ i      | integer | not null
+ j      | integer | not null
+ k      | integer | not null
+Indexes:
+    "pkey" PRIMARY KEY, btree (i, j, k)
+Distributed by: (i, j)
+
+ALTER TABLE tbl_create_index DROP CONSTRAINT PKEY;
+-- after changing the distribution keys, the above failed clause should success
+ALTER TABLE tbl_create_index SET DISTRIBUTED BY(k);
+CREATE UNIQUE INDEX ON tbl_create_index(k);
+CREATE UNIQUE INDEX ON tbl_create_index(i, k);
+ALTER TABLE tbl_create_index ADD CONSTRAINT PKEY PRIMARY KEY(i, k);
+\d tbl_create_index
+Table "public.tbl_create_index"
+ Column |  Type   | Modifiers 
+--------+---------+-----------
+ i      | integer | not null
+ j      | integer | not null
+ k      | integer | not null
+Indexes:
+    "pkey" PRIMARY KEY, btree (i, k)
+    "tbl_create_index_i_k_idx" UNIQUE, btree (i, k)
+    "tbl_create_index_k_idx" UNIQUE, btree (k)
+Distributed by: (k)
+
+DROP TABLE tbl_create_index;
--- a/src/test/regress/input/external_table.source
+++ b/src/test/regress/input/external_table.source
@@ -461,6 +461,7 @@ SELECT COUNT(*) FROM exttab_constraints_1;
 -- Error log should have a couple of rows
 SELECT COUNT(*) from gp_read_error_log('exttab_constraints_1');
 CREATE TABLE exttab_constraints_insert_1 (LIKE exttab_constraints_1) distributed by (i);
+ALTER TABLE exttab_constraints_insert_1 SET DISTRIBUTED BY(j);
 ALTER TABLE exttab_constraints_insert_1 ADD CONSTRAINT exttab_uniq_constraint_1 UNIQUE (j);
 -- This should fail
 select gp_truncate_error_log('exttab_constraints_1');

--- a/src/test/regress/input/partindex_test.source
+++ b/src/test/regress/input/partindex_test.source
@@ -369,10 +369,7 @@ create table part_table4
 -- create an index on all parts
 create index id_index on part_table4(id);

-- create 2 indexes on part_table4_1_prt_girls_2_prt_1
-create unique index id_index_unique on part_table4_1_prt_girls_2_prt_1(id);
-
-- create another index on column id
+-- create an index on column id
 create index id_rank_index on part_table4_1_prt_girls_2_prt_1(id, rank); 

 -- given index on part_table4_1_prt_boys_2_prt_3_id_idx, return similar index on part_table4_1_prt_girls_2_prt_1
@@ -384,14 +381,6 @@ pg_class.oid =
 	from pg_index
 	where indexrelid = ('part_table4_1_prt_boys_2_prt_3_id_idx'::regclass));

-- return the unique index on column "id" on part "part_table4_1_prt_girls_2_prt_1"
-select relname from
-pg_class where 
-pg_class.oid =
-(select gp_get_physical_index_relid('part_table4'::regclass, 'part_table4_1_prt_girls_2_prt_1'::regclass, indkey, indpred, indexprs, true)
-	from pg_index
-	where indexrelid = ('part_table4_1_prt_boys_2_prt_3_id_idx'::regclass));
-
 -- ************************************************************
 -- * Scenario 6
 -- * 	- a part has an existing constraint

--- a/src/test/regress/output/external_table.source
+++ b/src/test/regress/output/external_table.source
@@ -792,8 +792,8 @@ SELECT COUNT(*) from gp_read_error_log('exttab_constraints_1');
 (1 row)

 CREATE TABLE exttab_constraints_insert_1 (LIKE exttab_constraints_1) distributed by (i);
+ALTER TABLE exttab_constraints_insert_1 SET DISTRIBUTED BY(j);
 ALTER TABLE exttab_constraints_insert_1 ADD CONSTRAINT exttab_uniq_constraint_1 UNIQUE (j);
-NOTICE:  updating distribution policy to match new UNIQUE constraint
 -- This should fail
 select gp_truncate_error_log('exttab_constraints_1');
 gp_truncate_error_log 

--- a/src/test/regress/output/partindex_test.source
+++ b/src/test/regress/output/partindex_test.source
@@ -827,10 +827,7 @@ NOTICE:  building index for child partition "part_table4_1_prt_girls_2_prt_3"
 NOTICE:  building index for child partition "part_table4_1_prt_girls_2_prt_4"
 NOTICE:  building index for child partition "part_table4_1_prt_girls_2_prt_5"
 NOTICE:  building index for child partition "part_table4_1_prt_girls_2_prt_1"
-- create 2 indexes on part_table4_1_prt_girls_2_prt_1
-create unique index id_index_unique on part_table4_1_prt_girls_2_prt_1(id);
-NOTICE:  updating distribution policy to match new UNIQUE index
-- create another index on column id
+-- create an index on column id
 create index id_rank_index on part_table4_1_prt_girls_2_prt_1(id, rank); 
 -- given index on part_table4_1_prt_boys_2_prt_3_id_idx, return similar index on part_table4_1_prt_girls_2_prt_1
 -- return the non-unique index on column "id" on part "part_table4_1_prt_girls_2_prt_1"
@@ -845,18 +842,6 @@ pg_class.oid =
 part_table4_1_prt_girls_2_prt_1_id_idx
 (1 row)

-- return the unique index on column "id" on part "part_table4_1_prt_girls_2_prt_1"
-select relname from
-pg_class where 
-pg_class.oid =
-(select gp_get_physical_index_relid('part_table4'::regclass, 'part_table4_1_prt_girls_2_prt_1'::regclass, indkey, indpred, indexprs, true)
-	from pg_index
-	where indexrelid = ('part_table4_1_prt_boys_2_prt_3_id_idx'::regclass));
-     relname     
-----------------
- id_index_unique
-(1 row)
-
 -- ************************************************************
 -- * Scenario 6
 -- * 	- a part has an existing constraint

--- a/src/test/regress/sql/alter_table.sql
+++ b/src/test/regress/sql/alter_table.sql
@@ -1771,6 +1771,7 @@ DROP TABLE test_drop_constr_parent CASCADE;
 -- IF EXISTS test
 --
 ALTER TABLE IF EXISTS tt8 ADD COLUMN f int;
+ALTER TABLE IF EXISTS tt8 SET DISTRIBUTED BY(f);
 ALTER TABLE IF EXISTS tt8 ADD CONSTRAINT xxx PRIMARY KEY(f);
 ALTER TABLE IF EXISTS tt8 ADD CHECK (f BETWEEN 0 AND 10);
 ALTER TABLE IF EXISTS tt8 ALTER COLUMN f SET DEFAULT 0;
@@ -1781,6 +1782,7 @@ CREATE TABLE tt8(a int);
 CREATE SCHEMA alter2;

 ALTER TABLE IF EXISTS tt8 ADD COLUMN f int;
+ALTER TABLE IF EXISTS tt8 SET DISTRIBUTED BY(f);
 ALTER TABLE IF EXISTS tt8 ADD CONSTRAINT xxx PRIMARY KEY(f);
 ALTER TABLE IF EXISTS tt8 ADD CHECK (f BETWEEN 0 AND 10);
 ALTER TABLE IF EXISTS tt8 ALTER COLUMN f SET DEFAULT 0;

--- a/src/test/regress/sql/bfv_olap.sql
+++ b/src/test/regress/sql/bfv_olap.sql
@@ -169,6 +169,7 @@ CREATE TABLE r
    d NUMERIC(10,0), 
    e DATE
 ) DISTRIBUTED BY (a,b);
+ALTER TABLE r SET DISTRIBUTED BY (b);
 ALTER TABLE r ADD CONSTRAINT PKEY PRIMARY KEY (b);

 --TEST

--- a/src/test/regress/sql/gp_create_table.sql
+++ b/src/test/regress/sql/gp_create_table.sql
@@ -31,25 +31,6 @@ drop table distpol;
 create table distpol as select random(), 2 as foo distributed by (foo);
 select distkey, distclass from gp_distribution_policy where localoid = 'distpol'::regclass;
 drop table distpol;
-- now test that MPP-101 /actually/ works
-create table distpol (i int, j int, k int) distributed by (i);
-alter table distpol add primary key (j);
-select distkey, distclass from gp_distribution_policy where localoid = 'distpol'::regclass;
-- make sure we can't overwrite it
-create unique index distpol_uidx on distpol(k);
-- should be able to now
-alter table distpol drop constraint distpol_pkey;
-create unique index distpol_uidx on distpol(k);
-select distkey, distclass from gp_distribution_policy where localoid = 'distpol'::regclass;
-drop index distpol_uidx;
-- expressions shouldn't be able to update the distribution key
-create unique index distpol_uidx on distpol(ln(k));
-drop index distpol_uidx;
-- lets make sure we don't change the policy when the table is full
-insert into distpol values(1, 2, 3);
-create unique index distpol_uidx on distpol(i);
-alter table distpol add primary key (i);
-drop table distpol;

 -- if the datatype of the index column is not hashable, can't update distribution
 -- key to it.

--- a/src/test/regress/sql/gp_index.sql
+++ b/src/test/regress/sql/gp_index.sql
@@ -31,3 +31,34 @@ DROP INDEX CONCURRENTLY "tbl_drop_index1";
 \d tbl_drop_ind_concur

 DROP TABLE tbl_drop_ind_concur;
+
+-- Creating UNIQUE/PRIMARY KEY index is disallowed to change the distribution
+-- keys implicitly
+CREATE TABLE tbl_create_index(i int, j int, k int) distributed by(i, j);
+-- should fail
+CREATE UNIQUE INDEX ON tbl_create_index(i);
+CREATE UNIQUE INDEX ON tbl_create_index(k);
+CREATE UNIQUE INDEX ON tbl_create_index(i, k);
+ALTER TABLE tbl_create_index ADD CONSTRAINT PKEY PRIMARY KEY(i);
+ALTER TABLE tbl_create_index ADD CONSTRAINT PKEY PRIMARY KEY(k);
+ALTER TABLE tbl_create_index ADD CONSTRAINT PKEY PRIMARY KEY(i, k);
+-- should success
+CREATE UNIQUE INDEX tbl_create_index_ij ON tbl_create_index(i, j);
+CREATE UNIQUE INDEX tbl_create_index_ijk ON tbl_create_index(i, j, k);
+\d tbl_create_index
+DROP INDEX tbl_create_index_ij;
+DROP INDEX tbl_create_index_ijk;
+
+ALTER TABLE tbl_create_index ADD CONSTRAINT PKEY PRIMARY KEY(i, j, k);
+\d tbl_create_index
+ALTER TABLE tbl_create_index DROP CONSTRAINT PKEY;
+
+-- after changing the distribution keys, the above failed clause should success
+ALTER TABLE tbl_create_index SET DISTRIBUTED BY(k);
+CREATE UNIQUE INDEX ON tbl_create_index(k);
+CREATE UNIQUE INDEX ON tbl_create_index(i, k);
+ALTER TABLE tbl_create_index ADD CONSTRAINT PKEY PRIMARY KEY(i, k);
+\d tbl_create_index
+
+DROP TABLE tbl_create_index;
+