execUtils.c 72.5 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * execUtils.c
4
 *	  miscellaneous executor utility routines
5
 *
6
 * Portions Copyright (c) 2005-2008, Greenplum inc
7
 * Portions Copyright (c) 2012-Present Pivotal Software, Inc.
B
Bruce Momjian 已提交
8
 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
9
 * Portions Copyright (c) 1994, Regents of the University of California
10 11 12
 *
 *
 * IDENTIFICATION
13
 *	  src/backend/executor/execUtils.c
14 15 16 17 18
 *
 *-------------------------------------------------------------------------
 */
/*
 * INTERFACE ROUTINES
19 20 21
 *		CreateExecutorState		Create/delete executor working state
 *		FreeExecutorState
 *		CreateExprContext
22
 *		CreateStandaloneExprContext
23
 *		FreeExprContext
24
 *		ReScanExprContext
25
 *
26
 *		ExecAssignExprContext	Common code for plan node init routines.
27 28
 *		ExecAssignResultType
 *		etc
29
 *
30 31 32
 *		ExecOpenScanRelation	Common code for scan node init routines.
 *		ExecCloseScanRelation
 *
33 34
 *		ExecOpenIndices			\
 *		ExecCloseIndices		 | referenced by InitPlan, EndPlan,
35
 *		ExecInsertIndexTuples	/  ExecInsert, ExecUpdate
36
 *
37 38 39
 *		RegisterExprContextCallback    Register function shutdown callback
 *		UnregisterExprContextCallback  Deregister function shutdown callback
 *
40 41 42
 *	 NOTES
 *		This file has traditionally been the place to stick misc.
 *		executor support stuff that doesn't really go anyplace else.
43 44
 */

45 46
#include "postgres.h"

47
#include "access/genam.h"
B
Bruce Momjian 已提交
48
#include "access/heapam.h"
49
#include "access/appendonlywriter.h"
50 51
#include "access/relscan.h"
#include "access/transam.h"
52
#include "catalog/index.h"
B
Bruce Momjian 已提交
53
#include "executor/execdebug.h"
54
#include "executor/execUtils.h"
55
#include "nodes/nodeFuncs.h"
56
#include "parser/parsetree.h"
57
#include "storage/lmgr.h"
58
#include "utils/memutils.h"
59
#include "utils/tqual.h"
60 61 62 63 64 65

#include "nodes/primnodes.h"
#include "nodes/execnodes.h"

#include "cdb/cdbutil.h"
#include "cdb/cdbvars.h"
66
#include "cdb/cdbdisp_query.h"
67 68 69 70 71
#include "cdb/cdbdispatchresult.h"
#include "cdb/ml_ipc.h"
#include "cdb/cdbmotion.h"
#include "cdb/cdbsreh.h"
#include "cdb/memquota.h"
W
Wang Hao 已提交
72
#include "executor/instrument.h"
73 74 75 76 77
#include "executor/spi.h"
#include "utils/elog.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "storage/ipc.h"
78
#include "cdb/cdbllize.h"
79
#include "utils/workfile_mgr.h"
W
Wang Hao 已提交
80
#include "utils/metrics_utils.h"
81

82 83
#include "cdb/memquota.h"

84
static bool get_last_attnums(Node *node, ProjectionInfo *projInfo);
85 86 87
static bool index_recheck_constraint(Relation index, Oid *constr_procs,
						 Datum *existing_values, bool *existing_isnull,
						 Datum *new_values);
88
static void ShutdownExprContext(ExprContext *econtext, bool isCommit);
89

90

91
/* ----------------------------------------------------------------
92
 *				 Executor state and memory management functions
93 94 95 96
 * ----------------------------------------------------------------
 */

/* ----------------
97
 *		CreateExecutorState
98
 *
99 100
 *		Create and initialize an EState node, which is the root of
 *		working storage for an entire Executor invocation.
101
 *
102 103 104 105
 * Principally, this creates the per-query memory context that will be
 * used to hold all working data that lives till the end of the query.
 * Note that the per-query context will become a child of the caller's
 * CurrentMemoryContext.
106 107
 * ----------------
 */
108 109
EState *
CreateExecutorState(void)
110
{
111
	EState	   *estate;
112
	MemoryContext qcontext;
113
	MemoryContext oldcontext;
114

115 116 117 118 119 120 121 122
	/*
	 * Create the per-query context for this Executor run.
	 */
	qcontext = AllocSetContextCreate(CurrentMemoryContext,
									 "ExecutorState",
									 ALLOCSET_DEFAULT_MINSIZE,
									 ALLOCSET_DEFAULT_INITSIZE,
									 ALLOCSET_DEFAULT_MAXSIZE);
123

124
	/*
B
Bruce Momjian 已提交
125 126
	 * Make the EState node within the per-query context.  This way, we don't
	 * need a separate pfree() operation for it at shutdown.
127
	 */
128 129 130 131
	oldcontext = MemoryContextSwitchTo(qcontext);

	estate = makeNode(EState);

132 133 134 135 136
	/*
	 * Initialize dynamicTableScanInfo.
	 */
	estate->dynamicTableScanInfo = palloc0(sizeof(DynamicTableScanInfo));

137 138 139 140 141
	/*
	 * Initialize all fields of the Executor State structure
	 */
	estate->es_direction = ForwardScanDirection;
	estate->es_snapshot = SnapshotNow;
142
	estate->es_crosscheck_snapshot = InvalidSnapshot;	/* no crosscheck */
143
	estate->es_range_table = NIL;
144
	estate->es_plannedstmt = NULL;
145

146
	estate->es_junkFilter = NULL;
147

148 149
	estate->es_output_cid = (CommandId) 0;

150 151 152 153
	estate->es_result_relations = NULL;
	estate->es_num_result_relations = 0;
	estate->es_result_relation_info = NULL;

154
	estate->es_trig_target_relations = NIL;
155
	estate->es_trig_tuple_slot = NULL;
156
	estate->es_trig_oldtup_slot = NULL;
157
	estate->es_trig_newtup_slot = NULL;
158

159 160 161 162 163
	estate->es_param_list_info = NULL;
	estate->es_param_exec_vals = NULL;

	estate->es_query_cxt = qcontext;

164
	estate->es_tupleTable = NIL;
165

166 167
	estate->es_rowMarks = NIL;

168 169 170
	estate->es_processed = 0;
	estate->es_lastoid = InvalidOid;

171 172 173
	estate->es_top_eflags = 0;
	estate->es_instrument = 0;
	estate->es_finished = false;
174 175 176

	estate->es_exprcontexts = NIL;

177 178
	estate->es_subplanstates = NIL;

179 180
	estate->es_auxmodifytables = NIL;

181 182
	estate->es_per_tuple_exprcontext = NULL;

183 184 185
	estate->es_epqTuple = NULL;
	estate->es_epqTupleSet = NULL;
	estate->es_epqScanDone = NULL;
186

187 188 189 190 191 192 193 194
	estate->es_sliceTable = NULL;
	estate->interconnect_context = NULL;
	estate->motionlayer_context = NULL;
	estate->es_interconnect_is_setup = false;
	estate->active_recv_id = -1;
	estate->es_got_eos = false;
	estate->cancelUnfinished = false;

195
	estate->dispatcherState = NULL;
196 197 198

	estate->currentSliceIdInPlan = 0;
	estate->currentExecutingSliceId = 0;
199
	estate->currentSubplanLevel = 0;
200
	estate->rootSliceId = 0;
201
	estate->eliminateAliens = false;
202

203 204 205 206 207 208 209 210 211
	/*
	 * Return the executor state structure
	 */
	MemoryContextSwitchTo(oldcontext);

	return estate;
}

/* ----------------
212
 *		FreeExecutorState
213
 *
214
 *		Release an EState along with all remaining working storage.
215 216 217 218 219 220 221 222 223
 *
 * Note: this is not responsible for releasing non-memory resources,
 * such as open relations or buffer pins.  But it will shut down any
 * still-active ExprContexts within the EState.  That is sufficient
 * cleanup for situations where the EState has only been used for expression
 * evaluation, and not to run a complete Plan.
 *
 * This can be called in any memory context ... so long as it's not one
 * of the ones to be freed.
224 225 226 227
 *
 * In Greenplum, this also clears the PartitionState, even though that's a
 * non-memory resource, as that can be allocated for expression evaluation even
 * when there is no Plan.
228 229 230 231 232 233
 * ----------------
 */
void
FreeExecutorState(EState *estate)
{
	/*
B
Bruce Momjian 已提交
234 235 236 237
	 * Shut down and free any remaining ExprContexts.  We do this explicitly
	 * to ensure that any remaining shutdown callbacks get called (since they
	 * might need to release resources that aren't simply memory within the
	 * per-query memory context).
238 239 240
	 */
	while (estate->es_exprcontexts)
	{
B
Bruce Momjian 已提交
241
		/*
B
Bruce Momjian 已提交
242 243
		 * XXX: seems there ought to be a faster way to implement this than
		 * repeated list_delete(), no?
244
		 */
245 246
		FreeExprContext((ExprContext *) linitial(estate->es_exprcontexts),
						true);
247 248
		/* FreeExprContext removed the list link for us */
	}
B
Bruce Momjian 已提交
249

250
	estate->dispatcherState = NULL;
251
	estate->dynamicTableScanInfo = NULL;
252

253 254
	/*
	 * Free the per-query memory context, thereby releasing all working
255
	 * memory, including the EState node itself.
256
	 */
257
	MemoryContextDelete(estate->es_query_cxt);
258 259 260
}

/* ----------------
261 262 263 264 265 266 267 268
 *		CreateExprContext
 *
 *		Create a context for expression evaluation within an EState.
 *
 * An executor run may require multiple ExprContexts (we usually make one
 * for each Plan node, and a separate one for per-output-tuple processing
 * such as constraint checking).  Each ExprContext has its own "per-tuple"
 * memory context.
269
 *
270
 * Note we make no assumption about the caller's memory context.
271 272
 * ----------------
 */
273
ExprContext *
274
CreateExprContext(EState *estate)
275
{
276 277
	ExprContext *econtext;
	MemoryContext oldcontext;
278

279 280 281 282 283 284 285
	/* Create the ExprContext node within the per-query memory context */
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	econtext = makeNode(ExprContext);

	/* Initialize fields of ExprContext */
	econtext->ecxt_scantuple = NULL;
286 287
	econtext->ecxt_innertuple = NULL;
	econtext->ecxt_outertuple = NULL;
288 289

	econtext->ecxt_per_query_memory = estate->es_query_cxt;
B
Bruce Momjian 已提交
290

291
	/*
292
	 * Create working memory for expression evaluation in this context.
293 294
	 */
	econtext->ecxt_per_tuple_memory =
295 296 297
		AllocSetContextCreate(estate->es_query_cxt,
							  "ExprContext",
							  ALLOCSET_DEFAULT_MINSIZE,
298 299
							  ALLOCSET_DEFAULT_INITSIZE,
							  ALLOCSET_DEFAULT_MAXSIZE);
300 301 302 303

	econtext->ecxt_param_exec_vals = estate->es_param_exec_vals;
	econtext->ecxt_param_list_info = estate->es_param_list_info;

304 305
	econtext->ecxt_aggvalues = NULL;
	econtext->ecxt_aggnulls = NULL;
306

307 308 309
	econtext->caseValue_datum = (Datum) 0;
	econtext->caseValue_isNull = true;

310 311 312 313 314
	econtext->domainValue_datum = (Datum) 0;
	econtext->domainValue_isNull = true;

	econtext->ecxt_estate = estate;

315
	econtext->ecxt_callbacks = NULL;
316

317
	/*
B
Bruce Momjian 已提交
318 319 320
	 * Link the ExprContext into the EState to ensure it is shut down when the
	 * EState is freed.  Because we use lcons(), shutdowns will occur in
	 * reverse order of creation, which may not be essential but can't hurt.
321 322 323 324 325
	 */
	estate->es_exprcontexts = lcons(econtext, estate->es_exprcontexts);

	MemoryContextSwitchTo(oldcontext);

326 327
	return econtext;
}
328

329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
/* ----------------
 *		CreateStandaloneExprContext
 *
 *		Create a context for standalone expression evaluation.
 *
 * An ExprContext made this way can be used for evaluation of expressions
 * that contain no Params, subplans, or Var references (it might work to
 * put tuple references into the scantuple field, but it seems unwise).
 *
 * The ExprContext struct is allocated in the caller's current memory
 * context, which also becomes its "per query" context.
 *
 * It is caller's responsibility to free the ExprContext when done,
 * or at least ensure that any shutdown callbacks have been called
 * (ReScanExprContext() is suitable).  Otherwise, non-memory resources
 * might be leaked.
 * ----------------
 */
ExprContext *
CreateStandaloneExprContext(void)
{
	ExprContext *econtext;

	/* Create the ExprContext node within the caller's memory context */
	econtext = makeNode(ExprContext);

	/* Initialize fields of ExprContext */
	econtext->ecxt_scantuple = NULL;
	econtext->ecxt_innertuple = NULL;
	econtext->ecxt_outertuple = NULL;

	econtext->ecxt_per_query_memory = CurrentMemoryContext;

	/*
	 * Create working memory for expression evaluation in this context.
	 */
	econtext->ecxt_per_tuple_memory =
		AllocSetContextCreate(CurrentMemoryContext,
							  "ExprContext",
							  ALLOCSET_DEFAULT_MINSIZE,
							  ALLOCSET_DEFAULT_INITSIZE,
							  ALLOCSET_DEFAULT_MAXSIZE);

	econtext->ecxt_param_exec_vals = NULL;
	econtext->ecxt_param_list_info = NULL;

	econtext->ecxt_aggvalues = NULL;
	econtext->ecxt_aggnulls = NULL;

	econtext->caseValue_datum = (Datum) 0;
	econtext->caseValue_isNull = true;

	econtext->domainValue_datum = (Datum) 0;
	econtext->domainValue_isNull = true;

	econtext->ecxt_estate = NULL;

	econtext->ecxt_callbacks = NULL;

	return econtext;
}

391 392 393 394 395 396 397 398 399
/* ----------------
 *		FreeExprContext
 *
 *		Free an expression context, including calling any remaining
 *		shutdown callbacks.
 *
 * Since we free the temporary context used for expression evaluation,
 * any previously computed pass-by-reference expression result will go away!
 *
400
 * If isCommit is false, we are being called in error cleanup, and should
B
Bruce Momjian 已提交
401
 * not call callbacks but only release memory.	(It might be better to call
402 403 404
 * the callbacks and pass the isCommit flag to them, but that would require
 * more invasive code changes than currently seems justified.)
 *
405 406
 * Note we make no assumption about the caller's memory context.
 * ----------------
407 408
 */
void
409
FreeExprContext(ExprContext *econtext, bool isCommit)
410
{
411 412
	EState	   *estate;

413
	/* Call any registered callbacks */
414
	ShutdownExprContext(econtext, isCommit);
415
	/* And clean up the memory used */
416
	MemoryContextDelete(econtext->ecxt_per_tuple_memory);
417
	/* Unlink self from owning EState, if any */
418
	estate = econtext->ecxt_estate;
419 420 421
	if (estate)
		estate->es_exprcontexts = list_delete_ptr(estate->es_exprcontexts,
												  econtext);
422
	/* And delete the ExprContext node */
423
	pfree(econtext);
424 425
}

426 427 428 429
/*
 * ReScanExprContext
 *
 *		Reset an expression context in preparation for a rescan of its
B
Bruce Momjian 已提交
430
 *		plan node.	This requires calling any registered shutdown callbacks,
431 432 433 434 435 436 437 438
 *		since any partially complete set-returning-functions must be canceled.
 *
 * Note we make no assumption about the caller's memory context.
 */
void
ReScanExprContext(ExprContext *econtext)
{
	/* Call any registered callbacks */
439
	ShutdownExprContext(econtext, true);
440 441 442 443
	/* And clean up the memory used */
	MemoryContextReset(econtext->ecxt_per_tuple_memory);
}

444 445 446
/*
 * Build a per-output-tuple ExprContext for an EState.
 *
447 448
 * This is normally invoked via GetPerTupleExprContext() macro,
 * not directly.
449 450 451 452 453
 */
ExprContext *
MakePerTupleExprContext(EState *estate)
{
	if (estate->es_per_tuple_exprcontext == NULL)
454
		estate->es_per_tuple_exprcontext = CreateExprContext(estate);
455 456 457 458

	return estate->es_per_tuple_exprcontext;
}

459

460
/* ----------------------------------------------------------------
461 462 463 464
 *				 miscellaneous node-init support functions
 *
 * Note: all of these are expected to be called with CurrentMemoryContext
 * equal to the per-query memory context.
465 466 467
 * ----------------------------------------------------------------
 */

468 469 470
/* ----------------
 *		ExecAssignExprContext
 *
B
Bruce Momjian 已提交
471
 *		This initializes the ps_ExprContext field.	It is only necessary
472
 *		to do this for nodes which use ExecQual or ExecProject
B
Bruce Momjian 已提交
473
 *		because those routines require an econtext. Other nodes that
474 475 476 477
 *		don't have to evaluate expressions don't need to do this.
 * ----------------
 */
void
478
ExecAssignExprContext(EState *estate, PlanState *planstate)
479 480 481 482
{
	planstate->ps_ExprContext = CreateExprContext(estate);
}

483
/* ----------------
484
 *		ExecAssignResultType
485 486 487
 * ----------------
 */
void
488
ExecAssignResultType(PlanState *planstate, TupleDesc tupDesc)
489
{
490
	TupleTableSlot *slot = planstate->ps_ResultTupleSlot;
491

492
	ExecSetSlotDescriptor(slot, tupDesc);
493 494 495
}

/* ----------------
496
 *		ExecAssignResultTypeFromTL
497 498 499
 * ----------------
 */
void
500
ExecAssignResultTypeFromTL(PlanState *planstate)
501
{
502
	bool		hasoid;
503
	TupleDesc	tupDesc;
504

505 506 507 508
	if (ExecContextForcesOids(planstate, &hasoid))
	{
		/* context forces OID choice; hasoid is now set correctly */
	}
509
	else
510
	{
511 512
		/* given free choice, don't leave space for OIDs in result tuples */
		hasoid = false;
513
	}
B
Bruce Momjian 已提交
514

515
	/*
B
Bruce Momjian 已提交
516 517 518
	 * ExecTypeFromTL needs the parse-time representation of the tlist, not a
	 * list of ExprStates.	This is good because some plan nodes don't bother
	 * to set up planstate->targetlist ...
519 520
	 */
	tupDesc = ExecTypeFromTL(planstate->plan->targetlist, hasoid);
521
	ExecAssignResultType(planstate, tupDesc);
522 523 524
}

/* ----------------
525
 *		ExecGetResultType
526 527 528
 * ----------------
 */
TupleDesc
529
ExecGetResultType(PlanState *planstate)
530
{
531
	TupleTableSlot *slot = planstate->ps_ResultTupleSlot;
532

533
	return slot->tts_tupleDescriptor;
534 535 536
}

/* ----------------
537 538 539 540 541 542
 *		ExecBuildProjectionInfo
 *
 * Build a ProjectionInfo node for evaluating the given tlist in the given
 * econtext, and storing the result into the tuple slot.  (Caller must have
 * ensured that tuple slot has a descriptor matching the tlist!)  Note that
 * the given tlist should be a list of ExprState nodes, not Expr nodes.
543 544
 *
 * inputDesc can be NULL, but if it is not, we check to see whether simple
B
Bruce Momjian 已提交
545
 * Vars in the tlist match the descriptor.	It is important to provide
546 547 548
 * inputDesc for relation-scan plan nodes, as a cross check that the relation
 * hasn't been changed since the plan was made.  At higher levels of a plan,
 * there is no need to recheck.
549 550
 * ----------------
 */
551 552 553
ProjectionInfo *
ExecBuildProjectionInfo(List *targetList,
						ExprContext *econtext,
554 555
						TupleTableSlot *slot,
						TupleDesc inputDesc)
556
{
557
	ProjectionInfo *projInfo = makeNode(ProjectionInfo);
558 559 560 561 562 563 564 565
	int			len = ExecTargetListLength(targetList);
	int		   *workspace;
	int		   *varSlotOffsets;
	int		   *varNumbers;
	int		   *varOutputCols;
	List	   *exprlist;
	int			numSimpleVars;
	bool		directMap;
566
	ListCell   *tl;
567

568 569
	projInfo->pi_exprContext = econtext;
	projInfo->pi_slot = slot;
570 571 572 573 574 575 576 577
	/* since these are all int arrays, we need do just one palloc */
	workspace = (int *) palloc(len * 3 * sizeof(int));
	projInfo->pi_varSlotOffsets = varSlotOffsets = workspace;
	projInfo->pi_varNumbers = varNumbers = workspace + len;
	projInfo->pi_varOutputCols = varOutputCols = workspace + len * 2;
	projInfo->pi_lastInnerVar = 0;
	projInfo->pi_lastOuterVar = 0;
	projInfo->pi_lastScanVar = 0;
578 579

	/*
580
	 * We separate the target list elements into simple Var references and
581 582
	 * expressions which require the full ExecTargetList machinery.  To be a
	 * simple Var, a Var has to be a user attribute and not mismatch the
583 584
	 * inputDesc.  (Note: if there is a type mismatch then ExecEvalScalarVar
	 * will probably throw an error at runtime, but we leave that to it.)
585
	 */
586 587 588
	exprlist = NIL;
	numSimpleVars = 0;
	directMap = true;
589 590 591 592
	foreach(tl, targetList)
	{
		GenericExprState *gstate = (GenericExprState *) lfirst(tl);
		Var		   *variable = (Var *) gstate->arg->expr;
593
		bool		isSimpleVar = false;
594

595 596 597
		if (variable != NULL &&
			IsA(variable, Var) &&
			variable->varattno > 0)
598
		{
599 600 601 602 603
			if (!inputDesc)
				isSimpleVar = true;		/* can't check type, assume OK */
			else if (variable->varattno <= inputDesc->natts)
			{
				Form_pg_attribute attr;
604

605 606 607 608 609
				attr = inputDesc->attrs[variable->varattno - 1];
				if (!attr->attisdropped && variable->vartype == attr->atttypid)
					isSimpleVar = true;
			}
		}
610

611
		if (isSimpleVar)
612 613
		{
			TargetEntry *tle = (TargetEntry *) gstate->xprstate.expr;
614
			AttrNumber	attnum = variable->varattno;
615

616 617
			varNumbers[numSimpleVars] = attnum;
			varOutputCols[numSimpleVars] = tle->resno;
618
			if (tle->resno != numSimpleVars + 1)
619
				directMap = false;
620 621 622

			switch (variable->varno)
			{
623
				case INNER_VAR:
624 625 626 627
					varSlotOffsets[numSimpleVars] = offsetof(ExprContext,
															 ecxt_innertuple);
					if (projInfo->pi_lastInnerVar < attnum)
						projInfo->pi_lastInnerVar = attnum;
628 629
					break;

630
				case OUTER_VAR:
631 632 633 634
					varSlotOffsets[numSimpleVars] = offsetof(ExprContext,
															 ecxt_outertuple);
					if (projInfo->pi_lastOuterVar < attnum)
						projInfo->pi_lastOuterVar = attnum;
635 636
					break;

637
					/* INDEX_VAR is handled by default case */
638

639
				default:
640 641 642 643
					varSlotOffsets[numSimpleVars] = offsetof(ExprContext,
															 ecxt_scantuple);
					if (projInfo->pi_lastScanVar < attnum)
						projInfo->pi_lastScanVar = attnum;
644 645
					break;
			}
646 647 648 649 650 651 652 653
			numSimpleVars++;
		}
		else
		{
			/* Not a simple variable, add it to generic targetlist */
			exprlist = lappend(exprlist, gstate);
			/* Examine expr to include contained Vars in lastXXXVar counts */
			get_last_attnums((Node *) variable, projInfo);
654 655
		}
	}
656 657 658 659 660 661
	projInfo->pi_targetlist = exprlist;
	projInfo->pi_numSimpleVars = numSimpleVars;
	projInfo->pi_directMap = directMap;

	if (exprlist == NIL)
		projInfo->pi_itemIsDone = NULL; /* not needed */
662 663 664
	else
		projInfo->pi_itemIsDone = (ExprDoneCond *)
			palloc(len * sizeof(ExprDoneCond));
665 666 667

	return projInfo;
}
668

669 670 671 672 673 674 675 676 677 678 679 680 681
/*
 * get_last_attnums: expression walker for ExecBuildProjectionInfo
 *
 *	Update the lastXXXVar counts to be at least as large as the largest
 *	attribute numbers found in the expression
 */
static bool
get_last_attnums(Node *node, ProjectionInfo *projInfo)
{
	if (node == NULL)
		return false;
	if (IsA(node, Var))
	{
682
		Var		   *variable = (Var *) node;
683 684 685 686
		AttrNumber	attnum = variable->varattno;

		switch (variable->varno)
		{
687
			case INNER_VAR:
688 689 690 691
				if (projInfo->pi_lastInnerVar < attnum)
					projInfo->pi_lastInnerVar = attnum;
				break;

692
			case OUTER_VAR:
693 694 695 696
				if (projInfo->pi_lastOuterVar < attnum)
					projInfo->pi_lastOuterVar = attnum;
				break;

697
				/* INDEX_VAR is handled by default case */
698

699 700 701 702 703 704 705
			default:
				if (projInfo->pi_lastScanVar < attnum)
					projInfo->pi_lastScanVar = attnum;
				break;
		}
		return false;
	}
706

707
	/*
708 709
	 * Don't examine the arguments of Aggrefs or WindowFuncs, because those do
	 * not represent expressions to be evaluated within the overall
710 711 712 713 714 715 716 717 718 719
	 * targetlist's econtext.
	 */
	if (IsA(node, Aggref))
		return false;
	if (IsA(node, WindowFunc))
		return false;
	return expression_tree_walker(node, get_last_attnums,
								  (void *) projInfo);
}

720 721 722 723
/* ----------------
 *		ExecAssignProjectionInfo
 *
 * forms the projection information from the node's targetlist
724 725 726
 *
 * Notes for inputDesc are same as for ExecBuildProjectionInfo: supply it
 * for a relation-scan node, can pass NULL for upper-level nodes
727 728 729
 * ----------------
 */
void
730 731
ExecAssignProjectionInfo(PlanState *planstate,
						 TupleDesc inputDesc)
732
{
733 734 735
	ProjectionInfo* pi = planstate->ps_ProjInfo;
	if (NULL != pi)
	{
736 737 738 739
		/*
		 * Note that pi->pi_varSlotOffsets, pi->pi_varNumbers, and
		 * pi->pi_varOutputCols are all pointers into the same allocation.
		 */
740 741 742 743 744 745 746 747 748 749 750
		if (NULL != pi->pi_varSlotOffsets)
		{
			pfree(pi->pi_varSlotOffsets);
		}
		if (NULL != pi->pi_itemIsDone)
		{
			pfree(pi->pi_itemIsDone);
		}
		pfree(pi);
	}

751 752 753
	planstate->ps_ProjInfo =
		ExecBuildProjectionInfo(planstate->targetlist,
								planstate->ps_ExprContext,
754 755 756 757
								planstate->ps_ResultTupleSlot,
								inputDesc);
}

758 759
/* ----------------
 *		ExecFreeExprContext
760
 *
761 762 763
 * A plan node's ExprContext should be freed explicitly during executor
 * shutdown because there may be shutdown callbacks to call.  (Other resources
 * made by the above routines, such as projection info, don't need to be freed
764
 * explicitly because they're just memory in the per-query memory context.)
765 766 767
 *
 * However ... there is no particular need to do it during ExecEndNode,
 * because FreeExecutorState will free any remaining ExprContexts within
B
Bruce Momjian 已提交
768
 * the EState.	Letting FreeExecutorState do it allows the ExprContexts to
769 770 771
 * be freed in reverse order of creation, rather than order of creation as
 * will happen if we delete them here, which saves O(N^2) work in the list
 * cleanup inside FreeExprContext.
772 773 774
 * ----------------
 */
void
775
ExecFreeExprContext(PlanState *planstate)
776
{
777
	/*
B
Bruce Momjian 已提交
778 779
	 * Per above discussion, don't actually delete the ExprContext. We do
	 * unlink it from the plan node, though.
780
	 */
781
	planstate->ps_ExprContext = NULL;
782 783
}

784
/* ----------------------------------------------------------------
785 786 787 788 789 790
 *		the following scan type support functions are for
 *		those nodes which are stubborn and return tuples in
 *		their Scan tuple slot instead of their Result tuple
 *		slot..	luck fur us, these nodes do not do projections
 *		so we don't have to worry about getting the ProjectionInfo
 *		right for them...  -cim 6/3/91
791 792 793 794
 * ----------------------------------------------------------------
 */

/* ----------------
795
 *		ExecGetScanType
796 797 798
 * ----------------
 */
TupleDesc
799
ExecGetScanType(ScanState *scanstate)
800
{
801
	TupleTableSlot *slot = scanstate->ss_ScanTupleSlot;
802

803
	return slot->tts_tupleDescriptor;
804 805 806
}

/* ----------------
807
 *		ExecAssignScanType
808 809 810
 * ----------------
 */
void
811
ExecAssignScanType(ScanState *scanstate, TupleDesc tupDesc)
812
{
813
	TupleTableSlot *slot = scanstate->ss_ScanTupleSlot;
814

815
	ExecSetSlotDescriptor(slot, tupDesc);
816 817 818
}

/* ----------------
819
 *		ExecAssignScanTypeFromOuterPlan
820 821 822
 * ----------------
 */
void
823
ExecAssignScanTypeFromOuterPlan(ScanState *scanstate)
824
{
825
	PlanState  *outerPlan;
826
	TupleDesc	tupDesc;
827

828
	outerPlan = outerPlanState(scanstate);
829
	tupDesc = ExecGetResultType(outerPlan);
830

831
	ExecAssignScanType(scanstate, tupDesc);
832 833 834
}


835 836 837 838 839
/* ----------------------------------------------------------------
 *				  Scan node support
 * ----------------------------------------------------------------
 */

840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
/* ----------------------------------------------------------------
 *		ExecRelationIsTargetRelation
 *
 *		Detect whether a relation (identified by rangetable index)
 *		is one of the target relations of the query.
 * ----------------------------------------------------------------
 */
bool
ExecRelationIsTargetRelation(EState *estate, Index scanrelid)
{
	ResultRelInfo *resultRelInfos;
	int			i;

	resultRelInfos = estate->es_result_relations;
	for (i = 0; i < estate->es_num_result_relations; i++)
	{
		if (resultRelInfos[i].ri_RangeTableIndex == scanrelid)
			return true;
	}
	return false;
}

862 863 864 865 866 867 868 869 870 871 872 873
/* ----------------------------------------------------------------
 *		ExecOpenScanRelation
 *
 *		Open the heap relation to be scanned by a base-level scan plan node.
 *		This should be called during the node's ExecInit routine.
 *
 * By default, this acquires AccessShareLock on the relation.  However,
 * if the relation was already locked by InitPlan, we don't need to acquire
 * any additional lock.  This saves trips to the shared lock manager.
 * ----------------------------------------------------------------
 */
Relation
T
Tom Lane 已提交
874
ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
875
{
T
Tom Lane 已提交
876
	Relation	rel;
877 878 879 880
	Oid			reloid;
	LOCKMODE	lockmode;

	/*
881 882 883
	 * Determine the lock type we need.  First, scan to see if target relation
	 * is a result relation.  If not, check if it's a FOR UPDATE/FOR SHARE
	 * relation.  In either of those cases, we got the lock already.
884 885
	 */
	lockmode = AccessShareLock;
886 887
	if (ExecRelationIsTargetRelation(estate, scanrelid))
		lockmode = NoLock;
888
	else
889 890 891 892 893 894 895 896 897 898 899 900 901 902 903
	{
		ListCell   *l;

		foreach(l, estate->es_rowMarks)
		{
			ExecRowMark *erm = lfirst(l);

			if (erm->rti == scanrelid)
			{
				lockmode = NoLock;
				break;
			}
		}
	}

T
Tom Lane 已提交
904
	/* Open the relation and acquire lock as needed */
905
	reloid = getrelid(scanrelid, estate->es_range_table);
T
Tom Lane 已提交
906
	rel = heap_open(reloid, lockmode);
907

T
Tom Lane 已提交
908 909 910 911 912 913 914 915 916 917 918 919 920 921
	/*
	 * Complain if we're attempting a scan of an unscannable relation, except
	 * when the query won't actually be run.  This is a slightly klugy place
	 * to do this, perhaps, but there is no better place.
	 */
	if ((eflags & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA)) == 0 &&
		!RelationIsScannable(rel))
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("materialized view \"%s\" has not been populated",
						RelationGetRelationName(rel)),
				 errhint("Use the REFRESH MATERIALIZED VIEW command.")));

	return rel;
922 923 924 925 926 927 928 929 930 931 932 933 934 935
}

/*
 * same as above, but for external table scans
 */
Relation
ExecOpenScanExternalRelation(EState *estate, Index scanrelid)
{
	RangeTblEntry *rtentry;
	Oid			reloid;
	LOCKMODE	lockmode;

	lockmode = NoLock;

936 937 938
	rtentry = rt_fetch(scanrelid, estate->es_range_table);
	reloid = rtentry->relid;

939
	return relation_open(reloid, NoLock);
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962
}

/* ----------------------------------------------------------------
 *		ExecCloseScanRelation
 *
 *		Close the heap relation scanned by a base-level scan plan node.
 *		This should be called during the node's ExecEnd routine.
 *
 * Currently, we do not release the lock acquired by ExecOpenScanRelation.
 * This lock should be held till end of transaction.  (There is a faction
 * that considers this too much locking, however.)
 *
 * If we did want to release the lock, we'd have to repeat the logic in
 * ExecOpenScanRelation in order to figure out what to release.
 * ----------------------------------------------------------------
 */
void
ExecCloseScanRelation(Relation scanrel)
{
	heap_close(scanrel, NoLock);
}


963
/* ----------------------------------------------------------------
964
 *				  ExecInsertIndexTuples support
965 966
 * ----------------------------------------------------------------
 */
967 968 969 970

/* ----------------------------------------------------------------
 *		ExecOpenIndices
 *
971
 *		Find the indices associated with a result relation, open them,
972
 *		and save information about them in the result ResultRelInfo.
973
 *
974
 *		At entry, caller has already opened and locked
975
 *		resultRelInfo->ri_RelationDesc.
976 977 978
 * ----------------------------------------------------------------
 */
void
979
ExecOpenIndices(ResultRelInfo *resultRelInfo)
980
{
981
	Relation	resultRelation = resultRelInfo->ri_RelationDesc;
982 983
	List	   *indexoidlist;
	ListCell   *l;
984 985
	int			len,
				i;
986 987
	RelationPtr relationDescs;
	IndexInfo **indexInfoArray;
988

989
	resultRelInfo->ri_NumIndices = 0;
990

991
	/* fast path if no indexes */
B
Bruce Momjian 已提交
992
	if (!RelationGetForm(resultRelation)->relhasindex)
H
Hiroshi Inoue 已提交
993
		return;
994

995 996
	/*
	 * Get cached list of index OIDs
997
	 */
998
	indexoidlist = RelationGetIndexList(resultRelation);
999
	len = list_length(indexoidlist);
1000 1001
	if (len == 0)
		return;
1002

1003 1004
	/*
	 * allocate space for result arrays
1005
	 */
1006 1007 1008
	relationDescs = (RelationPtr) palloc(len * sizeof(Relation));
	indexInfoArray = (IndexInfo **) palloc(len * sizeof(IndexInfo *));

1009 1010 1011
	resultRelInfo->ri_NumIndices = len;
	resultRelInfo->ri_IndexRelationDescs = relationDescs;
	resultRelInfo->ri_IndexRelationInfo = indexInfoArray;
1012

1013
	/*
B
Bruce Momjian 已提交
1014 1015
	 * For each index, open the index relation and save pg_index info. We
	 * acquire RowExclusiveLock, signifying we will update the index.
1016 1017 1018
	 *
	 * Note: we do this even if the index is not IndexIsReady; it's not worth
	 * the trouble to optimize for the case where it isn't.
1019
	 */
1020
	i = 0;
1021
	foreach(l, indexoidlist)
1022
	{
1023
		Oid			indexOid = lfirst_oid(l);
1024 1025
		Relation	indexDesc;
		IndexInfo  *ii;
1026

1027
		indexDesc = index_open(indexOid, RowExclusiveLock);
1028

1029 1030
		/* extract index key information from the index's pg_index info */
		ii = BuildIndexInfo(indexDesc);
1031

1032 1033 1034
		relationDescs[i] = indexDesc;
		indexInfoArray[i] = ii;
		i++;
1035
	}
1036

1037
	list_free(indexoidlist);
1038 1039 1040
}

/* ----------------------------------------------------------------
1041
 *		ExecCloseIndices
1042
 *
1043
 *		Close the index relations stored in resultRelInfo
1044 1045 1046
 * ----------------------------------------------------------------
 */
void
1047
ExecCloseIndices(ResultRelInfo *resultRelInfo)
1048
{
1049 1050
	int			i;
	int			numIndices;
1051
	RelationPtr indexDescs;
1052

1053
	numIndices = resultRelInfo->ri_NumIndices;
1054
	indexDescs = resultRelInfo->ri_IndexRelationDescs;
1055 1056

	for (i = 0; i < numIndices; i++)
V
Vadim B. Mikheev 已提交
1057
	{
1058
		if (indexDescs[i] == NULL)
1059
			continue;			/* shouldn't happen? */
B
Bruce Momjian 已提交
1060

1061
		/* Drop lock acquired by ExecOpenIndices */
1062
		index_close(indexDescs[i], RowExclusiveLock);
V
Vadim B. Mikheev 已提交
1063
	}
B
Bruce Momjian 已提交
1064

1065
	/*
1066 1067
	 * XXX should free indexInfo array here too?  Currently we assume that
	 * such stuff will be cleaned up automatically in FreeExecutorState.
1068
	 */
1069 1070 1071
}

/* ----------------------------------------------------------------
1072
 *		ExecInsertIndexTuples
1073
 *
1074 1075 1076 1077 1078 1079 1080
 *		This routine takes care of inserting index tuples
 *		into all the relations indexing the result relation
 *		when a heap tuple is inserted into the result relation.
 *		Much of this code should be moved into the genam
 *		stuff as it only exists here because the genam stuff
 *		doesn't provide the functionality needed by the
 *		executor.. -cim 9/27/89
1081
 *
1082 1083
 *		This returns a list of index OIDs for any unique or exclusion
 *		constraints that are deferred and that had
1084 1085
 *		potential (unconfirmed) conflicts.
 *
1086 1087 1088
 *		CAUTION: this must not be called for a HOT update.
 *		We can't defend against that here for lack of info.
 *		Should we change the API to make it safer?
1089 1090
 * ----------------------------------------------------------------
 */
1091
List *
1092
ExecInsertIndexTuples(TupleTableSlot *slot,
1093
					  ItemPointer tupleid,
1094
					  EState *estate)
1095
{
1096
	List	   *result = NIL;
1097
	ResultRelInfo *resultRelInfo;
1098 1099 1100 1101 1102 1103
	int			i;
	int			numIndices;
	RelationPtr relationDescs;
	Relation	heapRelation;
	IndexInfo **indexInfoArray;
	ExprContext *econtext;
1104 1105
	Datum		values[INDEX_MAX_KEYS];
	bool		isnull[INDEX_MAX_KEYS];
1106

1107 1108
	/*
	 * Get information from the result relation info structure.
1109
	 */
1110 1111 1112 1113 1114
	resultRelInfo = estate->es_result_relation_info;
	numIndices = resultRelInfo->ri_NumIndices;
	relationDescs = resultRelInfo->ri_IndexRelationDescs;
	indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
	heapRelation = resultRelInfo->ri_RelationDesc;
1115

1116
	/*
B
Bruce Momjian 已提交
1117 1118
	 * We will use the EState's per-tuple context for evaluating predicates
	 * and index expressions (creating it if it's not already there).
1119
	 */
1120
	econtext = GetPerTupleExprContext(estate);
1121 1122 1123

	/* Arrange for econtext's scan tuple to be the tuple under test */
	econtext->ecxt_scantuple = slot;
1124

1125 1126
	/*
	 * for each index, form and insert the index tuple
1127
	 */
1128 1129
	for (i = 0; i < numIndices; i++)
	{
1130
		Relation	indexRelation = relationDescs[i];
1131
		IndexInfo  *indexInfo;
1132
		IndexUniqueCheck checkUnique;
1133
		bool		satisfiesConstraint;
1134

1135
		if (indexRelation == NULL)
1136 1137 1138
			continue;

		indexInfo = indexInfoArray[i];
1139

1140 1141 1142 1143
		/* If the index is marked as read-only, ignore it */
		if (!indexInfo->ii_ReadyForInserts)
			continue;

1144 1145
		/* Check for partial index */
		if (indexInfo->ii_Predicate != NIL)
1146
		{
1147 1148 1149
			List	   *predicate;

			/*
B
Bruce Momjian 已提交
1150 1151
			 * If predicate state not set up yet, create it (in the estate's
			 * per-query context)
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161
			 */
			predicate = indexInfo->ii_PredicateState;
			if (predicate == NIL)
			{
				predicate = (List *)
					ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
									estate);
				indexInfo->ii_PredicateState = predicate;
			}

1162
			/* Skip this index-update if the predicate isn't satisfied */
1163
			if (!ExecQual(predicate, econtext, false))
1164 1165 1166
				continue;
		}

1167
		/*
B
Bruce Momjian 已提交
1168 1169
		 * FormIndexDatum fills in its values and isnull parameters with the
		 * appropriate values for the column(s) of the index.
1170
		 */
1171
		FormIndexDatum(indexInfo,
1172
					   slot,
1173
					   estate,
1174 1175
					   values,
					   isnull);
1176

1177
		/*
1178
		 * The index AM does the actual insertion, plus uniqueness checking.
1179 1180 1181 1182 1183 1184 1185
		 *
		 * For an immediate-mode unique index, we just tell the index AM to
		 * throw error if not unique.
		 *
		 * For a deferrable unique index, we tell the index AM to just detect
		 * possible non-uniqueness, and we add the index OID to the result
		 * list if further checking is needed.
1186
		 */
1187
		if (!indexRelation->rd_index->indisunique)
1188 1189 1190 1191 1192 1193
			checkUnique = UNIQUE_CHECK_NO;
		else if (indexRelation->rd_index->indimmediate)
			checkUnique = UNIQUE_CHECK_YES;
		else
			checkUnique = UNIQUE_CHECK_PARTIAL;

1194
		satisfiesConstraint =
B
Bruce Momjian 已提交
1195 1196 1197
			index_insert(indexRelation, /* index relation */
						 values,	/* array of index Datums */
						 isnull,	/* null flags */
1198 1199 1200 1201
						 tupleid,		/* tid of heap tuple */
						 heapRelation,	/* heap relation */
						 checkUnique);	/* type of uniqueness check to do */

1202 1203 1204
		/*
		 * If the index has an associated exclusion constraint, check that.
		 * This is simpler than the process for uniqueness checks since we
B
Bruce Momjian 已提交
1205
		 * always insert first and then check.	If the constraint is deferred,
1206 1207 1208 1209 1210 1211 1212 1213 1214
		 * we check now anyway, but don't throw error on violation; instead
		 * we'll queue a recheck event.
		 *
		 * An index for an exclusion constraint can't also be UNIQUE (not an
		 * essential property, we just don't allow it in the grammar), so no
		 * need to preserve the prior state of satisfiesConstraint.
		 */
		if (indexInfo->ii_ExclusionOps != NULL)
		{
B
Bruce Momjian 已提交
1215
			bool		errorOK = !indexRelation->rd_index->indimmediate;
1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226

			satisfiesConstraint =
				check_exclusion_constraint(heapRelation,
										   indexRelation, indexInfo,
										   tupleid, values, isnull,
										   estate, false, errorOK);
		}

		if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
			 indexInfo->ii_ExclusionOps != NULL) &&
			!satisfiesConstraint)
1227 1228
		{
			/*
1229 1230 1231
			 * The tuple potentially violates the uniqueness or exclusion
			 * constraint, so make a note of the index so that we can re-check
			 * it later.
1232 1233 1234
			 */
			result = lappend_oid(result, RelationGetRelid(indexRelation));
		}
1235
	}
1236 1237

	return result;
1238
}
V
Vadim B. Mikheev 已提交
1239

1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
/*
 * Check for violation of an exclusion constraint
 *
 * heap: the table containing the new tuple
 * index: the index supporting the exclusion constraint
 * indexInfo: info about the index, including the exclusion properties
 * tupleid: heap TID of the new tuple we have just inserted
 * values, isnull: the *index* column values computed for the new tuple
 * estate: an EState we can do evaluation in
 * newIndex: if true, we are trying to build a new index (this affects
 *		only the wording of error messages)
 * errorOK: if true, don't throw error for violation
 *
 * Returns true if OK, false if actual or potential violation
 *
 * When errorOK is true, we report violation without waiting to see if any
 * concurrent transaction has committed or not; so the violation is only
 * potential, and the caller must recheck sometime later.  This behavior
 * is convenient for deferred exclusion checks; we need not bother queuing
 * a deferred event if there is definitely no conflict at insertion time.
 *
 * When errorOK is false, we'll throw error on violation, so a false result
 * is impossible.
 */
bool
check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
						   ItemPointer tupleid, Datum *values, bool *isnull,
						   EState *estate, bool newIndex, bool errorOK)
{
B
Bruce Momjian 已提交
1269 1270
	Oid		   *constr_procs = indexInfo->ii_ExclusionProcs;
	uint16	   *constr_strats = indexInfo->ii_ExclusionStrats;
1271
	Oid		   *index_collations = index->rd_indcollation;
B
Bruce Momjian 已提交
1272 1273 1274 1275 1276 1277 1278 1279 1280
	int			index_natts = index->rd_index->indnatts;
	IndexScanDesc index_scan;
	HeapTuple	tup;
	ScanKeyData scankeys[INDEX_MAX_KEYS];
	SnapshotData DirtySnapshot;
	int			i;
	bool		conflict;
	bool		found_self;
	ExprContext *econtext;
1281
	TupleTableSlot *existing_slot;
1282
	TupleTableSlot *save_scantuple;
1283 1284

	/*
B
Bruce Momjian 已提交
1285 1286
	 * If any of the input values are NULL, the constraint check is assumed to
	 * pass (i.e., we assume the operators are strict).
1287 1288 1289 1290 1291 1292 1293 1294
	 */
	for (i = 0; i < index_natts; i++)
	{
		if (isnull[i])
			return true;
	}

	/*
B
Bruce Momjian 已提交
1295 1296
	 * Search the tuples that are in the index for any violations, including
	 * tuples that aren't visible yet.
1297 1298 1299 1300 1301
	 */
	InitDirtySnapshot(DirtySnapshot);

	for (i = 0; i < index_natts; i++)
	{
1302 1303 1304 1305 1306 1307 1308 1309
		ScanKeyEntryInitialize(&scankeys[i],
							   0,
							   i + 1,
							   constr_strats[i],
							   InvalidOid,
							   index_collations[i],
							   constr_procs[i],
							   values[i]);
1310 1311
	}

1312 1313 1314 1315 1316 1317 1318
	/*
	 * Need a TupleTableSlot to put existing tuples in.
	 *
	 * To use FormIndexDatum, we have to make the econtext's scantuple point
	 * to this slot.  Be sure to save and restore caller's value for
	 * scantuple.
	 */
1319 1320
	existing_slot = MakeSingleTupleTableSlot(RelationGetDescr(heap));

1321 1322 1323 1324
	econtext = GetPerTupleExprContext(estate);
	save_scantuple = econtext->ecxt_scantuple;
	econtext->ecxt_scantuple = existing_slot;

1325
	/*
B
Bruce Momjian 已提交
1326 1327
	 * May have to restart scan from this point if a potential conflict is
	 * found.
1328 1329 1330 1331
	 */
retry:
	conflict = false;
	found_self = false;
T
Tom Lane 已提交
1332 1333
	index_scan = index_beginscan(heap, index, &DirtySnapshot, index_natts, 0);
	index_rescan(index_scan, scankeys, index_natts, NULL, 0);
1334 1335 1336 1337

	while ((tup = index_getnext(index_scan,
								ForwardScanDirection)) != NULL)
	{
B
Bruce Momjian 已提交
1338
		TransactionId xwait;
1339 1340
		Datum		existing_values[INDEX_MAX_KEYS];
		bool		existing_isnull[INDEX_MAX_KEYS];
B
Bruce Momjian 已提交
1341 1342
		char	   *error_new;
		char	   *error_existing;
1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359

		/*
		 * Ignore the entry for the tuple we're trying to check.
		 */
		if (ItemPointerEquals(tupleid, &tup->t_self))
		{
			if (found_self)		/* should not happen */
				elog(ERROR, "found self tuple multiple times in index \"%s\"",
					 RelationGetRelationName(index));
			found_self = true;
			continue;
		}

		/*
		 * Extract the index column values and isnull flags from the existing
		 * tuple.
		 */
1360
		ExecStoreHeapTuple(tup,	existing_slot, InvalidBuffer, false);
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371
		FormIndexDatum(indexInfo, existing_slot, estate,
					   existing_values, existing_isnull);

		/* If lossy indexscan, must recheck the condition */
		if (index_scan->xs_recheck)
		{
			if (!index_recheck_constraint(index,
										  constr_procs,
										  existing_values,
										  existing_isnull,
										  values))
B
Bruce Momjian 已提交
1372 1373
				continue;		/* tuple doesn't actually match, so no
								 * conflict */
1374 1375 1376
		}

		/*
B
Bruce Momjian 已提交
1377 1378
		 * At this point we have either a conflict or a potential conflict. If
		 * we're not supposed to raise error, just return the fact of the
1379 1380 1381 1382 1383 1384 1385 1386 1387 1388
		 * potential conflict without waiting to see if it's real.
		 */
		if (errorOK)
		{
			conflict = true;
			break;
		}

		/*
		 * If an in-progress transaction is affecting the visibility of this
B
Bruce Momjian 已提交
1389
		 * tuple, we need to wait for it to complete and then recheck.	For
1390 1391 1392 1393
		 * simplicity we do rechecking by just restarting the whole scan ---
		 * this case probably doesn't happen often enough to be worth trying
		 * harder, and anyway we don't want to hold any index internal locks
		 * while waiting.
1394
		 */
1395 1396
		xwait = TransactionIdIsValid(DirtySnapshot.xmin) ?
			DirtySnapshot.xmin : DirtySnapshot.xmax;
1397

1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416
		if (TransactionIdIsValid(xwait))
		{
			index_endscan(index_scan);
			XactLockTableWait(xwait);
			goto retry;
		}

		/*
		 * We have a definite conflict.  Report it.
		 */
		error_new = BuildIndexValueDescription(index, values, isnull);
		error_existing = BuildIndexValueDescription(index, existing_values,
													existing_isnull);
		if (newIndex)
			ereport(ERROR,
					(errcode(ERRCODE_EXCLUSION_VIOLATION),
					 errmsg("could not create exclusion constraint \"%s\"",
							RelationGetRelationName(index)),
					 errdetail("Key %s conflicts with key %s.",
1417 1418 1419
							   error_new, error_existing),
					 errtableconstraint(heap,
										RelationGetRelationName(index))));
1420 1421 1422 1423 1424 1425
		else
			ereport(ERROR,
					(errcode(ERRCODE_EXCLUSION_VIOLATION),
					 errmsg("conflicting key value violates exclusion constraint \"%s\"",
							RelationGetRelationName(index)),
					 errdetail("Key %s conflicts with existing key %s.",
1426 1427 1428
							   error_new, error_existing),
					 errtableconstraint(heap,
										RelationGetRelationName(index))));
1429 1430 1431 1432 1433
	}

	index_endscan(index_scan);

	/*
1434 1435
	 * Ordinarily, at this point the search should have found the originally
	 * inserted tuple, unless we exited the loop early because of conflict.
1436 1437 1438
	 * However, it is possible to define exclusion constraints for which that
	 * wouldn't be true --- for instance, if the operator is <>. So we no
	 * longer complain if found_self is still false.
1439 1440
	 */

1441
	econtext->ecxt_scantuple = save_scantuple;
1442 1443 1444 1445 1446 1447 1448 1449

	ExecDropSingleTupleTableSlot(existing_slot);

	return !conflict;
}

/*
 * Check existing tuple's index values to see if it really matches the
B
Bruce Momjian 已提交
1450
 * exclusion condition against the new_values.	Returns true if conflict.
1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465
 */
static bool
index_recheck_constraint(Relation index, Oid *constr_procs,
						 Datum *existing_values, bool *existing_isnull,
						 Datum *new_values)
{
	int			index_natts = index->rd_index->indnatts;
	int			i;

	for (i = 0; i < index_natts; i++)
	{
		/* Assume the exclusion operators are strict */
		if (existing_isnull[i])
			return false;

1466 1467 1468 1469
		if (!DatumGetBool(OidFunctionCall2Coll(constr_procs[i],
											   index->rd_indcollation[i],
											   existing_values[i],
											   new_values[i])))
1470
			return false;
1471
	}
1472 1473

	return true;
1474
}
V
Vadim B. Mikheev 已提交
1475

1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495
/*
 * ExecUpdateAOtupCount
 *		Update the tuple count on the master for an append only relation segfile.
 */
static void
ExecUpdateAOtupCount(ResultRelInfo *result_rels,
					 Snapshot shapshot,
					 int num_result_rels,
					 EState* estate,
					 uint64 tupadded)
{
	int		i;

	Assert(Gp_role == GP_ROLE_DISPATCH);

	bool was_delete = estate && estate->es_plannedstmt &&
		(estate->es_plannedstmt->commandType == CMD_DELETE);

	for (i = num_result_rels; i > 0; i--)
	{
1496
		if(RelationIsAppendOptimized(result_rels->ri_RelationDesc))
1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520
		{
			Assert(result_rels->ri_aosegno != InvalidFileSegNumber);

			if (was_delete && tupadded > 0)
			{
				/* Touch the ao seg info */
				UpdateMasterAosegTotals(result_rels->ri_RelationDesc,
									result_rels->ri_aosegno,
									0,
									1);
			} 
			else if (!was_delete)
			{
				UpdateMasterAosegTotals(result_rels->ri_RelationDesc,
									result_rels->ri_aosegno,
									tupadded,
									1);
			}
		}

		result_rels++;
	}
}

1521 1522 1523 1524
/*
 * UpdateChangedParamSet
 *		Add changed parameters to a plan node's chgParam set
 */
1525
void
1526
UpdateChangedParamSet(PlanState *node, Bitmapset *newchg)
V
Vadim B. Mikheev 已提交
1527
{
1528
	Bitmapset  *parmset;
1529

1530
	/*
B
Bruce Momjian 已提交
1531 1532
	 * The plan node only depends on params listed in its allParam set. Don't
	 * include anything else into its chgParam set.
1533 1534
	 */
	parmset = bms_intersect(node->plan->allParam, newchg);
B
Bruce Momjian 已提交
1535

1536
	/*
B
Bruce Momjian 已提交
1537 1538
	 * Keep node->chgParam == NULL if there's not actually any members; this
	 * allows the simplest possible tests in executor node files.
1539 1540 1541 1542 1543
	 */
	if (!bms_is_empty(parmset))
		node->chgParam = bms_join(node->chgParam, parmset);
	else
		bms_free(parmset);
V
Vadim B. Mikheev 已提交
1544
}
1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560

/*
 * Register a shutdown callback in an ExprContext.
 *
 * Shutdown callbacks will be called (in reverse order of registration)
 * when the ExprContext is deleted or rescanned.  This provides a hook
 * for functions called in the context to do any cleanup needed --- it's
 * particularly useful for functions returning sets.  Note that the
 * callback will *not* be called in the event that execution is aborted
 * by an error.
 */
void
RegisterExprContextCallback(ExprContext *econtext,
							ExprContextCallbackFunction function,
							Datum arg)
{
B
Bruce Momjian 已提交
1561
	ExprContext_CB *ecxt_callback;
1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586

	/* Save the info in appropriate memory context */
	ecxt_callback = (ExprContext_CB *)
		MemoryContextAlloc(econtext->ecxt_per_query_memory,
						   sizeof(ExprContext_CB));

	ecxt_callback->function = function;
	ecxt_callback->arg = arg;

	/* link to front of list for appropriate execution order */
	ecxt_callback->next = econtext->ecxt_callbacks;
	econtext->ecxt_callbacks = ecxt_callback;
}

/*
 * Deregister a shutdown callback in an ExprContext.
 *
 * Any list entries matching the function and arg will be removed.
 * This can be used if it's no longer necessary to call the callback.
 */
void
UnregisterExprContextCallback(ExprContext *econtext,
							  ExprContextCallbackFunction function,
							  Datum arg)
{
B
Bruce Momjian 已提交
1587 1588
	ExprContext_CB **prev_callback;
	ExprContext_CB *ecxt_callback;
1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608

	prev_callback = &econtext->ecxt_callbacks;

	while ((ecxt_callback = *prev_callback) != NULL)
	{
		if (ecxt_callback->function == function && ecxt_callback->arg == arg)
		{
			*prev_callback = ecxt_callback->next;
			pfree(ecxt_callback);
		}
		else
			prev_callback = &ecxt_callback->next;
	}
}

/*
 * Call all the shutdown callbacks registered in an ExprContext.
 *
 * The callback list is emptied (important in case this is only a rescan
 * reset, and not deletion of the ExprContext).
1609 1610 1611
 *
 * If isCommit is false, just clean the callback list but don't call 'em.
 * (See comment for FreeExprContext.)
1612 1613
 */
static void
1614
ShutdownExprContext(ExprContext *econtext, bool isCommit)
1615
{
B
Bruce Momjian 已提交
1616
	ExprContext_CB *ecxt_callback;
1617 1618 1619 1620 1621 1622 1623
	MemoryContext oldcontext;

	/* Fast path in normal case where there's nothing to do. */
	if (econtext->ecxt_callbacks == NULL)
		return;

	/*
B
Bruce Momjian 已提交
1624 1625
	 * Call the callbacks in econtext's per-tuple context.  This ensures that
	 * any memory they might leak will get cleaned up.
1626 1627
	 */
	oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
1628 1629 1630 1631 1632 1633 1634

	/*
	 * Call each callback function in reverse registration order.
	 */
	while ((ecxt_callback = econtext->ecxt_callbacks) != NULL)
	{
		econtext->ecxt_callbacks = ecxt_callback->next;
1635 1636
		if (isCommit)
			(*ecxt_callback->function) (ecxt_callback->arg);
1637 1638
		pfree(ecxt_callback);
	}
1639 1640

	MemoryContextSwitchTo(oldcontext);
1641
}
1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692


/* ---------------------------------------------------------------
 * 		Share Input utilities
 * ---------------------------------------------------------------
 */
ShareNodeEntry *
ExecGetShareNodeEntry(EState* estate, int shareidx, bool fCreate)
{
	Assert(shareidx >= 0);
	Assert(estate->es_sharenode != NULL);

	if(!fCreate)
	{
		if(shareidx >= list_length(*estate->es_sharenode))
			return NULL;
	}
	else
	{
		while(list_length(*estate->es_sharenode) <= shareidx)
		{
			ShareNodeEntry *n = makeNode(ShareNodeEntry);
			n->sharePlan = NULL;
			n->shareState = NULL;

			*estate->es_sharenode = lappend(*estate->es_sharenode, n);
		}
	}

	return (ShareNodeEntry *) list_nth(*estate->es_sharenode, shareidx);
}

/* ----------------------------------------------------------------
 *		CDB Slice Table utilities
 * ----------------------------------------------------------------
 */

/* Attach a slice table to the given Estate structure.	It should
 * consist of blank slices, one for the root plan, one for each
 * Motion node (which roots a slice with a send node), and one for
 * each subplan (which acts as an initplan root node).
 */
void
InitSliceTable(EState *estate, int nMotions, int nSubplans)
{
	SliceTable *table;
	Slice	   *slice;
	int			i,
				n;
	MemoryContext oldcontext;

1693 1694 1695 1696 1697 1698 1699 1700
	n = 1 + nMotions + nSubplans;

	if (gp_max_slices > 0 && n > gp_max_slices)
		ereport(ERROR,
				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
				 errmsg("at most %d slices are allowed in a query, current number: %d", gp_max_slices, n),
				 errhint("rewrite your query or adjust GUC gp_max_slices")));

1701 1702 1703 1704 1705 1706
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	table = makeNode(SliceTable);
	table->nMotions = nMotions;
	table->nInitPlans = nSubplans;
	table->slices = NIL;
W
Wang Hao 已提交
1707
	table->instrument_options = INSTRUMENT_NONE;
1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719

	/* Each slice table has a unique-id. */
	table->ic_instance_id = ++gp_interconnect_id;

	for (i = 0; i < n; i++)
	{
		slice = makeNode(Slice);

		slice->sliceIndex = i;
        slice->rootIndex = (i > 0 && i <= nMotions) ? -1 : i;
		slice->gangType = GANGTYPE_UNALLOCATED;
		slice->gangSize = 0;
1720
		slice->segments = NIL;
1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777
		slice->numGangMembersToBeActive = 0;
		slice->directDispatch.isDirectDispatch = false;
		slice->directDispatch.contentIds = NIL;
		slice->primaryGang = NULL;
		slice->parentIndex = -1;
		slice->children = NIL;
		slice->primaryProcesses = NIL;

		table->slices = lappend(table->slices, slice);
	}

	estate->es_sliceTable = table;

	MemoryContextSwitchTo(oldcontext);
}

/*
 * A forgiving slice table indexer that returns the indexed Slice* or NULL
 */
Slice *
getCurrentSlice(EState *estate, int sliceIndex)
{
	SliceTable *sliceTable = estate->es_sliceTable;

    if (sliceTable &&
        sliceIndex >= 0 &&
        sliceIndex < list_length(sliceTable->slices))
	    return (Slice *)list_nth(sliceTable->slices, sliceIndex);

    return NULL;
}

/* Should the slice run on the QD?
 *
 * N.B. Not the same as !sliceRunsOnQE(slice), when slice is NULL.
 */
bool
sliceRunsOnQD(Slice * slice)
{
	return (slice != NULL && slice->gangType == GANGTYPE_UNALLOCATED);
}


/* Should the slice run on a QE?
 *
 * N.B. Not the same as !sliceRunsOnQD(slice), when slice is NULL.
 */
bool
sliceRunsOnQE(Slice * slice)
{
	return (slice != NULL && slice->gangType != GANGTYPE_UNALLOCATED);
}

/**
 * Calculate the number of sending processes that should in be a slice.
 */
int
G
Gang Xiong 已提交
1778
sliceCalculateNumSendingProcesses(Slice *slice)
1779 1780 1781 1782 1783 1784 1785 1786 1787
{
	switch(slice->gangType)
	{
		case GANGTYPE_UNALLOCATED:
			return 0; /* does not send */

		case GANGTYPE_ENTRYDB_READER:
			return 1; /* on master */

G
Gang Xiong 已提交
1788 1789 1790
		case GANGTYPE_SINGLETON_READER:
			return 1; /* on segment */

1791 1792
		case GANGTYPE_PRIMARY_WRITER:
		case GANGTYPE_PRIMARY_READER:
G
Gang Xiong 已提交
1793
			if (slice->directDispatch.isDirectDispatch)
1794
				return list_length(slice->directDispatch.contentIds);
G
Gang Xiong 已提交
1795 1796
			else
				return getgpsegmentCount();
1797 1798 1799 1800 1801 1802 1803 1804

		default:
			Insist(false);
			return -1;
	}
}

/* Forward declarations */
1805
static void InventorySliceTree(CdbDispatcherState *ds, List * slices, int sliceIndex);
1806

K
Kenan Yao 已提交
1807 1808
/*
 * Function AssignGangs runs on the QD and finishes construction of the
1809 1810 1811 1812 1813
 * global slice table for a plan by assigning gangs allocated by the
 * executor factory to the slices of the slice table.
 *
 * On entry, the slice table (at queryDesc->estate->es_sliceTable) has
 * the correct structure (established by InitSliceTable) and has correct
H
Heikki Linnakangas 已提交
1814
 * gang types (established by function FillSliceTable).
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825
 *
 * Gang assignment involves taking an inventory of the requirements of
 * each slice tree in the slice table, asking the executor factory to
 * allocate a minimal set of gangs that can satisfy any of the slice trees,
 * and associating the allocated gangs with slices in the slice table.
 *
 * On successful exit, the CDBProcess lists (primaryProcesses, mirrorProcesses)
 * and the Gang pointers (primaryGang, mirrorGang) are set correctly in each
 * slice in the slice table.
 */
void
1826
AssignGangs(CdbDispatcherState *ds, QueryDesc *queryDesc)
1827
{
1828 1829 1830 1831
	SliceTable	*sliceTable;
	ListCell  	*cell;
	Slice		*slice;
	EState		*estate;
1832
	int			rootIdx;
1833

1834 1835
	estate = queryDesc->estate;
	sliceTable = estate->es_sliceTable;
1836 1837
	rootIdx = RootSliceIndex(queryDesc->estate);

1838 1839
	/* cleanup processMap because initPlan and main Plan share the same slice table */
	foreach(cell, sliceTable->slices)
1840
	{
1841 1842
		slice = (Slice *) lfirst(cell);
		slice->processesMap = NULL;
1843 1844
	}

1845
	InventorySliceTree(ds, sliceTable->slices, rootIdx);
1846 1847 1848 1849 1850 1851 1852 1853
}

/*
 * Helper for AssignGangs takes a simple inventory of the gangs required
 * by a slice tree.  Recursive.  Closely coupled with AssignGangs.	Not
 * generally useful.
 */
void
1854
InventorySliceTree(CdbDispatcherState *ds, List *slices, int sliceIndex)
P
Pengzhou Tang 已提交
1855 1856 1857
{
	ListCell *cell;
	int childIndex;
1858
	Slice *slice = list_nth(slices, sliceIndex);
P
Pengzhou Tang 已提交
1859

1860
	if (slice->gangType == GANGTYPE_UNALLOCATED)
P
Pengzhou Tang 已提交
1861
	{
1862 1863
		slice->primaryGang = NULL;
		slice->primaryProcesses = getCdbProcessesForQD(true);
P
Pengzhou Tang 已提交
1864
	}
1865
	else
1866
	{
1867 1868 1869
		Assert(slice->segments != NIL);
		slice->primaryGang = AllocateGang(ds, slice->gangType, slice->segments);
		setupCdbProcessList(slice);
1870 1871 1872 1873 1874
	}

	foreach(cell, slice->children)
	{
		childIndex = lfirst_int(cell);
1875
		InventorySliceTree(ds, slices, childIndex);
1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953
	}
}

/*
 * Choose the execution identity (who does this executor serve?).
 * There are types:
 *
 * 1. No-Op (ignore) -- this occurs when the specified direction is
 *	 NoMovementScanDirection or when Gp_role is GP_ROLE_DISPATCH
 *	 and the current slice belongs to a QE.
 *
 * 2. Executor serves a Root Slice -- this occurs when Gp_role is
 *   GP_ROLE_UTILITY or the current slice is a root.  It corresponds
 *   to the "normal" path through the executor in that we enter the plan
 *   at the top and count on the motion nodes at the fringe of the top
 *   slice to return without ever calling nodes below them.
 *
 * 3. Executor serves a Non-Root Slice on a QE -- this occurs when
 *   Gp_role is GP_ROLE_EXECUTE and the current slice is not a root
 *   slice. It corresponds to a QE running a slice with a motion node on
 *	 top.  The call, thus, returns no tuples (since they all go out
 *	 on the interconnect to the receiver version of the motion node),
 *	 but it does execute the indicated slice down to any fringe
 *	 motion nodes (as in case 2).
 */
GpExecIdentity
getGpExecIdentity(QueryDesc *queryDesc,
				  ScanDirection direction,
				  EState	   *estate)
{
	Slice *currentSlice;

	currentSlice = getCurrentSlice(estate, LocallyExecutingSliceIndex(estate));
	if (currentSlice)
    {
        if (Gp_role == GP_ROLE_EXECUTE ||
            sliceRunsOnQD(currentSlice))
            currentSliceId = currentSlice->sliceIndex;
    }

	/* select the strategy */
	if (direction == NoMovementScanDirection)
	{
		return GP_IGNORE;
	}
	else if (Gp_role == GP_ROLE_DISPATCH && sliceRunsOnQE(currentSlice))
	{
		return GP_IGNORE;
	}
	else if (Gp_role == GP_ROLE_EXECUTE && LocallyExecutingSliceIndex(estate) != RootSliceIndex(estate))
	{
		return GP_NON_ROOT_ON_QE;
	}
	else
	{
		return GP_ROOT_SLICE;
	}
}

/*
 * End the gp-specific part of the executor.
 *
 * In here we collect the dispatch results if there are any, tear
 * down the interconnect if it is set-up.
 */
void mppExecutorFinishup(QueryDesc *queryDesc)
{
	EState	   *estate;
	Slice      *currentSlice;

	/* caller must have switched into per-query memory context already */
	estate = queryDesc->estate;

	currentSlice = getCurrentSlice(estate, LocallyExecutingSliceIndex(estate));

	/*
	 * If QD, wait for QEs to finish and check their results.
	 */
1954
	if (estate->dispatcherState && estate->dispatcherState->primaryResults)
1955
	{
P
Pengzhou Tang 已提交
1956 1957 1958 1959 1960
		CdbDispatchResults *pr = NULL;
		CdbDispatcherState *ds = estate->dispatcherState;
		DispatchWaitMode waitMode = DISPATCH_WAIT_NONE;
		ErrorData *qeError = NULL;
		HTAB *aopartcounts = NULL;
1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979

		/*
		 * If we are finishing a query before all the tuples of the query
		 * plan were fetched we must call ExecSquelchNode before checking
		 * the dispatch results in order to tell the nodes below we no longer
		 * need any more tuples.
		 */
		if (!estate->es_got_eos)
		{
			ExecSquelchNode(queryDesc->planstate);
		}

		/*
		 * Wait for completion of all QEs.  We send a "graceful" query
		 * finish, not cancel signal.  Since the query has succeeded,
		 * don't confuse QEs by sending erroneous message.
		 */
		if (estate->cancelUnfinished)
			waitMode = DISPATCH_WAIT_FINISH;
P
Pengzhou Tang 已提交
1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990

		cdbdisp_checkDispatchResult(ds, waitMode);

		pr = cdbdisp_getDispatchResults(ds, &qeError);		

		if (qeError)
		{
			estate->dispatcherState = NULL;
			cdbdisp_destroyDispatcherState(ds);
			ReThrowError(qeError);
		}
1991 1992

		/* If top slice was delegated to QEs, get num of rows processed. */
H
Heikki Linnakangas 已提交
1993 1994
		int primaryWriterSliceIndex = PrimaryWriterSliceIndex(estate);
		//if (sliceRunsOnQE(currentSlice))
1995 1996
		{
			estate->es_processed +=
H
Heikki Linnakangas 已提交
1997
				cdbdisp_sumCmdTuples(pr, primaryWriterSliceIndex);
1998
			estate->es_lastoid =
H
Heikki Linnakangas 已提交
1999
				cdbdisp_maxLastOid(pr, primaryWriterSliceIndex);
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063
			aopartcounts = cdbdisp_sumAoPartTupCount(estate->es_result_partitions, pr);
		}

		/* sum up rejected rows if any (single row error handling only) */
		cdbdisp_sumRejectedRows(pr);

		/* sum up inserted rows into any AO relation */
		if (aopartcounts)
		{
			/* counts from a partitioned AO table */

			ListCell *lc;

			foreach(lc, estate->es_result_aosegnos)
			{
				SegfileMapNode *map = lfirst(lc);
				struct {
					Oid relid;
			   		int64 tupcount;
				} *entry;
				bool found;

				entry = hash_search(aopartcounts,
									&(map->relid),
									HASH_FIND,
									&found);

				/*
				 * Must update the mod count only for segfiles where actual tuples were touched 
				 * (added/deleted) based on entry->tupcount.
				 */
				if (found && entry->tupcount)
				{
					bool was_delete = estate->es_plannedstmt && (estate->es_plannedstmt->commandType == CMD_DELETE);

					Relation r = heap_open(map->relid, AccessShareLock);
					if (was_delete)
					{
						UpdateMasterAosegTotals(r, map->segno, 0, 1);
					}
					else
					{
						UpdateMasterAosegTotals(r, map->segno, entry->tupcount, 1);	
					}
					heap_close(r, NoLock);
				}
			}
		}
		else
		{
			/* counts from a (non partitioned) AO table */

			ExecUpdateAOtupCount(estate->es_result_relations,
								 estate->es_snapshot,
								 estate->es_num_result_relations,
								 estate,
								 estate->es_processed);
		}

		/*
		 * Check and free the results of all gangs. If any QE had an
		 * error, report it and exit to our error handler via PG_THROW.
		 * NB: This call doesn't wait, because we already waited above.
		 */
P
Pengzhou Tang 已提交
2064 2065
		estate->dispatcherState = NULL;
		cdbdisp_destroyDispatcherState(ds);
2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077
	}

	/* Teardown the Interconnect */
	if (estate->es_interconnect_is_setup)
	{
		/*
		 * MPP-3413: If we got here during cancellation of a cursor,
		 * we need to set the "forceEos" argument correctly --
		 * otherwise we potentially hang (cursors cancel on the QEs,
		 * mark the estate to "cancelUnfinished" and then try to do a
		 * normal interconnect teardown).
		 */
2078
		TeardownInterconnect(estate->interconnect_context, estate->cancelUnfinished, false);
2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089
		estate->es_interconnect_is_setup = false;
	}
}

/*
 * Cleanup the gp-specific parts of the query executor.
 *
 * Will normally be called after an error from within a CATCH block.
 */
void mppExecutorCleanup(QueryDesc *queryDesc)
{
P
Pengzhou Tang 已提交
2090
	CdbDispatcherState *ds;
2091 2092 2093 2094
	EState	   *estate;

	/* caller must have switched into per-query memory context already */
	estate = queryDesc->estate;
P
Pengzhou Tang 已提交
2095
	ds = estate->dispatcherState;
2096

W
Wang Hao 已提交
2097 2098 2099 2100
	/* GPDB hook for collecting query info */
	if (query_info_collect_hook && QueryCancelCleanup)
		(*query_info_collect_hook)(METRICS_QUERY_CANCELING, queryDesc);

2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117
	/*
	 * If this query is being canceled, record that when the gpperfmon
	 * is enabled.
	 */
	if (gp_enable_gpperfmon &&
		Gp_role == GP_ROLE_DISPATCH &&
		queryDesc->gpmon_pkt &&
		QueryCancelCleanup)
	{			
		gpmon_qlog_query_canceling(queryDesc->gpmon_pkt);
	}

	/*
	 * Request any commands still executing on qExecs to stop.
	 * Wait for them to finish and clean up the dispatching structures.
	 * Replace current error info with QE error info if more interesting.
	 */
2118
	if (ds)
2119 2120 2121 2122 2123 2124 2125 2126 2127 2128
	{
		/*
		 * If we are finishing a query before all the tuples of the query
		 * plan were fetched we must call ExecSquelchNode before checking
		 * the dispatch results in order to tell the nodes below we no longer
		 * need any more tuples.
		 */
		if (estate->es_interconnect_is_setup && !estate->es_got_eos)
			ExecSquelchNode(queryDesc->planstate);

P
Pengzhou Tang 已提交
2129 2130
		estate->dispatcherState = NULL;
		CdbDispatchHandleError(ds);
2131 2132 2133 2134 2135
	}

	/* Clean up the interconnect. */
	if (estate->es_interconnect_is_setup)
	{
2136
		TeardownInterconnect(estate->interconnect_context, true /* force EOS */, true);
2137 2138
		estate->es_interconnect_is_setup = false;
	}
W
Wang Hao 已提交
2139 2140 2141 2142

	/* GPDB hook for collecting query info */
	if (query_info_collect_hook)
		(*query_info_collect_hook)(QueryCancelCleanup ? METRICS_QUERY_CANCELED : METRICS_QUERY_ERROR, queryDesc);
2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163
	
	/**
	 * Perfmon related stuff.
	 */
	if (gp_enable_gpperfmon 
			&& Gp_role == GP_ROLE_DISPATCH
			&& queryDesc->gpmon_pkt)
	{			
		gpmon_qlog_query_error(queryDesc->gpmon_pkt);
		pfree(queryDesc->gpmon_pkt);
		queryDesc->gpmon_pkt = NULL;
	}

	/* Workfile manager per-query resource accounting */
	WorkfileQueryspace_ReleaseEntry();

	ReportOOMConsumption();

	/**
	 * Since there was an error, clean up the function scan stack.
	 */
2164
	if (!IsResManagerMemoryPolicyNone())
2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195
	{
		SPI_InitMemoryReservation();
	}
}

void ResetExprContext(ExprContext *econtext)
{
	MemoryContext memctxt = econtext->ecxt_per_tuple_memory;
	if(memctxt->allBytesAlloc - memctxt->allBytesFreed > 50000)
		MemoryContextReset(memctxt);
}

/**
 * This method is used to determine how much memory a specific operator
 * is supposed to use (in KB). 
 */
uint64 PlanStateOperatorMemKB(const PlanState *ps)
{
	Assert(ps);
	Assert(ps->plan);
	uint64 result = 0;
	if (ps->plan->operatorMemKB == 0)
	{
		/**
		 * There are some statements that do not go through the resource queue and these
		 * plans dont get decorated with the operatorMemKB. Someday, we should fix resource queues.
		 */
		result = work_mem;
	}
	else
	{
2196 2197 2198 2199 2200 2201
		if (IsA(ps, AggState))
		{
			result = ps->plan->operatorMemKB + MemoryAccounting_RequestQuotaIncrease();
		}
		else
			result = ps->plan->operatorMemKB;
2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258
	}
	
	return result;
}

/**
 * Methods to find motionstate object within a planstate tree given a motion id (which is the same as slice index)
 */
typedef struct MotionStateFinderContext
{
	int motionId; /* Input */
	MotionState *motionState; /* Output */
} MotionStateFinderContext;

/**
 * Walker method that finds motion state node within a planstate tree.
 */
static CdbVisitOpt
MotionStateFinderWalker(PlanState *node,
				  void *context)
{
	Assert(context);
	MotionStateFinderContext *ctx = (MotionStateFinderContext *) context;

	if (IsA(node, MotionState))
	{
		MotionState *ms = (MotionState *) node;
		Motion *m = (Motion *) ms->ps.plan;
		if (m->motionID == ctx->motionId)
		{
			Assert(ctx->motionState == NULL);
			ctx->motionState = ms;
			return CdbVisit_Skip;	/* don't visit subtree */
		}
	}

	/* Continue walking */
	return CdbVisit_Walk;
}

/**
 * Given a slice index, find the motionstate that corresponds to this slice index. This will iterate over the planstate tree
 * to get the right node.
 */
MotionState *getMotionState(struct PlanState *ps, int sliceIndex)
{
	Assert(ps);
	Assert(sliceIndex > -1);

	MotionStateFinderContext ctx;
	ctx.motionId = sliceIndex;
	ctx.motionState = NULL;
	planstate_walk_node(ps, MotionStateFinderWalker, &ctx);
	Assert(ctx.motionState != NULL);
	return ctx.motionState;
}

2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557
typedef struct MotionFinderContext
{
	plan_tree_base_prefix base; /* Required prefix for plan_tree_walker/mutator */
	int motionId; /* Input */
	Motion *motion; /* Output */
} MotionFinderContext;

/*
 * Walker to find a motion node that matches a particular motionID
 */
static bool
MotionFinderWalker(Plan *node,
				  void *context)
{
	Assert(context);
	MotionFinderContext *ctx = (MotionFinderContext *) context;


	if (node == NULL)
		return false;

	if (IsA(node, Motion))
	{
		Motion *m = (Motion *) node;
		if (m->motionID == ctx->motionId)
		{
			ctx->motion = m;
			return true;	/* found our node; no more visit */
		}
	}

	/* Continue walking */
	return plan_tree_walker((Node*)node, MotionFinderWalker, ctx);
}

/*
 * Given the Plan and a Slice index, find the motion node that is the root of the slice's subtree.
 */
Motion *findSenderMotion(PlannedStmt *plannedstmt, int sliceIndex)
{
	Assert(sliceIndex > -1);

	Plan *planTree = plannedstmt->planTree;
	MotionFinderContext ctx;
	ctx.base.node = (Node*)plannedstmt;
	ctx.motionId = sliceIndex;
	ctx.motion = NULL;
	MotionFinderWalker(planTree, &ctx);
	return ctx.motion;
}

typedef struct SubPlanFinderContext
{
	plan_tree_base_prefix base; /* Required prefix for plan_tree_walker/mutator */
	Bitmapset *bms_subplans; /* Bitmapset for relevant subplans in current slice */
} SubPlanFinderContext;

/*
 * Walker to find all the subplans in a PlanTree between 'node' and the next motion node
 */
static bool
SubPlanFinderWalker(Plan *node,
				  void *context)
{
	Assert(context);
	SubPlanFinderContext *ctx = (SubPlanFinderContext *) context;

	if (node == NULL || IsA(node, Motion))
	{
		return false;	/* don't visit subtree */
	}

	if (IsA(node, SubPlan))
	{
		SubPlan *subplan = (SubPlan *) node;
		int i = subplan->plan_id - 1;
		if (!bms_is_member(i, ctx->bms_subplans))
			ctx->bms_subplans = bms_add_member(ctx->bms_subplans, i);
		else
			return false;
	 }

	/* Continue walking */
	return plan_tree_walker((Node*)node, SubPlanFinderWalker, ctx);
}

/*
 * Given a plan and a root motion node find all the subplans
 * between 'root' and the next motion node in the tree
 */
Bitmapset *getLocallyExecutableSubplans(PlannedStmt *plannedstmt, Plan *root)
{
	SubPlanFinderContext ctx;
	Plan* root_plan = root;
	if (IsA(root, Motion))
	{
		root_plan = outerPlan(root);
	}
	ctx.base.node = (Node*)plannedstmt;
	ctx.bms_subplans = NULL;
	SubPlanFinderWalker(root_plan, &ctx);
	return ctx.bms_subplans;
}

typedef struct ParamExtractorContext
{
	plan_tree_base_prefix base; /* Required prefix for plan_tree_walker/mutator */
	EState *estate;
} ParamExtractorContext;

/*
 * Given a subplan determine if it is an initPlan (subplan->is_initplan) then copy its params
 * from estate-> es_param_list_info to estate->es_param_exec_vals.
 */
static void ExtractSubPlanParam(SubPlan *subplan, EState *estate)
{
	/*
	 * If this plan is un-correlated or undirect correlated one and want to
	 * set params for parent plan then mark parameters as needing evaluation.
	 *
	 * Note that in the case of un-correlated subqueries we don't care about
	 * setting parent->chgParam here: indices take care about it, for others -
	 * it doesn't matter...
	 */
	if (subplan->setParam != NIL)
	{
		ListCell   *lst;

		foreach(lst, subplan->setParam)
		{
			int			paramid = lfirst_int(lst);
			ParamExecData *prmExec = &(estate->es_param_exec_vals[paramid]);

			/**
			 * Has this parameter been already
			 * evaluated as part of preprocess_initplan()? If so,
			 * we shouldn't re-evaluate it. If it has been evaluated,
			 * we will simply substitute the actual value from
			 * the external parameters.
			 */
			if (Gp_role == GP_ROLE_EXECUTE && subplan->is_initplan)
			{
				ParamListInfo paramInfo = estate->es_param_list_info;
				ParamExternData *prmExt = NULL;
				int extParamIndex = -1;

				Assert(paramInfo);
				Assert(paramInfo->numParams > 0);

				/*
				 * To locate the value of this pre-evaluated parameter, we need to find
				 * its location in the external parameter list.
				 */
				extParamIndex = paramInfo->numParams - estate->es_plannedstmt->nParamExec + paramid;
				prmExt = &paramInfo->params[extParamIndex];

				/* Make sure the types are valid */
				if (!OidIsValid(prmExt->ptype))
				{
					prmExec->execPlan = NULL;
					prmExec->isnull = true;
					prmExec->value = (Datum) 0;
				}
				else
				{
					/** Hurray! Copy value from external parameter and don't bother setting up execPlan. */
					prmExec->execPlan = NULL;
					prmExec->isnull = prmExt->isnull;
					prmExec->value = prmExt->value;
				}
			}
		}
	}
}

/*
 * Walker to extract all the precomputer InitPlan params in a plan tree.
 */
static bool
ParamExtractorWalker(Plan *node,
				  void *context)
{
	Assert(context);
	ParamExtractorContext *ctx = (ParamExtractorContext *) context;

	/* Assuming InitPlan always runs on the master */
	if (node == NULL)
	{
		return false;	/* don't visit subtree */
	}

	if (IsA(node, SubPlan))
	{
		SubPlan *sub_plan = (SubPlan *) node;
		ExtractSubPlanParam(sub_plan, ctx->estate);
	}

	/* Continue walking */
	return plan_tree_walker((Node*)node, ParamExtractorWalker, ctx);
}

/*
 * Find and extract all the InitPlan setParams in a root node's subtree.
 */
void ExtractParamsFromInitPlans(PlannedStmt *plannedstmt, Plan *root, EState *estate)
{
	ParamExtractorContext ctx;
	ctx.base.node = (Node*)plannedstmt;
	ctx.estate = estate;

	/* If gather motion shows up at top, we still need to find master only init plan */
	if (IsA(root, Motion))
	{
		root = outerPlan(root);
	}
	ParamExtractorWalker(root, &ctx);
}

typedef struct MotionAssignerContext
{
	plan_tree_base_prefix base; /* Required prefix for plan_tree_walker/mutator */
	List *motStack; /* Motion Stack */
} MotionAssignerContext;

/*
 * Walker to set plan->motionNode for every Plan node to its corresponding parent
 * motion node.
 *
 * This function maintains a stack of motion nodes. When we encounter a motion node
 * we push it on to the stack, walk its subtree, and then pop it off the stack.
 * When we encounter any plan node (motion nodes included) we assign its plan->motionNode
 * to the top of the stack.
 *
 * NOTE: Motion nodes will have their motionNode value set to the previous motion node
 * we encountered while walking the subtree.
 */
static bool
MotionAssignerWalker(Plan *node,
				  void *context)
{
	if (node == NULL) return false;

	Assert(context);
	MotionAssignerContext *ctx = (MotionAssignerContext *) context;

	if (is_plan_node((Node*)node))
	{
		Plan *plan = (Plan *) node;
		/*
		 * TODO: For cached plan we may be assigning multiple times.
		 * The eventual goal is to relocate it to planner. For now,
		 * ignore already assigned nodes.
		 */
		if (NULL != plan->motionNode)
			return true;
		plan->motionNode = ctx->motStack != NIL ? (Plan *) lfirst(list_head(ctx->motStack)) : NULL;
	}

	/*
	 * Subplans get dynamic motion assignment as they can be executed from
	 * arbitrary expressions. So, we don't assign any motion to these nodes.
	 */
	if (IsA(node, SubPlan))
	{
		return false;
	}

	if (IsA(node, Motion))
	{
		ctx->motStack = lcons(node, ctx->motStack);
		plan_tree_walker((Node *)node, MotionAssignerWalker, ctx);
		ctx->motStack = list_delete_first(ctx->motStack);

		return false;
	}

	/* Continue walking */
	return plan_tree_walker((Node*)node, MotionAssignerWalker, ctx);
}

/*
 * Assign every node in plannedstmt->planTree its corresponding
 * parent Motion Node if it has one
 *
 * NOTE: Some plans may not be rooted by a motion on the segment so
 * this function does not guarantee that every node will have a non-NULL
 * motionNode value.
 */
void AssignParentMotionToPlanNodes(PlannedStmt *plannedstmt)
{
	MotionAssignerContext ctx;
	ctx.base.node = (Node*)plannedstmt;
	ctx.motStack = NIL;

	MotionAssignerWalker(plannedstmt->planTree, &ctx);
	/* The entire motion stack should have been unwounded */
	Assert(ctx.motStack == NIL);
}

2558 2559 2560 2561 2562 2563 2564 2565 2566
/**
 * Provide index of locally executing slice
 */
int LocallyExecutingSliceIndex(EState *estate)
{
	Assert(estate);
	return (!estate->es_sliceTable ? 0 : estate->es_sliceTable->localSlice);
}

H
Heikki Linnakangas 已提交
2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589
/**
 * Provide index of slice being executed on the primary writer gang
 */
int PrimaryWriterSliceIndex(EState *estate)
{
	ListCell   *lc;

	Assert(estate);

	if (!estate->es_sliceTable)
		return 0;

	foreach (lc, estate->es_sliceTable->slices)
	{
		Slice	   *slice = (Slice *) lfirst(lc);

		if (slice->gangType == GANGTYPE_PRIMARY_WRITER)
			return slice->sliceIndex;
	}

	return 0;
}

2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605
/**
 * Provide root slice of locally executing slice.
 */
int RootSliceIndex(EState *estate)
{
	Assert(estate);
	int result = 0;

	if (estate->es_sliceTable)
	{
		Slice *localSlice = list_nth(estate->es_sliceTable->slices, LocallyExecutingSliceIndex(estate));
		result = localSlice->rootIndex;
	}

	return result;
}
2606

2607

2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628
#ifdef USE_ASSERT_CHECKING
/**
 * Assert that slicetable is valid. Must be called after ExecInitMotion, which sets up the slice table
 */
void AssertSliceTableIsValid(SliceTable *st, struct PlannedStmt *pstmt)
{
	if (!st)
		return;

	Assert(pstmt);

	Assert(pstmt->nMotionNodes == st->nMotions);
	Assert(pstmt->nInitPlans == st->nInitPlans);

	ListCell *lc = NULL;
	int i = 0;

	int maxIndex = st->nMotions + st->nInitPlans + 1;

	Assert(maxIndex == list_length(st->slices));

2629
	foreach_with_count(lc, st->slices, i)
2630 2631 2632 2633
	{
		Slice *s = (Slice *) lfirst(lc);

		/* The n-th slice entry has sliceIndex of n */
2634
		Assert(s->sliceIndex == i && "slice index incorrect");
2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684

		/* The root index of a slice is either 0 or is a slice corresponding to an init plan */
		Assert((s->rootIndex == 0) || (s->rootIndex > st->nMotions && s->rootIndex < maxIndex));

		/* Parent slice index */
		if (s->sliceIndex == s->rootIndex)
		{
			/* Current slice is a root slice. It will have parent index -1.*/
			Assert(s->parentIndex == -1 && "expecting parent index of -1");
		}
		else
		{
			/* All other slices must have a valid parent index */
			Assert(s->parentIndex >= 0 && s->parentIndex < maxIndex && "slice's parent index out of range");
		}

		/* Current slice's children must consider it the parent */
		ListCell *lc1 = NULL;
		foreach (lc1, s->children)
		{
			int childIndex = lfirst_int(lc1);
			Assert(childIndex >= 0 && childIndex < maxIndex && "invalid child slice");
			Slice *sc = (Slice *) list_nth(st->slices, childIndex);
			Assert(sc->parentIndex == s->sliceIndex && "slice's child does not consider it the parent");
		}

		/* Current slice must be in its parent's children list */
		if (s->parentIndex >= 0)
		{
			Slice *sp = (Slice *) list_nth(st->slices, s->parentIndex);

			bool found = false;
			foreach (lc1, sp->children)
			{
				int childIndex = lfirst_int(lc1);
				Assert(childIndex >= 0 && childIndex < maxIndex && "invalid child slice");
				Slice *sc = (Slice *) list_nth(st->slices, childIndex);

				if (sc->sliceIndex == s->sliceIndex)
				{
					found = true;
					break;
				}
			}

			Assert(found && "slice's parent does not consider it a child");
		}
	}
}
#endif