proc.c 61.6 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * proc.c
4
 *	  routines to manage per-process shared memory data structure
5
 *
6 7
 * Portions Copyright (c) 2006-2008, Greenplum inc
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
8
 * Portions Copyright (c) 1994, Regents of the University of California
9 10 11
 *
 *
 * IDENTIFICATION
12
 *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.199 2008/01/26 19:55:08 tgl Exp $
13 14 15 16 17
 *
 *-------------------------------------------------------------------------
 */
/*
 * Interface (a):
18
 *		ProcSleep(), ProcWakeup(),
19 20
 *		ProcQueueAlloc() -- create a shm queue for sleeping processes
 *		ProcQueueInit() -- create a queue without allocing memory
21
 *
22 23
 * Waiting for a lock causes the backend to be put to sleep.  Whoever releases
 * the lock wakes the process up again (and gives it an error code so it knows
24 25 26 27
 * whether it was awoken on an error condition).
 *
 * Interface (b):
 *
28 29
 * ProcReleaseLocks -- frees the locks associated with current transaction
 *
30
 * ProcKill -- destroys the shared memory state (and locks)
31
 * associated with the process.
32
 */
33 34
#include "postgres.h"

35
#include <signal.h>
36 37
#include <unistd.h>
#include <sys/time.h>
M
Marc G. Fournier 已提交
38

39
#include "access/transam.h"
40
#include "access/xact.h"
41
#include "catalog/namespace.h" /* TempNamespaceOidIsValid */
42
#include "commands/async.h"
43
#include "miscadmin.h"
44 45
#include "postmaster/autovacuum.h"
#include "replication/syncrep.h"
46
#include "storage/ipc.h"
47
#include "storage/spin.h"
48
#include "storage/sinval.h"
49
#include "storage/lmgr.h"
50
#include "storage/proc.h"
51
#include "storage/procarray.h"
52 53 54 55 56 57
#include "storage/pmsignal.h"
#include "executor/execdesc.h"
#include "utils/resscheduler.h"
#include "utils/timestamp.h"
#include "utils/portal.h"

58
#include "utils/sharedsnapshot.h"  /*SharedLocalSnapshotSlot*/
59

60 61 62
#include "cdb/cdblocaldistribxact.h"
#include "cdb/cdbgang.h"
#include "cdb/cdbvars.h"  /*Gp_is_writer*/
63
#include "port/atomics.h"
64
#include "utils/session_state.h"
65

66
/* GUC variables */
B
Bruce Momjian 已提交
67
int			DeadlockTimeout = 1000;
68
int			StatementTimeout = 0;
69
int			IdleSessionGangTimeout = 18000;
70
bool		log_lock_waits = false;
M
 
Marc G. Fournier 已提交
71

72
/* Pointer to this process's PGPROC struct, if any */
J
Jan Wieck 已提交
73
PGPROC	   *MyProc = NULL;
74

75 76
/* Special for MPP reader gangs */
PGPROC	   *lockHolderProcPtr = NULL;
77

78 79 80 81 82 83 84 85 86
/*
 * This spinlock protects the freelist of recycled PGPROC structures.
 * We cannot use an LWLock because the LWLock manager depends on already
 * having a PGPROC and a wait semaphore!  But these structures are touched
 * relatively infrequently (only at backend startup or shutdown) and not for
 * very long, so a spinlock is okay.
 */
NON_EXEC_STATIC slock_t *ProcStructLock = NULL;

87
/* Pointers to shared-memory structures */
88
NON_EXEC_STATIC PROC_HDR *ProcGlobal = NULL;
89
NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL;
90

91 92
/* If we are waiting for a lock, this points to the associated LOCALLOCK */
static LOCALLOCK *lockAwaited = NULL;
93

94 95 96
/* Mark these volatile because they can be changed by signal handler */
static volatile bool statement_timeout_active = false;
static volatile bool deadlock_timeout_active = false;
97
static volatile DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
98 99
static volatile sig_atomic_t clientWaitTimeoutInterruptEnabled = 0;
static volatile sig_atomic_t clientWaitTimeoutInterruptOccurred = 0;
100
volatile bool cancel_from_timeout = false;
B
Bruce Momjian 已提交
101

102 103 104
/* timeout_start_time is set when log_lock_waits is true */
static TimestampTz timeout_start_time;

105
/* statement_fin_time is valid only if statement_timeout_active is true */
106
static TimestampTz statement_fin_time;
107

108
static void RemoveProcFromArray(int code, Datum arg);
109
static void ProcKill(int code, Datum arg);
110
static void AuxiliaryProcKill(int code, Datum arg);
111
static bool CheckStatementTimeout(void);
112 113
static void ClientWaitTimeoutInterruptHandler(void);
static void ProcessClientWaitTimeout(void);
114

115

116 117 118
/*
 * Report shared-memory space needed by InitProcGlobal.
 */
119
Size
120
ProcGlobalShmemSize(void)
121
{
122 123 124 125
	Size		size = 0;

	/* ProcGlobal */
	size = add_size(size, sizeof(PROC_HDR));
126 127 128
	/* AuxiliaryProcs */
	size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGPROC)));
	/* MyProcs, including autovacuum */
129
	size = add_size(size, mul_size(MaxBackends, sizeof(PGPROC)));
130 131
	/* ProcStructLock */
	size = add_size(size, sizeof(slock_t));
132 133 134 135

	return size;
}

136 137 138 139
/*
 * Report number of semaphores needed by InitProcGlobal.
 */
int
140
ProcGlobalSemas(void)
141
{
142 143 144 145 146
	/*
	 * We need a sema per backend (including autovacuum), plus one for each
	 * auxiliary process.
	 */
	return MaxBackends + NUM_AUXILIARY_PROCS;
147 148
}

149 150
/*
 * InitProcGlobal -
151 152
 *	  Initialize the global process table during postmaster or standalone
 *	  backend startup.
153
 *
154
 *	  We also create all the per-process semaphores we will need to support
155 156 157 158 159 160 161
 *	  the requested number of backends.  We used to allocate semaphores
 *	  only when backends were actually started up, but that is bad because
 *	  it lets Postgres fail under load --- a lot of Unix systems are
 *	  (mis)configured with small limits on the number of semaphores, and
 *	  running out when trying to start another backend is a common failure.
 *	  So, now we grab enough semaphores to support the desired max number
 *	  of backends immediately at initialization --- if the sysadmin has set
162 163
 *	  MaxConnections or autovacuum_max_workers higher than his kernel will
 *	  support, he'll find out sooner rather than later.
164 165 166 167
 *
 *	  Another reason for creating semaphores here is that the semaphore
 *	  implementation typically requires us to create semaphores in the
 *	  postmaster, not in backends.
168 169
 *
 * Note: this is NOT called by individual backends under a postmaster,
170
 * not even in the EXEC_BACKEND case.  The ProcGlobal and AuxiliaryProcs
171
 * pointers must be propagated specially for EXEC_BACKEND operation.
172 173
 */
void
174
InitProcGlobal(int mppLocalProcessCounter)
175
{
176 177 178
	PGPROC	   *procs;
	int			i;
	bool		found;
179

180
	/* Create the ProcGlobal shared structure */
181
	ProcGlobal = (PROC_HDR *)
182 183
		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
	Assert(!found);
184

185
	/*
186 187
	 * Create the PGPROC structures for auxiliary (bgwriter) processes, too.
	 * These do not get linked into the freeProcs list.
188
	 */
189 190
	AuxiliaryProcs = (PGPROC *)
		ShmemInitStruct("AuxiliaryProcs", NUM_AUXILIARY_PROCS * sizeof(PGPROC),
191 192
						&found);
	Assert(!found);
193

194 195 196 197
	/*
	 * Initialize the data structures.
	 */
	ProcGlobal->freeProcs = INVALID_OFFSET;
198
	ProcGlobal->autovacFreeProcs = INVALID_OFFSET;
199

200
	ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY;
201

202 203
	ProcGlobal->mppLocalProcessCounter = mppLocalProcessCounter;

204 205 206
	/*
	 * Pre-create the PGPROC structures and create a semaphore for each.
	 */
207
	procs = (PGPROC *) ShmemAlloc((MaxConnections) * sizeof(PGPROC));
208 209 210 211
	if (!procs)
		ereport(FATAL,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of shared memory")));
212 213
	MemSet(procs, 0, MaxConnections * sizeof(PGPROC));
	for (i = 0; i < MaxConnections; i++)
214 215
	{
		PGSemaphoreCreate(&(procs[i].sem));
216 217
		InitSharedLatch(&(procs[i].procLatch));

218 219 220
		procs[i].links.next = ProcGlobal->freeProcs;
		ProcGlobal->freeProcs = MAKE_OFFSET(&procs[i]);
	}
221
	ProcGlobal->procs = procs;
222 223 224 225 226 227 228 229 230 231 232
	ProcGlobal->numFreeProcs = MaxConnections;

	procs = (PGPROC *) ShmemAlloc((autovacuum_max_workers) * sizeof(PGPROC));
	if (!procs)
		ereport(FATAL,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of shared memory")));
	MemSet(procs, 0, autovacuum_max_workers * sizeof(PGPROC));
	for (i = 0; i < autovacuum_max_workers; i++)
	{
		PGSemaphoreCreate(&(procs[i].sem));
233
		InitSharedLatch(&(procs[i].procLatch));
234 235 236
		procs[i].links.next = ProcGlobal->autovacFreeProcs;
		ProcGlobal->autovacFreeProcs = MAKE_OFFSET(&procs[i]);
	}
237 238 239 240 241 242 243 244 245

	MemSet(AuxiliaryProcs, 0, NUM_AUXILIARY_PROCS * sizeof(PGPROC));
	for (i = 0; i < NUM_AUXILIARY_PROCS; i++)
	{
		AuxiliaryProcs[i].pid = 0;		/* marks auxiliary proc as not in use */
		PGSemaphoreCreate(&(AuxiliaryProcs[i].sem));
		InitSharedLatch(&(AuxiliaryProcs[i].procLatch));
	}

246 247 248
	/* Create ProcStructLock spinlock, too */
	ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
	SpinLockInit(ProcStructLock);
249 250
}

251
/*
252
 * InitProcess -- initialize a per-process data structure for this backend
253 254
 */
void
255
InitProcess(void)
256
{
257 258
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;
259
	SHMEM_OFFSET myOffset;
260
	int			i;
261 262

	/*
263 264
	 * ProcGlobal should be set up already (if we are a backend, we inherit
	 * this by fork() or EXEC_BACKEND mechanism from the postmaster).
265
	 */
266
	if (procglobal == NULL)
267
		elog(PANIC, "proc header uninitialized");
268 269

	if (MyProc != NULL)
270
		elog(ERROR, "you already exist");
271

272
	/*
273 274 275
	 * Initialize process-local latch support.  This could fail if the kernel
	 * is low on resources, and if so we want to exit cleanly before acquiring
	 * any shared-memory resources.
276
	 */
277
	InitializeLatchSupport();
278

279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
	/*
	 * Try to get a proc struct from the free list.  If this fails, we must be
	 * out of PGPROC structures (not to mention semaphores).
	 *
	 * While we are holding the ProcStructLock, also copy the current shared
	 * estimate of spins_per_delay to local storage.
	 */
	SpinLockAcquire(ProcStructLock);

	set_spins_per_delay(procglobal->spins_per_delay);

	if (IsAutoVacuumWorkerProcess())
		myOffset = procglobal->autovacFreeProcs;
	else
		myOffset = procglobal->freeProcs;

	if (myOffset != INVALID_OFFSET)
	{
		MyProc = (PGPROC *) MAKE_PTR(myOffset);
		if (IsAutoVacuumWorkerProcess())
			procglobal->autovacFreeProcs = MyProc->links.next;
		else
			procglobal->freeProcs = MyProc->links.next;

		procglobal->numFreeProcs--;		/* we removed an entry from the list. */
		Assert(procglobal->numFreeProcs >= 0);

		SpinLockRelease(ProcStructLock);
	}
	else
309
	{
310 311 312 313 314 315 316
		/*
		 * If we reach here, all the PGPROCs are in use.  This is one of the
		 * possible places to detect "too many backends", so give the standard
		 * error message.  XXX do we need to give a different failure message
		 * in the autovacuum case?
		 */
		SpinLockRelease(ProcStructLock);
317 318 319
		ereport(FATAL,
				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
				 errmsg("sorry, too many clients already")));
320
	}
321

322 323 324 325 326 327
	if (gp_debug_pgproc)
	{
		elog(LOG, "allocating PGPROC entry for pid %d, freeProcs (prev offset, new offset): (%ld, %ld)",
			 MyProcPid, MAKE_OFFSET(MyProc), MyProc->links.next);
	}

328
	int mppLocalProcessSerial = pg_atomic_add_fetch_u32((pg_atomic_uint32 *)&procglobal->mppLocalProcessCounter, 1);
329 330 331 332 333 334 335 336 337 338 339 340

	lockHolderProcPtr = MyProc;

	/* Set the next pointer to INVALID_OFFSET */
	MyProc->links.next = INVALID_OFFSET;

	/*
	 * Now that we have a PGPROC, mark ourselves as an active postmaster
	 * child; this is so that the postmaster can detect it if we exit without
	 * cleaning up.  (XXX autovac launcher currently doesn't participate in
	 * this; it probably should.)
	 *
341
	 * Ideally, we should create functions similar to IsAutoVacuumWorkerProcess()
342 343 344
	 * for ftsProber, SeqServer etc who call InitProcess().
	 * But MyPMChildSlot helps to get away with it.
	 */
345
	if (IsUnderPostmaster && !IsAutoVacuumWorkerProcess()
346 347
		&& MyPMChildSlot > 0)
		MarkPostmasterChildActive();
348

349
	/*
B
Bruce Momjian 已提交
350 351
	 * Initialize all fields of MyProc, except for the semaphore which was
	 * prepared for us by InitProcGlobal.
352
	 */
353
	SHMQueueElemInit(&(MyProc->links));
354
	MyProc->waitStatus = STATUS_OK;
355
	MyProc->lxid = InvalidLocalTransactionId;
356
	MyProc->xid = InvalidTransactionId;
357
	MyProc->localDistribXactData.state = LOCALDISTRIBXACT_STATE_NONE;
358
	MyProc->xmin = InvalidTransactionId;
359 360
	MyProc->serializableIsoLevel = false;
	MyProc->inDropTransaction = false;
361
	MyProc->pid = MyProcPid;
362 363
	/* backendId, databaseId and roleId will be filled in later */
	MyProc->backendId = InvalidBackendId;
364
	MyProc->databaseId = InvalidOid;
365
	MyProc->roleId = InvalidOid;
366
	MyProc->inCommit = false;
367 368 369
	MyProc->vacuumFlags = 0;
	if (IsAutoVacuumWorkerProcess())
		MyProc->vacuumFlags |= PROC_IS_AUTOVACUUM;
370 371 372
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
373
	MyProc->waitLock = NULL;
374
	MyProc->waitProcLock = NULL;
375
	MyProc->resWaiting = false;
376
	MyProc->resGranted = false;
377
	MyProc->resSlotId = -1;
378 379
	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
		SHMQueueInit(&(MyProc->myProcLocks[i]));
380

381 382 383 384 385 386 387 388 389 390 391 392
    /* 
     * mppLocalProcessSerial uniquely identifies this backend process among
     * all those that our parent postmaster process creates over its lifetime. 
     *
  	 * Since we use the process serial number to decide if we should
	 * deliver a response from a server under this spin, we need to 
	 * assign it under the spin lock.
	 */
    MyProc->mppLocalProcessSerial = mppLocalProcessSerial;

    /* 
     * A nonzero gp_session_id uniquely identifies an MPP client session 
393 394
     * over the lifetime of the entry postmaster process. A qDisp passes
     * its gp_session_id down to all of its qExecs. If this is a qExec,
395 396 397 398 399
     * we have already received the gp_session_id from the qDisp.
     */
    if (Gp_role == GP_ROLE_DISPATCH && gp_session_id == -1)
        gp_session_id = mppLocalProcessSerial;
    MyProc->mppSessionId = gp_session_id;
400
    elog(DEBUG1,"InitProcess(): gp_session_id %d, Gp_role %d",gp_session_id, Gp_role);
401 402 403
    
    MyProc->mppIsWriter = Gp_is_writer;

404 405 406 407 408
	if (Gp_role == GP_ROLE_DISPATCH)
	{
		MyProc->mppIsWriter = true;
	}
    
409 410 411 412 413 414 415 416 417 418 419 420
	/* Initialize fields for sync rep */
	MyProc->waitLSN.xlogid = 0;
	MyProc->waitLSN.xrecoff = 0;
	MyProc->syncRepState = SYNC_REP_NOT_WAITING;
	SHMQueueElemInit(&(MyProc->syncRepLinks));

	/*
	 * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch.
	 * Note that there's no particular need to do ResetLatch here.
	 */
	OwnLatch(&MyProc->procLatch);

421
	/*
422
	 * We might be reusing a semaphore that belonged to a failed process. So
B
Bruce Momjian 已提交
423
	 * be careful and reinitialize its value here.	(This is not strictly
424
	 * necessary anymore, but seems like a good idea for cleanliness.)
425
	 */
426
	PGSemaphoreReset(&MyProc->sem);
427

428 429 430 431 432
	/* Set wait portal (do not check if resource scheduling is enabled) */
	MyProc->waitPortalId = INVALID_PORTALID;

	MyProc->queryCommandId = -1;

433
	/*
434
	 * Arrange to clean up at backend exit.
435
	 */
436
	on_shmem_exit(ProcKill, 0);
437 438

	/*
B
Bruce Momjian 已提交
439 440
	 * Now that we have a PGPROC, we could try to acquire locks, so initialize
	 * the deadlock checker.
441 442
	 */
	InitDeadLockChecking();
443 444
}

445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
/*
 * InitProcessPhase2 -- make MyProc visible in the shared ProcArray.
 *
 * This is separate from InitProcess because we can't acquire LWLocks until
 * we've created a PGPROC, but in the EXEC_BACKEND case there is a good deal
 * of stuff to be done before this step that will require LWLock access.
 */
void
InitProcessPhase2(void)
{
	Assert(MyProc != NULL);

	/*
	 * Add our PGPROC to the PGPROC array in shared memory.
	 */
	ProcArrayAdd(MyProc);

	/*
	 * Arrange to clean that up at backend exit.
	 */
	on_shmem_exit(RemoveProcFromArray, 0);
}

468
/*
469
 * InitAuxiliaryProcess -- create a per-auxiliary-process data structure
470
 *
471 472
 * This is called by bgwriter and similar processes so that they will have a
 * MyProc value that's real enough to let them wait for LWLocks.  The PGPROC
473
 * and sema that are assigned are one of the extra ones created during
474
 * InitProcGlobal.
475
 *
476
 * Auxiliary processes are presently not expected to wait for real (lockmgr)
477
 * locks, so we need not set up the deadlock checker.  They are never added
478 479 480
 * to the ProcArray or the sinval messaging mechanism, either.	They also
 * don't get a VXID assigned, since this is only useful when we actually
 * hold lockmgr locks.
481 482
 */
void
483
InitAuxiliaryProcess(void)
484
{
485
	PGPROC	   *auxproc;
486
	int			proctype;
487
	int			i;
J
Jan Wieck 已提交
488

489
	/*
490 491
	 * ProcGlobal should be set up already (if we are a backend, we inherit
	 * this by fork() or EXEC_BACKEND mechanism from the postmaster).
492
	 */
493
	if (ProcGlobal == NULL || AuxiliaryProcs == NULL)
494
		elog(PANIC, "proc header uninitialized");
495 496

	if (MyProc != NULL)
497
		elog(ERROR, "you already exist");
498

499
	/*
500 501 502
	 * Initialize process-local latch support.  This could fail if the kernel
	 * is low on resources, and if so we want to exit cleanly before acquiring
	 * any shared-memory resources.
503
	 */
504
	InitializeLatchSupport();
505

506
	/*
507 508 509 510 511 512 513 514 515 516 517 518
	 * We use the ProcStructLock to protect assignment and releasing of
	 * AuxiliaryProcs entries.
	 *
	 * While we are holding the ProcStructLock, also copy the current shared
	 * estimate of spins_per_delay to local storage.
	 */
	SpinLockAcquire(ProcStructLock);

	set_spins_per_delay(ProcGlobal->spins_per_delay);

	/*
	 * Find a free auxproc ... *big* trouble if there isn't one ...
519
	 */
520
	for (proctype = 0; proctype < NUM_AUXILIARY_PROCS; proctype++)
521
	{
522
		auxproc = &AuxiliaryProcs[proctype];
523
		if (auxproc->pid == 0)
524 525
			break;
	}
526
	if (proctype >= NUM_AUXILIARY_PROCS)
527
	{
528
		SpinLockRelease(ProcStructLock);
529
		elog(FATAL, "all AuxiliaryProcs are in use");
530
	}
531

532 533 534
	/* Mark auxiliary proc as in use by me */
	/* use volatile pointer to prevent code rearrangement */
	((volatile PGPROC *) auxproc)->pid = MyProcPid;
535

536 537
	MyProc = auxproc;
	lockHolderProcPtr = auxproc;
538

539 540
	SpinLockRelease(ProcStructLock);

541
	/*
542 543
	 * Initialize all fields of MyProc, except for the semaphore which was
	 * prepared for us by InitProcGlobal.
544 545
	 */
	SHMQueueElemInit(&(MyProc->links));
546
	MyProc->waitStatus = STATUS_OK;
547
	MyProc->lxid = InvalidLocalTransactionId;
548
	MyProc->xid = InvalidTransactionId;
549
	MyProc->localDistribXactData.state = LOCALDISTRIBXACT_STATE_NONE;
550
	MyProc->xmin = InvalidTransactionId;
551 552
	MyProc->serializableIsoLevel = false;
	MyProc->inDropTransaction = false;
553
	MyProc->databaseId = InvalidOid;
554
	MyProc->roleId = InvalidOid;
555 556 557
    MyProc->mppLocalProcessSerial = 0;
    MyProc->mppSessionId = 0;
    MyProc->mppIsWriter = false;
558
	MyProc->inCommit = false;
559
	MyProc->vacuumFlags = 0;
560 561 562 563
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
	MyProc->waitLock = NULL;
564
	MyProc->waitProcLock = NULL;
565 566
	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
		SHMQueueInit(&(MyProc->myProcLocks[i]));
567

568 569 570 571 572 573 574 575 576 577 578 579 580
	/*
	 * Auxiliary process doesn't bother with sync rep.  Though it was
	 * originally supposed to not do transaction work, but it does in GPDB,
	 * we mark it and avoid sync rep work.
	 */
	MyProc->syncRepState = SYNC_REP_DISABLED;

	/*
	 * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch.
	 * Note that there's no particular need to do ResetLatch here.
	 */
	OwnLatch(&MyProc->procLatch);

581
	/*
B
Bruce Momjian 已提交
582
	 * We might be reusing a semaphore that belonged to a failed process. So
B
Bruce Momjian 已提交
583
	 * be careful and reinitialize its value here.	(This is not strictly
584
	 * necessary anymore, but seems like a good idea for cleanliness.)
585
	 */
586
	PGSemaphoreReset(&MyProc->sem);
587

588 589
	MyProc->queryCommandId = -1;

590 591 592
	/*
	 * Arrange to clean up at process exit.
	 */
593
	on_shmem_exit(AuxiliaryProcKill, Int32GetDatum(proctype));
594 595
}

596 597 598 599 600 601
/*
 * Check whether there are at least N free PGPROC objects.
 */
bool
HaveNFreeProcs(int n)
{
602
	Assert(n >= 0);
603

604
	return (ProcGlobal->numFreeProcs >= n);
605 606
}

607 608 609 610
/*
 * Cancel any pending wait for lock, when aborting a transaction.
 *
 * (Normally, this would only happen if we accept a cancel/die
611
 * interrupt while waiting; but an ereport(ERROR) while waiting is
612 613
 * within the realm of possibility, too.)
 */
614
void
615 616
LockWaitCancel(void)
{
617 618
	LWLockId	partitionLock;

619
	/* Nothing to do if we weren't waiting for a lock */
620
	if (lockAwaited == NULL)
621
		return;
622

623
	/* Don't try to cancel resource locks.*/
624
	if (Gp_role == GP_ROLE_DISPATCH && IsResQueueEnabled() &&
625
		LOCALLOCK_LOCKMETHOD(*lockAwaited) == RESOURCE_LOCKMETHOD)
626
		return;
627

628
	/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
629
	disable_sig_alarm(false);
630 631

	/* Unlink myself from the wait queue, if on it (might not be anymore!) */
632
	partitionLock = LockHashPartitionLock(lockAwaited->hashcode);
633
	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
634

635
	if (MyProc->links.next != INVALID_OFFSET)
636 637
	{
		/* We could not have been granted the lock yet */
638
		RemoveFromWaitQueue(MyProc, lockAwaited->hashcode);
639 640 641 642 643
	}
	else
	{
		/*
		 * Somebody kicked us off the lock queue already.  Perhaps they
B
Bruce Momjian 已提交
644 645 646
		 * granted us the lock, or perhaps they detected a deadlock. If they
		 * did grant us the lock, we'd better remember it in our local lock
		 * table.
647
		 */
648 649
		if (MyProc->waitStatus == STATUS_OK)
			GrantAwaitedLock();
650 651
	}

652
	lockAwaited = NULL;
653

654
	LWLockRelease(partitionLock);
H
Hiroshi Inoue 已提交
655

656
	/*
657
	 * We used to do PGSemaphoreReset() here to ensure that our proc's wait
B
Bruce Momjian 已提交
658 659 660 661 662 663
	 * semaphore is reset to zero.	This prevented a leftover wakeup signal
	 * from remaining in the semaphore if someone else had granted us the lock
	 * we wanted before we were able to remove ourselves from the wait-list.
	 * However, now that ProcSleep loops until waitStatus changes, a leftover
	 * wakeup signal isn't harmful, and it seems not worth expending cycles to
	 * get rid of a signal that most likely isn't there.
664
	 */
H
Hiroshi Inoue 已提交
665
}
666

667

668
/*
669
 * ProcReleaseLocks() -- release locks associated with current transaction
670
 *			at main transaction commit or abort
671 672 673 674 675 676
 *
 * At main transaction commit, we release all locks except session locks.
 * At main transaction abort, we release all locks including session locks;
 * this lets us clean up after a VACUUM FULL failure.
 *
 * At subtransaction commit, we don't release any locks (so this func is not
677
 * needed at all); we will defer the releasing to the parent transaction.
678
 * At subtransaction abort, we release all locks held by the subtransaction;
679 680
 * this is implemented by retail releasing of the locks under control of
 * the ResourceOwner mechanism.
681 682
 *
 * Note that user locks are not released in any case.
683 684
 */
void
685
ProcReleaseLocks(bool isCommit)
686
{
687 688
	if (!MyProc)
		return;
689 690 691
	/* If waiting, get off wait queue (should only be needed after error) */
	LockWaitCancel();
	/* Release locks */
692
	LockReleaseAll(DEFAULT_LOCKMETHOD, !isCommit);
693 694 695
}


696 697 698 699 700 701 702
/*
 * RemoveProcFromArray() -- Remove this process from the shared ProcArray.
 */
static void
RemoveProcFromArray(int code, Datum arg)
{
	Assert(MyProc != NULL);
703
	ProcArrayRemove(MyProc, InvalidTransactionId);
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
}

/*
 * update_spins_per_delay
 *   Update spins_per_delay value in ProcGlobal.
 */
static void update_spins_per_delay()
{
	volatile PROC_HDR *procglobal = ProcGlobal;
	bool casResult = false;

	while (!casResult)
	{
		int old_spins_per_delay = procglobal->spins_per_delay;
		int new_spins_per_delay = recompute_spins_per_delay(old_spins_per_delay);
719 720
		casResult = pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&procglobal->spins_per_delay,
										(uint32 *)&old_spins_per_delay,
721 722
										new_spins_per_delay);
	}
723 724
}

725 726
/*
 * ProcKill() -- Destroy the per-proc data structure for
727
 *		this process. Release any of its held LW locks.
728 729
 */
static void
730
ProcKill(int code, Datum arg)
731
{
732 733 734
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;
	PGPROC	   *proc;
735
	Assert(MyProc != NULL);
736

737 738 739 740 741 742 743
	/* Make sure we're out of the sync rep lists */
	SyncRepCleanupAtProcExit();

	/* 
	 * Cleanup for any resource locks on portals - from holdable cursors or
	 * unclean process abort (assertion failures).
	 */
744
	if (Gp_role == GP_ROLE_DISPATCH && IsResQueueEnabled())
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
		AtExitCleanup_ResPortals();

	/*
	 * Remove the shared snapshot slot.
	 */
	if (SharedLocalSnapshotSlot != NULL)
	{
		if (Gp_role == GP_ROLE_DISPATCH)
		{
			SharedSnapshotRemove(SharedLocalSnapshotSlot,
								 "Query Dispatcher");
		}
	    else if (Gp_segment == -1 && Gp_role == GP_ROLE_EXECUTE && !Gp_is_writer)
	    {
			/* 
			 * Entry db singleton QE is a user of the shared snapshot -- not a creator.
			 */	
	    }
		else if (Gp_role == GP_ROLE_EXECUTE && Gp_is_writer)
		{
			SharedSnapshotRemove(SharedLocalSnapshotSlot,
								 "Writer qExec");
		}
		SharedLocalSnapshotSlot = NULL;
	}

771
	/*
B
Bruce Momjian 已提交
772 773
	 * Release any LW locks I am holding.  There really shouldn't be any, but
	 * it's cheap to check again before we cut the knees off the LWLock
774
	 * facility by releasing our PGPROC ...
775
	 */
776
	LWLockReleaseAll();
777

778
	MyProc->localDistribXactData.state = LOCALDISTRIBXACT_STATE_NONE;
779 780 781
    MyProc->mppLocalProcessSerial = 0;
    MyProc->mppSessionId = 0;
    MyProc->mppIsWriter = false;
782 783 784 785 786 787 788 789 790 791 792 793
	MyProc->pid = 0;

	/*
	 * Clear MyProc first; then disown the process latch.  This is so that
	 * signal handlers won't try to clear the process latch after it's no
	 * longer ours.
	 */
	proc = MyProc;
	MyProc = NULL;
	DisownLatch(&proc->procLatch);

	SpinLockAcquire(ProcStructLock);
794

795 796 797 798 799 800 801
	/* Return PGPROC structure (and semaphore) to freelist */
	if (IsAutoVacuumWorkerProcess())
	{
		proc->links.next = procglobal->autovacFreeProcs;
		procglobal->autovacFreeProcs = MAKE_OFFSET(proc);
	}
	else
802
	{
803 804
		proc->links.next = procglobal->freeProcs;
		procglobal->freeProcs = MAKE_OFFSET(proc);
805
	}
806

807 808 809 810 811 812
	procglobal->numFreeProcs++;	/* we added an entry */

	/* Update shared estimate of spins_per_delay */
	update_spins_per_delay();

	SpinLockRelease(ProcStructLock);
813

814 815 816 817
	/*
	 * This process is no longer present in shared memory in any meaningful
	 * way, so tell the postmaster we've cleaned up acceptably well.
	 */
818
	if (IsUnderPostmaster && !IsAutoVacuumWorkerProcess()
819 820
		&& MyPMChildSlot > 0)
		MarkPostmasterChildInactive();
821 822 823 824

	/* wake autovac launcher if needed -- see comments in FreeWorkerInfo */
	if (AutovacuumLauncherPid != 0)
		kill(AutovacuumLauncherPid, SIGUSR1);
825 826 827
}

/*
828 829 830
 * AuxiliaryProcKill() -- Cut-down version of ProcKill for auxiliary
 *		processes (bgwriter, etc).	The PGPROC and sema are not released, only
 *		marked as not-in-use.
831 832
 */
static void
833
AuxiliaryProcKill(int code, Datum arg)
834
{
B
Bruce Momjian 已提交
835
	int			proctype = DatumGetInt32(arg);
836
	PGPROC	   *auxproc;
J
Jan Wieck 已提交
837

838
	Assert(proctype >= 0 && proctype < NUM_AUXILIARY_PROCS);
J
Jan Wieck 已提交
839

840
	auxproc = &AuxiliaryProcs[proctype];
J
Jan Wieck 已提交
841

842
	Assert(MyProc == auxproc);
843

844
	/* Release any LW locks I am holding (see notes above) */
845 846
	LWLockReleaseAll();

847 848
	/* Release ownership of the process's latch, too */
	DisownLatch(&MyProc->procLatch);
849

850 851 852 853 854
	SpinLockAcquire(ProcStructLock);

	/* Mark auxiliary proc no longer in use */
	MyProc->pid = 0;

855 856
	/* Update shared estimate of spins_per_delay */
	update_spins_per_delay();
857

858 859
	SpinLockRelease(ProcStructLock);

860
	/*
861 862 863 864 865 866 867
	 * If the parent process of this auxiliary process does not exist, we
	 * have trouble. Besides the obvious case that the postmaster is gone,
	 * this could happen to filerep subprocesses when the filerep main
	 * process dies unexpectedly. The postmaster will receive the SIGCHLD
	 * signal when we exit in that case. Make sure we exit with non-zero (and
	 * not 1 either) exit status, to force the postmaster to reset the system
	 * if that happens.
868 869
	 */
	if (!ParentProcIsAlive())
870
		proc_exit(10);
871

872 873 874
	/* PGPROC struct isn't mine anymore */
	MyProc = NULL;
	lockHolderProcPtr = NULL;
875 876
}

877

878 879
/*
 * ProcQueue package: routines for putting processes to sleep
880
 *		and  waking them up
881 882 883 884 885 886 887 888
 */

/*
 * ProcQueueAlloc -- alloc/attach to a shared memory process queue
 *
 * Returns: a pointer to the queue or NULL
 * Side Effects: Initializes the queue if we allocated one
 */
889
#ifdef NOT_USED
890
PROC_QUEUE *
891 892
ProcQueueAlloc(char *name)
{
893 894
	bool		found;
	PROC_QUEUE *queue = (PROC_QUEUE *)
B
Bruce Momjian 已提交
895
	ShmemInitStruct(name, sizeof(PROC_QUEUE), &found);
896 897

	if (!queue)
898
		return NULL;
899 900
	if (!found)
		ProcQueueInit(queue);
901
	return queue;
902
}
903
#endif
904 905 906 907 908

/*
 * ProcQueueInit -- initialize a shared memory process queue
 */
void
909
ProcQueueInit(PROC_QUEUE *queue)
910
{
911 912
	SHMQueueInit(&(queue->links));
	queue->size = 0;
913 914 915 916
}


/*
917
 * ProcSleep -- put a process to sleep on the specified lock
918
 *
919 920
 * Caller must have set MyProc->heldLocks to reflect locks already held
 * on the lockable object by this process (under all XIDs).
921
 *
922
 * The lock table's partition lock must be held at entry, and will be held
923
 * at exit.
924
 *
925
 * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
926
 *
927
 * ASSUME: that no one will fiddle with the queue until after
928
 *		we release the partition lock.
929 930
 *
 * NOTES: The process queue is now a priority queue for locking.
931 932 933
 *
 * P() on the semaphore should put us to sleep.  The process
 * semaphore is normally zero, so when we try to acquire it, we sleep.
934 935
 */
int
936
ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
937
{
938 939 940
	LOCKMODE	lockmode = locallock->tag.mode;
	LOCK	   *lock = locallock->lock;
	PROCLOCK   *proclock = locallock->proclock;
941 942
	uint32		hashcode = locallock->hashcode;
	LWLockId	partitionLock = LockHashPartitionLock(hashcode);
943
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
944
	LOCKMASK	myHeldLocks = MyProc->heldLocks;
945
	bool		early_deadlock = false;
B
Bruce Momjian 已提交
946
	bool		allow_autovacuum_cancel = true;
947
	int			myWaitStatus;
J
Jan Wieck 已提交
948
	PGPROC	   *proc;
949
	int			i;
950

951
	/*
952 953
	 * Determine where to add myself in the wait queue.
	 *
954 955 956 957
	 * Normally I should go at the end of the queue.  However, if I already
	 * hold locks that conflict with the request of any previous waiter, put
	 * myself in the queue just in front of the first such waiter. This is not
	 * a necessary step, since deadlock detection would move me to before that
B
Bruce Momjian 已提交
958 959
	 * waiter anyway; but it's relatively cheap to detect such a conflict
	 * immediately, and avoid delaying till deadlock timeout.
960
	 *
961 962
	 * Special case: if I find I should go in front of some waiter, check to
	 * see if I conflict with already-held locks or the requests before that
B
Bruce Momjian 已提交
963 964 965 966
	 * waiter.	If not, then just grant myself the requested lock immediately.
	 * This is the same as the test for immediate grant in LockAcquire, except
	 * we are only considering the part of the wait queue before my insertion
	 * point.
967 968
	 */
	if (myHeldLocks != 0)
V
Vadim B. Mikheev 已提交
969
	{
970
		LOCKMASK	aheadRequests = 0;
971

J
Jan Wieck 已提交
972
		proc = (PGPROC *) MAKE_PTR(waitQueue->links.next);
973
		for (i = 0; i < waitQueue->size; i++)
V
Vadim B. Mikheev 已提交
974
		{
975
			/* Must he wait for me? */
B
Bruce Momjian 已提交
976
			if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks)
V
Vadim B. Mikheev 已提交
977
			{
978
				/* Must I wait for him ? */
B
Bruce Momjian 已提交
979
				if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks)
980
				{
981
					/*
B
Bruce Momjian 已提交
982 983 984 985 986
					 * Yes, so we have a deadlock.	Easiest way to clean up
					 * correctly is to call RemoveFromWaitQueue(), but we
					 * can't do that until we are *on* the wait queue. So, set
					 * a flag to check below, and break out of loop.  Also,
					 * record deadlock info for later message.
987
					 */
988
					RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
989 990
					early_deadlock = true;
					break;
991
				}
992
				/* I must go before this waiter.  Check special case. */
B
Bruce Momjian 已提交
993
				if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
994 995 996
					LockCheckConflicts(lockMethodTable,
									   lockmode,
									   lock,
997
									   proclock,
998
									   MyProc) == STATUS_OK)
999
				{
1000
					/* Skip the wait and just grant myself the lock. */
1001
					GrantLock(lock, proclock, lockmode);
1002
					GrantAwaitedLock();
1003
					return STATUS_OK;
1004 1005
				}
				/* Break out of loop to put myself before him */
V
Vadim B. Mikheev 已提交
1006
				break;
1007
			}
1008
			/* Nope, so advance to next waiter */
1009
			aheadRequests |= LOCKBIT_ON(proc->waitLockMode);
J
Jan Wieck 已提交
1010
			proc = (PGPROC *) MAKE_PTR(proc->links.next);
V
Vadim B. Mikheev 已提交
1011
		}
B
Bruce Momjian 已提交
1012

1013
		/*
B
Bruce Momjian 已提交
1014 1015
		 * If we fall out of loop normally, proc points to waitQueue head, so
		 * we will insert at tail of queue as desired.
1016
		 */
1017 1018 1019 1020
	}
	else
	{
		/* I hold no locks, so I can't push in front of anyone. */
J
Jan Wieck 已提交
1021
		proc = (PGPROC *) &(waitQueue->links);
V
Vadim B. Mikheev 已提交
1022
	}
1023

1024
	/*
B
Bruce Momjian 已提交
1025
	 * Insert self into queue, ahead of the given proc (or at tail of queue).
1026
	 */
1027
	SHMQueueInsertBefore(&(proc->links), &(MyProc->links));
B
Bruce Momjian 已提交
1028
	waitQueue->size++;
1029

1030
	lock->waitMask |= LOCKBIT_ON(lockmode);
1031

J
Jan Wieck 已提交
1032
	/* Set up wait information in PGPROC object, too */
1033
	MyProc->waitLock = lock;
1034
	MyProc->waitProcLock = proclock;
1035 1036
	MyProc->waitLockMode = lockmode;

1037
	MyProc->waitStatus = STATUS_WAITING;
1038 1039

	/*
B
Bruce Momjian 已提交
1040 1041 1042
	 * If we detected deadlock, give up without waiting.  This must agree with
	 * CheckDeadLock's recovery code, except that we shouldn't release the
	 * semaphore since we haven't tried to lock it yet.
1043 1044 1045
	 */
	if (early_deadlock)
	{
1046
		RemoveFromWaitQueue(MyProc, hashcode);
1047 1048
		return STATUS_ERROR;
	}
1049

1050
	/* mark that we are waiting for a lock */
1051
	lockAwaited = locallock;
1052

1053
	/*
1054
	 * Release the lock table's partition lock.
1055
	 *
1056
	 * NOTE: this may also cause us to exit critical-section state, possibly
B
Bruce Momjian 已提交
1057 1058
	 * allowing a cancel/die interrupt to be accepted. This is OK because we
	 * have recorded the fact that we are waiting for a lock, and so
1059
	 * LockWaitCancel will clean up if cancel/die happens.
1060
	 */
1061
	LWLockRelease(partitionLock);
1062

1063 1064 1065
	/* Reset deadlock_state before enabling the signal handler */
	deadlock_state = DS_NOT_YET_CHECKED;

1066
	/*
B
Bruce Momjian 已提交
1067 1068 1069 1070
	 * Set timer so we can wake up after awhile and check for a deadlock. If a
	 * deadlock is detected, the handler releases the process's semaphore and
	 * sets MyProc->waitStatus = STATUS_ERROR, allowing us to know that we
	 * must report failure rather than success.
1071
	 *
1072 1073
	 * By delaying the check until we've waited for a bit, we can avoid
	 * running the rather expensive deadlock-check code in most cases.
1074
	 */
1075
	if (!enable_sig_alarm(DeadlockTimeout, false))
1076
		elog(FATAL, "could not set timer for process wakeup");
1077

1078
	/*
1079
	 * If someone wakes us between LWLockRelease and PGSemaphoreLock,
B
Bruce Momjian 已提交
1080
	 * PGSemaphoreLock will not block.	The wakeup is "saved" by the semaphore
B
Bruce Momjian 已提交
1081 1082 1083 1084 1085
	 * implementation.	While this is normally good, there are cases where a
	 * saved wakeup might be leftover from a previous operation (for example,
	 * we aborted ProcWaitForSignal just before someone did ProcSendSignal).
	 * So, loop to wait again if the waitStatus shows we haven't been granted
	 * nor denied the lock yet.
1086
	 *
1087 1088 1089 1090 1091 1092 1093
	 * We pass interruptOK = true, which eliminates a window in which
	 * cancel/die interrupts would be held off undesirably.  This is a promise
	 * that we don't mind losing control to a cancel/die interrupt here.  We
	 * don't, because we have no shared-state-change work to do after being
	 * granted the lock (the grantor did it all).  We do have to worry about
	 * updating the locallock table, but if we lose control to an error,
	 * LockWaitCancel will fix that up.
1094
	 */
B
Bruce Momjian 已提交
1095 1096
	do
	{
1097
		PGSemaphoreLock(&MyProc->sem, true);
1098

1099 1100
		/*
		 * waitStatus could change from STATUS_WAITING to something else
B
Bruce Momjian 已提交
1101
		 * asynchronously.	Read it just once per loop to prevent surprising
1102 1103 1104 1105
		 * behavior (such as missing log messages).
		 */
		myWaitStatus = MyProc->waitStatus;

1106 1107
		/*
		 * If we are not deadlocked, but are waiting on an autovacuum-induced
B
Bruce Momjian 已提交
1108
		 * task, send a signal to interrupt it.
1109 1110 1111
		 */
		if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel)
		{
B
Bruce Momjian 已提交
1112
			PGPROC	   *autovac = GetBlockingAutoVacuumPgproc();
1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123

			LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

			/*
			 * Only do it if the worker is not working to protect against Xid
			 * wraparound.
			 */
			if ((autovac != NULL) &&
				(autovac->vacuumFlags & PROC_IS_AUTOVACUUM) &&
				!(autovac->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND))
			{
B
Bruce Momjian 已提交
1124
				int			pid = autovac->pid;
1125 1126 1127 1128 1129 1130 1131
				StringInfoData locktagbuf;
				StringInfoData logbuf;		/* errdetail for server log */

				initStringInfo(&locktagbuf);
				initStringInfo(&logbuf);
				DescribeLockTag(&locktagbuf, &lock->tag);
				appendStringInfo(&logbuf,
1132
					  _("Process %d waits for %s on %s."),
1133 1134 1135 1136 1137 1138 1139
						 MyProcPid,
						 GetLockmodeName(lock->tag.locktag_lockmethodid,
										 lockmode),
						 locktagbuf.data);

				/* release lock as quickly as possible */
				LWLockRelease(ProcArrayLock);
1140

1141 1142 1143
				ereport(LOG,
						(errmsg("sending cancel to blocking autovacuum PID %d",
							pid),
T
Tom Lane 已提交
1144
						 errdetail("%s", logbuf.data)));
1145

1146 1147
				pfree(logbuf.data);
				pfree(locktagbuf.data);
1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164

				/* send the autovacuum worker Back to Old Kent Road */
				if (kill(pid, SIGINT) < 0)
				{
					/* Just a warning to allow multiple callers */
					ereport(WARNING,
							(errmsg("could not send signal to process %d: %m",
									pid)));
				}
			}
			else
				LWLockRelease(ProcArrayLock);

			/* prevent signal from being resent more than once */
			allow_autovacuum_cancel = false;
		}

1165 1166 1167 1168
		/*
		 * If awoken after the deadlock check interrupt has run, and
		 * log_lock_waits is on, then report about the wait.
		 */
1169
		if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED)
1170
		{
1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188
			StringInfoData buf;
			const char *modename;
			long		secs;
			int			usecs;
			long		msecs;

			initStringInfo(&buf);
			DescribeLockTag(&buf, &locallock->tag.lock);
			modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid,
									   lockmode);
			TimestampDifference(timeout_start_time, GetCurrentTimestamp(),
								&secs, &usecs);
			msecs = secs * 1000 + usecs / 1000;
			usecs = usecs % 1000;

			if (deadlock_state == DS_SOFT_DEADLOCK)
				ereport(LOG,
						(errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms",
B
Bruce Momjian 已提交
1189
							  MyProcPid, modename, buf.data, msecs, usecs)));
1190
			else if (deadlock_state == DS_HARD_DEADLOCK)
1191
			{
1192
				/*
B
Bruce Momjian 已提交
1193 1194 1195 1196
				 * This message is a bit redundant with the error that will be
				 * reported subsequently, but in some cases the error report
				 * might not make it to the log (eg, if it's caught by an
				 * exception handler), and we want to ensure all long-wait
1197 1198 1199 1200
				 * events get logged.
				 */
				ereport(LOG,
						(errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms",
B
Bruce Momjian 已提交
1201
							  MyProcPid, modename, buf.data, msecs, usecs)));
1202
			}
1203 1204 1205 1206

			if (myWaitStatus == STATUS_WAITING)
				ereport(LOG,
						(errmsg("process %d still waiting for %s on %s after %ld.%03d ms",
B
Bruce Momjian 已提交
1207
							  MyProcPid, modename, buf.data, msecs, usecs)));
1208 1209
			else if (myWaitStatus == STATUS_OK)
				ereport(LOG,
B
Bruce Momjian 已提交
1210 1211
					(errmsg("process %d acquired %s on %s after %ld.%03d ms",
							MyProcPid, modename, buf.data, msecs, usecs)));
1212 1213 1214
			else
			{
				Assert(myWaitStatus == STATUS_ERROR);
B
Bruce Momjian 已提交
1215

1216 1217
				/*
				 * Currently, the deadlock checker always kicks its own
B
Bruce Momjian 已提交
1218 1219 1220 1221 1222
				 * process, which means that we'll only see STATUS_ERROR when
				 * deadlock_state == DS_HARD_DEADLOCK, and there's no need to
				 * print redundant messages.  But for completeness and
				 * future-proofing, print a message if it looks like someone
				 * else kicked us off the lock.
1223 1224 1225 1226
				 */
				if (deadlock_state != DS_HARD_DEADLOCK)
					ereport(LOG,
							(errmsg("process %d failed to acquire %s on %s after %ld.%03d ms",
B
Bruce Momjian 已提交
1227
							  MyProcPid, modename, buf.data, msecs, usecs)));
1228 1229 1230
			}

			/*
B
Bruce Momjian 已提交
1231 1232
			 * At this point we might still need to wait for the lock. Reset
			 * state so we don't print the above messages again.
1233 1234 1235 1236
			 */
			deadlock_state = DS_NO_DEADLOCK;

			pfree(buf.data);
1237
		}
1238
	} while (myWaitStatus == STATUS_WAITING);
1239

1240
	/*
1241
	 * Disable the timer, if it's still running
B
Bruce Momjian 已提交
1242
	 */
1243
	if (!disable_sig_alarm(false))
1244
		elog(FATAL, "could not disable timer for process wakeup");
B
Bruce Momjian 已提交
1245

1246
	/*
B
Bruce Momjian 已提交
1247 1248 1249
	 * Re-acquire the lock table's partition lock.  We have to do this to hold
	 * off cancel/die interrupts before we can mess with lockAwaited (else we
	 * might have a missed or duplicated locallock update).
1250
	 */
1251
	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
1252 1253 1254

	/*
	 * We no longer want LockWaitCancel to do anything.
1255
	 */
1256
	lockAwaited = NULL;
1257

1258
	/*
1259
	 * If we got the lock, be sure to remember it in the locallock table.
1260
	 */
1261
	if (MyProc->waitStatus == STATUS_OK)
1262
		GrantAwaitedLock();
1263

1264 1265 1266 1267
	/*
	 * We don't have to do anything else, because the awaker did all the
	 * necessary update of the lock table and MyProc.
	 */
1268
	return MyProc->waitStatus;
1269 1270 1271 1272 1273 1274
}


/*
 * ProcWakeup -- wake up a process by releasing its private semaphore.
 *
1275
 *	 Also remove the process from the wait queue and set its links invalid.
1276
 *	 RETURN: the next process in the wait queue.
1277
 *
1278 1279
 * The appropriate lock partition lock must be held by caller.
 *
1280 1281 1282
 * XXX: presently, this code is only used for the "success" case, and only
 * works correctly for that case.  To clean up in failure case, would need
 * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
1283
 * Hence, in practice the waitStatus parameter must be STATUS_OK.
1284
 */
J
Jan Wieck 已提交
1285
PGPROC *
1286
ProcWakeup(PGPROC *proc, int waitStatus)
1287
{
J
Jan Wieck 已提交
1288
	PGPROC	   *retProc;
1289

1290
	/* Proc should be sleeping ... */
1291 1292
	if (proc->links.prev == INVALID_OFFSET ||
		proc->links.next == INVALID_OFFSET)
1293
		return NULL;
1294
	Assert(proc->waitStatus == STATUS_WAITING);
1295

1296
	/* Save next process before we zap the list link */
J
Jan Wieck 已提交
1297
	retProc = (PGPROC *) MAKE_PTR(proc->links.next);
1298

1299
	/* Remove process from wait queue */
1300
	SHMQueueDelete(&(proc->links));
1301
	(proc->waitLock->waitProcs.size)--;
1302

1303 1304
	/* Clean up process' state and pass it the ok/fail signal */
	proc->waitLock = NULL;
1305
	proc->waitProcLock = NULL;
1306
	proc->waitStatus = waitStatus;
1307

1308
	/* And awaken it */
1309
	PGSemaphoreUnlock(&proc->sem);
1310 1311

	return retProc;
1312 1313 1314 1315
}

/*
 * ProcLockWakeup -- routine for waking up processes when a lock is
1316 1317
 *		released (or a prior waiter is aborted).  Scan all waiters
 *		for lock, waken any that are no longer blocked.
1318 1319
 *
 * The appropriate lock partition lock must be held by caller.
1320
 */
1321
void
1322
ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
1323
{
1324 1325
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
	int			queue_size = waitQueue->size;
J
Jan Wieck 已提交
1326
	PGPROC	   *proc;
1327
	LOCKMASK	aheadRequests = 0;
M
 
Marc G. Fournier 已提交
1328

1329
	Assert(queue_size >= 0);
1330

1331 1332
	if (queue_size == 0)
		return;
1333

J
Jan Wieck 已提交
1334
	proc = (PGPROC *) MAKE_PTR(waitQueue->links.next);
1335

1336 1337
	while (queue_size-- > 0)
	{
B
Bruce Momjian 已提交
1338
		LOCKMODE	lockmode = proc->waitLockMode;
M
 
Marc G. Fournier 已提交
1339 1340

		/*
B
Bruce Momjian 已提交
1341 1342
		 * Waken if (a) doesn't conflict with requests of earlier waiters, and
		 * (b) doesn't conflict with already-held locks.
M
 
Marc G. Fournier 已提交
1343
		 */
B
Bruce Momjian 已提交
1344
		if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 &&
1345 1346 1347
			LockCheckConflicts(lockMethodTable,
							   lockmode,
							   lock,
1348
							   proc->waitProcLock,
1349
							   proc) == STATUS_OK)
M
 
Marc G. Fournier 已提交
1350
		{
1351
			/* OK to waken */
1352
			GrantLock(lock, proc->waitProcLock, lockmode);
1353
			proc = ProcWakeup(proc, STATUS_OK);
B
Bruce Momjian 已提交
1354

1355
			/*
B
Bruce Momjian 已提交
1356 1357 1358
			 * ProcWakeup removes proc from the lock's waiting process queue
			 * and returns the next proc in chain; don't use proc's next-link,
			 * because it's been cleared.
1359
			 */
M
 
Marc G. Fournier 已提交
1360
		}
1361
		else
1362
		{
B
Bruce Momjian 已提交
1363
			/*
B
Bruce Momjian 已提交
1364
			 * Cannot wake this guy. Remember his request for later checks.
B
Bruce Momjian 已提交
1365
			 */
1366
			aheadRequests |= LOCKBIT_ON(lockmode);
J
Jan Wieck 已提交
1367
			proc = (PGPROC *) MAKE_PTR(proc->links.next);
1368
		}
M
 
Marc G. Fournier 已提交
1369
	}
1370 1371

	Assert(waitQueue->size >= 0);
1372 1373
}

1374 1375 1376
/*
 * CheckDeadLock
 *
1377
 * We only get to this routine if we got SIGALRM after DeadlockTimeout
1378 1379
 * while waiting for a lock to be released by some other process.  Look
 * to see if there's a deadlock; if not, just return and continue waiting.
1380
 * (But signal ProcSleep to log a message, if log_lock_waits is true.)
1381 1382
 * If we have a real deadlock, remove ourselves from the lock's wait queue
 * and signal an error to ProcSleep.
1383 1384 1385
 *
 * NB: this is run inside a signal handler, so be very wary about what is done
 * here or in called routines.
1386
 */
1387
static void
1388
CheckDeadLock(void)
1389
{
1390 1391
	int			i;

1392
	/*
B
Bruce Momjian 已提交
1393 1394
	 * Acquire exclusive lock on the entire shared lock data structures. Must
	 * grab LWLocks in partition-number order to avoid LWLock deadlock.
1395 1396 1397 1398 1399 1400
	 *
	 * Note that the deadlock check interrupt had better not be enabled
	 * anywhere that this process itself holds lock partition locks, else this
	 * will wait forever.  Also note that LWLockAcquire creates a critical
	 * section, so that this routine cannot be interrupted by cancel/die
	 * interrupts.
1401
	 */
1402 1403
	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
		LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE);
1404

1405
	/*
1406 1407
	 * Check to see if we've been awoken by anyone in the interim.
	 *
1408 1409 1410
	 * If we have, we can return and resume our transaction -- happy day.
	 * Before we are awoken the process releasing the lock grants it to us
	 * so we know that we don't have to wait anymore.
1411
	 *
1412
	 * We check by looking to see if we've been unlinked from the wait queue.
B
Bruce Momjian 已提交
1413
	 * This is quicker than checking our semaphore's state, since no kernel
1414
	 * call is needed, and it is safe because we hold the lock partition lock.
1415 1416 1417
	 */
	if (MyProc->links.prev == INVALID_OFFSET ||
		MyProc->links.next == INVALID_OFFSET)
1418
		goto check_done;
1419

1420
#ifdef LOCK_DEBUG
B
Bruce Momjian 已提交
1421 1422
	if (Debug_deadlocks)
		DumpAllLocks();
1423 1424
#endif

1425 1426
	/* Run the deadlock check, and set deadlock_state for use by ProcSleep */
	deadlock_state = DeadLockCheck(MyProc);
B
Bruce Momjian 已提交
1427

1428
	if (deadlock_state == DS_HARD_DEADLOCK)
1429
	{
1430 1431 1432
		/*
		 * Oops.  We have a deadlock.
		 *
1433
		 * Get this process out of wait state.	(Note: we could do this more
1434 1435 1436
		 * efficiently by relying on lockAwaited, but use this coding to
		 * preserve the flexibility to kill some other transaction than the
		 * one detecting the deadlock.)
1437 1438
		 *
		 * RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
1439 1440
		 * ProcSleep will report an error after we return from the signal
		 * handler.
1441 1442
		 */
		Assert(MyProc->waitLock != NULL);
1443
		if (Gp_role == GP_ROLE_DISPATCH && IsResQueueEnabled() &&
1444 1445 1446 1447 1448 1449 1450
			LOCK_LOCKMETHOD(*(MyProc->waitLock)) == RESOURCE_LOCKMETHOD)
		{
			ResRemoveFromWaitQueue(MyProc, 
								   LockTagHashCode(&(MyProc->waitLock->tag)));
		}
		else
		{
1451
			RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
1452
		}
1453

1454 1455 1456 1457 1458
		/*
		 * Unlock my semaphore so that the interrupted ProcSleep() call can
		 * finish.
		 */
		PGSemaphoreUnlock(&MyProc->sem);
1459

1460
		/*
1461 1462 1463 1464 1465 1466 1467 1468
		 * We're done here.  Transaction abort caused by the error that
		 * ProcSleep will raise will cause any other locks we hold to be
		 * released, thus allowing other processes to wake up; we don't need
		 * to do that here.  NOTE: an exception is that releasing locks we
		 * hold doesn't consider the possibility of waiters that were blocked
		 * behind us on the lock we just failed to get, and might now be
		 * wakable because we're not in front of them anymore.  However,
		 * RemoveFromWaitQueue took care of waking up any such processes.
1469
		 */
1470
	}
1471
	else if (log_lock_waits || deadlock_state == DS_BLOCKED_BY_AUTOVACUUM)
1472
	{
1473 1474 1475
		/*
		 * Unlock my semaphore so that the interrupted ProcSleep() call can
		 * print the log message (we daren't do it here because we are inside
B
Bruce Momjian 已提交
1476 1477
		 * a signal handler).  It will then sleep again until someone releases
		 * the lock.
1478 1479 1480
		 *
		 * If blocked by autovacuum, this wakeup will enable ProcSleep to send
		 * the cancelling signal to the autovacuum worker.
1481 1482
		 */
		PGSemaphoreUnlock(&MyProc->sem);
1483
	}
1484

1485
	/*
B
Bruce Momjian 已提交
1486 1487 1488 1489 1490
	 * And release locks.  We do this in reverse order for two reasons: (1)
	 * Anyone else who needs more than one of the locks will be trying to lock
	 * them in increasing order; we don't want to release the other process
	 * until it can get all the locks it needs. (2) This avoids O(N^2)
	 * behavior inside LWLockRelease.
1491 1492
	 */
check_done:
B
Bruce Momjian 已提交
1493
	for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
1494
		LWLockRelease(FirstLockMgrLock + i);
1495 1496 1497
}


1498 1499 1500 1501 1502 1503
/*
 * ProcWaitForSignal - wait for a signal from another backend.
 *
 * This can share the semaphore normally used for waiting for locks,
 * since a backend could never be waiting for a lock and a signal at
 * the same time.  As with locks, it's OK if the signal arrives just
B
Bruce Momjian 已提交
1504
 * before we actually reach the waiting state.	Also as with locks,
1505 1506
 * it's necessary that the caller be robust against bogus wakeups:
 * always check that the desired state has occurred, and wait again
B
Bruce Momjian 已提交
1507
 * if not.	This copes with possible "leftover" wakeups.
1508 1509 1510 1511
 */
void
ProcWaitForSignal(void)
{
1512
	PGSemaphoreLock(&MyProc->sem, true);
1513 1514 1515
}

/*
1516
 * ProcSendSignal - send a signal to a backend identified by PID
1517 1518
 */
void
1519
ProcSendSignal(int pid)
1520
{
1521
	PGPROC	   *proc = BackendPidGetProc(pid);
1522 1523

	if (proc != NULL)
1524
		PGSemaphoreUnlock(&proc->sem);
1525 1526 1527
}


1528 1529 1530 1531 1532 1533 1534 1535 1536
/*****************************************************************************
 * SIGALRM interrupt support
 *
 * Maybe these should be in pqsignal.c?
 *****************************************************************************/

/*
 * Enable the SIGALRM interrupt to fire after the specified delay
 *
1537
 * Delay is given in milliseconds.	Caller should be sure a SIGALRM
1538 1539
 * signal handler is installed before this is called.
 *
1540 1541
 * This code properly handles nesting of deadlock timeout alarms within
 * statement timeout alarms.
1542
 *
1543 1544 1545
 * Returns TRUE if okay, FALSE on failure.
 */
bool
1546
enable_sig_alarm(int delayms, bool is_statement_timeout)
1547
{
1548
	TimestampTz fin_time;
1549
	struct itimerval timeval;
1550

1551 1552
	if (is_statement_timeout)
	{
1553 1554 1555 1556 1557 1558 1559 1560 1561 1562
		/*
		 * Begin statement-level timeout
		 *
		 * Note that we compute statement_fin_time with reference to the
		 * statement_timestamp, but apply the specified delay without any
		 * correction; that is, we ignore whatever time has elapsed since
		 * statement_timestamp was set.  In the normal case only a small
		 * interval will have elapsed and so this doesn't matter, but there
		 * are corner cases (involving multi-statement query strings with
		 * embedded COMMIT or ROLLBACK) where we might re-initialize the
B
Bruce Momjian 已提交
1563 1564 1565 1566
		 * statement timeout long after initial receipt of the message. In
		 * such cases the enforcement of the statement timeout will be a bit
		 * inconsistent.  This annoyance is judged not worth the cost of
		 * performing an additional gettimeofday() here.
1567
		 */
1568
		Assert(!deadlock_timeout_active);
1569 1570
		fin_time = GetCurrentStatementStartTimestamp();
		fin_time = TimestampTzPlusMilliseconds(fin_time, delayms);
1571
		statement_fin_time = fin_time;
1572
		cancel_from_timeout = false;
1573
		statement_timeout_active = true;
1574 1575 1576 1577 1578 1579
	}
	else if (statement_timeout_active)
	{
		/*
		 * Begin deadlock timeout with statement-level timeout active
		 *
1580 1581 1582 1583
		 * Here, we want to interrupt at the closer of the two timeout times.
		 * If fin_time >= statement_fin_time then we need not touch the
		 * existing timer setting; else set up to interrupt at the deadlock
		 * timeout time.
1584 1585 1586
		 *
		 * NOTE: in this case it is possible that this routine will be
		 * interrupted by the previously-set timer alarm.  This is okay
B
Bruce Momjian 已提交
1587 1588 1589
		 * because the signal handler will do only what it should do according
		 * to the state variables.	The deadlock checker may get run earlier
		 * than normal, but that does no harm.
1590
		 */
1591 1592
		timeout_start_time = GetCurrentTimestamp();
		fin_time = TimestampTzPlusMilliseconds(timeout_start_time, delayms);
1593
		deadlock_timeout_active = true;
1594
		if (fin_time >= statement_fin_time)
1595 1596 1597 1598 1599 1600
			return true;
	}
	else
	{
		/* Begin deadlock timeout with no statement-level timeout */
		deadlock_timeout_active = true;
1601 1602 1603
		/* GetCurrentTimestamp can be expensive, so only do it if we must */
		if (log_lock_waits)
			timeout_start_time = GetCurrentTimestamp();
1604
	}
1605

1606
	/* If we reach here, okay to set the timer interrupt */
1607
	MemSet(&timeval, 0, sizeof(struct itimerval));
1608 1609
	timeval.it_value.tv_sec = delayms / 1000;
	timeval.it_value.tv_usec = (delayms % 1000) * 1000;
1610
	if (setitimer(ITIMER_REAL, &timeval, NULL))
1611
		return false;
1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632
	return true;
}

/*
 * Cancel the SIGALRM timer, either for a deadlock timeout or a statement
 * timeout.  If a deadlock timeout is canceled, any active statement timeout
 * remains in force.
 *
 * Returns TRUE if okay, FALSE on failure.
 */
bool
disable_sig_alarm(bool is_statement_timeout)
{
	/*
	 * Always disable the interrupt if it is active; this avoids being
	 * interrupted by the signal handler and thereby possibly getting
	 * confused.
	 *
	 * We will re-enable the interrupt if necessary in CheckStatementTimeout.
	 */
	if (statement_timeout_active || deadlock_timeout_active)
1633
	{
1634
		struct itimerval timeval;
1635

1636
		MemSet(&timeval, 0, sizeof(struct itimerval));
1637
		if (setitimer(ITIMER_REAL, &timeval, NULL))
1638
		{
1639 1640 1641
			statement_timeout_active = false;
			cancel_from_timeout = false;
			deadlock_timeout_active = false;
1642 1643
			return false;
		}
1644 1645
	}

1646 1647 1648 1649
	/* Always cancel deadlock timeout, in case this is error cleanup */
	deadlock_timeout_active = false;

	/* Cancel or reschedule statement timeout */
1650
	if (is_statement_timeout)
1651
	{
1652
		statement_timeout_active = false;
1653 1654
		cancel_from_timeout = false;
	}
1655 1656 1657 1658 1659
	else if (statement_timeout_active)
	{
		if (!CheckStatementTimeout())
			return false;
	}
1660 1661 1662
	return true;
}

1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683
/*
 * We get here when a session has been idle for a while (waiting for the
 * client to send us SQL to execute).  The idea is to consume less resources while sitting idle,
 * so we can support more sessions being logged on.
 *
 * The expectation is that if the session is logged on, but nobody is sending us work to do,
 * we want to free up whatever resources we can.  Usually it means there is a human being at the
 * other end of the connection, and that person has walked away from their terminal, or just hasn't
 * decided what to do next.  We could be idle for a very long time (many hours).
 *
 * Of course, freeing gangs means that the next time the user does send in an SQL statement,
 * we need to allocate gangs (at least the writer gang) to do anything.  This entails extra work,
 * so we don't want to do this if we don't think the session has gone idle.
 *
 * P.s:  Is there anything we can free up on the master (QD) side?  I can't think of anything.
 *
 */
static void
HandleClientWaitTimeout(void)
{
	elog(DEBUG2,"HandleClientWaitTimeout");
G
Gang Xiong 已提交
1684

1685 1686 1687 1688
	/*
	 * cancel the timer, as there is no reason we need it to go off again.
	 */
	disable_sig_alarm(false);
G
Gang Xiong 已提交
1689

1690 1691 1692
	/*
	 * Free gangs to free up resources on the segDBs.
	 */
1693
	if (GangsExist())
1694
	{
G
Gang Xiong 已提交
1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719
		if (IsTransactionOrTransactionBlock() || TempNamespaceOidIsValid())
		{
			/*
			 * If we are in a transaction, we can't release the writer gang,
			 * as this will abort the transaction.
			 *
			 * If we have a TempNameSpace, we can't release the writer gang, as this
			 * would drop any temp tables we own.
			 *
			 * Since we are idle, any reader gangs will be available but not allocated.
			 */
			disconnectAndDestroyIdleReaderGangs();
		}
		else
		{
			/*
			 * Get rid of ALL gangs... Readers and primary writer.
			 * After this, we have no resources being consumed on the segDBs at all.
			 *
			 * Our session wasn't destroyed due to an fatal error or FTS action, so
			 * we don't need to do anything special.  Specifically, we DON'T want
			 * to act like we are now in a new session, since that would be confusing
			 * in the log.
			 *
			 */
1720
			DisconnectAndDestroyAllGangs(false);
G
Gang Xiong 已提交
1721
		}
1722 1723
	}
}
1724

1725
/*
1726 1727 1728
 * Check for statement timeout.  If the timeout time has come,
 * trigger a query-cancel interrupt; if not, reschedule the SIGALRM
 * interrupt to occur at the right time.
1729
 *
1730
 * Returns true if okay, false if failed to set the interrupt.
1731
 */
1732 1733
static bool
CheckStatementTimeout(void)
1734
{
1735
	TimestampTz now;
B
Bruce Momjian 已提交
1736

1737 1738 1739
	if (!statement_timeout_active)
		return true;			/* do nothing if not active */

1740 1741 1742 1743
	/* QD takes care of timeouts for QE. */
	if (Gp_role == GP_ROLE_EXECUTE)
		return true;

1744
	now = GetCurrentTimestamp();
1745

1746
	if (now >= statement_fin_time)
1747
	{
1748 1749
		/* Time to die */
		statement_timeout_active = false;
1750
		cancel_from_timeout = true;
1751 1752 1753 1754
#ifdef HAVE_SETSID
		/* try to signal whole process group */
		kill(-MyProcPid, SIGINT);
#endif
1755
		kill(MyProcPid, SIGINT);
1756 1757 1758 1759
	}
	else
	{
		/* Not time yet, so (re)schedule the interrupt */
1760 1761
		long		secs;
		int			usecs;
1762 1763
		struct itimerval timeval;

1764 1765
		TimestampDifference(now, statement_fin_time,
							&secs, &usecs);
B
Bruce Momjian 已提交
1766

1767 1768 1769 1770 1771 1772
		/*
		 * It's possible that the difference is less than a microsecond;
		 * ensure we don't cancel, rather than set, the interrupt.
		 */
		if (secs == 0 && usecs == 0)
			usecs = 1;
1773
		MemSet(&timeval, 0, sizeof(struct itimerval));
1774 1775
		timeval.it_value.tv_sec = secs;
		timeval.it_value.tv_usec = usecs;
1776
		if (setitimer(ITIMER_REAL, &timeval, NULL))
1777 1778 1779
			return false;
	}

1780 1781
	return true;
}
1782

1783 1784 1785 1786 1787 1788
/*
 * need DoingCommandRead to be extern so we can test it here.
 * Or would it be better to have some routine to call to get the
 * value of the bool?  This is simpler.
 */
extern bool DoingCommandRead;
1789 1790

/*
1791 1792 1793 1794 1795 1796
 * Signal handler for SIGALRM
 *
 * Process deadlock check and/or statement timeout check, as needed.
 * To avoid various edge cases, we must be careful to do nothing
 * when there is nothing to be done.  We also need to be able to
 * reschedule the timer interrupt if called before end of statement.
1797 1798 1799 1800
 */
void
handle_sig_alarm(SIGNAL_ARGS)
{
1801 1802
	int			save_errno = errno;

1803 1804 1805 1806 1807 1808
	/* SIGALRM is cause for waking anything waiting on the process latch */
	if (MyProc)
		SetLatch(&MyProc->procLatch);

	/* don't joggle the elbow of proc_exit */
	if (!proc_exit_inprogress)
1809
	{
1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821
		/*
		 * Idle session timeout shares with the deadlock timeout.
		 * If DoingCommandRead is true, we are deciding the session is idle
		 * In that case, we can't possibly be in a deadlock, so no point
		 * in running the deadlock detection.
		 */

		if (deadlock_timeout_active && !DoingCommandRead)
		{
			deadlock_timeout_active = false;
			CheckDeadLock();
		}
1822

1823 1824 1825 1826 1827 1828 1829 1830 1831
		if (statement_timeout_active)
			(void) CheckStatementTimeout();

		/*
		 * If we are DoingCommandRead, it means we are sitting idle waiting for
		 * the user to send us some SQL.
		 */
		if (DoingCommandRead)
		{
1832
			(void) ClientWaitTimeoutInterruptHandler();
1833 1834 1835
			deadlock_timeout_active = false;
		}
	}
1836 1837

	errno = save_errno;
1838
}
1839

1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942
static void
ClientWaitTimeoutInterruptHandler(void)
{
	int save_errno = errno;

	/* Don't joggle the elbow of proc_exit */
	if (proc_exit_inprogress)
		return;

	if (clientWaitTimeoutInterruptEnabled)
	{
		bool save_ImmediateInterruptOK = ImmediateInterruptOK;

		/*
		 * We may be called while ImmediateInterruptOK is true; turn it off
		 * while messing with the client wait timeout state.
		 */
		ImmediateInterruptOK = false;

		/*
		 * I'm not sure whether some flavors of Unix might allow another
		 * SIGALRM occurrence to recursively interrupt this routine. To cope
		 * with the possibility, we do the same sort of dance that
		 * EnableNotifyInterrupt must do -- see that routine for comments.
		 */
		clientWaitTimeoutInterruptEnabled = 0; /* disable any recursive signal */
		clientWaitTimeoutInterruptOccurred = 1; /* do at least one iteration */
		for (;;)
		{
			clientWaitTimeoutInterruptEnabled = 1;
			if (!clientWaitTimeoutInterruptOccurred)
				break;
			clientWaitTimeoutInterruptEnabled = 0;
			if (clientWaitTimeoutInterruptOccurred)
			{
				ProcessClientWaitTimeout();
			}
		}

		/*
		 * Restore ImmediateInterruptOK, and check for interrupts if needed.
		 */
		ImmediateInterruptOK = save_ImmediateInterruptOK;
		if (save_ImmediateInterruptOK)
			CHECK_FOR_INTERRUPTS();
	}
	else
	{
		/*
		 * In this path it is NOT SAFE to do much of anything, except this:
		 */
		clientWaitTimeoutInterruptOccurred = 1;
	}

	errno = save_errno;
}

void
EnableClientWaitTimeoutInterrupt(void)
{
	for (;;)
	{
		clientWaitTimeoutInterruptEnabled = 1;
		if (!clientWaitTimeoutInterruptOccurred)
			break;
		clientWaitTimeoutInterruptEnabled = 0;
		if (clientWaitTimeoutInterruptOccurred)
		{
			ProcessClientWaitTimeout();
		}
	}
}

bool
DisableClientWaitTimeoutInterrupt(void)
{
	bool result = (clientWaitTimeoutInterruptEnabled != 0);

	clientWaitTimeoutInterruptEnabled = 0;

	return result;
}

static void
ProcessClientWaitTimeout(void)
{
	bool notify_enabled;
	bool catchup_enabled;

	/* Must prevent SIGUSR1 and SIGUSR2 interrupt while I am running */
	notify_enabled = DisableNotifyInterrupt();
	catchup_enabled = DisableCatchupInterrupt();

	clientWaitTimeoutInterruptOccurred = 0;

	HandleClientWaitTimeout();

	if (notify_enabled)
		EnableNotifyInterrupt();
	if (catchup_enabled)
		EnableCatchupInterrupt();
}

1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965
/*
 * ResProcSleep -- put a process to sleep (that is waiting for a resource lock).
 *
 * Notes:
 * 	Locktable's masterLock must be held at entry, and will be held
 * 	at exit.
 *
 *	This is merely a version of ProcSleep modified for resource locks.
 *	The logic here could have been merged into ProcSleep, however it was
 *	requested to keep as much as possible of this resource lock code 
 *	seperate from its standard lock relatives - in the interest of not
 *	introducing new bugs or performance regressions into the lock code.
 */
int
ResProcSleep(LOCKMODE lockmode, LOCALLOCK *locallock, void *incrementSet)
{
	LOCK	   *lock = locallock->lock;
	PROCLOCK   *proclock = locallock->proclock;
	PROC_QUEUE	*waitQueue = &(lock->waitProcs);
	PGPROC		*proc;
	uint32		hashcode = locallock->hashcode;
	LWLockId	partitionLock = LockHashPartitionLock(hashcode);

1966 1967
	bool		selflock = true;		/* initialize result for error. */

1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985
	/*
	 * Don't check my held locks, as we just add at the end of the queue.
	 */
	proc = (PGPROC *) &(waitQueue->links);
	SHMQueueInsertBefore(&(proc->links), &(MyProc->links));
	waitQueue->size++;

	lock->waitMask |= LOCKBIT_ON(lockmode);

	/*
	 * reflect this in PGPROC object, too.
	 */
	MyProc->waitLock = lock;
	MyProc->waitProcLock = (PROCLOCK *) proclock;
	MyProc->waitLockMode = lockmode;

	MyProc->waitStatus = STATUS_ERROR;	/* initialize result for error */

1986 1987 1988 1989 1990 1991 1992 1993 1994 1995
	/* Now check the status of the self lock footgun. */
	selflock = ResCheckSelfDeadLock(lock, proclock, incrementSet);
	if (selflock)
	{
		LWLockRelease(partitionLock);
		ereport(ERROR,
				(errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
				 errmsg("deadlock detected, locking against self")));
	}

1996 1997 1998 1999 2000 2001 2002 2003 2004 2005
	/* Mark that we are waiting for a lock */
	lockAwaited = locallock;

	/* Ok to wait.*/
	LWLockRelease(partitionLock);

	if (!enable_sig_alarm(DeadlockTimeout, false))
   		elog(FATAL, "could not set timer for (resource lock) process wakeup");

	/*
2006
	 * Sleep on the semaphore.
2007
	 */
2008
	PGSemaphoreLock(&MyProc->sem, true);
2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038

	if (!disable_sig_alarm(false))
		elog(FATAL, "could not disable timer for (resource lock) process wakeup");

	/*
	 * Have been awakened, so continue.
	 */
	LWLockAcquire(partitionLock, LW_EXCLUSIVE);

	/*
	 * We no longer want (Res)LockWaitCancel to do anything.
	 */
	lockAwaited = NULL;

	return MyProc->waitStatus;
}


/*
 * ResLockWaitCancel -- Cancel any pending wait for a resource lock, when 
 *	aborting a transaction.
 */
void
ResLockWaitCancel(void)
{
	LWLockId	partitionLock;

	if (lockAwaited != NULL)
	{
		/* Unlink myself from the wait queue, if on it  */
2039
		partitionLock = LockHashPartitionLock(lockAwaited->hashcode);
2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058
		LWLockAcquire(partitionLock, LW_EXCLUSIVE);

		if (MyProc->links.next != INVALID_OFFSET)
		{
			/* We could not have been granted the lock yet */
			Assert(MyProc->waitStatus == STATUS_ERROR);

			/* We should only be trying to cancel resource locks. */
			Assert(LOCALLOCK_LOCKMETHOD(*lockAwaited) == RESOURCE_LOCKMETHOD);

			ResRemoveFromWaitQueue(MyProc, lockAwaited->hashcode);
		}

		lockAwaited = NULL;

		LWLockRelease(partitionLock);
	}

	/*
2059 2060 2061
	 * Reset the proc wait semaphore to zero. This is necessary in the
	 * scenario where someone else granted us the lock we wanted before we
	 * were able to remove ourselves from the wait-list.
2062
	 */
2063
	PGSemaphoreReset(&MyProc->sem);
2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092

	return;
}

bool ProcGetMppLocalProcessCounter(int *mppLocalProcessCounter)
{
	Assert(mppLocalProcessCounter != NULL);

	if (ProcGlobal == NULL)
		return false;

	*mppLocalProcessCounter = ProcGlobal->mppLocalProcessCounter;

	return true;
}

bool ProcCanSetMppSessionId(void)
{
	if (ProcGlobal == NULL || MyProc == NULL)
		return false;

	return true;
}

void ProcNewMppSessionId(int *newSessionId)
{
	Assert(newSessionId != NULL);

    *newSessionId = MyProc->mppSessionId =
2093
		pg_atomic_add_fetch_u32((pg_atomic_uint32 *)&ProcGlobal->mppLocalProcessCounter, 1);
2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113

    /*
     * Make sure that our SessionState entry correctly records our
     * new session id.
     */
    if (NULL != MySessionState)
    {
    	/* This should not happen outside of dispatcher on the master */
    	Assert(GpIdentity.segindex == MASTER_CONTENT_ID && Gp_role == GP_ROLE_DISPATCH);

    	ereport(gp_sessionstate_loglevel, (errmsg("ProcNewMppSessionId: changing session id (old: %d, new: %d), pinCount: %d, activeProcessCount: %d",
    			MySessionState->sessionId, *newSessionId, MySessionState->pinCount, MySessionState->activeProcessCount), errprintstack(true)));

#ifdef USE_ASSERT_CHECKING
    	MySessionState->isModifiedSessionId = true;
#endif

    	MySessionState->sessionId = *newSessionId;
    }
}