YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/postgres/src/backend/storage/ipc/procarray.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * procarray.c
4
 *    POSTGRES process array code.
5
 *
6
 *
7
 * This module maintains arrays of the PGPROC and PGXACT structures for all
8
 * active backends.  Although there are several uses for this, the principal
9
 * one is as a means of determining the set of currently running transactions.
10
 *
11
 * Because of various subtle race conditions it is critical that a backend
12
 * hold the correct locks while setting or clearing its MyPgXact->xid field.
13
 * See notes in src/backend/access/transam/README.
14
 *
15
 * The process arrays now also include structures representing prepared
16
 * transactions.  The xid and subxids fields of these are valid, as are the
17
 * myProcLocks lists.  They can be distinguished from regular backend PGPROCs
18
 * at need by checking for pid == 0.
19
 *
20
 * During hot standby, we also keep a list of XIDs representing transactions
21
 * that are known to be running in the master (or more precisely, were running
22
 * as of the current point in the WAL stream).  This list is kept in the
23
 * KnownAssignedXids array, and is updated by watching the sequence of
24
 * arriving XIDs.  This is necessary because if we leave those XIDs out of
25
 * snapshots taken for standby queries, then they will appear to be already
26
 * complete, leading to MVCC failures.  Note that in hot standby, the PGPROC
27
 * array represents standby processes, which by definition are not running
28
 * transactions that have XIDs.
29
 *
30
 * It is perhaps possible for a backend on the master to terminate without
31
 * writing an abort record for its transaction.  While that shouldn't really
32
 * happen, it would tie up KnownAssignedXids indefinitely, so we protect
33
 * ourselves by pruning the array when a valid list of running XIDs arrives.
34
 *
35
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
36
 * Portions Copyright (c) 1994, Regents of the University of California
37
 *
38
 *
39
 * IDENTIFICATION
40
 *    src/backend/storage/ipc/procarray.c
41
 *
42
 *-------------------------------------------------------------------------
43
 */
44
#include "postgres.h"
45
46
#include <signal.h>
47
48
#include "access/clog.h"
49
#include "access/subtrans.h"
50
#include "access/transam.h"
51
#include "access/twophase.h"
52
#include "access/xact.h"
53
#include "access/xlog.h"
54
#include "catalog/catalog.h"
55
#include "catalog/pg_authid.h"
56
#include "commands/dbcommands.h"
57
#include "miscadmin.h"
58
#include "pgstat.h"
59
#include "storage/proc.h"
60
#include "storage/procarray.h"
61
#include "storage/spin.h"
62
#include "utils/builtins.h"
63
#include "utils/rel.h"
64
#include "utils/snapmgr.h"
65
66
#include "pg_yb_utils.h"
67
68
/* Our shared memory area */
69
typedef struct ProcArrayStruct
70
{
71
  int     numProcs;   /* number of valid procs entries */
72
  int     maxProcs;   /* allocated size of procs array */
73
74
  /*
75
   * Known assigned XIDs handling
76
   */
77
  int     maxKnownAssignedXids; /* allocated size of array */
78
  int     numKnownAssignedXids; /* current # of valid entries */
79
  int     tailKnownAssignedXids;  /* index of oldest valid element */
80
  int     headKnownAssignedXids;  /* index of newest element, + 1 */
81
  slock_t   known_assigned_xids_lck;  /* protects head/tail pointers */
82
83
  /*
84
   * Highest subxid that has been removed from KnownAssignedXids array to
85
   * prevent overflow; or InvalidTransactionId if none.  We track this for
86
   * similar reasons to tracking overflowing cached subxids in PGXACT
87
   * entries.  Must hold exclusive ProcArrayLock to change this, and shared
88
   * lock to read it.
89
   */
90
  TransactionId lastOverflowedXid;
91
92
  /* oldest xmin of any replication slot */
93
  TransactionId replication_slot_xmin;
94
  /* oldest catalog xmin of any replication slot */
95
  TransactionId replication_slot_catalog_xmin;
96
97
  /* indexes into allPgXact[], has PROCARRAY_MAXPROCS entries */
98
  int     pgprocnos[FLEXIBLE_ARRAY_MEMBER];
99
} ProcArrayStruct;
100
101
static ProcArrayStruct *procArray;
102
103
static PGPROC *allProcs;
104
static PGXACT *allPgXact;
105
106
/*
107
 * Bookkeeping for tracking emulated transactions in recovery
108
 */
109
static TransactionId *KnownAssignedXids;
110
static bool *KnownAssignedXidsValid;
111
static TransactionId latestObservedXid = InvalidTransactionId;
112
113
/*
114
 * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
115
 * the highest xid that might still be running that we don't have in
116
 * KnownAssignedXids.
117
 */
118
static TransactionId standbySnapshotPendingXmin;
119
120
#ifdef XIDCACHE_DEBUG
121
122
/* counters for XidCache measurement */
123
static long xc_by_recent_xmin = 0;
124
static long xc_by_known_xact = 0;
125
static long xc_by_my_xact = 0;
126
static long xc_by_latest_xid = 0;
127
static long xc_by_main_xid = 0;
128
static long xc_by_child_xid = 0;
129
static long xc_by_known_assigned = 0;
130
static long xc_no_overflow = 0;
131
static long xc_slow_answer = 0;
132
133
#define xc_by_recent_xmin_inc()   (xc_by_recent_xmin++)
134
#define xc_by_known_xact_inc()    (xc_by_known_xact++)
135
#define xc_by_my_xact_inc()     (xc_by_my_xact++)
136
#define xc_by_latest_xid_inc()    (xc_by_latest_xid++)
137
#define xc_by_main_xid_inc()    (xc_by_main_xid++)
138
#define xc_by_child_xid_inc()   (xc_by_child_xid++)
139
#define xc_by_known_assigned_inc()  (xc_by_known_assigned++)
140
#define xc_no_overflow_inc()    (xc_no_overflow++)
141
#define xc_slow_answer_inc()    (xc_slow_answer++)
142
143
static void DisplayXidCache(void);
144
#else             /* !XIDCACHE_DEBUG */
145
146
0
#define xc_by_recent_xmin_inc()   ((void) 0)
147
0
#define xc_by_known_xact_inc()    ((void) 0)
148
0
#define xc_by_my_xact_inc()     ((void) 0)
149
0
#define xc_by_latest_xid_inc()    ((void) 0)
150
0
#define xc_by_main_xid_inc()    ((void) 0)
151
0
#define xc_by_child_xid_inc()   ((void) 0)
152
0
#define xc_by_known_assigned_inc()  ((void) 0)
153
0
#define xc_no_overflow_inc()    ((void) 0)
154
0
#define xc_slow_answer_inc()    ((void) 0)
155
#endif              /* XIDCACHE_DEBUG */
156
157
/* Primitives for KnownAssignedXids array handling for standby */
158
static void KnownAssignedXidsCompress(bool force);
159
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
160
           bool exclusive_lock);
161
static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
162
static bool KnownAssignedXidExists(TransactionId xid);
163
static void KnownAssignedXidsRemove(TransactionId xid);
164
static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
165
              TransactionId *subxids);
166
static void KnownAssignedXidsRemovePreceding(TransactionId xid);
167
static int  KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
168
static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray,
169
                 TransactionId *xmin,
170
                 TransactionId xmax);
171
static TransactionId KnownAssignedXidsGetOldestXmin(void);
172
static void KnownAssignedXidsDisplay(int trace_level);
173
static void KnownAssignedXidsReset(void);
174
static inline void ProcArrayEndTransactionInternal(PGPROC *proc,
175
                PGXACT *pgxact, TransactionId latestXid);
176
static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
177
178
/*
179
 * Report shared-memory space needed by CreateSharedProcArray.
180
 */
181
Size
182
ProcArrayShmemSize(void)
183
3.61k
{
184
3.61k
  Size    size;
185
186
  /* Size of the ProcArray structure itself */
187
32.2k
#define PROCARRAY_MAXPROCS  (MaxBackends + max_prepared_xacts)
188
189
3.61k
  size = offsetof(ProcArrayStruct, pgprocnos);
190
3.61k
  size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
191
192
  /*
193
   * During Hot Standby processing we have a data structure called
194
   * KnownAssignedXids, created in shared memory. Local data structures are
195
   * also created in various backends during GetSnapshotData(),
196
   * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
197
   * main structures created in those functions must be identically sized,
198
   * since we may at times copy the whole of the data structures around. We
199
   * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
200
   *
201
   * Ideally we'd only create this structure if we were actually doing hot
202
   * standby in the current run, but we don't know that yet at the time
203
   * shared memory is being set up.
204
   */
205
3.61k
#define TOTAL_MAX_CACHED_SUBXIDS \
206
21.3k
  ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
207
208
3.61k
  if (EnableHotStandby)
209
3.61k
  {
210
3.61k
    size = add_size(size,
211
3.61k
            mul_size(sizeof(TransactionId),
212
3.61k
                 TOTAL_MAX_CACHED_SUBXIDS));
213
3.61k
    size = add_size(size,
214
3.61k
            mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
215
3.61k
  }
216
217
3.61k
  return size;
218
3.61k
}
219
220
/*
221
 * Initialize the shared PGPROC array during postmaster startup.
222
 */
223
void
224
CreateSharedProcArray(void)
225
3.61k
{
226
3.61k
  bool    found;
227
228
  /* Create or attach to the ProcArray shared structure */
229
3.61k
  procArray = (ProcArrayStruct *)
230
3.61k
    ShmemInitStruct("Proc Array",
231
3.61k
            add_size(offsetof(ProcArrayStruct, pgprocnos),
232
3.61k
                 mul_size(sizeof(int),
233
3.61k
                      PROCARRAY_MAXPROCS)),
234
3.61k
            &found);
235
236
3.61k
  if (!found)
237
3.61k
  {
238
    /*
239
     * We're the first - initialize.
240
     */
241
3.61k
    procArray->numProcs = 0;
242
3.61k
    procArray->maxProcs = PROCARRAY_MAXPROCS;
243
3.61k
    procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS;
244
3.61k
    procArray->numKnownAssignedXids = 0;
245
3.61k
    procArray->tailKnownAssignedXids = 0;
246
3.61k
    procArray->headKnownAssignedXids = 0;
247
3.61k
    SpinLockInit(&procArray->known_assigned_xids_lck);
248
3.61k
    procArray->lastOverflowedXid = InvalidTransactionId;
249
3.61k
    procArray->replication_slot_xmin = InvalidTransactionId;
250
3.61k
    procArray->replication_slot_catalog_xmin = InvalidTransactionId;
251
3.61k
  }
252
253
3.61k
  allProcs = ProcGlobal->allProcs;
254
3.61k
  allPgXact = ProcGlobal->allPgXact;
255
256
  /* Create or attach to the KnownAssignedXids arrays too, if needed */
257
3.61k
  if (EnableHotStandby)
258
3.61k
  {
259
3.61k
    KnownAssignedXids = (TransactionId *)
260
3.61k
      ShmemInitStruct("KnownAssignedXids",
261
3.61k
              mul_size(sizeof(TransactionId),
262
3.61k
                   TOTAL_MAX_CACHED_SUBXIDS),
263
3.61k
              &found);
264
3.61k
    KnownAssignedXidsValid = (bool *)
265
3.61k
      ShmemInitStruct("KnownAssignedXidsValid",
266
3.61k
              mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
267
3.61k
              &found);
268
3.61k
  }
269
270
  /* Register and initialize fields of ProcLWLockTranche */
271
3.61k
  LWLockRegisterTranche(LWTRANCHE_PROC, "proc");
272
3.61k
}
273
274
/*
275
 * Add the specified PGPROC to the shared array.
276
 */
277
void
278
ProcArrayAdd(PGPROC *proc)
279
2.55k
{
280
2.55k
  ProcArrayStruct *arrayP = procArray;
281
2.55k
  int     index;
282
283
2.55k
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
284
285
2.55k
  if (arrayP->numProcs >= arrayP->maxProcs)
286
0
  {
287
    /*
288
     * Oops, no room.  (This really shouldn't happen, since there is a
289
     * fixed supply of PGPROC structs too, and so we should have failed
290
     * earlier.)
291
     */
292
0
    LWLockRelease(ProcArrayLock);
293
0
    ereport(FATAL,
294
0
        (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
295
0
         errmsg("sorry, too many clients already")));
296
0
  }
297
298
  /*
299
   * Keep the procs array sorted by (PGPROC *) so that we can utilize
300
   * locality of references much better. This is useful while traversing the
301
   * ProcArray because there is an increased likelihood of finding the next
302
   * PGPROC structure in the cache.
303
   *
304
   * Since the occurrence of adding/removing a proc is much lower than the
305
   * access to the ProcArray itself, the overhead should be marginal
306
   */
307
2.64k
  for (index = 0; index < arrayP->numProcs; index++)
308
888
  {
309
    /*
310
     * If we are the first PGPROC or if we have found our right position
311
     * in the array, break
312
     */
313
888
    if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno))
314
798
      break;
315
888
  }
316
317
2.55k
  memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index],
318
2.55k
      (arrayP->numProcs - index) * sizeof(int));
319
2.55k
  arrayP->pgprocnos[index] = proc->pgprocno;
320
2.55k
  arrayP->numProcs++;
321
322
2.55k
  LWLockRelease(ProcArrayLock);
323
2.55k
}
324
325
/*
326
 * Remove the specified PGPROC from the shared array.
327
 *
328
 * When latestXid is a valid XID, we are removing a live 2PC gxact from the
329
 * array, and thus causing it to appear as "not running" anymore.  In this
330
 * case we must advance latestCompletedXid.  (This is essentially the same
331
 * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
332
 * the ProcArrayLock only once, and don't damage the content of the PGPROC;
333
 * twophase.c depends on the latter.)
334
 */
335
void
336
ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
337
2.55k
{
338
2.55k
  ProcArrayStruct *arrayP = procArray;
339
2.55k
  int     index;
340
341
#ifdef XIDCACHE_DEBUG
342
  /* dump stats at backend shutdown, but not prepared-xact end */
343
  if (proc->pid != 0)
344
    DisplayXidCache();
345
#endif
346
347
2.55k
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
348
349
  /*
350
   * Postgres transaction related code-paths are disabled for YB.
351
   */
352
353
2.55k
  if (!IsYugaByteEnabled()) {
354
904
    if (TransactionIdIsValid(latestXid))
355
0
    {
356
0
      Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
357
358
      /* Advance global latestCompletedXid while holding the lock */
359
0
      if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
360
0
                                latestXid))
361
0
        ShmemVariableCache->latestCompletedXid = latestXid;
362
0
    }
363
904
    else
364
904
    {
365
      /* Shouldn't be trying to remove a live transaction here */
366
904
      Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
367
904
    }
368
904
  }
369
370
4.05k
  for (index = 0; index < arrayP->numProcs; index++)
371
4.04k
  {
372
4.04k
    if (arrayP->pgprocnos[index] == proc->pgprocno)
373
2.55k
    {
374
      /* Keep the PGPROC array sorted. See notes above */
375
2.55k
      memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1],
376
2.55k
          (arrayP->numProcs - index - 1) * sizeof(int));
377
2.55k
      arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
378
2.55k
      arrayP->numProcs--;
379
2.55k
      LWLockRelease(ProcArrayLock);
380
2.55k
      return;
381
2.55k
    }
382
4.04k
  }
383
384
  /* Oops */
385
1
  LWLockRelease(ProcArrayLock);
386
387
1
  elog(LOG, "failed to find proc %p in ProcArray", proc);
388
1
}
389
390
391
/*
392
 * ProcArrayEndTransaction -- mark a transaction as no longer running
393
 *
394
 * This is used interchangeably for commit and abort cases.  The transaction
395
 * commit/abort must already be reported to WAL and pg_xact.
396
 *
397
 * proc is currently always MyProc, but we pass it explicitly for flexibility.
398
 * latestXid is the latest Xid among the transaction's main XID and
399
 * subtransactions, or InvalidTransactionId if it has no XID.  (We must ask
400
 * the caller to pass latestXid, instead of computing it from the PGPROC's
401
 * contents, because the subxid information in the PGPROC might be
402
 * incomplete.)
403
 */
404
void
405
ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
406
154k
{
407
154k
  PGXACT     *pgxact = &allPgXact[proc->pgprocno];
408
409
154k
  if (TransactionIdIsValid(latestXid))
410
111
  {
411
    /*
412
     * We must lock ProcArrayLock while clearing our advertised XID, so
413
     * that we do not exit the set of "running" transactions while someone
414
     * else is taking a snapshot.  See discussion in
415
     * src/backend/access/transam/README.
416
     */
417
111
    Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
418
419
    /*
420
     * If we can immediately acquire ProcArrayLock, we clear our own XID
421
     * and release the lock.  If not, use group XID clearing to improve
422
     * efficiency.
423
     */
424
111
    if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
425
111
    {
426
111
      if (IsCurrentTxnWithPGRel())
427
111
        ProcArrayEndTransactionInternal(proc, pgxact, latestXid);
428
111
      LWLockRelease(ProcArrayLock);
429
111
    }
430
0
    else if (IsCurrentTxnWithPGRel())
431
0
      ProcArrayGroupClearXid(proc, latestXid);
432
111
  }
433
154k
  else
434
154k
  {
435
    /*
436
     * If we have no XID, we don't need to lock, since we won't affect
437
     * anyone else's calculation of a snapshot.  We might change their
438
     * estimate of global xmin, but that's OK.
439
     */
440
154k
    Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
441
442
154k
    proc->lxid = InvalidLocalTransactionId;
443
154k
    pgxact->xmin = InvalidTransactionId;
444
    /* must be cleared with xid/xmin: */
445
154k
    pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
446
154k
    pgxact->delayChkpt = false; /* be sure this is cleared in abort */
447
154k
    proc->recoveryConflictPending = false;
448
449
154k
    Assert(pgxact->nxids == 0);
450
154k
    Assert(pgxact->overflowed == false);
451
154k
  }
452
154k
}
453
454
/*
455
 * Mark a write transaction as no longer running.
456
 *
457
 * We don't do any locking here; caller must handle that.
458
 */
459
static inline void
460
ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
461
                TransactionId latestXid)
462
111
{
463
111
  pgxact->xid = InvalidTransactionId;
464
111
  proc->lxid = InvalidLocalTransactionId;
465
111
  pgxact->xmin = InvalidTransactionId;
466
  /* must be cleared with xid/xmin: */
467
111
  pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
468
111
  pgxact->delayChkpt = false; /* be sure this is cleared in abort */
469
111
  proc->recoveryConflictPending = false;
470
471
  /* Clear the subtransaction-XID cache too while holding the lock */
472
111
  pgxact->nxids = 0;
473
111
  pgxact->overflowed = false;
474
475
  /* Also advance global latestCompletedXid while holding the lock */
476
111
  if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
477
111
                latestXid))
478
110
    ShmemVariableCache->latestCompletedXid = latestXid;
479
111
}
480
481
/*
482
 * ProcArrayGroupClearXid -- group XID clearing
483
 *
484
 * When we cannot immediately acquire ProcArrayLock in exclusive mode at
485
 * commit time, add ourselves to a list of processes that need their XIDs
486
 * cleared.  The first process to add itself to the list will acquire
487
 * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
488
 * on behalf of all group members.  This avoids a great deal of contention
489
 * around ProcArrayLock when many processes are trying to commit at once,
490
 * since the lock need not be repeatedly handed off from one committing
491
 * process to the next.
492
 */
493
static void
494
ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
495
0
{
496
0
  volatile PROC_HDR *procglobal = ProcGlobal;
497
0
  uint32    nextidx;
498
0
  uint32    wakeidx;
499
500
  /* We should definitely have an XID to clear. */
501
0
  Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
502
503
  /* Add ourselves to the list of processes needing a group XID clear. */
504
0
  proc->procArrayGroupMember = true;
505
0
  proc->procArrayGroupMemberXid = latestXid;
506
0
  while (true)
507
0
  {
508
0
    nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
509
0
    pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx);
510
511
0
    if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst,
512
0
                       &nextidx,
513
0
                       (uint32) proc->pgprocno))
514
0
      break;
515
0
  }
516
517
  /*
518
   * If the list was not empty, the leader will clear our XID.  It is
519
   * impossible to have followers without a leader because the first process
520
   * that has added itself to the list will always have nextidx as
521
   * INVALID_PGPROCNO.
522
   */
523
0
  if (nextidx != INVALID_PGPROCNO)
524
0
  {
525
0
    int     extraWaits = 0;
526
527
    /* Sleep until the leader clears our XID. */
528
0
    pgstat_report_wait_start(WAIT_EVENT_PROCARRAY_GROUP_UPDATE);
529
0
    for (;;)
530
0
    {
531
      /* acts as a read barrier */
532
0
      PGSemaphoreLock(proc->sem);
533
0
      if (!proc->procArrayGroupMember)
534
0
        break;
535
0
      extraWaits++;
536
0
    }
537
0
    pgstat_report_wait_end();
538
539
0
    Assert(pg_atomic_read_u32(&proc->procArrayGroupNext) == INVALID_PGPROCNO);
540
541
    /* Fix semaphore count for any absorbed wakeups */
542
0
    while (extraWaits-- > 0)
543
0
      PGSemaphoreUnlock(proc->sem);
544
0
    return;
545
0
  }
546
547
  /* We are the leader.  Acquire the lock on behalf of everyone. */
548
0
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
549
550
  /*
551
   * Now that we've got the lock, clear the list of processes waiting for
552
   * group XID clearing, saving a pointer to the head of the list.  Trying
553
   * to pop elements one at a time could lead to an ABA problem.
554
   */
555
0
  while (true)
556
0
  {
557
0
    nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
558
0
    if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst,
559
0
                       &nextidx,
560
0
                       INVALID_PGPROCNO))
561
0
      break;
562
0
  }
563
564
  /* Remember head of list so we can perform wakeups after dropping lock. */
565
0
  wakeidx = nextidx;
566
567
  /* Walk the list and clear all XIDs. */
568
0
  while (nextidx != INVALID_PGPROCNO)
569
0
  {
570
0
    PGPROC     *proc = &allProcs[nextidx];
571
0
    PGXACT     *pgxact = &allPgXact[nextidx];
572
573
0
    ProcArrayEndTransactionInternal(proc, pgxact, proc->procArrayGroupMemberXid);
574
575
    /* Move to next proc in list. */
576
0
    nextidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
577
0
  }
578
579
  /* We're done with the lock now. */
580
0
  LWLockRelease(ProcArrayLock);
581
582
  /*
583
   * Now that we've released the lock, go back and wake everybody up.  We
584
   * don't do this under the lock so as to keep lock hold times to a
585
   * minimum.  The system calls we need to perform to wake other processes
586
   * up are probably much slower than the simple memory writes we did while
587
   * holding the lock.
588
   */
589
0
  while (wakeidx != INVALID_PGPROCNO)
590
0
  {
591
0
    PGPROC     *proc = &allProcs[wakeidx];
592
593
0
    wakeidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
594
0
    pg_atomic_write_u32(&proc->procArrayGroupNext, INVALID_PGPROCNO);
595
596
    /* ensure all previous writes are visible before follower continues. */
597
0
    pg_write_barrier();
598
599
0
    proc->procArrayGroupMember = false;
600
601
0
    if (proc != MyProc)
602
0
      PGSemaphoreUnlock(proc->sem);
603
0
  }
604
0
}
605
606
/*
607
 * ProcArrayClearTransaction -- clear the transaction fields
608
 *
609
 * This is used after successfully preparing a 2-phase transaction.  We are
610
 * not actually reporting the transaction's XID as no longer running --- it
611
 * will still appear as running because the 2PC's gxact is in the ProcArray
612
 * too.  We just have to clear out our own PGXACT.
613
 */
614
void
615
ProcArrayClearTransaction(PGPROC *proc)
616
0
{
617
0
  PGXACT     *pgxact = &allPgXact[proc->pgprocno];
618
619
0
  if (IsYugaByteEnabled()) {
620
0
    return;
621
0
  }
622
623
  /*
624
   * We can skip locking ProcArrayLock here, because this action does not
625
   * actually change anyone's view of the set of running XIDs: our entry is
626
   * duplicate with the gxact that has already been inserted into the
627
   * ProcArray.
628
   */
629
0
  pgxact->xid = InvalidTransactionId;
630
0
  proc->lxid = InvalidLocalTransactionId;
631
0
  pgxact->xmin = InvalidTransactionId;
632
0
  proc->recoveryConflictPending = false;
633
634
  /* redundant, but just in case */
635
0
  pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
636
0
  pgxact->delayChkpt = false;
637
638
  /* Clear the subtransaction-XID cache too */
639
0
  pgxact->nxids = 0;
640
0
  pgxact->overflowed = false;
641
0
}
642
643
/*
644
 * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
645
 *
646
 * Remember up to where the startup process initialized the CLOG and subtrans
647
 * so we can ensure it's initialized gaplessly up to the point where necessary
648
 * while in recovery.
649
 */
650
void
651
ProcArrayInitRecovery(TransactionId initializedUptoXID)
652
0
{
653
0
  Assert(standbyState == STANDBY_INITIALIZED);
654
0
  Assert(TransactionIdIsNormal(initializedUptoXID));
655
656
0
  if (IsYugaByteEnabled()) {
657
0
    return;
658
0
  }
659
660
  /*
661
   * we set latestObservedXid to the xid SUBTRANS has been initialized up
662
   * to, so we can extend it from that point onwards in
663
   * RecordKnownAssignedTransactionIds, and when we get consistent in
664
   * ProcArrayApplyRecoveryInfo().
665
   */
666
0
  latestObservedXid = initializedUptoXID;
667
0
  TransactionIdRetreat(latestObservedXid);
668
0
}
669
670
/*
671
 * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
672
 *
673
 * Takes us through 3 states: Initialized, Pending and Ready.
674
 * Normal case is to go all the way to Ready straight away, though there
675
 * are atypical cases where we need to take it in steps.
676
 *
677
 * Use the data about running transactions on master to create the initial
678
 * state of KnownAssignedXids. We also use these records to regularly prune
679
 * KnownAssignedXids because we know it is possible that some transactions
680
 * with FATAL errors fail to write abort records, which could cause eventual
681
 * overflow.
682
 *
683
 * See comments for LogStandbySnapshot().
684
 */
685
void
686
ProcArrayApplyRecoveryInfo(RunningTransactions running)
687
0
{
688
0
  TransactionId *xids;
689
0
  int     nxids;
690
0
  TransactionId nextXid;
691
0
  int     i;
692
693
0
  if (IsYugaByteEnabled()) {
694
0
    return;
695
0
  }
696
697
0
  Assert(standbyState >= STANDBY_INITIALIZED);
698
0
  Assert(TransactionIdIsValid(running->nextXid));
699
0
  Assert(TransactionIdIsValid(running->oldestRunningXid));
700
0
  Assert(TransactionIdIsNormal(running->latestCompletedXid));
701
702
  /*
703
   * Remove stale transactions, if any.
704
   */
705
0
  ExpireOldKnownAssignedTransactionIds(running->oldestRunningXid);
706
707
  /*
708
   * Remove stale locks, if any.
709
   */
710
0
  StandbyReleaseOldLocks(running->oldestRunningXid);
711
712
  /*
713
   * If our snapshot is already valid, nothing else to do...
714
   */
715
0
  if (standbyState == STANDBY_SNAPSHOT_READY)
716
0
    return;
717
718
  /*
719
   * If our initial RunningTransactionsData had an overflowed snapshot then
720
   * we knew we were missing some subxids from our snapshot. If we continue
721
   * to see overflowed snapshots then we might never be able to start up, so
722
   * we make another test to see if our snapshot is now valid. We know that
723
   * the missing subxids are equal to or earlier than nextXid. After we
724
   * initialise we continue to apply changes during recovery, so once the
725
   * oldestRunningXid is later than the nextXid from the initial snapshot we
726
   * know that we no longer have missing information and can mark the
727
   * snapshot as valid.
728
   */
729
0
  if (standbyState == STANDBY_SNAPSHOT_PENDING)
730
0
  {
731
    /*
732
     * If the snapshot isn't overflowed or if its empty we can reset our
733
     * pending state and use this snapshot instead.
734
     */
735
0
    if (!running->subxid_overflow || running->xcnt == 0)
736
0
    {
737
      /*
738
       * If we have already collected known assigned xids, we need to
739
       * throw them away before we apply the recovery snapshot.
740
       */
741
0
      KnownAssignedXidsReset();
742
0
      standbyState = STANDBY_INITIALIZED;
743
0
    }
744
0
    else
745
0
    {
746
0
      if (TransactionIdPrecedes(standbySnapshotPendingXmin,
747
0
                    running->oldestRunningXid))
748
0
      {
749
0
        standbyState = STANDBY_SNAPSHOT_READY;
750
0
        elog(trace_recovery(DEBUG1),
751
0
           "recovery snapshots are now enabled");
752
0
      }
753
0
      else
754
0
        elog(trace_recovery(DEBUG1),
755
0
           "recovery snapshot waiting for non-overflowed snapshot or "
756
0
           "until oldest active xid on standby is at least %u (now %u)",
757
0
           standbySnapshotPendingXmin,
758
0
           running->oldestRunningXid);
759
0
      return;
760
0
    }
761
0
  }
762
763
0
  Assert(standbyState == STANDBY_INITIALIZED);
764
765
  /*
766
   * OK, we need to initialise from the RunningTransactionsData record.
767
   *
768
   * NB: this can be reached at least twice, so make sure new code can deal
769
   * with that.
770
   */
771
772
  /*
773
   * Nobody else is running yet, but take locks anyhow
774
   */
775
0
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
776
777
  /*
778
   * KnownAssignedXids is sorted so we cannot just add the xids, we have to
779
   * sort them first.
780
   *
781
   * Some of the new xids are top-level xids and some are subtransactions.
782
   * We don't call SubtransSetParent because it doesn't matter yet. If we
783
   * aren't overflowed then all xids will fit in snapshot and so we don't
784
   * need subtrans. If we later overflow, an xid assignment record will add
785
   * xids to subtrans. If RunningXacts is overflowed then we don't have
786
   * enough information to correctly update subtrans anyway.
787
   */
788
789
  /*
790
   * Allocate a temporary array to avoid modifying the array passed as
791
   * argument.
792
   */
793
0
  xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt));
794
795
  /*
796
   * Add to the temp array any xids which have not already completed.
797
   */
798
0
  nxids = 0;
799
0
  for (i = 0; i < running->xcnt + running->subxcnt; i++)
800
0
  {
801
0
    TransactionId xid = running->xids[i];
802
803
    /*
804
     * The running-xacts snapshot can contain xids that were still visible
805
     * in the procarray when the snapshot was taken, but were already
806
     * WAL-logged as completed. They're not running anymore, so ignore
807
     * them.
808
     */
809
0
    if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
810
0
      continue;
811
812
0
    xids[nxids++] = xid;
813
0
  }
814
815
0
  if (nxids > 0)
816
0
  {
817
0
    if (procArray->numKnownAssignedXids != 0)
818
0
    {
819
0
      LWLockRelease(ProcArrayLock);
820
0
      elog(ERROR, "KnownAssignedXids is not empty");
821
0
    }
822
823
    /*
824
     * Sort the array so that we can add them safely into
825
     * KnownAssignedXids.
826
     */
827
0
    qsort(xids, nxids, sizeof(TransactionId), xidComparator);
828
829
    /*
830
     * Add the sorted snapshot into KnownAssignedXids.  The running-xacts
831
     * snapshot may include duplicated xids because of prepared
832
     * transactions, so ignore them.
833
     */
834
0
    for (i = 0; i < nxids; i++)
835
0
    {
836
0
      if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i]))
837
0
      {
838
0
        elog(DEBUG1,
839
0
           "found duplicated transaction %u for KnownAssignedXids insertion",
840
0
           xids[i]);
841
0
        continue;
842
0
      }
843
0
      KnownAssignedXidsAdd(xids[i], xids[i], true);
844
0
    }
845
846
0
    KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
847
0
  }
848
849
0
  pfree(xids);
850
851
  /*
852
   * latestObservedXid is at least set to the point where SUBTRANS was
853
   * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid
854
   * RecordKnownAssignedTransactionIds() was called for.  Initialize
855
   * subtrans from thereon, up to nextXid - 1.
856
   *
857
   * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
858
   * because we've just added xids to the known assigned xids machinery that
859
   * haven't gone through RecordKnownAssignedTransactionId().
860
   */
861
0
  Assert(TransactionIdIsNormal(latestObservedXid));
862
0
  TransactionIdAdvance(latestObservedXid);
863
0
  while (TransactionIdPrecedes(latestObservedXid, running->nextXid))
864
0
  {
865
0
    ExtendSUBTRANS(latestObservedXid);
866
0
    TransactionIdAdvance(latestObservedXid);
867
0
  }
868
0
  TransactionIdRetreat(latestObservedXid);  /* = running->nextXid - 1 */
869
870
  /* ----------
871
   * Now we've got the running xids we need to set the global values that
872
   * are used to track snapshots as they evolve further.
873
   *
874
   * - latestCompletedXid which will be the xmax for snapshots
875
   * - lastOverflowedXid which shows whether snapshots overflow
876
   * - nextXid
877
   *
878
   * If the snapshot overflowed, then we still initialise with what we know,
879
   * but the recovery snapshot isn't fully valid yet because we know there
880
   * are some subxids missing. We don't know the specific subxids that are
881
   * missing, so conservatively assume the last one is latestObservedXid.
882
   * ----------
883
   */
884
0
  if (running->subxid_overflow)
885
0
  {
886
0
    standbyState = STANDBY_SNAPSHOT_PENDING;
887
888
0
    standbySnapshotPendingXmin = latestObservedXid;
889
0
    procArray->lastOverflowedXid = latestObservedXid;
890
0
  }
891
0
  else
892
0
  {
893
0
    standbyState = STANDBY_SNAPSHOT_READY;
894
895
0
    standbySnapshotPendingXmin = InvalidTransactionId;
896
0
  }
897
898
  /*
899
   * If a transaction wrote a commit record in the gap between taking and
900
   * logging the snapshot then latestCompletedXid may already be higher than
901
   * the value from the snapshot, so check before we use the incoming value.
902
   */
903
0
  if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
904
0
                running->latestCompletedXid))
905
0
    ShmemVariableCache->latestCompletedXid = running->latestCompletedXid;
906
907
0
  Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
908
909
0
  LWLockRelease(ProcArrayLock);
910
911
  /*
912
   * ShmemVariableCache->nextXid must be beyond any observed xid.
913
   *
914
   * We don't expect anyone else to modify nextXid, hence we don't need to
915
   * hold a lock while examining it.  We still acquire the lock to modify
916
   * it, though.
917
   */
918
0
  nextXid = latestObservedXid;
919
0
  TransactionIdAdvance(nextXid);
920
0
  if (TransactionIdFollows(nextXid, ShmemVariableCache->nextXid))
921
0
  {
922
0
    LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
923
0
    ShmemVariableCache->nextXid = nextXid;
924
0
    LWLockRelease(XidGenLock);
925
0
  }
926
927
0
  Assert(TransactionIdIsValid(ShmemVariableCache->nextXid));
928
929
0
  KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
930
0
  if (standbyState == STANDBY_SNAPSHOT_READY)
931
0
    elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled");
932
0
  else
933
0
    elog(trace_recovery(DEBUG1),
934
0
       "recovery snapshot waiting for non-overflowed snapshot or "
935
0
       "until oldest active xid on standby is at least %u (now %u)",
936
0
       standbySnapshotPendingXmin,
937
0
       running->oldestRunningXid);
938
0
}
939
940
/*
941
 * ProcArrayApplyXidAssignment
942
 *    Process an XLOG_XACT_ASSIGNMENT WAL record
943
 */
944
void
945
ProcArrayApplyXidAssignment(TransactionId topxid,
946
              int nsubxids, TransactionId *subxids)
947
0
{
948
0
  TransactionId max_xid;
949
0
  int     i;
950
951
0
  if (IsYugaByteEnabled()) {
952
0
    return;
953
0
  }
954
955
0
  Assert(standbyState >= STANDBY_INITIALIZED);
956
957
0
  max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
958
959
  /*
960
   * Mark all the subtransactions as observed.
961
   *
962
   * NOTE: This will fail if the subxid contains too many previously
963
   * unobserved xids to fit into known-assigned-xids. That shouldn't happen
964
   * as the code stands, because xid-assignment records should never contain
965
   * more than PGPROC_MAX_CACHED_SUBXIDS entries.
966
   */
967
0
  RecordKnownAssignedTransactionIds(max_xid);
968
969
  /*
970
   * Notice that we update pg_subtrans with the top-level xid, rather than
971
   * the parent xid. This is a difference between normal processing and
972
   * recovery, yet is still correct in all cases. The reason is that
973
   * subtransaction commit is not marked in clog until commit processing, so
974
   * all aborted subtransactions have already been clearly marked in clog.
975
   * As a result we are able to refer directly to the top-level
976
   * transaction's state rather than skipping through all the intermediate
977
   * states in the subtransaction tree. This should be the first time we
978
   * have attempted to SubTransSetParent().
979
   */
980
0
  for (i = 0; i < nsubxids; i++)
981
0
    SubTransSetParent(subxids[i], topxid);
982
983
  /* KnownAssignedXids isn't maintained yet, so we're done for now */
984
0
  if (standbyState == STANDBY_INITIALIZED)
985
0
    return;
986
987
  /*
988
   * Uses same locking as transaction commit
989
   */
990
0
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
991
992
  /*
993
   * Remove subxids from known-assigned-xacts.
994
   */
995
0
  KnownAssignedXidsRemoveTree(InvalidTransactionId, nsubxids, subxids);
996
997
  /*
998
   * Advance lastOverflowedXid to be at least the last of these subxids.
999
   */
1000
0
  if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
1001
0
    procArray->lastOverflowedXid = max_xid;
1002
1003
0
  LWLockRelease(ProcArrayLock);
1004
0
}
1005
1006
/*
1007
 * TransactionIdIsInProgress -- is given transaction running in some backend
1008
 *
1009
 * Aside from some shortcuts such as checking RecentXmin and our own Xid,
1010
 * there are four possibilities for finding a running transaction:
1011
 *
1012
 * 1. The given Xid is a main transaction Id.  We will find this out cheaply
1013
 * by looking at the PGXACT struct for each backend.
1014
 *
1015
 * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
1016
 * We can find this out cheaply too.
1017
 *
1018
 * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
1019
 * if the Xid is running on the master.
1020
 *
1021
 * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
1022
 * if that is running according to PGXACT or KnownAssignedXids.  This is the
1023
 * slowest way, but sadly it has to be done always if the others failed,
1024
 * unless we see that the cached subxact sets are complete (none have
1025
 * overflowed).
1026
 *
1027
 * ProcArrayLock has to be held while we do 1, 2, 3.  If we save the top Xids
1028
 * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
1029
 * This buys back some concurrency (and we can't retrieve the main Xids from
1030
 * PGXACT again anyway; see GetNewTransactionId).
1031
 */
1032
bool
1033
TransactionIdIsInProgress(TransactionId xid)
1034
12.6k
{
1035
12.6k
  static TransactionId *xids = NULL;
1036
12.6k
  int     nxids = 0;
1037
12.6k
  ProcArrayStruct *arrayP = procArray;
1038
12.6k
  TransactionId topxid;
1039
12.6k
  int     i,
1040
12.6k
        j;
1041
1042
12.6k
  if (IsYugaByteEnabled()) {
1043
12.6k
    return false;
1044
12.6k
  }
1045
1046
  /*
1047
   * Don't bother checking a transaction older than RecentXmin; it could not
1048
   * possibly still be running.  (Note: in particular, this guarantees that
1049
   * we reject InvalidTransactionId, FrozenTransactionId, etc as not
1050
   * running.)
1051
   */
1052
0
  if (TransactionIdPrecedes(xid, RecentXmin))
1053
0
  {
1054
0
    xc_by_recent_xmin_inc();
1055
0
    return false;
1056
0
  }
1057
1058
  /*
1059
   * We may have just checked the status of this transaction, so if it is
1060
   * already known to be completed, we can fall out without any access to
1061
   * shared memory.
1062
   */
1063
0
  if (TransactionIdIsKnownCompleted(xid))
1064
0
  {
1065
0
    xc_by_known_xact_inc();
1066
0
    return false;
1067
0
  }
1068
1069
  /*
1070
   * Also, we can handle our own transaction (and subtransactions) without
1071
   * any access to shared memory.
1072
   */
1073
0
  if (TransactionIdIsCurrentTransactionId(xid))
1074
0
  {
1075
0
    xc_by_my_xact_inc();
1076
0
    return true;
1077
0
  }
1078
1079
  /*
1080
   * If first time through, get workspace to remember main XIDs in. We
1081
   * malloc it permanently to avoid repeated palloc/pfree overhead.
1082
   */
1083
0
  if (xids == NULL)
1084
0
  {
1085
    /*
1086
     * In hot standby mode, reserve enough space to hold all xids in the
1087
     * known-assigned list. If we later finish recovery, we no longer need
1088
     * the bigger array, but we don't bother to shrink it.
1089
     */
1090
0
    int     maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
1091
1092
0
    xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
1093
0
    if (xids == NULL)
1094
0
      ereport(ERROR,
1095
0
          (errcode(ERRCODE_OUT_OF_MEMORY),
1096
0
           errmsg("out of memory")));
1097
0
  }
1098
1099
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
1100
1101
  /*
1102
   * Now that we have the lock, we can check latestCompletedXid; if the
1103
   * target Xid is after that, it's surely still running.
1104
   */
1105
0
  if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xid))
1106
0
  {
1107
0
    LWLockRelease(ProcArrayLock);
1108
0
    xc_by_latest_xid_inc();
1109
0
    return true;
1110
0
  }
1111
1112
  /* No shortcuts, gotta grovel through the array */
1113
0
  for (i = 0; i < arrayP->numProcs; i++)
1114
0
  {
1115
0
    int     pgprocno = arrayP->pgprocnos[i];
1116
0
    volatile PGPROC *proc = &allProcs[pgprocno];
1117
0
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
1118
0
    TransactionId pxid;
1119
1120
    /* Ignore my own proc --- dealt with it above */
1121
0
    if (proc == MyProc)
1122
0
      continue;
1123
1124
    /* Fetch xid just once - see GetNewTransactionId */
1125
0
    pxid = pgxact->xid;
1126
1127
0
    if (!TransactionIdIsValid(pxid))
1128
0
      continue;
1129
1130
    /*
1131
     * Step 1: check the main Xid
1132
     */
1133
0
    if (TransactionIdEquals(pxid, xid))
1134
0
    {
1135
0
      LWLockRelease(ProcArrayLock);
1136
0
      xc_by_main_xid_inc();
1137
0
      return true;
1138
0
    }
1139
1140
    /*
1141
     * We can ignore main Xids that are younger than the target Xid, since
1142
     * the target could not possibly be their child.
1143
     */
1144
0
    if (TransactionIdPrecedes(xid, pxid))
1145
0
      continue;
1146
1147
    /*
1148
     * Step 2: check the cached child-Xids arrays
1149
     */
1150
0
    for (j = pgxact->nxids - 1; j >= 0; j--)
1151
0
    {
1152
      /* Fetch xid just once - see GetNewTransactionId */
1153
0
      TransactionId cxid = proc->subxids.xids[j];
1154
1155
0
      if (TransactionIdEquals(cxid, xid))
1156
0
      {
1157
0
        LWLockRelease(ProcArrayLock);
1158
0
        xc_by_child_xid_inc();
1159
0
        return true;
1160
0
      }
1161
0
    }
1162
1163
    /*
1164
     * Save the main Xid for step 4.  We only need to remember main Xids
1165
     * that have uncached children.  (Note: there is no race condition
1166
     * here because the overflowed flag cannot be cleared, only set, while
1167
     * we hold ProcArrayLock.  So we can't miss an Xid that we need to
1168
     * worry about.)
1169
     */
1170
0
    if (pgxact->overflowed)
1171
0
      xids[nxids++] = pxid;
1172
0
  }
1173
1174
  /*
1175
   * Step 3: in hot standby mode, check the known-assigned-xids list.  XIDs
1176
   * in the list must be treated as running.
1177
   */
1178
0
  if (RecoveryInProgress())
1179
0
  {
1180
    /* none of the PGXACT entries should have XIDs in hot standby mode */
1181
0
    Assert(nxids == 0);
1182
1183
0
    if (KnownAssignedXidExists(xid))
1184
0
    {
1185
0
      LWLockRelease(ProcArrayLock);
1186
0
      xc_by_known_assigned_inc();
1187
0
      return true;
1188
0
    }
1189
1190
    /*
1191
     * If the KnownAssignedXids overflowed, we have to check pg_subtrans
1192
     * too.  Fetch all xids from KnownAssignedXids that are lower than
1193
     * xid, since if xid is a subtransaction its parent will always have a
1194
     * lower value.  Note we will collect both main and subXIDs here, but
1195
     * there's no help for it.
1196
     */
1197
0
    if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid))
1198
0
      nxids = KnownAssignedXidsGet(xids, xid);
1199
0
  }
1200
1201
0
  LWLockRelease(ProcArrayLock);
1202
1203
  /*
1204
   * If none of the relevant caches overflowed, we know the Xid is not
1205
   * running without even looking at pg_subtrans.
1206
   */
1207
0
  if (nxids == 0)
1208
0
  {
1209
0
    xc_no_overflow_inc();
1210
0
    return false;
1211
0
  }
1212
1213
  /*
1214
   * Step 4: have to check pg_subtrans.
1215
   *
1216
   * At this point, we know it's either a subtransaction of one of the Xids
1217
   * in xids[], or it's not running.  If it's an already-failed
1218
   * subtransaction, we want to say "not running" even though its parent may
1219
   * still be running.  So first, check pg_xact to see if it's been aborted.
1220
   */
1221
0
  xc_slow_answer_inc();
1222
1223
0
  if (TransactionIdDidAbort(xid))
1224
0
    return false;
1225
1226
  /*
1227
   * It isn't aborted, so check whether the transaction tree it belongs to
1228
   * is still running (or, more precisely, whether it was running when we
1229
   * held ProcArrayLock).
1230
   */
1231
0
  topxid = SubTransGetTopmostTransaction(xid);
1232
0
  Assert(TransactionIdIsValid(topxid));
1233
0
  if (!TransactionIdEquals(topxid, xid))
1234
0
  {
1235
0
    for (i = 0; i < nxids; i++)
1236
0
    {
1237
0
      if (TransactionIdEquals(xids[i], topxid))
1238
0
        return true;
1239
0
    }
1240
0
  }
1241
1242
0
  return false;
1243
0
}
1244
1245
/*
1246
 * TransactionIdIsActive -- is xid the top-level XID of an active backend?
1247
 *
1248
 * This differs from TransactionIdIsInProgress in that it ignores prepared
1249
 * transactions, as well as transactions running on the master if we're in
1250
 * hot standby.  Also, we ignore subtransactions since that's not needed
1251
 * for current uses.
1252
 */
1253
bool
1254
TransactionIdIsActive(TransactionId xid)
1255
0
{
1256
0
  bool    result = false;
1257
0
  ProcArrayStruct *arrayP = procArray;
1258
0
  int     i;
1259
1260
0
  if (IsYugaByteEnabled()) {
1261
0
    return false;
1262
0
  }
1263
1264
  /*
1265
   * Don't bother checking a transaction older than RecentXmin; it could not
1266
   * possibly still be running.
1267
   */
1268
0
  if (TransactionIdPrecedes(xid, RecentXmin))
1269
0
    return false;
1270
1271
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
1272
1273
0
  for (i = 0; i < arrayP->numProcs; i++)
1274
0
  {
1275
0
    int     pgprocno = arrayP->pgprocnos[i];
1276
0
    volatile PGPROC *proc = &allProcs[pgprocno];
1277
0
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
1278
0
    TransactionId pxid;
1279
1280
    /* Fetch xid just once - see GetNewTransactionId */
1281
0
    pxid = pgxact->xid;
1282
1283
0
    if (!TransactionIdIsValid(pxid))
1284
0
      continue;
1285
1286
0
    if (proc->pid == 0)
1287
0
      continue;     /* ignore prepared transactions */
1288
1289
0
    if (TransactionIdEquals(pxid, xid))
1290
0
    {
1291
0
      result = true;
1292
0
      break;
1293
0
    }
1294
0
  }
1295
1296
0
  LWLockRelease(ProcArrayLock);
1297
1298
0
  return result;
1299
0
}
1300
1301
1302
/*
1303
 * GetOldestXmin -- returns oldest transaction that was running
1304
 *          when any current transaction was started.
1305
 *
1306
 * If rel is NULL or a shared relation, all backends are considered, otherwise
1307
 * only backends running in this database are considered.
1308
 *
1309
 * The flags are used to ignore the backends in calculation when any of the
1310
 * corresponding flags is set. Typically, if you want to ignore ones with
1311
 * PROC_IN_VACUUM flag, you can use PROCARRAY_FLAGS_VACUUM.
1312
 *
1313
 * PROCARRAY_SLOTS_XMIN causes GetOldestXmin to ignore the xmin and
1314
 * catalog_xmin of any replication slots that exist in the system when
1315
 * calculating the oldest xmin.
1316
 *
1317
 * This is used by VACUUM to decide which deleted tuples must be preserved in
1318
 * the passed in table. For shared relations backends in all databases must be
1319
 * considered, but for non-shared relations that's not required, since only
1320
 * backends in my own database could ever see the tuples in them. Also, we can
1321
 * ignore concurrently running lazy VACUUMs because (a) they must be working
1322
 * on other tables, and (b) they don't need to do snapshot-based lookups.
1323
 *
1324
 * This is also used to determine where to truncate pg_subtrans.  For that
1325
 * backends in all databases have to be considered, so rel = NULL has to be
1326
 * passed in.
1327
 *
1328
 * Note: we include all currently running xids in the set of considered xids.
1329
 * This ensures that if a just-started xact has not yet set its snapshot,
1330
 * when it does set the snapshot it cannot set xmin less than what we compute.
1331
 * See notes in src/backend/access/transam/README.
1332
 *
1333
 * Note: despite the above, it's possible for the calculated value to move
1334
 * backwards on repeated calls. The calculated value is conservative, so that
1335
 * anything older is definitely not considered as running by anyone anymore,
1336
 * but the exact value calculated depends on a number of things. For example,
1337
 * if rel = NULL and there are no transactions running in the current
1338
 * database, GetOldestXmin() returns latestCompletedXid. If a transaction
1339
 * begins after that, its xmin will include in-progress transactions in other
1340
 * databases that started earlier, so another call will return a lower value.
1341
 * Nonetheless it is safe to vacuum a table in the current database with the
1342
 * first result.  There are also replication-related effects: a walsender
1343
 * process can set its xmin based on transactions that are no longer running
1344
 * in the master but are still being replayed on the standby, thus possibly
1345
 * making the GetOldestXmin reading go backwards.  In this case there is a
1346
 * possibility that we lose data that the standby would like to have, but
1347
 * unless the standby uses a replication slot to make its xmin persistent
1348
 * there is little we can do about that --- data is only protected if the
1349
 * walsender runs continuously while queries are executed on the standby.
1350
 * (The Hot Standby code deals with such cases by failing standby queries
1351
 * that needed to access already-removed data, so there's no integrity bug.)
1352
 * The return value is also adjusted with vacuum_defer_cleanup_age, so
1353
 * increasing that setting on the fly is another easy way to make
1354
 * GetOldestXmin() move backwards, with no consequences for data integrity.
1355
 */
1356
TransactionId
1357
GetOldestXmin(Relation rel, int flags)
1358
1.06k
{
1359
1.06k
  ProcArrayStruct *arrayP = procArray;
1360
1.06k
  TransactionId result;
1361
1.06k
  int     index;
1362
1.06k
  bool    allDbs;
1363
1364
1.06k
  if (IsYugaByteEnabled()) {
1365
93
    return InvalidTransactionId;
1366
93
  }
1367
1368
969
  volatile TransactionId replication_slot_xmin = InvalidTransactionId;
1369
969
  volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
1370
1371
  /*
1372
   * If we're not computing a relation specific limit, or if a shared
1373
   * relation has been passed in, backends in all databases have to be
1374
   * considered.
1375
   */
1376
969
  allDbs = rel == NULL || rel->rd_rel->relisshared;
1377
1378
  /* Cannot look for individual databases during recovery */
1379
969
  Assert(allDbs || !RecoveryInProgress());
1380
1381
969
  LWLockAcquire(ProcArrayLock, LW_SHARED);
1382
1383
  /*
1384
   * We initialize the MIN() calculation with latestCompletedXid + 1. This
1385
   * is a lower bound for the XIDs that might appear in the ProcArray later,
1386
   * and so protects us against overestimating the result due to future
1387
   * additions.
1388
   */
1389
969
  result = ShmemVariableCache->latestCompletedXid;
1390
969
  Assert(TransactionIdIsNormal(result));
1391
969
  TransactionIdAdvance(result);
1392
1393
1.95k
  for (index = 0; index < arrayP->numProcs; index++)
1394
989
  {
1395
989
    int     pgprocno = arrayP->pgprocnos[index];
1396
989
    volatile PGPROC *proc = &allProcs[pgprocno];
1397
989
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
1398
1399
989
    if (pgxact->vacuumFlags & (flags & PROCARRAY_PROC_FLAGS_MASK))
1400
0
      continue;
1401
1402
989
    if (allDbs ||
1403
0
      proc->databaseId == MyDatabaseId ||
1404
0
      proc->databaseId == 0) /* always include WalSender */
1405
989
    {
1406
      /* Fetch xid just once - see GetNewTransactionId */
1407
989
      TransactionId xid = pgxact->xid;
1408
1409
      /* First consider the transaction's own Xid, if any */
1410
989
      if (TransactionIdIsNormal(xid) &&
1411
0
        TransactionIdPrecedes(xid, result))
1412
0
        result = xid;
1413
1414
      /*
1415
       * Also consider the transaction's Xmin, if set.
1416
       *
1417
       * We must check both Xid and Xmin because a transaction might
1418
       * have an Xmin but not (yet) an Xid; conversely, if it has an
1419
       * Xid, that could determine some not-yet-set Xmin.
1420
       */
1421
989
      xid = pgxact->xmin; /* Fetch just once */
1422
989
      if (TransactionIdIsNormal(xid) &&
1423
63
        TransactionIdPrecedes(xid, result))
1424
0
        result = xid;
1425
989
    }
1426
989
  }
1427
1428
  /* fetch into volatile var while ProcArrayLock is held */
1429
969
  replication_slot_xmin = procArray->replication_slot_xmin;
1430
969
  replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
1431
1432
969
  if (RecoveryInProgress())
1433
0
  {
1434
    /*
1435
     * Check to see whether KnownAssignedXids contains an xid value older
1436
     * than the main procarray.
1437
     */
1438
0
    TransactionId kaxmin = KnownAssignedXidsGetOldestXmin();
1439
1440
0
    LWLockRelease(ProcArrayLock);
1441
1442
0
    if (TransactionIdIsNormal(kaxmin) &&
1443
0
      TransactionIdPrecedes(kaxmin, result))
1444
0
      result = kaxmin;
1445
0
  }
1446
969
  else
1447
969
  {
1448
    /*
1449
     * No other information needed, so release the lock immediately.
1450
     */
1451
969
    LWLockRelease(ProcArrayLock);
1452
1453
    /*
1454
     * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age,
1455
     * being careful not to generate a "permanent" XID.
1456
     *
1457
     * vacuum_defer_cleanup_age provides some additional "slop" for the
1458
     * benefit of hot standby queries on standby servers.  This is quick
1459
     * and dirty, and perhaps not all that useful unless the master has a
1460
     * predictable transaction rate, but it offers some protection when
1461
     * there's no walsender connection.  Note that we are assuming
1462
     * vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
1463
     * so guc.c should limit it to no more than the xidStopLimit threshold
1464
     * in varsup.c.  Also note that we intentionally don't apply
1465
     * vacuum_defer_cleanup_age on standby servers.
1466
     */
1467
969
    result -= vacuum_defer_cleanup_age;
1468
969
    if (!TransactionIdIsNormal(result))
1469
0
      result = FirstNormalTransactionId;
1470
969
  }
1471
1472
  /*
1473
   * Check whether there are replication slots requiring an older xmin.
1474
   */
1475
969
  if (!(flags & PROCARRAY_SLOTS_XMIN) &&
1476
969
    TransactionIdIsValid(replication_slot_xmin) &&
1477
0
    NormalTransactionIdPrecedes(replication_slot_xmin, result))
1478
0
    result = replication_slot_xmin;
1479
1480
  /*
1481
   * After locks have been released and defer_cleanup_age has been applied,
1482
   * check whether we need to back up further to make logical decoding
1483
   * possible. We need to do so if we're computing the global limit (rel =
1484
   * NULL) or if the passed relation is a catalog relation of some kind.
1485
   */
1486
969
  if (!(flags & PROCARRAY_SLOTS_XMIN) &&
1487
969
    (rel == NULL ||
1488
0
     RelationIsAccessibleInLogicalDecoding(rel)) &&
1489
969
    TransactionIdIsValid(replication_slot_catalog_xmin) &&
1490
0
    NormalTransactionIdPrecedes(replication_slot_catalog_xmin, result))
1491
0
    result = replication_slot_catalog_xmin;
1492
1493
969
  return result;
1494
969
}
1495
1496
/*
1497
 * GetMaxSnapshotXidCount -- get max size for snapshot XID array
1498
 *
1499
 * We have to export this for use by snapmgr.c.
1500
 */
1501
int
1502
GetMaxSnapshotXidCount(void)
1503
3.29k
{
1504
3.29k
  return procArray->maxProcs;
1505
3.29k
}
1506
1507
/*
1508
 * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
1509
 *
1510
 * We have to export this for use by snapmgr.c.
1511
 */
1512
int
1513
GetMaxSnapshotSubxidCount(void)
1514
3.29k
{
1515
3.29k
  return TOTAL_MAX_CACHED_SUBXIDS;
1516
3.29k
}
1517
1518
/*
1519
 * GetSnapshotData -- returns information about running transactions.
1520
 *
1521
 * The returned snapshot includes xmin (lowest still-running xact ID),
1522
 * xmax (highest completed xact ID + 1), and a list of running xact IDs
1523
 * in the range xmin <= xid < xmax.  It is used as follows:
1524
 *    All xact IDs < xmin are considered finished.
1525
 *    All xact IDs >= xmax are considered still running.
1526
 *    For an xact ID xmin <= xid < xmax, consult list to see whether
1527
 *    it is considered running or not.
1528
 * This ensures that the set of transactions seen as "running" by the
1529
 * current xact will not change after it takes the snapshot.
1530
 *
1531
 * All running top-level XIDs are included in the snapshot, except for lazy
1532
 * VACUUM processes.  We also try to include running subtransaction XIDs,
1533
 * but since PGPROC has only a limited cache area for subxact XIDs, full
1534
 * information may not be available.  If we find any overflowed subxid arrays,
1535
 * we have to mark the snapshot's subxid data as overflowed, and extra work
1536
 * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
1537
 * in tqual.c).
1538
 *
1539
 * We also update the following backend-global variables:
1540
 *    TransactionXmin: the oldest xmin of any snapshot in use in the
1541
 *      current transaction (this is the same as MyPgXact->xmin).
1542
 *    RecentXmin: the xmin computed for the most recent snapshot.  XIDs
1543
 *      older than this are known not running any more.
1544
 *    RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
1545
 *      running transactions, except those running LAZY VACUUM).  This is
1546
 *      the same computation done by
1547
 *      GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM).
1548
 *    RecentGlobalDataXmin: the global xmin for non-catalog tables
1549
 *      >= RecentGlobalXmin
1550
 *
1551
 * Note: this function should probably not be called with an argument that's
1552
 * not statically allocated (see xip allocation below).
1553
 */
1554
Snapshot
1555
GetSnapshotData(Snapshot snapshot)
1556
244k
{
1557
244k
  ProcArrayStruct *arrayP = procArray;
1558
244k
  TransactionId xmin;
1559
244k
  TransactionId xmax;
1560
244k
  TransactionId globalxmin;
1561
244k
  int     index;
1562
244k
  int     count = 0;
1563
244k
  int     subcount = 0;
1564
244k
  bool    suboverflowed = false;
1565
244k
  volatile TransactionId replication_slot_xmin = InvalidTransactionId;
1566
244k
  volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
1567
1568
244k
  Assert(snapshot != NULL);
1569
1570
  /*
1571
   * Allocating space for maxProcs xids is usually overkill; numProcs would
1572
   * be sufficient.  But it seems better to do the malloc while not holding
1573
   * the lock, so we can't look at numProcs.  Likewise, we allocate much
1574
   * more subxip storage than is probably needed.
1575
   *
1576
   * This does open a possibility for avoiding repeated malloc/free: since
1577
   * maxProcs does not change at runtime, we can simply reuse the previous
1578
   * xip arrays if any.  (This relies on the fact that all callers pass
1579
   * static SnapshotData structs.)
1580
   */
1581
244k
  if (snapshot->xip == NULL)
1582
3.30k
  {
1583
    /*
1584
     * First call for this snapshot. Snapshot is same size whether or not
1585
     * we are in recovery, see later comments.
1586
     */
1587
3.30k
    snapshot->xip = (TransactionId *)
1588
3.30k
      malloc(GetMaxSnapshotXidCount() * sizeof(TransactionId));
1589
3.30k
    if (snapshot->xip == NULL)
1590
3.30k
      ereport(ERROR,
1591
3.30k
          (errcode(ERRCODE_OUT_OF_MEMORY),
1592
3.30k
           errmsg("out of memory")));
1593
3.30k
    Assert(snapshot->subxip == NULL);
1594
3.30k
    snapshot->subxip = (TransactionId *)
1595
3.30k
      malloc(GetMaxSnapshotSubxidCount() * sizeof(TransactionId));
1596
3.30k
    if (snapshot->subxip == NULL)
1597
3.30k
      ereport(ERROR,
1598
3.30k
          (errcode(ERRCODE_OUT_OF_MEMORY),
1599
3.30k
           errmsg("out of memory")));
1600
3.30k
  }
1601
1602
  /*
1603
   * It is sufficient to get shared lock on ProcArrayLock, even if we are
1604
   * going to set MyPgXact->xmin.
1605
   */
1606
244k
  LWLockAcquire(ProcArrayLock, LW_SHARED);
1607
1608
  /* xmax is always latestCompletedXid + 1 */
1609
244k
  xmax = ShmemVariableCache->latestCompletedXid;
1610
244k
  Assert(TransactionIdIsNormal(xmax));
1611
244k
  TransactionIdAdvance(xmax);
1612
1613
  /* initialize xmin calculation with xmax */
1614
244k
  globalxmin = xmin = xmax;
1615
1616
244k
  snapshot->takenDuringRecovery = RecoveryInProgress();
1617
1618
244k
  if (!snapshot->takenDuringRecovery)
1619
244k
  {
1620
244k
    int      *pgprocnos = arrayP->pgprocnos;
1621
244k
    int     numProcs;
1622
1623
    /*
1624
     * Spin over procArray checking xid, xmin, and subxids.  The goal is
1625
     * to gather all active xids, find the lowest xmin, and try to record
1626
     * subxids.
1627
     */
1628
244k
    numProcs = arrayP->numProcs;
1629
952k
    for (index = 0; index < numProcs; index++)
1630
707k
    {
1631
707k
      int     pgprocno = pgprocnos[index];
1632
707k
      volatile PGXACT *pgxact = &allPgXact[pgprocno];
1633
707k
      TransactionId xid;
1634
1635
      /*
1636
       * Backend is doing logical decoding which manages xmin
1637
       * separately, check below.
1638
       */
1639
707k
      if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING)
1640
0
        continue;
1641
1642
      /* Ignore procs running LAZY VACUUM */
1643
707k
      if (pgxact->vacuumFlags & PROC_IN_VACUUM)
1644
0
        continue;
1645
1646
      /* Update globalxmin to be the smallest valid xmin */
1647
707k
      xid = pgxact->xmin; /* fetch just once */
1648
707k
      if (TransactionIdIsNormal(xid) &&
1649
196k
        NormalTransactionIdPrecedes(xid, globalxmin))
1650
1
        globalxmin = xid;
1651
1652
      /* Fetch xid just once - see GetNewTransactionId */
1653
707k
      xid = pgxact->xid;
1654
1655
      /*
1656
       * If the transaction has no XID assigned, we can skip it; it
1657
       * won't have sub-XIDs either.  If the XID is >= xmax, we can also
1658
       * skip it; such transactions will be treated as running anyway
1659
       * (and any sub-XIDs will also be >= xmax).
1660
       */
1661
707k
      if (!TransactionIdIsNormal(xid)
1662
91
        || !NormalTransactionIdPrecedes(xid, xmax))
1663
707k
        continue;
1664
1665
      /*
1666
       * We don't include our own XIDs (if any) in the snapshot, but we
1667
       * must include them in xmin.
1668
       */
1669
18.4E
      if (NormalTransactionIdPrecedes(xid, xmin))
1670
2
        xmin = xid;
1671
18.4E
      if (pgxact == MyPgXact)
1672
2
        continue;
1673
1674
      /* Add XID to snapshot. */
1675
18.4E
      snapshot->xip[count++] = xid;
1676
1677
      /*
1678
       * Save subtransaction XIDs if possible (if we've already
1679
       * overflowed, there's no point).  Note that the subxact XIDs must
1680
       * be later than their parent, so no need to check them against
1681
       * xmin.  We could filter against xmax, but it seems better not to
1682
       * do that much work while holding the ProcArrayLock.
1683
       *
1684
       * The other backend can add more subxids concurrently, but cannot
1685
       * remove any.  Hence it's important to fetch nxids just once.
1686
       * Should be safe to use memcpy, though.  (We needn't worry about
1687
       * missing any xids added concurrently, because they must postdate
1688
       * xmax.)
1689
       *
1690
       * Again, our own XIDs are not included in the snapshot.
1691
       */
1692
18.4E
      if (!suboverflowed)
1693
0
      {
1694
0
        if (pgxact->overflowed)
1695
0
          suboverflowed = true;
1696
0
        else
1697
0
        {
1698
0
          int     nxids = pgxact->nxids;
1699
1700
0
          if (nxids > 0)
1701
0
          {
1702
0
            volatile PGPROC *proc = &allProcs[pgprocno];
1703
1704
0
            memcpy(snapshot->subxip + subcount,
1705
0
                 (void *) proc->subxids.xids,
1706
0
                 nxids * sizeof(TransactionId));
1707
0
            subcount += nxids;
1708
0
          }
1709
0
        }
1710
0
      }
1711
18.4E
    }
1712
244k
  }
1713
3
  else
1714
3
  {
1715
    /*
1716
     * We're in hot standby, so get XIDs from KnownAssignedXids.
1717
     *
1718
     * We store all xids directly into subxip[]. Here's why:
1719
     *
1720
     * In recovery we don't know which xids are top-level and which are
1721
     * subxacts, a design choice that greatly simplifies xid processing.
1722
     *
1723
     * It seems like we would want to try to put xids into xip[] only, but
1724
     * that is fairly small. We would either need to make that bigger or
1725
     * to increase the rate at which we WAL-log xid assignment; neither is
1726
     * an appealing choice.
1727
     *
1728
     * We could try to store xids into xip[] first and then into subxip[]
1729
     * if there are too many xids. That only works if the snapshot doesn't
1730
     * overflow because we do not search subxip[] in that case. A simpler
1731
     * way is to just store all xids in the subxact array because this is
1732
     * by far the bigger array. We just leave the xip array empty.
1733
     *
1734
     * Either way we need to change the way XidInMVCCSnapshot() works
1735
     * depending upon when the snapshot was taken, or change normal
1736
     * snapshot processing so it matches.
1737
     *
1738
     * Note: It is possible for recovery to end before we finish taking
1739
     * the snapshot, and for newly assigned transaction ids to be added to
1740
     * the ProcArray.  xmax cannot change while we hold ProcArrayLock, so
1741
     * those newly added transaction ids would be filtered away, so we
1742
     * need not be concerned about them.
1743
     */
1744
3
    subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
1745
3
                          xmax);
1746
1747
3
    if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid))
1748
0
      suboverflowed = true;
1749
3
  }
1750
1751
1752
  /* fetch into volatile var while ProcArrayLock is held */
1753
244k
  replication_slot_xmin = procArray->replication_slot_xmin;
1754
244k
  replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
1755
1756
244k
  if (!TransactionIdIsValid(MyPgXact->xmin))
1757
208k
    MyPgXact->xmin = TransactionXmin = xmin;
1758
1759
244k
  LWLockRelease(ProcArrayLock);
1760
1761
  /*
1762
   * Update globalxmin to include actual process xids.  This is a slightly
1763
   * different way of computing it than GetOldestXmin uses, but should give
1764
   * the same result.
1765
   */
1766
244k
  if (TransactionIdPrecedes(xmin, globalxmin))
1767
1
    globalxmin = xmin;
1768
1769
  /* Update global variables too */
1770
244k
  RecentGlobalXmin = globalxmin - vacuum_defer_cleanup_age;
1771
244k
  if (!TransactionIdIsNormal(RecentGlobalXmin))
1772
0
    RecentGlobalXmin = FirstNormalTransactionId;
1773
1774
  /* Check whether there's a replication slot requiring an older xmin. */
1775
244k
  if (TransactionIdIsValid(replication_slot_xmin) &&
1776
0
    NormalTransactionIdPrecedes(replication_slot_xmin, RecentGlobalXmin))
1777
0
    RecentGlobalXmin = replication_slot_xmin;
1778
1779
  /* Non-catalog tables can be vacuumed if older than this xid */
1780
244k
  RecentGlobalDataXmin = RecentGlobalXmin;
1781
1782
  /*
1783
   * Check whether there's a replication slot requiring an older catalog
1784
   * xmin.
1785
   */
1786
244k
  if (TransactionIdIsNormal(replication_slot_catalog_xmin) &&
1787
0
    NormalTransactionIdPrecedes(replication_slot_catalog_xmin, RecentGlobalXmin))
1788
0
    RecentGlobalXmin = replication_slot_catalog_xmin;
1789
1790
244k
  RecentXmin = xmin;
1791
1792
244k
  snapshot->xmin = xmin;
1793
244k
  snapshot->xmax = xmax;
1794
244k
  snapshot->xcnt = count;
1795
244k
  snapshot->subxcnt = subcount;
1796
244k
  snapshot->suboverflowed = suboverflowed;
1797
1798
244k
  snapshot->curcid = GetCurrentCommandId(false);
1799
1800
  /*
1801
   * This is a new snapshot, so set both refcounts are zero, and mark it as
1802
   * not copied in persistent memory.
1803
   */
1804
244k
  snapshot->active_count = 0;
1805
244k
  snapshot->regd_count = 0;
1806
244k
  snapshot->copied = false;
1807
1808
244k
  if (old_snapshot_threshold < 0)
1809
244k
  {
1810
    /*
1811
     * If not using "snapshot too old" feature, fill related fields with
1812
     * dummy values that don't require any locking.
1813
     */
1814
244k
    snapshot->lsn = InvalidXLogRecPtr;
1815
244k
    snapshot->whenTaken = 0;
1816
244k
  }
1817
8
  else
1818
8
  {
1819
    /*
1820
     * Capture the current time and WAL stream location in case this
1821
     * snapshot becomes old enough to need to fall back on the special
1822
     * "old snapshot" logic.
1823
     */
1824
8
    snapshot->lsn = GetXLogInsertRecPtr();
1825
8
    snapshot->whenTaken = GetSnapshotCurrentTimestamp();
1826
8
    MaintainOldSnapshotTimeMapping(snapshot->whenTaken, xmin);
1827
8
  }
1828
1829
244k
  return snapshot;
1830
244k
}
1831
1832
/*
1833
 * ProcArrayInstallImportedXmin -- install imported xmin into MyPgXact->xmin
1834
 *
1835
 * This is called when installing a snapshot imported from another
1836
 * transaction.  To ensure that OldestXmin doesn't go backwards, we must
1837
 * check that the source transaction is still running, and we'd better do
1838
 * that atomically with installing the new xmin.
1839
 *
1840
 * Returns true if successful, false if source xact is no longer running.
1841
 */
1842
bool
1843
ProcArrayInstallImportedXmin(TransactionId xmin,
1844
               VirtualTransactionId *sourcevxid)
1845
0
{
1846
0
  bool    result = false;
1847
0
  ProcArrayStruct *arrayP = procArray;
1848
0
  int     index;
1849
1850
0
  Assert(TransactionIdIsNormal(xmin));
1851
0
  if (!sourcevxid)
1852
0
    return false;
1853
1854
  /* Get lock so source xact can't end while we're doing this */
1855
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
1856
1857
0
  for (index = 0; index < arrayP->numProcs; index++)
1858
0
  {
1859
0
    int     pgprocno = arrayP->pgprocnos[index];
1860
0
    volatile PGPROC *proc = &allProcs[pgprocno];
1861
0
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
1862
0
    TransactionId xid;
1863
1864
    /* Ignore procs running LAZY VACUUM */
1865
0
    if (pgxact->vacuumFlags & PROC_IN_VACUUM)
1866
0
      continue;
1867
1868
    /* We are only interested in the specific virtual transaction. */
1869
0
    if (proc->backendId != sourcevxid->backendId)
1870
0
      continue;
1871
0
    if (proc->lxid != sourcevxid->localTransactionId)
1872
0
      continue;
1873
1874
    /*
1875
     * We check the transaction's database ID for paranoia's sake: if it's
1876
     * in another DB then its xmin does not cover us.  Caller should have
1877
     * detected this already, so we just treat any funny cases as
1878
     * "transaction not found".
1879
     */
1880
0
    if (proc->databaseId != MyDatabaseId)
1881
0
      continue;
1882
1883
    /*
1884
     * Likewise, let's just make real sure its xmin does cover us.
1885
     */
1886
0
    xid = pgxact->xmin;   /* fetch just once */
1887
0
    if (!TransactionIdIsNormal(xid) ||
1888
0
      !TransactionIdPrecedesOrEquals(xid, xmin))
1889
0
      continue;
1890
1891
    /*
1892
     * We're good.  Install the new xmin.  As in GetSnapshotData, set
1893
     * TransactionXmin too.  (Note that because snapmgr.c called
1894
     * GetSnapshotData first, we'll be overwriting a valid xmin here, so
1895
     * we don't check that.)
1896
     */
1897
0
    MyPgXact->xmin = TransactionXmin = xmin;
1898
1899
0
    result = true;
1900
0
    break;
1901
0
  }
1902
1903
0
  LWLockRelease(ProcArrayLock);
1904
1905
0
  return result;
1906
0
}
1907
1908
/*
1909
 * ProcArrayInstallRestoredXmin -- install restored xmin into MyPgXact->xmin
1910
 *
1911
 * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
1912
 * PGPROC of the transaction from which we imported the snapshot, rather than
1913
 * an XID.
1914
 *
1915
 * Returns true if successful, false if source xact is no longer running.
1916
 */
1917
bool
1918
ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
1919
0
{
1920
0
  bool    result = false;
1921
0
  TransactionId xid;
1922
0
  volatile PGXACT *pgxact;
1923
1924
0
  Assert(TransactionIdIsNormal(xmin));
1925
0
  Assert(proc != NULL);
1926
1927
  /* Get lock so source xact can't end while we're doing this */
1928
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
1929
1930
0
  pgxact = &allPgXact[proc->pgprocno];
1931
1932
  /*
1933
   * Be certain that the referenced PGPROC has an advertised xmin which is
1934
   * no later than the one we're installing, so that the system-wide xmin
1935
   * can't go backwards.  Also, make sure it's running in the same database,
1936
   * so that the per-database xmin cannot go backwards.
1937
   */
1938
0
  xid = pgxact->xmin;     /* fetch just once */
1939
0
  if (proc->databaseId == MyDatabaseId &&
1940
0
    TransactionIdIsNormal(xid) &&
1941
0
    TransactionIdPrecedesOrEquals(xid, xmin))
1942
0
  {
1943
0
    MyPgXact->xmin = TransactionXmin = xmin;
1944
0
    result = true;
1945
0
  }
1946
1947
0
  LWLockRelease(ProcArrayLock);
1948
1949
0
  return result;
1950
0
}
1951
1952
/*
1953
 * GetRunningTransactionData -- returns information about running transactions.
1954
 *
1955
 * Similar to GetSnapshotData but returns more information. We include
1956
 * all PGXACTs with an assigned TransactionId, even VACUUM processes and
1957
 * prepared transactions.
1958
 *
1959
 * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
1960
 * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
1961
 * array until the caller has WAL-logged this snapshot, and releases the
1962
 * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
1963
 * lock is released.
1964
 *
1965
 * The returned data structure is statically allocated; caller should not
1966
 * modify it, and must not assume it is valid past the next call.
1967
 *
1968
 * This is never executed during recovery so there is no need to look at
1969
 * KnownAssignedXids.
1970
 *
1971
 * Dummy PGXACTs from prepared transaction are included, meaning that this
1972
 * may return entries with duplicated TransactionId values coming from
1973
 * transaction finishing to prepare.  Nothing is done about duplicated
1974
 * entries here to not hold on ProcArrayLock more than necessary.
1975
 *
1976
 * We don't worry about updating other counters, we want to keep this as
1977
 * simple as possible and leave GetSnapshotData() as the primary code for
1978
 * that bookkeeping.
1979
 *
1980
 * Note that if any transaction has overflowed its cached subtransactions
1981
 * then there is no real need include any subtransactions.
1982
 */
1983
RunningTransactions
1984
GetRunningTransactionData(void)
1985
65
{
1986
  /* result workspace */
1987
65
  static RunningTransactionsData CurrentRunningXactsData;
1988
1989
65
  ProcArrayStruct *arrayP = procArray;
1990
65
  RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
1991
65
  TransactionId latestCompletedXid;
1992
65
  TransactionId oldestRunningXid;
1993
65
  TransactionId *xids;
1994
65
  int     index;
1995
65
  int     count;
1996
65
  int     subcount;
1997
65
  bool    suboverflowed;
1998
1999
65
  Assert(!RecoveryInProgress());
2000
2001
  /*
2002
   * Allocating space for maxProcs xids is usually overkill; numProcs would
2003
   * be sufficient.  But it seems better to do the malloc while not holding
2004
   * the lock, so we can't look at numProcs.  Likewise, we allocate much
2005
   * more subxip storage than is probably needed.
2006
   *
2007
   * Should only be allocated in bgwriter, since only ever executed during
2008
   * checkpoints.
2009
   */
2010
65
  if (CurrentRunningXacts->xids == NULL)
2011
22
  {
2012
    /*
2013
     * First call
2014
     */
2015
22
    CurrentRunningXacts->xids = (TransactionId *)
2016
22
      malloc(TOTAL_MAX_CACHED_SUBXIDS * sizeof(TransactionId));
2017
22
    if (CurrentRunningXacts->xids == NULL)
2018
22
      ereport(ERROR,
2019
22
          (errcode(ERRCODE_OUT_OF_MEMORY),
2020
22
           errmsg("out of memory")));
2021
22
  }
2022
2023
65
  xids = CurrentRunningXacts->xids;
2024
2025
65
  count = subcount = 0;
2026
65
  suboverflowed = false;
2027
2028
  /*
2029
   * Ensure that no xids enter or leave the procarray while we obtain
2030
   * snapshot.
2031
   */
2032
65
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2033
65
  LWLockAcquire(XidGenLock, LW_SHARED);
2034
2035
65
  latestCompletedXid = ShmemVariableCache->latestCompletedXid;
2036
2037
65
  oldestRunningXid = ShmemVariableCache->nextXid;
2038
2039
  /*
2040
   * Spin over procArray collecting all xids
2041
   */
2042
150
  for (index = 0; index < arrayP->numProcs; index++)
2043
85
  {
2044
85
    int     pgprocno = arrayP->pgprocnos[index];
2045
85
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
2046
85
    TransactionId xid;
2047
2048
    /* Fetch xid just once - see GetNewTransactionId */
2049
85
    xid = pgxact->xid;
2050
2051
    /*
2052
     * We don't need to store transactions that don't have a TransactionId
2053
     * yet because they will not show as running on a standby server.
2054
     */
2055
85
    if (!TransactionIdIsValid(xid))
2056
85
      continue;
2057
2058
    /*
2059
     * Be careful not to exclude any xids before calculating the values of
2060
     * oldestRunningXid and suboverflowed, since these are used to clean
2061
     * up transaction information held on standbys.
2062
     */
2063
0
    if (TransactionIdPrecedes(xid, oldestRunningXid))
2064
0
      oldestRunningXid = xid;
2065
2066
0
    if (pgxact->overflowed)
2067
0
      suboverflowed = true;
2068
2069
    /*
2070
     * If we wished to exclude xids this would be the right place for it.
2071
     * Procs with the PROC_IN_VACUUM flag set don't usually assign xids,
2072
     * but they do during truncation at the end when they get the lock and
2073
     * truncate, so it is not much of a problem to include them if they
2074
     * are seen and it is cleaner to include them.
2075
     */
2076
2077
0
    xids[count++] = xid;
2078
0
  }
2079
2080
  /*
2081
   * Spin over procArray collecting all subxids, but only if there hasn't
2082
   * been a suboverflow.
2083
   */
2084
65
  if (!suboverflowed)
2085
65
  {
2086
150
    for (index = 0; index < arrayP->numProcs; index++)
2087
85
    {
2088
85
      int     pgprocno = arrayP->pgprocnos[index];
2089
85
      volatile PGPROC *proc = &allProcs[pgprocno];
2090
85
      volatile PGXACT *pgxact = &allPgXact[pgprocno];
2091
85
      int     nxids;
2092
2093
      /*
2094
       * Save subtransaction XIDs. Other backends can't add or remove
2095
       * entries while we're holding XidGenLock.
2096
       */
2097
85
      nxids = pgxact->nxids;
2098
85
      if (nxids > 0)
2099
0
      {
2100
0
        memcpy(&xids[count], (void *) proc->subxids.xids,
2101
0
             nxids * sizeof(TransactionId));
2102
0
        count += nxids;
2103
0
        subcount += nxids;
2104
2105
        /*
2106
         * Top-level XID of a transaction is always less than any of
2107
         * its subxids, so we don't need to check if any of the
2108
         * subxids are smaller than oldestRunningXid
2109
         */
2110
0
      }
2111
85
    }
2112
65
  }
2113
2114
  /*
2115
   * It's important *not* to include the limits set by slots here because
2116
   * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
2117
   * were to be included here the initial value could never increase because
2118
   * of a circular dependency where slots only increase their limits when
2119
   * running xacts increases oldestRunningXid and running xacts only
2120
   * increases if slots do.
2121
   */
2122
2123
65
  CurrentRunningXacts->xcnt = count - subcount;
2124
65
  CurrentRunningXacts->subxcnt = subcount;
2125
65
  CurrentRunningXacts->subxid_overflow = suboverflowed;
2126
65
  CurrentRunningXacts->nextXid = ShmemVariableCache->nextXid;
2127
65
  CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
2128
65
  CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
2129
2130
65
  Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
2131
65
  Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
2132
65
  Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
2133
2134
  /* We don't release the locks here, the caller is responsible for that */
2135
2136
65
  return CurrentRunningXacts;
2137
65
}
2138
2139
/*
2140
 * GetOldestActiveTransactionId()
2141
 *
2142
 * Similar to GetSnapshotData but returns just oldestActiveXid. We include
2143
 * all PGXACTs with an assigned TransactionId, even VACUUM processes.
2144
 * We look at all databases, though there is no need to include WALSender
2145
 * since this has no effect on hot standby conflicts.
2146
 *
2147
 * This is never executed during recovery so there is no need to look at
2148
 * KnownAssignedXids.
2149
 *
2150
 * We don't worry about updating other counters, we want to keep this as
2151
 * simple as possible and leave GetSnapshotData() as the primary code for
2152
 * that bookkeeping.
2153
 */
2154
TransactionId
2155
GetOldestActiveTransactionId(void)
2156
83
{
2157
83
  ProcArrayStruct *arrayP = procArray;
2158
83
  TransactionId oldestRunningXid;
2159
83
  int     index;
2160
2161
83
  Assert(!RecoveryInProgress());
2162
2163
  /*
2164
   * Read nextXid, as the upper bound of what's still active.
2165
   *
2166
   * Reading a TransactionId is atomic, but we must grab the lock to make
2167
   * sure that all XIDs < nextXid are already present in the proc array (or
2168
   * have already completed), when we spin over it.
2169
   */
2170
83
  LWLockAcquire(XidGenLock, LW_SHARED);
2171
83
  oldestRunningXid = ShmemVariableCache->nextXid;
2172
83
  LWLockRelease(XidGenLock);
2173
2174
  /*
2175
   * Spin over procArray collecting all xids and subxids.
2176
   */
2177
83
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2178
168
  for (index = 0; index < arrayP->numProcs; index++)
2179
85
  {
2180
85
    int     pgprocno = arrayP->pgprocnos[index];
2181
85
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
2182
85
    TransactionId xid;
2183
2184
    /* Fetch xid just once - see GetNewTransactionId */
2185
85
    xid = pgxact->xid;
2186
2187
85
    if (!TransactionIdIsNormal(xid))
2188
85
      continue;
2189
2190
0
    if (TransactionIdPrecedes(xid, oldestRunningXid))
2191
0
      oldestRunningXid = xid;
2192
2193
    /*
2194
     * Top-level XID of a transaction is always less than any of its
2195
     * subxids, so we don't need to check if any of the subxids are
2196
     * smaller than oldestRunningXid
2197
     */
2198
0
  }
2199
83
  LWLockRelease(ProcArrayLock);
2200
2201
83
  return oldestRunningXid;
2202
83
}
2203
2204
/*
2205
 * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
2206
 *
2207
 * Returns the oldest xid that we can guarantee not to have been affected by
2208
 * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
2209
 * transaction aborted. Note that the value can (and most of the time will) be
2210
 * much more conservative than what really has been affected by vacuum, but we
2211
 * currently don't have better data available.
2212
 *
2213
 * This is useful to initialize the cutoff xid after which a new changeset
2214
 * extraction replication slot can start decoding changes.
2215
 *
2216
 * Must be called with ProcArrayLock held either shared or exclusively,
2217
 * although most callers will want to use exclusive mode since it is expected
2218
 * that the caller will immediately use the xid to peg the xmin horizon.
2219
 */
2220
TransactionId
2221
GetOldestSafeDecodingTransactionId(bool catalogOnly)
2222
0
{
2223
0
  ProcArrayStruct *arrayP = procArray;
2224
0
  TransactionId oldestSafeXid;
2225
0
  int     index;
2226
0
  bool    recovery_in_progress = RecoveryInProgress();
2227
2228
0
  Assert(LWLockHeldByMe(ProcArrayLock));
2229
2230
  /*
2231
   * Acquire XidGenLock, so no transactions can acquire an xid while we're
2232
   * running. If no transaction with xid were running concurrently a new xid
2233
   * could influence the RecentXmin et al.
2234
   *
2235
   * We initialize the computation to nextXid since that's guaranteed to be
2236
   * a safe, albeit pessimal, value.
2237
   */
2238
0
  LWLockAcquire(XidGenLock, LW_SHARED);
2239
0
  oldestSafeXid = ShmemVariableCache->nextXid;
2240
2241
  /*
2242
   * If there's already a slot pegging the xmin horizon, we can start with
2243
   * that value, it's guaranteed to be safe since it's computed by this
2244
   * routine initially and has been enforced since.  We can always use the
2245
   * slot's general xmin horizon, but the catalog horizon is only usable
2246
   * when only catalog data is going to be looked at.
2247
   */
2248
0
  if (TransactionIdIsValid(procArray->replication_slot_xmin) &&
2249
0
    TransactionIdPrecedes(procArray->replication_slot_xmin,
2250
0
                oldestSafeXid))
2251
0
    oldestSafeXid = procArray->replication_slot_xmin;
2252
2253
0
  if (catalogOnly &&
2254
0
    TransactionIdIsValid(procArray->replication_slot_catalog_xmin) &&
2255
0
    TransactionIdPrecedes(procArray->replication_slot_catalog_xmin,
2256
0
                oldestSafeXid))
2257
0
    oldestSafeXid = procArray->replication_slot_catalog_xmin;
2258
2259
  /*
2260
   * If we're not in recovery, we walk over the procarray and collect the
2261
   * lowest xid. Since we're called with ProcArrayLock held and have
2262
   * acquired XidGenLock, no entries can vanish concurrently, since
2263
   * PGXACT->xid is only set with XidGenLock held and only cleared with
2264
   * ProcArrayLock held.
2265
   *
2266
   * In recovery we can't lower the safe value besides what we've computed
2267
   * above, so we'll have to wait a bit longer there. We unfortunately can
2268
   * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
2269
   * machinery can miss values and return an older value than is safe.
2270
   */
2271
0
  if (!recovery_in_progress)
2272
0
  {
2273
    /*
2274
     * Spin over procArray collecting all min(PGXACT->xid)
2275
     */
2276
0
    for (index = 0; index < arrayP->numProcs; index++)
2277
0
    {
2278
0
      int     pgprocno = arrayP->pgprocnos[index];
2279
0
      volatile PGXACT *pgxact = &allPgXact[pgprocno];
2280
0
      TransactionId xid;
2281
2282
      /* Fetch xid just once - see GetNewTransactionId */
2283
0
      xid = pgxact->xid;
2284
2285
0
      if (!TransactionIdIsNormal(xid))
2286
0
        continue;
2287
2288
0
      if (TransactionIdPrecedes(xid, oldestSafeXid))
2289
0
        oldestSafeXid = xid;
2290
0
    }
2291
0
  }
2292
2293
0
  LWLockRelease(XidGenLock);
2294
2295
0
  return oldestSafeXid;
2296
0
}
2297
2298
/*
2299
 * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
2300
 * delaying checkpoint because they have critical actions in progress.
2301
 *
2302
 * Constructs an array of VXIDs of transactions that are currently in commit
2303
 * critical sections, as shown by having delayChkpt set in their PGXACT.
2304
 *
2305
 * Returns a palloc'd array that should be freed by the caller.
2306
 * *nvxids is the number of valid entries.
2307
 *
2308
 * Note that because backends set or clear delayChkpt without holding any lock,
2309
 * the result is somewhat indeterminate, but we don't really care.  Even in
2310
 * a multiprocessor with delayed writes to shared memory, it should be certain
2311
 * that setting of delayChkpt will propagate to shared memory when the backend
2312
 * takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
2313
 * it's already inserted its commit record.  Whether it takes a little while
2314
 * for clearing of delayChkpt to propagate is unimportant for correctness.
2315
 */
2316
VirtualTransactionId *
2317
GetVirtualXIDsDelayingChkpt(int *nvxids)
2318
987
{
2319
987
  VirtualTransactionId *vxids;
2320
987
  ProcArrayStruct *arrayP = procArray;
2321
987
  int     count = 0;
2322
987
  int     index;
2323
2324
  /* allocate what's certainly enough result space */
2325
987
  vxids = (VirtualTransactionId *)
2326
987
    palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2327
2328
987
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2329
2330
1.97k
  for (index = 0; index < arrayP->numProcs; index++)
2331
989
  {
2332
989
    int     pgprocno = arrayP->pgprocnos[index];
2333
989
    volatile PGPROC *proc = &allProcs[pgprocno];
2334
989
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
2335
2336
989
    if (pgxact->delayChkpt)
2337
0
    {
2338
0
      VirtualTransactionId vxid;
2339
2340
0
      GET_VXID_FROM_PGPROC(vxid, *proc);
2341
0
      if (VirtualTransactionIdIsValid(vxid))
2342
0
        vxids[count++] = vxid;
2343
0
    }
2344
989
  }
2345
2346
987
  LWLockRelease(ProcArrayLock);
2347
2348
987
  *nvxids = count;
2349
987
  return vxids;
2350
987
}
2351
2352
/*
2353
 * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
2354
 *
2355
 * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
2356
 * of the specified VXIDs are still in critical sections of code.
2357
 *
2358
 * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
2359
 * those numbers should be small enough for it not to be a problem.
2360
 */
2361
bool
2362
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
2363
0
{
2364
0
  bool    result = false;
2365
0
  ProcArrayStruct *arrayP = procArray;
2366
0
  int     index;
2367
2368
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2369
2370
0
  for (index = 0; index < arrayP->numProcs; index++)
2371
0
  {
2372
0
    int     pgprocno = arrayP->pgprocnos[index];
2373
0
    volatile PGPROC *proc = &allProcs[pgprocno];
2374
0
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
2375
0
    VirtualTransactionId vxid;
2376
2377
0
    GET_VXID_FROM_PGPROC(vxid, *proc);
2378
2379
0
    if (pgxact->delayChkpt && VirtualTransactionIdIsValid(vxid))
2380
0
    {
2381
0
      int     i;
2382
2383
0
      for (i = 0; i < nvxids; i++)
2384
0
      {
2385
0
        if (VirtualTransactionIdEquals(vxid, vxids[i]))
2386
0
        {
2387
0
          result = true;
2388
0
          break;
2389
0
        }
2390
0
      }
2391
0
      if (result)
2392
0
        break;
2393
0
    }
2394
0
  }
2395
2396
0
  LWLockRelease(ProcArrayLock);
2397
2398
0
  return result;
2399
0
}
2400
2401
/*
2402
 * BackendPidGetProc -- get a backend's PGPROC given its PID
2403
 *
2404
 * Returns NULL if not found.  Note that it is up to the caller to be
2405
 * sure that the question remains meaningful for long enough for the
2406
 * answer to be used ...
2407
 */
2408
PGPROC *
2409
BackendPidGetProc(int pid)
2410
11
{
2411
11
  PGPROC     *result;
2412
2413
11
  if (pid == 0)        /* never match dummy PGPROCs */
2414
0
    return NULL;
2415
2416
11
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2417
2418
11
  result = BackendPidGetProcWithLock(pid);
2419
2420
11
  LWLockRelease(ProcArrayLock);
2421
2422
11
  return result;
2423
11
}
2424
2425
/*
2426
 * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID
2427
 *
2428
 * Same as above, except caller must be holding ProcArrayLock.  The found
2429
 * entry, if any, can be assumed to be valid as long as the lock remains held.
2430
 */
2431
PGPROC *
2432
BackendPidGetProcWithLock(int pid)
2433
6.72k
{
2434
6.72k
  PGPROC     *result = NULL;
2435
6.72k
  ProcArrayStruct *arrayP = procArray;
2436
6.72k
  int     index;
2437
2438
6.72k
  if (pid == 0)        /* never match dummy PGPROCs */
2439
0
    return NULL;
2440
2441
11.7k
  for (index = 0; index < arrayP->numProcs; index++)
2442
11.7k
  {
2443
11.7k
    PGPROC     *proc = &allProcs[arrayP->pgprocnos[index]];
2444
2445
11.7k
    if (proc->pid == pid)
2446
6.72k
    {
2447
6.72k
      result = proc;
2448
6.72k
      break;
2449
6.72k
    }
2450
11.7k
  }
2451
2452
6.72k
  return result;
2453
6.72k
}
2454
2455
/*
2456
 * BackendXidGetPid -- get a backend's pid given its XID
2457
 *
2458
 * Returns 0 if not found or it's a prepared transaction.  Note that
2459
 * it is up to the caller to be sure that the question remains
2460
 * meaningful for long enough for the answer to be used ...
2461
 *
2462
 * Only main transaction Ids are considered.  This function is mainly
2463
 * useful for determining what backend owns a lock.
2464
 *
2465
 * Beware that not every xact has an XID assigned.  However, as long as you
2466
 * only call this using an XID found on disk, you're safe.
2467
 */
2468
int
2469
BackendXidGetPid(TransactionId xid)
2470
0
{
2471
0
  int     result = 0;
2472
0
  ProcArrayStruct *arrayP = procArray;
2473
0
  int     index;
2474
2475
0
  if (xid == InvalidTransactionId) /* never match invalid xid */
2476
0
    return 0;
2477
2478
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2479
2480
0
  for (index = 0; index < arrayP->numProcs; index++)
2481
0
  {
2482
0
    int     pgprocno = arrayP->pgprocnos[index];
2483
0
    volatile PGPROC *proc = &allProcs[pgprocno];
2484
0
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
2485
2486
0
    if (pgxact->xid == xid)
2487
0
    {
2488
0
      result = proc->pid;
2489
0
      break;
2490
0
    }
2491
0
  }
2492
2493
0
  LWLockRelease(ProcArrayLock);
2494
2495
0
  return result;
2496
0
}
2497
2498
/*
2499
 * IsBackendPid -- is a given pid a running backend
2500
 *
2501
 * This is not called by the backend, but is called by external modules.
2502
 */
2503
bool
2504
IsBackendPid(int pid)
2505
0
{
2506
0
  return (BackendPidGetProc(pid) != NULL);
2507
0
}
2508
2509
2510
/*
2511
 * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
2512
 *
2513
 * The array is palloc'd. The number of valid entries is returned into *nvxids.
2514
 *
2515
 * The arguments allow filtering the set of VXIDs returned.  Our own process
2516
 * is always skipped.  In addition:
2517
 *  If limitXmin is not InvalidTransactionId, skip processes with
2518
 *    xmin > limitXmin.
2519
 *  If excludeXmin0 is true, skip processes with xmin = 0.
2520
 *  If allDbs is false, skip processes attached to other databases.
2521
 *  If excludeVacuum isn't zero, skip processes for which
2522
 *    (vacuumFlags & excludeVacuum) is not zero.
2523
 *
2524
 * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
2525
 * allow skipping backends whose oldest live snapshot is no older than
2526
 * some snapshot we have.  Since we examine the procarray with only shared
2527
 * lock, there are race conditions: a backend could set its xmin just after
2528
 * we look.  Indeed, on multiprocessors with weak memory ordering, the
2529
 * other backend could have set its xmin *before* we look.  We know however
2530
 * that such a backend must have held shared ProcArrayLock overlapping our
2531
 * own hold of ProcArrayLock, else we would see its xmin update.  Therefore,
2532
 * any snapshot the other backend is taking concurrently with our scan cannot
2533
 * consider any transactions as still running that we think are committed
2534
 * (since backends must hold ProcArrayLock exclusive to commit).
2535
 */
2536
VirtualTransactionId *
2537
GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
2538
            bool allDbs, int excludeVacuum,
2539
            int *nvxids)
2540
0
{
2541
0
  VirtualTransactionId *vxids;
2542
0
  ProcArrayStruct *arrayP = procArray;
2543
0
  int     count = 0;
2544
0
  int     index;
2545
2546
  /* allocate what's certainly enough result space */
2547
0
  vxids = (VirtualTransactionId *)
2548
0
    palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2549
2550
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2551
2552
0
  for (index = 0; index < arrayP->numProcs; index++)
2553
0
  {
2554
0
    int     pgprocno = arrayP->pgprocnos[index];
2555
0
    volatile PGPROC *proc = &allProcs[pgprocno];
2556
0
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
2557
2558
0
    if (proc == MyProc)
2559
0
      continue;
2560
2561
0
    if (excludeVacuum & pgxact->vacuumFlags)
2562
0
      continue;
2563
2564
0
    if (allDbs || proc->databaseId == MyDatabaseId)
2565
0
    {
2566
      /* Fetch xmin just once - might change on us */
2567
0
      TransactionId pxmin = pgxact->xmin;
2568
2569
0
      if (excludeXmin0 && !TransactionIdIsValid(pxmin))
2570
0
        continue;
2571
2572
      /*
2573
       * InvalidTransactionId precedes all other XIDs, so a proc that
2574
       * hasn't set xmin yet will not be rejected by this test.
2575
       */
2576
0
      if (!TransactionIdIsValid(limitXmin) ||
2577
0
        TransactionIdPrecedesOrEquals(pxmin, limitXmin))
2578
0
      {
2579
0
        VirtualTransactionId vxid;
2580
2581
0
        GET_VXID_FROM_PGPROC(vxid, *proc);
2582
0
        if (VirtualTransactionIdIsValid(vxid))
2583
0
          vxids[count++] = vxid;
2584
0
      }
2585
0
    }
2586
0
  }
2587
2588
0
  LWLockRelease(ProcArrayLock);
2589
2590
0
  *nvxids = count;
2591
0
  return vxids;
2592
0
}
2593
2594
/*
2595
 * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
2596
 *
2597
 * Usage is limited to conflict resolution during recovery on standby servers.
2598
 * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId
2599
 * in cases where we cannot accurately determine a value for latestRemovedXid.
2600
 *
2601
 * If limitXmin is InvalidTransactionId then we want to kill everybody,
2602
 * so we're not worried if they have a snapshot or not, nor does it really
2603
 * matter what type of lock we hold.
2604
 *
2605
 * All callers that are checking xmins always now supply a valid and useful
2606
 * value for limitXmin. The limitXmin is always lower than the lowest
2607
 * numbered KnownAssignedXid that is not already a FATAL error. This is
2608
 * because we only care about cleanup records that are cleaning up tuple
2609
 * versions from committed transactions. In that case they will only occur
2610
 * at the point where the record is less than the lowest running xid. That
2611
 * allows us to say that if any backend takes a snapshot concurrently with
2612
 * us then the conflict assessment made here would never include the snapshot
2613
 * that is being derived. So we take LW_SHARED on the ProcArray and allow
2614
 * concurrent snapshots when limitXmin is valid. We might think about adding
2615
 *   Assert(limitXmin < lowest(KnownAssignedXids))
2616
 * but that would not be true in the case of FATAL errors lagging in array,
2617
 * but we already know those are bogus anyway, so we skip that test.
2618
 *
2619
 * If dbOid is valid we skip backends attached to other databases.
2620
 *
2621
 * Be careful to *not* pfree the result from this function. We reuse
2622
 * this array sufficiently often that we use malloc for the result.
2623
 */
2624
VirtualTransactionId *
2625
GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
2626
0
{
2627
0
  static VirtualTransactionId *vxids;
2628
0
  ProcArrayStruct *arrayP = procArray;
2629
0
  int     count = 0;
2630
0
  int     index;
2631
2632
  /*
2633
   * If first time through, get workspace to remember main XIDs in. We
2634
   * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
2635
   * result space, remembering room for a terminator.
2636
   */
2637
0
  if (vxids == NULL)
2638
0
  {
2639
0
    vxids = (VirtualTransactionId *)
2640
0
      malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
2641
0
    if (vxids == NULL)
2642
0
      ereport(ERROR,
2643
0
          (errcode(ERRCODE_OUT_OF_MEMORY),
2644
0
           errmsg("out of memory")));
2645
0
  }
2646
2647
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2648
2649
0
  for (index = 0; index < arrayP->numProcs; index++)
2650
0
  {
2651
0
    int     pgprocno = arrayP->pgprocnos[index];
2652
0
    volatile PGPROC *proc = &allProcs[pgprocno];
2653
0
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
2654
2655
    /* Exclude prepared transactions */
2656
0
    if (proc->pid == 0)
2657
0
      continue;
2658
2659
0
    if (!OidIsValid(dbOid) ||
2660
0
      proc->databaseId == dbOid)
2661
0
    {
2662
      /* Fetch xmin just once - can't change on us, but good coding */
2663
0
      TransactionId pxmin = pgxact->xmin;
2664
2665
      /*
2666
       * We ignore an invalid pxmin because this means that backend has
2667
       * no snapshot currently. We hold a Share lock to avoid contention
2668
       * with users taking snapshots.  That is not a problem because the
2669
       * current xmin is always at least one higher than the latest
2670
       * removed xid, so any new snapshot would never conflict with the
2671
       * test here.
2672
       */
2673
0
      if (!TransactionIdIsValid(limitXmin) ||
2674
0
        (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
2675
0
      {
2676
0
        VirtualTransactionId vxid;
2677
2678
0
        GET_VXID_FROM_PGPROC(vxid, *proc);
2679
0
        if (VirtualTransactionIdIsValid(vxid))
2680
0
          vxids[count++] = vxid;
2681
0
      }
2682
0
    }
2683
0
  }
2684
2685
0
  LWLockRelease(ProcArrayLock);
2686
2687
  /* add the terminator */
2688
0
  vxids[count].backendId = InvalidBackendId;
2689
0
  vxids[count].localTransactionId = InvalidLocalTransactionId;
2690
2691
0
  return vxids;
2692
0
}
2693
2694
/*
2695
 * CancelVirtualTransaction - used in recovery conflict processing
2696
 *
2697
 * Returns pid of the process signaled, or 0 if not found.
2698
 */
2699
pid_t
2700
CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
2701
0
{
2702
0
  ProcArrayStruct *arrayP = procArray;
2703
0
  int     index;
2704
0
  pid_t   pid = 0;
2705
2706
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2707
2708
0
  for (index = 0; index < arrayP->numProcs; index++)
2709
0
  {
2710
0
    int     pgprocno = arrayP->pgprocnos[index];
2711
0
    volatile PGPROC *proc = &allProcs[pgprocno];
2712
0
    VirtualTransactionId procvxid;
2713
2714
0
    GET_VXID_FROM_PGPROC(procvxid, *proc);
2715
2716
0
    if (procvxid.backendId == vxid.backendId &&
2717
0
      procvxid.localTransactionId == vxid.localTransactionId)
2718
0
    {
2719
0
      proc->recoveryConflictPending = true;
2720
0
      pid = proc->pid;
2721
0
      if (pid != 0)
2722
0
      {
2723
        /*
2724
         * Kill the pid if it's still here. If not, that's what we
2725
         * wanted so ignore any errors.
2726
         */
2727
0
        (void) SendProcSignal(pid, sigmode, vxid.backendId);
2728
0
      }
2729
0
      break;
2730
0
    }
2731
0
  }
2732
2733
0
  LWLockRelease(ProcArrayLock);
2734
2735
0
  return pid;
2736
0
}
2737
2738
/*
2739
 * MinimumActiveBackends --- count backends (other than myself) that are
2740
 *    in active transactions.  Return true if the count exceeds the
2741
 *    minimum threshold passed.  This is used as a heuristic to decide if
2742
 *    a pre-XLOG-flush delay is worthwhile during commit.
2743
 *
2744
 * Do not count backends that are blocked waiting for locks, since they are
2745
 * not going to get to run until someone else commits.
2746
 */
2747
bool
2748
MinimumActiveBackends(int min)
2749
0
{
2750
0
  ProcArrayStruct *arrayP = procArray;
2751
0
  int     count = 0;
2752
0
  int     index;
2753
2754
  /* Quick short-circuit if no minimum is specified */
2755
0
  if (min == 0)
2756
0
    return true;
2757
2758
  /*
2759
   * Note: for speed, we don't acquire ProcArrayLock.  This is a little bit
2760
   * bogus, but since we are only testing fields for zero or nonzero, it
2761
   * should be OK.  The result is only used for heuristic purposes anyway...
2762
   */
2763
0
  for (index = 0; index < arrayP->numProcs; index++)
2764
0
  {
2765
0
    int     pgprocno = arrayP->pgprocnos[index];
2766
0
    volatile PGPROC *proc = &allProcs[pgprocno];
2767
0
    volatile PGXACT *pgxact = &allPgXact[pgprocno];
2768
2769
    /*
2770
     * Since we're not holding a lock, need to be prepared to deal with
2771
     * garbage, as someone could have incremented numProcs but not yet
2772
     * filled the structure.
2773
     *
2774
     * If someone just decremented numProcs, 'proc' could also point to a
2775
     * PGPROC entry that's no longer in the array. It still points to a
2776
     * PGPROC struct, though, because freed PGPROC entries just go to the
2777
     * free list and are recycled. Its contents are nonsense in that case,
2778
     * but that's acceptable for this function.
2779
     */
2780
0
    if (pgprocno == -1)
2781
0
      continue;     /* do not count deleted entries */
2782
0
    if (proc == MyProc)
2783
0
      continue;     /* do not count myself */
2784
0
    if (pgxact->xid == InvalidTransactionId)
2785
0
      continue;     /* do not count if no XID assigned */
2786
0
    if (proc->pid == 0)
2787
0
      continue;     /* do not count prepared xacts */
2788
0
    if (proc->waitLock != NULL)
2789
0
      continue;     /* do not count if blocked on a lock */
2790
0
    count++;
2791
0
    if (count >= min)
2792
0
      break;
2793
0
  }
2794
2795
0
  return count >= min;
2796
0
}
2797
2798
/*
2799
 * CountDBBackends --- count backends that are using specified database
2800
 */
2801
int
2802
CountDBBackends(Oid databaseid)
2803
0
{
2804
0
  ProcArrayStruct *arrayP = procArray;
2805
0
  int     count = 0;
2806
0
  int     index;
2807
2808
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2809
2810
0
  for (index = 0; index < arrayP->numProcs; index++)
2811
0
  {
2812
0
    int     pgprocno = arrayP->pgprocnos[index];
2813
0
    volatile PGPROC *proc = &allProcs[pgprocno];
2814
2815
0
    if (proc->pid == 0)
2816
0
      continue;     /* do not count prepared xacts */
2817
0
    if (!OidIsValid(databaseid) ||
2818
0
      proc->databaseId == databaseid)
2819
0
      count++;
2820
0
  }
2821
2822
0
  LWLockRelease(ProcArrayLock);
2823
2824
0
  return count;
2825
0
}
2826
2827
/*
2828
 * CountDBConnections --- counts database backends ignoring any background
2829
 *    worker processes
2830
 */
2831
int
2832
CountDBConnections(Oid databaseid)
2833
0
{
2834
0
  ProcArrayStruct *arrayP = procArray;
2835
0
  int     count = 0;
2836
0
  int     index;
2837
2838
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2839
2840
0
  for (index = 0; index < arrayP->numProcs; index++)
2841
0
  {
2842
0
    int     pgprocno = arrayP->pgprocnos[index];
2843
0
    volatile PGPROC *proc = &allProcs[pgprocno];
2844
2845
0
    if (proc->pid == 0)
2846
0
      continue;     /* do not count prepared xacts */
2847
0
    if (proc->isBackgroundWorker)
2848
0
      continue;     /* do not count background workers */
2849
0
    if (!OidIsValid(databaseid) ||
2850
0
      proc->databaseId == databaseid)
2851
0
      count++;
2852
0
  }
2853
2854
0
  LWLockRelease(ProcArrayLock);
2855
2856
0
  return count;
2857
0
}
2858
2859
/*
2860
 * CancelDBBackends --- cancel backends that are using specified database
2861
 */
2862
void
2863
CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
2864
0
{
2865
0
  ProcArrayStruct *arrayP = procArray;
2866
0
  int     index;
2867
0
  pid_t   pid = 0;
2868
2869
  /* tell all backends to die */
2870
0
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2871
2872
0
  for (index = 0; index < arrayP->numProcs; index++)
2873
0
  {
2874
0
    int     pgprocno = arrayP->pgprocnos[index];
2875
0
    volatile PGPROC *proc = &allProcs[pgprocno];
2876
2877
0
    if (databaseid == InvalidOid || proc->databaseId == databaseid)
2878
0
    {
2879
0
      VirtualTransactionId procvxid;
2880
2881
0
      GET_VXID_FROM_PGPROC(procvxid, *proc);
2882
2883
0
      proc->recoveryConflictPending = conflictPending;
2884
0
      pid = proc->pid;
2885
0
      if (pid != 0)
2886
0
      {
2887
        /*
2888
         * Kill the pid if it's still here. If not, that's what we
2889
         * wanted so ignore any errors.
2890
         */
2891
0
        (void) SendProcSignal(pid, sigmode, procvxid.backendId);
2892
0
      }
2893
0
    }
2894
0
  }
2895
2896
0
  LWLockRelease(ProcArrayLock);
2897
0
}
2898
2899
/*
2900
 * CountUserBackends --- count backends that are used by specified user
2901
 */
2902
int
2903
CountUserBackends(Oid roleid)
2904
6
{
2905
6
  ProcArrayStruct *arrayP = procArray;
2906
6
  int     count = 0;
2907
6
  int     index;
2908
2909
6
  LWLockAcquire(ProcArrayLock, LW_SHARED);
2910
2911
22
  for (index = 0; index < arrayP->numProcs; index++)
2912
16
  {
2913
16
    int     pgprocno = arrayP->pgprocnos[index];
2914
16
    volatile PGPROC *proc = &allProcs[pgprocno];
2915
2916
16
    if (proc->pid == 0)
2917
0
      continue;     /* do not count prepared xacts */
2918
16
    if (proc->isBackgroundWorker)
2919
0
      continue;     /* do not count background workers */
2920
16
    if (proc->roleId == roleid)
2921
10
      count++;
2922
16
  }
2923
2924
6
  LWLockRelease(ProcArrayLock);
2925
2926
6
  return count;
2927
6
}
2928
2929
/*
2930
 * CountOtherDBBackends -- check for other backends running in the given DB
2931
 *
2932
 * If there are other backends in the DB, we will wait a maximum of 5 seconds
2933
 * for them to exit.  Autovacuum backends are encouraged to exit early by
2934
 * sending them SIGTERM, but normal user backends are just waited for.
2935
 *
2936
 * The current backend is always ignored; it is caller's responsibility to
2937
 * check whether the current backend uses the given DB, if it's important.
2938
 *
2939
 * Returns true if there are (still) other backends in the DB, false if not.
2940
 * Also, *nbackends and *nprepared are set to the number of other backends
2941
 * and prepared transactions in the DB, respectively.
2942
 *
2943
 * This function is used to interlock DROP DATABASE and related commands
2944
 * against there being any active backends in the target DB --- dropping the
2945
 * DB while active backends remain would be a Bad Thing.  Note that we cannot
2946
 * detect here the possibility of a newly-started backend that is trying to
2947
 * connect to the doomed database, so additional interlocking is needed during
2948
 * backend startup.  The caller should normally hold an exclusive lock on the
2949
 * target DB before calling this, which is one reason we mustn't wait
2950
 * indefinitely.
2951
 */
2952
bool
2953
CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
2954
43
{
2955
43
  ProcArrayStruct *arrayP = procArray;
2956
2957
0
#define MAXAUTOVACPIDS  10    /* max autovacs to SIGTERM per iteration */
2958
43
  int     autovac_pids[MAXAUTOVACPIDS];
2959
43
  int     tries;
2960
2961
  /* 50 tries with 100ms sleep between tries makes 5 sec total wait */
2962
43
  for (tries = 0; tries < 50; tries++)
2963
43
  {
2964
43
    int     nautovacs = 0;
2965
43
    bool    found = false;
2966
43
    int     index;
2967
2968
43
    CHECK_FOR_INTERRUPTS();
2969
2970
43
    *nbackends = *nprepared = 0;
2971
2972
43
    LWLockAcquire(ProcArrayLock, LW_SHARED);
2973
2974
101
    for (index = 0; index < arrayP->numProcs; index++)
2975
58
    {
2976
58
      int     pgprocno = arrayP->pgprocnos[index];
2977
58
      volatile PGPROC *proc = &allProcs[pgprocno];
2978
58
      volatile PGXACT *pgxact = &allPgXact[pgprocno];
2979
2980
58
      if (proc->databaseId != databaseId)
2981
58
        continue;
2982
0
      if (proc == MyProc)
2983
0
        continue;
2984
2985
0
      found = true;
2986
2987
0
      if (proc->pid == 0)
2988
0
        (*nprepared)++;
2989
0
      else
2990
0
      {
2991
0
        (*nbackends)++;
2992
0
        if ((pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) &&
2993
0
          nautovacs < MAXAUTOVACPIDS)
2994
0
          autovac_pids[nautovacs++] = proc->pid;
2995
0
      }
2996
0
    }
2997
2998
43
    LWLockRelease(ProcArrayLock);
2999
3000
43
    if (!found)
3001
43
      return false;   /* no conflicting backends, so done */
3002
3003
    /*
3004
     * Send SIGTERM to any conflicting autovacuums before sleeping. We
3005
     * postpone this step until after the loop because we don't want to
3006
     * hold ProcArrayLock while issuing kill(). We have no idea what might
3007
     * block kill() inside the kernel...
3008
     */
3009
0
    for (index = 0; index < nautovacs; index++)
3010
0
      (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */
3011
3012
    /* sleep, then try again */
3013
0
    pg_usleep(100 * 1000L); /* 100ms */
3014
0
  }
3015
3016
0
  return true;       /* timed out, still conflicts */
3017
43
}
3018
3019
/*
3020
 * Terminate existing connections to the specified database. This routine
3021
 * is used by the DROP DATABASE command when user has asked to forcefully
3022
 * drop the database.
3023
 *
3024
 * The current backend is always ignored; it is caller's responsibility to
3025
 * check whether the current backend uses the given DB, if it's important.
3026
 *
3027
 * It doesn't allow to terminate the connections even if there is a one
3028
 * backend with the prepared transaction in the target database.
3029
 */
3030
void
3031
TerminateOtherDBBackends(Oid databaseId)
3032
0
{
3033
0
  ProcArrayStruct *arrayP = procArray;
3034
0
  List     *pids = NIL;
3035
0
  int     nprepared = 0;
3036
0
  int     i;
3037
3038
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
3039
3040
0
  for (i = 0; i < procArray->numProcs; i++)
3041
0
  {
3042
0
    int     pgprocno = arrayP->pgprocnos[i];
3043
0
    PGPROC     *proc = &allProcs[pgprocno];
3044
3045
0
    if (proc->databaseId != databaseId)
3046
0
      continue;
3047
0
    if (proc == MyProc)
3048
0
      continue;
3049
3050
0
    if (proc->pid != 0)
3051
0
      pids = lappend_int(pids, proc->pid);
3052
0
    else
3053
0
      nprepared++;
3054
0
  }
3055
3056
0
  LWLockRelease(ProcArrayLock);
3057
3058
0
  if (nprepared > 0)
3059
0
    ereport(ERROR,
3060
0
        (errcode(ERRCODE_OBJECT_IN_USE),
3061
0
         errmsg("database \"%s\" is being used by prepared transaction",
3062
0
            get_database_name(databaseId)),
3063
0
         errdetail_plural("There is %d prepared transaction using the database.",
3064
0
                  "There are %d prepared transactions using the database.",
3065
0
                  nprepared,
3066
0
                  nprepared)));
3067
3068
0
  if (pids)
3069
0
  {
3070
0
    ListCell   *lc;
3071
3072
    /*
3073
     * Check whether we have the necessary rights to terminate other
3074
     * sessions.  We don't terminate any session untill we ensure that we
3075
     * have rights on all the sessions to be terminated.  These checks are
3076
     * the same as we do in pg_terminate_backend.
3077
     *
3078
     * In this case we don't raise some warnings - like "PID %d is not a
3079
     * PostgreSQL server process", because for us already finished session
3080
     * is not a problem.
3081
     */
3082
0
    foreach(lc, pids)
3083
0
    {
3084
0
      int     pid = lfirst_int(lc);
3085
0
      PGPROC     *proc = BackendPidGetProc(pid);
3086
3087
0
      if (proc != NULL)
3088
0
      {
3089
        /* Only allow superusers to signal superuser-owned backends. */
3090
0
        if (superuser_arg(proc->roleId) && !superuser())
3091
0
          ereport(ERROR,
3092
0
              (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3093
0
               (errmsg("must be a superuser to terminate superuser process"))));
3094
3095
        /* Users can signal backends they have role membership in. */
3096
0
        if (!has_privs_of_role(GetUserId(), proc->roleId) &&
3097
0
          !has_privs_of_role(GetUserId(), DEFAULT_ROLE_SIGNAL_BACKENDID))
3098
0
          ereport(ERROR,
3099
0
              (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
3100
0
               (errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend"))));
3101
0
      }
3102
0
    }
3103
3104
    /*
3105
     * There's a race condition here: once we release the ProcArrayLock,
3106
     * it's possible for the session to exit before we issue kill.  That
3107
     * race condition possibility seems too unlikely to worry about.  See
3108
     * pg_signal_backend.
3109
     */
3110
0
    foreach(lc, pids)
3111
0
    {
3112
0
      int     pid = lfirst_int(lc);
3113
0
      PGPROC     *proc = BackendPidGetProc(pid);
3114
3115
0
      if (proc != NULL)
3116
0
      {
3117
        /*
3118
         * If we have setsid(), signal the backend's whole process
3119
         * group
3120
         */
3121
0
#ifdef HAVE_SETSID
3122
0
        (void) kill(-pid, SIGTERM);
3123
#else
3124
        (void) kill(pid, SIGTERM);
3125
#endif
3126
0
      }
3127
0
    }
3128
0
  }
3129
0
}
3130
3131
/*
3132
 * ProcArraySetReplicationSlotXmin
3133
 *
3134
 * Install limits to future computations of the xmin horizon to prevent vacuum
3135
 * and HOT pruning from removing affected rows still needed by clients with
3136
 * replication slots.
3137
 */
3138
void
3139
ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin,
3140
                bool already_locked)
3141
1.80k
{
3142
1.80k
  Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
3143
3144
1.80k
  if (!already_locked)
3145
1.80k
    LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3146
3147
1.80k
  procArray->replication_slot_xmin = xmin;
3148
1.80k
  procArray->replication_slot_catalog_xmin = catalog_xmin;
3149
3150
1.80k
  if (!already_locked)
3151
1.80k
    LWLockRelease(ProcArrayLock);
3152
1.80k
}
3153
3154
/*
3155
 * ProcArrayGetReplicationSlotXmin
3156
 *
3157
 * Return the current slot xmin limits. That's useful to be able to remove
3158
 * data that's older than those limits.
3159
 */
3160
void
3161
ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
3162
                TransactionId *catalog_xmin)
3163
0
{
3164
0
  LWLockAcquire(ProcArrayLock, LW_SHARED);
3165
3166
0
  if (xmin != NULL)
3167
0
    *xmin = procArray->replication_slot_xmin;
3168
3169
0
  if (catalog_xmin != NULL)
3170
0
    *catalog_xmin = procArray->replication_slot_catalog_xmin;
3171
3172
0
  LWLockRelease(ProcArrayLock);
3173
0
}
3174
3175
3176
#define XidCacheRemove(i) \
3177
1
  do { \
3178
1
    MyProc->subxids.xids[i] = MyProc->subxids.xids[MyPgXact->nxids - 1]; \
3179
1
    MyPgXact->nxids--; \
3180
1
  } while (0)
3181
3182
/*
3183
 * XidCacheRemoveRunningXids
3184
 *
3185
 * Remove a bunch of TransactionIds from the list of known-running
3186
 * subtransactions for my backend.  Both the specified xid and those in
3187
 * the xids[] array (of length nxids) are removed from the subxids cache.
3188
 * latestXid must be the latest XID among the group.
3189
 */
3190
void
3191
XidCacheRemoveRunningXids(TransactionId xid,
3192
              int nxids, const TransactionId *xids,
3193
              TransactionId latestXid)
3194
1
{
3195
1
  int     i,
3196
1
        j;
3197
3198
1
  Assert(TransactionIdIsValid(xid));
3199
3200
  /*
3201
   * We must hold ProcArrayLock exclusively in order to remove transactions
3202
   * from the PGPROC array.  (See src/backend/access/transam/README.)  It's
3203
   * possible this could be relaxed since we know this routine is only used
3204
   * to abort subtransactions, but pending closer analysis we'd best be
3205
   * conservative.
3206
   */
3207
1
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3208
3209
  /*
3210
   * Under normal circumstances xid and xids[] will be in increasing order,
3211
   * as will be the entries in subxids.  Scan backwards to avoid O(N^2)
3212
   * behavior when removing a lot of xids.
3213
   */
3214
1
  for (i = nxids - 1; i >= 0; i--)
3215
0
  {
3216
0
    TransactionId anxid = xids[i];
3217
3218
0
    for (j = MyPgXact->nxids - 1; j >= 0; j--)
3219
0
    {
3220
0
      if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
3221
0
      {
3222
0
        XidCacheRemove(j);
3223
0
        break;
3224
0
      }
3225
0
    }
3226
3227
    /*
3228
     * Ordinarily we should have found it, unless the cache has
3229
     * overflowed. However it's also possible for this routine to be
3230
     * invoked multiple times for the same subtransaction, in case of an
3231
     * error during AbortSubTransaction.  So instead of Assert, emit a
3232
     * debug warning.
3233
     */
3234
0
    if (j < 0 && !MyPgXact->overflowed)
3235
0
      elog(WARNING, "did not find subXID %u in MyProc", anxid);
3236
0
  }
3237
3238
1
  for (j = MyPgXact->nxids - 1; j >= 0; j--)
3239
1
  {
3240
1
    if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
3241
1
    {
3242
1
      XidCacheRemove(j);
3243
1
      break;
3244
1
    }
3245
1
  }
3246
  /* Ordinarily we should have found it, unless the cache has overflowed */
3247
1
  if (j < 0 && !MyPgXact->overflowed)
3248
0
    elog(WARNING, "did not find subXID %u in MyProc", xid);
3249
3250
  /* Also advance global latestCompletedXid while holding the lock */
3251
1
  if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
3252
1
                latestXid))
3253
1
    ShmemVariableCache->latestCompletedXid = latestXid;
3254
3255
1
  LWLockRelease(ProcArrayLock);
3256
1
}
3257
3258
#ifdef XIDCACHE_DEBUG
3259
3260
/*
3261
 * Print stats about effectiveness of XID cache
3262
 */
3263
static void
3264
DisplayXidCache(void)
3265
{
3266
  fprintf(stderr,
3267
      "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
3268
      xc_by_recent_xmin,
3269
      xc_by_known_xact,
3270
      xc_by_my_xact,
3271
      xc_by_latest_xid,
3272
      xc_by_main_xid,
3273
      xc_by_child_xid,
3274
      xc_by_known_assigned,
3275
      xc_no_overflow,
3276
      xc_slow_answer);
3277
}
3278
#endif              /* XIDCACHE_DEBUG */
3279
3280
3281
/* ----------------------------------------------
3282
 *    KnownAssignedTransactionIds sub-module
3283
 * ----------------------------------------------
3284
 */
3285
3286
/*
3287
 * In Hot Standby mode, we maintain a list of transactions that are (or were)
3288
 * running in the master at the current point in WAL.  These XIDs must be
3289
 * treated as running by standby transactions, even though they are not in
3290
 * the standby server's PGXACT array.
3291
 *
3292
 * We record all XIDs that we know have been assigned.  That includes all the
3293
 * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
3294
 * been assigned.  We can deduce the existence of unobserved XIDs because we
3295
 * know XIDs are assigned in sequence, with no gaps.  The KnownAssignedXids
3296
 * list expands as new XIDs are observed or inferred, and contracts when
3297
 * transaction completion records arrive.
3298
 *
3299
 * During hot standby we do not fret too much about the distinction between
3300
 * top-level XIDs and subtransaction XIDs. We store both together in the
3301
 * KnownAssignedXids list.  In backends, this is copied into snapshots in
3302
 * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
3303
 * doesn't care about the distinction either.  Subtransaction XIDs are
3304
 * effectively treated as top-level XIDs and in the typical case pg_subtrans
3305
 * links are *not* maintained (which does not affect visibility).
3306
 *
3307
 * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
3308
 * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every master transaction must
3309
 * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
3310
 * least every PGPROC_MAX_CACHED_SUBXIDS.  When we receive one of these
3311
 * records, we mark the subXIDs as children of the top XID in pg_subtrans,
3312
 * and then remove them from KnownAssignedXids.  This prevents overflow of
3313
 * KnownAssignedXids and snapshots, at the cost that status checks for these
3314
 * subXIDs will take a slower path through TransactionIdIsInProgress().
3315
 * This means that KnownAssignedXids is not necessarily complete for subXIDs,
3316
 * though it should be complete for top-level XIDs; this is the same situation
3317
 * that holds with respect to the PGPROC entries in normal running.
3318
 *
3319
 * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
3320
 * that, similarly to tracking overflow of a PGPROC's subxids array.  We do
3321
 * that by remembering the lastOverflowedXID, ie the last thrown-away subXID.
3322
 * As long as that is within the range of interesting XIDs, we have to assume
3323
 * that subXIDs are missing from snapshots.  (Note that subXID overflow occurs
3324
 * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
3325
 * subXID arrives - that is not an error.)
3326
 *
3327
 * Should a backend on primary somehow disappear before it can write an abort
3328
 * record, then we just leave those XIDs in KnownAssignedXids. They actually
3329
 * aborted but we think they were running; the distinction is irrelevant
3330
 * because either way any changes done by the transaction are not visible to
3331
 * backends in the standby.  We prune KnownAssignedXids when
3332
 * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
3333
 * array due to such dead XIDs.
3334
 */
3335
3336
/*
3337
 * RecordKnownAssignedTransactionIds
3338
 *    Record the given XID in KnownAssignedXids, as well as any preceding
3339
 *    unobserved XIDs.
3340
 *
3341
 * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
3342
 * associated with a transaction. Must be called for each record after we
3343
 * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
3344
 *
3345
 * Called during recovery in analogy with and in place of GetNewTransactionId()
3346
 */
3347
void
3348
RecordKnownAssignedTransactionIds(TransactionId xid)
3349
0
{
3350
0
  Assert(standbyState >= STANDBY_INITIALIZED);
3351
0
  Assert(TransactionIdIsValid(xid));
3352
0
  Assert(TransactionIdIsValid(latestObservedXid));
3353
3354
0
  elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
3355
0
     xid, latestObservedXid);
3356
3357
  /*
3358
   * When a newly observed xid arrives, it is frequently the case that it is
3359
   * *not* the next xid in sequence. When this occurs, we must treat the
3360
   * intervening xids as running also.
3361
   */
3362
0
  if (TransactionIdFollows(xid, latestObservedXid))
3363
0
  {
3364
0
    TransactionId next_expected_xid;
3365
3366
    /*
3367
     * Extend subtrans like we do in GetNewTransactionId() during normal
3368
     * operation using individual extend steps. Note that we do not need
3369
     * to extend clog since its extensions are WAL logged.
3370
     *
3371
     * This part has to be done regardless of standbyState since we
3372
     * immediately start assigning subtransactions to their toplevel
3373
     * transactions.
3374
     */
3375
0
    next_expected_xid = latestObservedXid;
3376
0
    while (TransactionIdPrecedes(next_expected_xid, xid))
3377
0
    {
3378
0
      TransactionIdAdvance(next_expected_xid);
3379
0
      ExtendSUBTRANS(next_expected_xid);
3380
0
    }
3381
0
    Assert(next_expected_xid == xid);
3382
3383
    /*
3384
     * If the KnownAssignedXids machinery isn't up yet, there's nothing
3385
     * more to do since we don't track assigned xids yet.
3386
     */
3387
0
    if (standbyState <= STANDBY_INITIALIZED)
3388
0
    {
3389
0
      latestObservedXid = xid;
3390
0
      return;
3391
0
    }
3392
3393
    /*
3394
     * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
3395
     */
3396
0
    next_expected_xid = latestObservedXid;
3397
0
    TransactionIdAdvance(next_expected_xid);
3398
0
    KnownAssignedXidsAdd(next_expected_xid, xid, false);
3399
3400
    /*
3401
     * Now we can advance latestObservedXid
3402
     */
3403
0
    latestObservedXid = xid;
3404
3405
    /* ShmemVariableCache->nextXid must be beyond any observed xid */
3406
0
    next_expected_xid = latestObservedXid;
3407
0
    TransactionIdAdvance(next_expected_xid);
3408
0
    LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
3409
0
    ShmemVariableCache->nextXid = next_expected_xid;
3410
0
    LWLockRelease(XidGenLock);
3411
0
  }
3412
0
}
3413
3414
/*
3415
 * ExpireTreeKnownAssignedTransactionIds
3416
 *    Remove the given XIDs from KnownAssignedXids.
3417
 *
3418
 * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
3419
 */
3420
void
3421
ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
3422
                    TransactionId *subxids, TransactionId max_xid)
3423
0
{
3424
0
  Assert(standbyState >= STANDBY_INITIALIZED);
3425
3426
  /*
3427
   * Uses same locking as transaction commit
3428
   */
3429
0
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3430
3431
0
  KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
3432
3433
  /* As in ProcArrayEndTransaction, advance latestCompletedXid */
3434
0
  if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
3435
0
                max_xid))
3436
0
    ShmemVariableCache->latestCompletedXid = max_xid;
3437
3438
0
  LWLockRelease(ProcArrayLock);
3439
0
}
3440
3441
/*
3442
 * ExpireAllKnownAssignedTransactionIds
3443
 *    Remove all entries in KnownAssignedXids
3444
 */
3445
void
3446
ExpireAllKnownAssignedTransactionIds(void)
3447
0
{
3448
0
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3449
0
  KnownAssignedXidsRemovePreceding(InvalidTransactionId);
3450
0
  LWLockRelease(ProcArrayLock);
3451
0
}
3452
3453
/*
3454
 * ExpireOldKnownAssignedTransactionIds
3455
 *    Remove KnownAssignedXids entries preceding the given XID
3456
 */
3457
void
3458
ExpireOldKnownAssignedTransactionIds(TransactionId xid)
3459
0
{
3460
0
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3461
0
  KnownAssignedXidsRemovePreceding(xid);
3462
0
  LWLockRelease(ProcArrayLock);
3463
0
}
3464
3465
3466
/*
3467
 * Private module functions to manipulate KnownAssignedXids
3468
 *
3469
 * There are 5 main uses of the KnownAssignedXids data structure:
3470
 *
3471
 *  * backends taking snapshots - all valid XIDs need to be copied out
3472
 *  * backends seeking to determine presence of a specific XID
3473
 *  * startup process adding new known-assigned XIDs
3474
 *  * startup process removing specific XIDs as transactions end
3475
 *  * startup process pruning array when special WAL records arrive
3476
 *
3477
 * This data structure is known to be a hot spot during Hot Standby, so we
3478
 * go to some lengths to make these operations as efficient and as concurrent
3479
 * as possible.
3480
 *
3481
 * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
3482
 * order, to be exact --- to allow binary search for specific XIDs.  Note:
3483
 * in general TransactionIdPrecedes would not provide a total order, but
3484
 * we know that the entries present at any instant should not extend across
3485
 * a large enough fraction of XID space to wrap around (the master would
3486
 * shut down for fear of XID wrap long before that happens).  So it's OK to
3487
 * use TransactionIdPrecedes as a binary-search comparator.
3488
 *
3489
 * It's cheap to maintain the sortedness during insertions, since new known
3490
 * XIDs are always reported in XID order; we just append them at the right.
3491
 *
3492
 * To keep individual deletions cheap, we need to allow gaps in the array.
3493
 * This is implemented by marking array elements as valid or invalid using
3494
 * the parallel boolean array KnownAssignedXidsValid[].  A deletion is done
3495
 * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
3496
 * XID entry itself.  This preserves the property that the XID entries are
3497
 * sorted, so we can do binary searches easily.  Periodically we compress
3498
 * out the unused entries; that's much cheaper than having to compress the
3499
 * array immediately on every deletion.
3500
 *
3501
 * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
3502
 * are those with indexes tail <= i < head; items outside this subscript range
3503
 * have unspecified contents.  When head reaches the end of the array, we
3504
 * force compression of unused entries rather than wrapping around, since
3505
 * allowing wraparound would greatly complicate the search logic.  We maintain
3506
 * an explicit tail pointer so that pruning of old XIDs can be done without
3507
 * immediately moving the array contents.  In most cases only a small fraction
3508
 * of the array contains valid entries at any instant.
3509
 *
3510
 * Although only the startup process can ever change the KnownAssignedXids
3511
 * data structure, we still need interlocking so that standby backends will
3512
 * not observe invalid intermediate states.  The convention is that backends
3513
 * must hold shared ProcArrayLock to examine the array.  To remove XIDs from
3514
 * the array, the startup process must hold ProcArrayLock exclusively, for
3515
 * the usual transactional reasons (compare commit/abort of a transaction
3516
 * during normal running).  Compressing unused entries out of the array
3517
 * likewise requires exclusive lock.  To add XIDs to the array, we just insert
3518
 * them into slots to the right of the head pointer and then advance the head
3519
 * pointer.  This wouldn't require any lock at all, except that on machines
3520
 * with weak memory ordering we need to be careful that other processors
3521
 * see the array element changes before they see the head pointer change.
3522
 * We handle this by using a spinlock to protect reads and writes of the
3523
 * head/tail pointers.  (We could dispense with the spinlock if we were to
3524
 * create suitable memory access barrier primitives and use those instead.)
3525
 * The spinlock must be taken to read or write the head/tail pointers unless
3526
 * the caller holds ProcArrayLock exclusively.
3527
 *
3528
 * Algorithmic analysis:
3529
 *
3530
 * If we have a maximum of M slots, with N XIDs currently spread across
3531
 * S elements then we have N <= S <= M always.
3532
 *
3533
 *  * Adding a new XID is O(1) and needs little locking (unless compression
3534
 *    must happen)
3535
 *  * Compressing the array is O(S) and requires exclusive lock
3536
 *  * Removing an XID is O(logS) and requires exclusive lock
3537
 *  * Taking a snapshot is O(S) and requires shared lock
3538
 *  * Checking for an XID is O(logS) and requires shared lock
3539
 *
3540
 * In comparison, using a hash table for KnownAssignedXids would mean that
3541
 * taking snapshots would be O(M). If we can maintain S << M then the
3542
 * sorted array technique will deliver significantly faster snapshots.
3543
 * If we try to keep S too small then we will spend too much time compressing,
3544
 * so there is an optimal point for any workload mix. We use a heuristic to
3545
 * decide when to compress the array, though trimming also helps reduce
3546
 * frequency of compressing. The heuristic requires us to track the number of
3547
 * currently valid XIDs in the array.
3548
 */
3549
3550
3551
/*
3552
 * Compress KnownAssignedXids by shifting valid data down to the start of the
3553
 * array, removing any gaps.
3554
 *
3555
 * A compression step is forced if "force" is true, otherwise we do it
3556
 * only if a heuristic indicates it's a good time to do it.
3557
 *
3558
 * Caller must hold ProcArrayLock in exclusive mode.
3559
 */
3560
static void
3561
KnownAssignedXidsCompress(bool force)
3562
0
{
3563
  /* use volatile pointer to prevent code rearrangement */
3564
0
  volatile ProcArrayStruct *pArray = procArray;
3565
0
  int     head,
3566
0
        tail;
3567
0
  int     compress_index;
3568
0
  int     i;
3569
3570
  /* no spinlock required since we hold ProcArrayLock exclusively */
3571
0
  head = pArray->headKnownAssignedXids;
3572
0
  tail = pArray->tailKnownAssignedXids;
3573
3574
0
  if (!force)
3575
0
  {
3576
    /*
3577
     * If we can choose how much to compress, use a heuristic to avoid
3578
     * compressing too often or not often enough.
3579
     *
3580
     * Heuristic is if we have a large enough current spread and less than
3581
     * 50% of the elements are currently in use, then compress. This
3582
     * should ensure we compress fairly infrequently. We could compress
3583
     * less often though the virtual array would spread out more and
3584
     * snapshots would become more expensive.
3585
     */
3586
0
    int     nelements = head - tail;
3587
3588
0
    if (nelements < 4 * PROCARRAY_MAXPROCS ||
3589
0
      nelements < 2 * pArray->numKnownAssignedXids)
3590
0
      return;
3591
0
  }
3592
3593
  /*
3594
   * We compress the array by reading the valid values from tail to head,
3595
   * re-aligning data to 0th element.
3596
   */
3597
0
  compress_index = 0;
3598
0
  for (i = tail; i < head; i++)
3599
0
  {
3600
0
    if (KnownAssignedXidsValid[i])
3601
0
    {
3602
0
      KnownAssignedXids[compress_index] = KnownAssignedXids[i];
3603
0
      KnownAssignedXidsValid[compress_index] = true;
3604
0
      compress_index++;
3605
0
    }
3606
0
  }
3607
3608
0
  pArray->tailKnownAssignedXids = 0;
3609
0
  pArray->headKnownAssignedXids = compress_index;
3610
0
}
3611
3612
/*
3613
 * Add xids into KnownAssignedXids at the head of the array.
3614
 *
3615
 * xids from from_xid to to_xid, inclusive, are added to the array.
3616
 *
3617
 * If exclusive_lock is true then caller already holds ProcArrayLock in
3618
 * exclusive mode, so we need no extra locking here.  Else caller holds no
3619
 * lock, so we need to be sure we maintain sufficient interlocks against
3620
 * concurrent readers.  (Only the startup process ever calls this, so no need
3621
 * to worry about concurrent writers.)
3622
 */
3623
static void
3624
KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
3625
           bool exclusive_lock)
3626
0
{
3627
  /* use volatile pointer to prevent code rearrangement */
3628
0
  volatile ProcArrayStruct *pArray = procArray;
3629
0
  TransactionId next_xid;
3630
0
  int     head,
3631
0
        tail;
3632
0
  int     nxids;
3633
0
  int     i;
3634
3635
0
  Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
3636
3637
  /*
3638
   * Calculate how many array slots we'll need.  Normally this is cheap; in
3639
   * the unusual case where the XIDs cross the wrap point, we do it the hard
3640
   * way.
3641
   */
3642
0
  if (to_xid >= from_xid)
3643
0
    nxids = to_xid - from_xid + 1;
3644
0
  else
3645
0
  {
3646
0
    nxids = 1;
3647
0
    next_xid = from_xid;
3648
0
    while (TransactionIdPrecedes(next_xid, to_xid))
3649
0
    {
3650
0
      nxids++;
3651
0
      TransactionIdAdvance(next_xid);
3652
0
    }
3653
0
  }
3654
3655
  /*
3656
   * Since only the startup process modifies the head/tail pointers, we
3657
   * don't need a lock to read them here.
3658
   */
3659
0
  head = pArray->headKnownAssignedXids;
3660
0
  tail = pArray->tailKnownAssignedXids;
3661
3662
0
  Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
3663
0
  Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
3664
3665
  /*
3666
   * Verify that insertions occur in TransactionId sequence.  Note that even
3667
   * if the last existing element is marked invalid, it must still have a
3668
   * correctly sequenced XID value.
3669
   */
3670
0
  if (head > tail &&
3671
0
    TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid))
3672
0
  {
3673
0
    KnownAssignedXidsDisplay(LOG);
3674
0
    elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
3675
0
  }
3676
3677
  /*
3678
   * If our xids won't fit in the remaining space, compress out free space
3679
   */
3680
0
  if (head + nxids > pArray->maxKnownAssignedXids)
3681
0
  {
3682
    /* must hold lock to compress */
3683
0
    if (!exclusive_lock)
3684
0
      LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3685
3686
0
    KnownAssignedXidsCompress(true);
3687
3688
0
    head = pArray->headKnownAssignedXids;
3689
    /* note: we no longer care about the tail pointer */
3690
3691
0
    if (!exclusive_lock)
3692
0
      LWLockRelease(ProcArrayLock);
3693
3694
    /*
3695
     * If it still won't fit then we're out of memory
3696
     */
3697
0
    if (head + nxids > pArray->maxKnownAssignedXids)
3698
0
      elog(ERROR, "too many KnownAssignedXids");
3699
0
  }
3700
3701
  /* Now we can insert the xids into the space starting at head */
3702
0
  next_xid = from_xid;
3703
0
  for (i = 0; i < nxids; i++)
3704
0
  {
3705
0
    KnownAssignedXids[head] = next_xid;
3706
0
    KnownAssignedXidsValid[head] = true;
3707
0
    TransactionIdAdvance(next_xid);
3708
0
    head++;
3709
0
  }
3710
3711
  /* Adjust count of number of valid entries */
3712
0
  pArray->numKnownAssignedXids += nxids;
3713
3714
  /*
3715
   * Now update the head pointer.  We use a spinlock to protect this
3716
   * pointer, not because the update is likely to be non-atomic, but to
3717
   * ensure that other processors see the above array updates before they
3718
   * see the head pointer change.
3719
   *
3720
   * If we're holding ProcArrayLock exclusively, there's no need to take the
3721
   * spinlock.
3722
   */
3723
0
  if (exclusive_lock)
3724
0
    pArray->headKnownAssignedXids = head;
3725
0
  else
3726
0
  {
3727
0
    SpinLockAcquire(&pArray->known_assigned_xids_lck);
3728
0
    pArray->headKnownAssignedXids = head;
3729
0
    SpinLockRelease(&pArray->known_assigned_xids_lck);
3730
0
  }
3731
0
}
3732
3733
/*
3734
 * KnownAssignedXidsSearch
3735
 *
3736
 * Searches KnownAssignedXids for a specific xid and optionally removes it.
3737
 * Returns true if it was found, false if not.
3738
 *
3739
 * Caller must hold ProcArrayLock in shared or exclusive mode.
3740
 * Exclusive lock must be held for remove = true.
3741
 */
3742
static bool
3743
KnownAssignedXidsSearch(TransactionId xid, bool remove)
3744
0
{
3745
  /* use volatile pointer to prevent code rearrangement */
3746
0
  volatile ProcArrayStruct *pArray = procArray;
3747
0
  int     first,
3748
0
        last;
3749
0
  int     head;
3750
0
  int     tail;
3751
0
  int     result_index = -1;
3752
3753
0
  if (remove)
3754
0
  {
3755
    /* we hold ProcArrayLock exclusively, so no need for spinlock */
3756
0
    tail = pArray->tailKnownAssignedXids;
3757
0
    head = pArray->headKnownAssignedXids;
3758
0
  }
3759
0
  else
3760
0
  {
3761
    /* take spinlock to ensure we see up-to-date array contents */
3762
0
    SpinLockAcquire(&pArray->known_assigned_xids_lck);
3763
0
    tail = pArray->tailKnownAssignedXids;
3764
0
    head = pArray->headKnownAssignedXids;
3765
0
    SpinLockRelease(&pArray->known_assigned_xids_lck);
3766
0
  }
3767
3768
  /*
3769
   * Standard binary search.  Note we can ignore the KnownAssignedXidsValid
3770
   * array here, since even invalid entries will contain sorted XIDs.
3771
   */
3772
0
  first = tail;
3773
0
  last = head - 1;
3774
0
  while (first <= last)
3775
0
  {
3776
0
    int     mid_index;
3777
0
    TransactionId mid_xid;
3778
3779
0
    mid_index = (first + last) / 2;
3780
0
    mid_xid = KnownAssignedXids[mid_index];
3781
3782
0
    if (xid == mid_xid)
3783
0
    {
3784
0
      result_index = mid_index;
3785
0
      break;
3786
0
    }
3787
0
    else if (TransactionIdPrecedes(xid, mid_xid))
3788
0
      last = mid_index - 1;
3789
0
    else
3790
0
      first = mid_index + 1;
3791
0
  }
3792
3793
0
  if (result_index < 0)
3794
0
    return false;     /* not in array */
3795
3796
0
  if (!KnownAssignedXidsValid[result_index])
3797
0
    return false;     /* in array, but invalid */
3798
3799
0
  if (remove)
3800
0
  {
3801
0
    KnownAssignedXidsValid[result_index] = false;
3802
3803
0
    pArray->numKnownAssignedXids--;
3804
0
    Assert(pArray->numKnownAssignedXids >= 0);
3805
3806
    /*
3807
     * If we're removing the tail element then advance tail pointer over
3808
     * any invalid elements.  This will speed future searches.
3809
     */
3810
0
    if (result_index == tail)
3811
0
    {
3812
0
      tail++;
3813
0
      while (tail < head && !KnownAssignedXidsValid[tail])
3814
0
        tail++;
3815
0
      if (tail >= head)
3816
0
      {
3817
        /* Array is empty, so we can reset both pointers */
3818
0
        pArray->headKnownAssignedXids = 0;
3819
0
        pArray->tailKnownAssignedXids = 0;
3820
0
      }
3821
0
      else
3822
0
      {
3823
0
        pArray->tailKnownAssignedXids = tail;
3824
0
      }
3825
0
    }
3826
0
  }
3827
3828
0
  return true;
3829
0
}
3830
3831
/*
3832
 * Is the specified XID present in KnownAssignedXids[]?
3833
 *
3834
 * Caller must hold ProcArrayLock in shared or exclusive mode.
3835
 */
3836
static bool
3837
KnownAssignedXidExists(TransactionId xid)
3838
0
{
3839
0
  Assert(TransactionIdIsValid(xid));
3840
3841
0
  return KnownAssignedXidsSearch(xid, false);
3842
0
}
3843
3844
/*
3845
 * Remove the specified XID from KnownAssignedXids[].
3846
 *
3847
 * Caller must hold ProcArrayLock in exclusive mode.
3848
 */
3849
static void
3850
KnownAssignedXidsRemove(TransactionId xid)
3851
0
{
3852
0
  Assert(TransactionIdIsValid(xid));
3853
3854
0
  elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
3855
3856
  /*
3857
   * Note: we cannot consider it an error to remove an XID that's not
3858
   * present.  We intentionally remove subxact IDs while processing
3859
   * XLOG_XACT_ASSIGNMENT, to avoid array overflow.  Then those XIDs will be
3860
   * removed again when the top-level xact commits or aborts.
3861
   *
3862
   * It might be possible to track such XIDs to distinguish this case from
3863
   * actual errors, but it would be complicated and probably not worth it.
3864
   * So, just ignore the search result.
3865
   */
3866
0
  (void) KnownAssignedXidsSearch(xid, true);
3867
0
}
3868
3869
/*
3870
 * KnownAssignedXidsRemoveTree
3871
 *    Remove xid (if it's not InvalidTransactionId) and all the subxids.
3872
 *
3873
 * Caller must hold ProcArrayLock in exclusive mode.
3874
 */
3875
static void
3876
KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
3877
              TransactionId *subxids)
3878
0
{
3879
0
  int     i;
3880
3881
0
  if (TransactionIdIsValid(xid))
3882
0
    KnownAssignedXidsRemove(xid);
3883
3884
0
  for (i = 0; i < nsubxids; i++)
3885
0
    KnownAssignedXidsRemove(subxids[i]);
3886
3887
  /* Opportunistically compress the array */
3888
0
  KnownAssignedXidsCompress(false);
3889
0
}
3890
3891
/*
3892
 * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
3893
 * then clear the whole table.
3894
 *
3895
 * Caller must hold ProcArrayLock in exclusive mode.
3896
 */
3897
static void
3898
KnownAssignedXidsRemovePreceding(TransactionId removeXid)
3899
0
{
3900
  /* use volatile pointer to prevent code rearrangement */
3901
0
  volatile ProcArrayStruct *pArray = procArray;
3902
0
  int     count = 0;
3903
0
  int     head,
3904
0
        tail,
3905
0
        i;
3906
3907
0
  if (!TransactionIdIsValid(removeXid))
3908
0
  {
3909
0
    elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
3910
0
    pArray->numKnownAssignedXids = 0;
3911
0
    pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
3912
0
    return;
3913
0
  }
3914
3915
0
  elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
3916
3917
  /*
3918
   * Mark entries invalid starting at the tail.  Since array is sorted, we
3919
   * can stop as soon as we reach an entry >= removeXid.
3920
   */
3921
0
  tail = pArray->tailKnownAssignedXids;
3922
0
  head = pArray->headKnownAssignedXids;
3923
3924
0
  for (i = tail; i < head; i++)
3925
0
  {
3926
0
    if (KnownAssignedXidsValid[i])
3927
0
    {
3928
0
      TransactionId knownXid = KnownAssignedXids[i];
3929
3930
0
      if (TransactionIdFollowsOrEquals(knownXid, removeXid))
3931
0
        break;
3932
3933
0
      if (!StandbyTransactionIdIsPrepared(knownXid))
3934
0
      {
3935
0
        KnownAssignedXidsValid[i] = false;
3936
0
        count++;
3937
0
      }
3938
0
    }
3939
0
  }
3940
3941
0
  pArray->numKnownAssignedXids -= count;
3942
0
  Assert(pArray->numKnownAssignedXids >= 0);
3943
3944
  /*
3945
   * Advance the tail pointer if we've marked the tail item invalid.
3946
   */
3947
0
  for (i = tail; i < head; i++)
3948
0
  {
3949
0
    if (KnownAssignedXidsValid[i])
3950
0
      break;
3951
0
  }
3952
0
  if (i >= head)
3953
0
  {
3954
    /* Array is empty, so we can reset both pointers */
3955
0
    pArray->headKnownAssignedXids = 0;
3956
0
    pArray->tailKnownAssignedXids = 0;
3957
0
  }
3958
0
  else
3959
0
  {
3960
0
    pArray->tailKnownAssignedXids = i;
3961
0
  }
3962
3963
  /* Opportunistically compress the array */
3964
0
  KnownAssignedXidsCompress(false);
3965
0
}
3966
3967
/*
3968
 * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
3969
 * We filter out anything >= xmax.
3970
 *
3971
 * Returns the number of XIDs stored into xarray[].  Caller is responsible
3972
 * that array is large enough.
3973
 *
3974
 * Caller must hold ProcArrayLock in (at least) shared mode.
3975
 */
3976
static int
3977
KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
3978
0
{
3979
0
  TransactionId xtmp = InvalidTransactionId;
3980
3981
0
  return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
3982
0
}
3983
3984
/*
3985
 * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
3986
 * we reduce *xmin to the lowest xid value seen if not already lower.
3987
 *
3988
 * Caller must hold ProcArrayLock in (at least) shared mode.
3989
 */
3990
static int
3991
KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
3992
                 TransactionId xmax)
3993
0
{
3994
0
  int     count = 0;
3995
0
  int     head,
3996
0
        tail;
3997
0
  int     i;
3998
3999
  /*
4000
   * Fetch head just once, since it may change while we loop. We can stop
4001
   * once we reach the initially seen head, since we are certain that an xid
4002
   * cannot enter and then leave the array while we hold ProcArrayLock.  We
4003
   * might miss newly-added xids, but they should be >= xmax so irrelevant
4004
   * anyway.
4005
   *
4006
   * Must take spinlock to ensure we see up-to-date array contents.
4007
   */
4008
0
  SpinLockAcquire(&procArray->known_assigned_xids_lck);
4009
0
  tail = procArray->tailKnownAssignedXids;
4010
0
  head = procArray->headKnownAssignedXids;
4011
0
  SpinLockRelease(&procArray->known_assigned_xids_lck);
4012
4013
0
  for (i = tail; i < head; i++)
4014
0
  {
4015
    /* Skip any gaps in the array */
4016
0
    if (KnownAssignedXidsValid[i])
4017
0
    {
4018
0
      TransactionId knownXid = KnownAssignedXids[i];
4019
4020
      /*
4021
       * Update xmin if required.  Only the first XID need be checked,
4022
       * since the array is sorted.
4023
       */
4024
0
      if (count == 0 &&
4025
0
        TransactionIdPrecedes(knownXid, *xmin))
4026
0
        *xmin = knownXid;
4027
4028
      /*
4029
       * Filter out anything >= xmax, again relying on sorted property
4030
       * of array.
4031
       */
4032
0
      if (TransactionIdIsValid(xmax) &&
4033
0
        TransactionIdFollowsOrEquals(knownXid, xmax))
4034
0
        break;
4035
4036
      /* Add knownXid into output array */
4037
0
      xarray[count++] = knownXid;
4038
0
    }
4039
0
  }
4040
4041
0
  return count;
4042
0
}
4043
4044
/*
4045
 * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
4046
 * if nothing there.
4047
 */
4048
static TransactionId
4049
KnownAssignedXidsGetOldestXmin(void)
4050
0
{
4051
0
  int     head,
4052
0
        tail;
4053
0
  int     i;
4054
4055
  /*
4056
   * Fetch head just once, since it may change while we loop.
4057
   */
4058
0
  SpinLockAcquire(&procArray->known_assigned_xids_lck);
4059
0
  tail = procArray->tailKnownAssignedXids;
4060
0
  head = procArray->headKnownAssignedXids;
4061
0
  SpinLockRelease(&procArray->known_assigned_xids_lck);
4062
4063
0
  for (i = tail; i < head; i++)
4064
0
  {
4065
    /* Skip any gaps in the array */
4066
0
    if (KnownAssignedXidsValid[i])
4067
0
      return KnownAssignedXids[i];
4068
0
  }
4069
4070
0
  return InvalidTransactionId;
4071
0
}
4072
4073
/*
4074
 * Display KnownAssignedXids to provide debug trail
4075
 *
4076
 * Currently this is only called within startup process, so we need no
4077
 * special locking.
4078
 *
4079
 * Note this is pretty expensive, and much of the expense will be incurred
4080
 * even if the elog message will get discarded.  It's not currently called
4081
 * in any performance-critical places, however, so no need to be tenser.
4082
 */
4083
static void
4084
KnownAssignedXidsDisplay(int trace_level)
4085
0
{
4086
  /* use volatile pointer to prevent code rearrangement */
4087
0
  volatile ProcArrayStruct *pArray = procArray;
4088
0
  StringInfoData buf;
4089
0
  int     head,
4090
0
        tail,
4091
0
        i;
4092
0
  int     nxids = 0;
4093
4094
0
  tail = pArray->tailKnownAssignedXids;
4095
0
  head = pArray->headKnownAssignedXids;
4096
4097
0
  initStringInfo(&buf);
4098
4099
0
  for (i = tail; i < head; i++)
4100
0
  {
4101
0
    if (KnownAssignedXidsValid[i])
4102
0
    {
4103
0
      nxids++;
4104
0
      appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
4105
0
    }
4106
0
  }
4107
4108
0
  elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
4109
0
     nxids,
4110
0
     pArray->numKnownAssignedXids,
4111
0
     pArray->tailKnownAssignedXids,
4112
0
     pArray->headKnownAssignedXids,
4113
0
     buf.data);
4114
4115
0
  pfree(buf.data);
4116
0
}
4117
4118
/*
4119
 * KnownAssignedXidsReset
4120
 *    Resets KnownAssignedXids to be empty
4121
 */
4122
static void
4123
KnownAssignedXidsReset(void)
4124
0
{
4125
  /* use volatile pointer to prevent code rearrangement */
4126
0
  volatile ProcArrayStruct *pArray = procArray;
4127
4128
0
  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4129
4130
0
  pArray->numKnownAssignedXids = 0;
4131
0
  pArray->tailKnownAssignedXids = 0;
4132
0
  pArray->headKnownAssignedXids = 0;
4133
4134
0
  LWLockRelease(ProcArrayLock);
4135
0
}