/Users/deen/code/yugabyte-db/src/postgres/src/backend/storage/ipc/procarray.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * procarray.c |
4 | | * POSTGRES process array code. |
5 | | * |
6 | | * |
7 | | * This module maintains arrays of the PGPROC and PGXACT structures for all |
8 | | * active backends. Although there are several uses for this, the principal |
9 | | * one is as a means of determining the set of currently running transactions. |
10 | | * |
11 | | * Because of various subtle race conditions it is critical that a backend |
12 | | * hold the correct locks while setting or clearing its MyPgXact->xid field. |
13 | | * See notes in src/backend/access/transam/README. |
14 | | * |
15 | | * The process arrays now also include structures representing prepared |
16 | | * transactions. The xid and subxids fields of these are valid, as are the |
17 | | * myProcLocks lists. They can be distinguished from regular backend PGPROCs |
18 | | * at need by checking for pid == 0. |
19 | | * |
20 | | * During hot standby, we also keep a list of XIDs representing transactions |
21 | | * that are known to be running in the master (or more precisely, were running |
22 | | * as of the current point in the WAL stream). This list is kept in the |
23 | | * KnownAssignedXids array, and is updated by watching the sequence of |
24 | | * arriving XIDs. This is necessary because if we leave those XIDs out of |
25 | | * snapshots taken for standby queries, then they will appear to be already |
26 | | * complete, leading to MVCC failures. Note that in hot standby, the PGPROC |
27 | | * array represents standby processes, which by definition are not running |
28 | | * transactions that have XIDs. |
29 | | * |
30 | | * It is perhaps possible for a backend on the master to terminate without |
31 | | * writing an abort record for its transaction. While that shouldn't really |
32 | | * happen, it would tie up KnownAssignedXids indefinitely, so we protect |
33 | | * ourselves by pruning the array when a valid list of running XIDs arrives. |
34 | | * |
35 | | * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group |
36 | | * Portions Copyright (c) 1994, Regents of the University of California |
37 | | * |
38 | | * |
39 | | * IDENTIFICATION |
40 | | * src/backend/storage/ipc/procarray.c |
41 | | * |
42 | | *------------------------------------------------------------------------- |
43 | | */ |
44 | | #include "postgres.h" |
45 | | |
46 | | #include <signal.h> |
47 | | |
48 | | #include "access/clog.h" |
49 | | #include "access/subtrans.h" |
50 | | #include "access/transam.h" |
51 | | #include "access/twophase.h" |
52 | | #include "access/xact.h" |
53 | | #include "access/xlog.h" |
54 | | #include "catalog/catalog.h" |
55 | | #include "catalog/pg_authid.h" |
56 | | #include "commands/dbcommands.h" |
57 | | #include "miscadmin.h" |
58 | | #include "pgstat.h" |
59 | | #include "storage/proc.h" |
60 | | #include "storage/procarray.h" |
61 | | #include "storage/spin.h" |
62 | | #include "utils/builtins.h" |
63 | | #include "utils/rel.h" |
64 | | #include "utils/snapmgr.h" |
65 | | |
66 | | #include "pg_yb_utils.h" |
67 | | |
68 | | /* Our shared memory area */ |
69 | | typedef struct ProcArrayStruct |
70 | | { |
71 | | int numProcs; /* number of valid procs entries */ |
72 | | int maxProcs; /* allocated size of procs array */ |
73 | | |
74 | | /* |
75 | | * Known assigned XIDs handling |
76 | | */ |
77 | | int maxKnownAssignedXids; /* allocated size of array */ |
78 | | int numKnownAssignedXids; /* current # of valid entries */ |
79 | | int tailKnownAssignedXids; /* index of oldest valid element */ |
80 | | int headKnownAssignedXids; /* index of newest element, + 1 */ |
81 | | slock_t known_assigned_xids_lck; /* protects head/tail pointers */ |
82 | | |
83 | | /* |
84 | | * Highest subxid that has been removed from KnownAssignedXids array to |
85 | | * prevent overflow; or InvalidTransactionId if none. We track this for |
86 | | * similar reasons to tracking overflowing cached subxids in PGXACT |
87 | | * entries. Must hold exclusive ProcArrayLock to change this, and shared |
88 | | * lock to read it. |
89 | | */ |
90 | | TransactionId lastOverflowedXid; |
91 | | |
92 | | /* oldest xmin of any replication slot */ |
93 | | TransactionId replication_slot_xmin; |
94 | | /* oldest catalog xmin of any replication slot */ |
95 | | TransactionId replication_slot_catalog_xmin; |
96 | | |
97 | | /* indexes into allPgXact[], has PROCARRAY_MAXPROCS entries */ |
98 | | int pgprocnos[FLEXIBLE_ARRAY_MEMBER]; |
99 | | } ProcArrayStruct; |
100 | | |
101 | | static ProcArrayStruct *procArray; |
102 | | |
103 | | static PGPROC *allProcs; |
104 | | static PGXACT *allPgXact; |
105 | | |
106 | | /* |
107 | | * Bookkeeping for tracking emulated transactions in recovery |
108 | | */ |
109 | | static TransactionId *KnownAssignedXids; |
110 | | static bool *KnownAssignedXidsValid; |
111 | | static TransactionId latestObservedXid = InvalidTransactionId; |
112 | | |
113 | | /* |
114 | | * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is |
115 | | * the highest xid that might still be running that we don't have in |
116 | | * KnownAssignedXids. |
117 | | */ |
118 | | static TransactionId standbySnapshotPendingXmin; |
119 | | |
120 | | #ifdef XIDCACHE_DEBUG |
121 | | |
122 | | /* counters for XidCache measurement */ |
123 | | static long xc_by_recent_xmin = 0; |
124 | | static long xc_by_known_xact = 0; |
125 | | static long xc_by_my_xact = 0; |
126 | | static long xc_by_latest_xid = 0; |
127 | | static long xc_by_main_xid = 0; |
128 | | static long xc_by_child_xid = 0; |
129 | | static long xc_by_known_assigned = 0; |
130 | | static long xc_no_overflow = 0; |
131 | | static long xc_slow_answer = 0; |
132 | | |
133 | | #define xc_by_recent_xmin_inc() (xc_by_recent_xmin++) |
134 | | #define xc_by_known_xact_inc() (xc_by_known_xact++) |
135 | | #define xc_by_my_xact_inc() (xc_by_my_xact++) |
136 | | #define xc_by_latest_xid_inc() (xc_by_latest_xid++) |
137 | | #define xc_by_main_xid_inc() (xc_by_main_xid++) |
138 | | #define xc_by_child_xid_inc() (xc_by_child_xid++) |
139 | | #define xc_by_known_assigned_inc() (xc_by_known_assigned++) |
140 | | #define xc_no_overflow_inc() (xc_no_overflow++) |
141 | | #define xc_slow_answer_inc() (xc_slow_answer++) |
142 | | |
143 | | static void DisplayXidCache(void); |
144 | | #else /* !XIDCACHE_DEBUG */ |
145 | | |
146 | 0 | #define xc_by_recent_xmin_inc() ((void) 0) |
147 | 0 | #define xc_by_known_xact_inc() ((void) 0) |
148 | 0 | #define xc_by_my_xact_inc() ((void) 0) |
149 | 0 | #define xc_by_latest_xid_inc() ((void) 0) |
150 | 0 | #define xc_by_main_xid_inc() ((void) 0) |
151 | 0 | #define xc_by_child_xid_inc() ((void) 0) |
152 | 0 | #define xc_by_known_assigned_inc() ((void) 0) |
153 | 0 | #define xc_no_overflow_inc() ((void) 0) |
154 | 0 | #define xc_slow_answer_inc() ((void) 0) |
155 | | #endif /* XIDCACHE_DEBUG */ |
156 | | |
157 | | /* Primitives for KnownAssignedXids array handling for standby */ |
158 | | static void KnownAssignedXidsCompress(bool force); |
159 | | static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, |
160 | | bool exclusive_lock); |
161 | | static bool KnownAssignedXidsSearch(TransactionId xid, bool remove); |
162 | | static bool KnownAssignedXidExists(TransactionId xid); |
163 | | static void KnownAssignedXidsRemove(TransactionId xid); |
164 | | static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, |
165 | | TransactionId *subxids); |
166 | | static void KnownAssignedXidsRemovePreceding(TransactionId xid); |
167 | | static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax); |
168 | | static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, |
169 | | TransactionId *xmin, |
170 | | TransactionId xmax); |
171 | | static TransactionId KnownAssignedXidsGetOldestXmin(void); |
172 | | static void KnownAssignedXidsDisplay(int trace_level); |
173 | | static void KnownAssignedXidsReset(void); |
174 | | static inline void ProcArrayEndTransactionInternal(PGPROC *proc, |
175 | | PGXACT *pgxact, TransactionId latestXid); |
176 | | static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid); |
177 | | |
178 | | /* |
179 | | * Report shared-memory space needed by CreateSharedProcArray. |
180 | | */ |
181 | | Size |
182 | | ProcArrayShmemSize(void) |
183 | 3.61k | { |
184 | 3.61k | Size size; |
185 | | |
186 | | /* Size of the ProcArray structure itself */ |
187 | 32.2k | #define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts) |
188 | | |
189 | 3.61k | size = offsetof(ProcArrayStruct, pgprocnos); |
190 | 3.61k | size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS)); |
191 | | |
192 | | /* |
193 | | * During Hot Standby processing we have a data structure called |
194 | | * KnownAssignedXids, created in shared memory. Local data structures are |
195 | | * also created in various backends during GetSnapshotData(), |
196 | | * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the |
197 | | * main structures created in those functions must be identically sized, |
198 | | * since we may at times copy the whole of the data structures around. We |
199 | | * refer to this size as TOTAL_MAX_CACHED_SUBXIDS. |
200 | | * |
201 | | * Ideally we'd only create this structure if we were actually doing hot |
202 | | * standby in the current run, but we don't know that yet at the time |
203 | | * shared memory is being set up. |
204 | | */ |
205 | 3.61k | #define TOTAL_MAX_CACHED_SUBXIDS \ |
206 | 21.3k | ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS) |
207 | | |
208 | 3.61k | if (EnableHotStandby) |
209 | 3.61k | { |
210 | 3.61k | size = add_size(size, |
211 | 3.61k | mul_size(sizeof(TransactionId), |
212 | 3.61k | TOTAL_MAX_CACHED_SUBXIDS)); |
213 | 3.61k | size = add_size(size, |
214 | 3.61k | mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS)); |
215 | 3.61k | } |
216 | | |
217 | 3.61k | return size; |
218 | 3.61k | } |
219 | | |
220 | | /* |
221 | | * Initialize the shared PGPROC array during postmaster startup. |
222 | | */ |
223 | | void |
224 | | CreateSharedProcArray(void) |
225 | 3.61k | { |
226 | 3.61k | bool found; |
227 | | |
228 | | /* Create or attach to the ProcArray shared structure */ |
229 | 3.61k | procArray = (ProcArrayStruct *) |
230 | 3.61k | ShmemInitStruct("Proc Array", |
231 | 3.61k | add_size(offsetof(ProcArrayStruct, pgprocnos), |
232 | 3.61k | mul_size(sizeof(int), |
233 | 3.61k | PROCARRAY_MAXPROCS)), |
234 | 3.61k | &found); |
235 | | |
236 | 3.61k | if (!found) |
237 | 3.61k | { |
238 | | /* |
239 | | * We're the first - initialize. |
240 | | */ |
241 | 3.61k | procArray->numProcs = 0; |
242 | 3.61k | procArray->maxProcs = PROCARRAY_MAXPROCS; |
243 | 3.61k | procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS; |
244 | 3.61k | procArray->numKnownAssignedXids = 0; |
245 | 3.61k | procArray->tailKnownAssignedXids = 0; |
246 | 3.61k | procArray->headKnownAssignedXids = 0; |
247 | 3.61k | SpinLockInit(&procArray->known_assigned_xids_lck); |
248 | 3.61k | procArray->lastOverflowedXid = InvalidTransactionId; |
249 | 3.61k | procArray->replication_slot_xmin = InvalidTransactionId; |
250 | 3.61k | procArray->replication_slot_catalog_xmin = InvalidTransactionId; |
251 | 3.61k | } |
252 | | |
253 | 3.61k | allProcs = ProcGlobal->allProcs; |
254 | 3.61k | allPgXact = ProcGlobal->allPgXact; |
255 | | |
256 | | /* Create or attach to the KnownAssignedXids arrays too, if needed */ |
257 | 3.61k | if (EnableHotStandby) |
258 | 3.61k | { |
259 | 3.61k | KnownAssignedXids = (TransactionId *) |
260 | 3.61k | ShmemInitStruct("KnownAssignedXids", |
261 | 3.61k | mul_size(sizeof(TransactionId), |
262 | 3.61k | TOTAL_MAX_CACHED_SUBXIDS), |
263 | 3.61k | &found); |
264 | 3.61k | KnownAssignedXidsValid = (bool *) |
265 | 3.61k | ShmemInitStruct("KnownAssignedXidsValid", |
266 | 3.61k | mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS), |
267 | 3.61k | &found); |
268 | 3.61k | } |
269 | | |
270 | | /* Register and initialize fields of ProcLWLockTranche */ |
271 | 3.61k | LWLockRegisterTranche(LWTRANCHE_PROC, "proc"); |
272 | 3.61k | } |
273 | | |
274 | | /* |
275 | | * Add the specified PGPROC to the shared array. |
276 | | */ |
277 | | void |
278 | | ProcArrayAdd(PGPROC *proc) |
279 | 2.55k | { |
280 | 2.55k | ProcArrayStruct *arrayP = procArray; |
281 | 2.55k | int index; |
282 | | |
283 | 2.55k | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
284 | | |
285 | 2.55k | if (arrayP->numProcs >= arrayP->maxProcs) |
286 | 0 | { |
287 | | /* |
288 | | * Oops, no room. (This really shouldn't happen, since there is a |
289 | | * fixed supply of PGPROC structs too, and so we should have failed |
290 | | * earlier.) |
291 | | */ |
292 | 0 | LWLockRelease(ProcArrayLock); |
293 | 0 | ereport(FATAL, |
294 | 0 | (errcode(ERRCODE_TOO_MANY_CONNECTIONS), |
295 | 0 | errmsg("sorry, too many clients already"))); |
296 | 0 | } |
297 | | |
298 | | /* |
299 | | * Keep the procs array sorted by (PGPROC *) so that we can utilize |
300 | | * locality of references much better. This is useful while traversing the |
301 | | * ProcArray because there is an increased likelihood of finding the next |
302 | | * PGPROC structure in the cache. |
303 | | * |
304 | | * Since the occurrence of adding/removing a proc is much lower than the |
305 | | * access to the ProcArray itself, the overhead should be marginal |
306 | | */ |
307 | 2.64k | for (index = 0; index < arrayP->numProcs; index++) |
308 | 888 | { |
309 | | /* |
310 | | * If we are the first PGPROC or if we have found our right position |
311 | | * in the array, break |
312 | | */ |
313 | 888 | if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno)) |
314 | 798 | break; |
315 | 888 | } |
316 | | |
317 | 2.55k | memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index], |
318 | 2.55k | (arrayP->numProcs - index) * sizeof(int)); |
319 | 2.55k | arrayP->pgprocnos[index] = proc->pgprocno; |
320 | 2.55k | arrayP->numProcs++; |
321 | | |
322 | 2.55k | LWLockRelease(ProcArrayLock); |
323 | 2.55k | } |
324 | | |
325 | | /* |
326 | | * Remove the specified PGPROC from the shared array. |
327 | | * |
328 | | * When latestXid is a valid XID, we are removing a live 2PC gxact from the |
329 | | * array, and thus causing it to appear as "not running" anymore. In this |
330 | | * case we must advance latestCompletedXid. (This is essentially the same |
331 | | * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take |
332 | | * the ProcArrayLock only once, and don't damage the content of the PGPROC; |
333 | | * twophase.c depends on the latter.) |
334 | | */ |
335 | | void |
336 | | ProcArrayRemove(PGPROC *proc, TransactionId latestXid) |
337 | 2.55k | { |
338 | 2.55k | ProcArrayStruct *arrayP = procArray; |
339 | 2.55k | int index; |
340 | | |
341 | | #ifdef XIDCACHE_DEBUG |
342 | | /* dump stats at backend shutdown, but not prepared-xact end */ |
343 | | if (proc->pid != 0) |
344 | | DisplayXidCache(); |
345 | | #endif |
346 | | |
347 | 2.55k | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
348 | | |
349 | | /* |
350 | | * Postgres transaction related code-paths are disabled for YB. |
351 | | */ |
352 | | |
353 | 2.55k | if (!IsYugaByteEnabled()) { |
354 | 904 | if (TransactionIdIsValid(latestXid)) |
355 | 0 | { |
356 | 0 | Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid)); |
357 | | |
358 | | /* Advance global latestCompletedXid while holding the lock */ |
359 | 0 | if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, |
360 | 0 | latestXid)) |
361 | 0 | ShmemVariableCache->latestCompletedXid = latestXid; |
362 | 0 | } |
363 | 904 | else |
364 | 904 | { |
365 | | /* Shouldn't be trying to remove a live transaction here */ |
366 | 904 | Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid)); |
367 | 904 | } |
368 | 904 | } |
369 | | |
370 | 4.05k | for (index = 0; index < arrayP->numProcs; index++) |
371 | 4.04k | { |
372 | 4.04k | if (arrayP->pgprocnos[index] == proc->pgprocno) |
373 | 2.55k | { |
374 | | /* Keep the PGPROC array sorted. See notes above */ |
375 | 2.55k | memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1], |
376 | 2.55k | (arrayP->numProcs - index - 1) * sizeof(int)); |
377 | 2.55k | arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */ |
378 | 2.55k | arrayP->numProcs--; |
379 | 2.55k | LWLockRelease(ProcArrayLock); |
380 | 2.55k | return; |
381 | 2.55k | } |
382 | 4.04k | } |
383 | | |
384 | | /* Oops */ |
385 | 1 | LWLockRelease(ProcArrayLock); |
386 | | |
387 | 1 | elog(LOG, "failed to find proc %p in ProcArray", proc); |
388 | 1 | } |
389 | | |
390 | | |
391 | | /* |
392 | | * ProcArrayEndTransaction -- mark a transaction as no longer running |
393 | | * |
394 | | * This is used interchangeably for commit and abort cases. The transaction |
395 | | * commit/abort must already be reported to WAL and pg_xact. |
396 | | * |
397 | | * proc is currently always MyProc, but we pass it explicitly for flexibility. |
398 | | * latestXid is the latest Xid among the transaction's main XID and |
399 | | * subtransactions, or InvalidTransactionId if it has no XID. (We must ask |
400 | | * the caller to pass latestXid, instead of computing it from the PGPROC's |
401 | | * contents, because the subxid information in the PGPROC might be |
402 | | * incomplete.) |
403 | | */ |
404 | | void |
405 | | ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) |
406 | 154k | { |
407 | 154k | PGXACT *pgxact = &allPgXact[proc->pgprocno]; |
408 | | |
409 | 154k | if (TransactionIdIsValid(latestXid)) |
410 | 111 | { |
411 | | /* |
412 | | * We must lock ProcArrayLock while clearing our advertised XID, so |
413 | | * that we do not exit the set of "running" transactions while someone |
414 | | * else is taking a snapshot. See discussion in |
415 | | * src/backend/access/transam/README. |
416 | | */ |
417 | 111 | Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid)); |
418 | | |
419 | | /* |
420 | | * If we can immediately acquire ProcArrayLock, we clear our own XID |
421 | | * and release the lock. If not, use group XID clearing to improve |
422 | | * efficiency. |
423 | | */ |
424 | 111 | if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE)) |
425 | 111 | { |
426 | 111 | if (IsCurrentTxnWithPGRel()) |
427 | 111 | ProcArrayEndTransactionInternal(proc, pgxact, latestXid); |
428 | 111 | LWLockRelease(ProcArrayLock); |
429 | 111 | } |
430 | 0 | else if (IsCurrentTxnWithPGRel()) |
431 | 0 | ProcArrayGroupClearXid(proc, latestXid); |
432 | 111 | } |
433 | 154k | else |
434 | 154k | { |
435 | | /* |
436 | | * If we have no XID, we don't need to lock, since we won't affect |
437 | | * anyone else's calculation of a snapshot. We might change their |
438 | | * estimate of global xmin, but that's OK. |
439 | | */ |
440 | 154k | Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid)); |
441 | | |
442 | 154k | proc->lxid = InvalidLocalTransactionId; |
443 | 154k | pgxact->xmin = InvalidTransactionId; |
444 | | /* must be cleared with xid/xmin: */ |
445 | 154k | pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; |
446 | 154k | pgxact->delayChkpt = false; /* be sure this is cleared in abort */ |
447 | 154k | proc->recoveryConflictPending = false; |
448 | | |
449 | 154k | Assert(pgxact->nxids == 0); |
450 | 154k | Assert(pgxact->overflowed == false); |
451 | 154k | } |
452 | 154k | } |
453 | | |
454 | | /* |
455 | | * Mark a write transaction as no longer running. |
456 | | * |
457 | | * We don't do any locking here; caller must handle that. |
458 | | */ |
459 | | static inline void |
460 | | ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact, |
461 | | TransactionId latestXid) |
462 | 111 | { |
463 | 111 | pgxact->xid = InvalidTransactionId; |
464 | 111 | proc->lxid = InvalidLocalTransactionId; |
465 | 111 | pgxact->xmin = InvalidTransactionId; |
466 | | /* must be cleared with xid/xmin: */ |
467 | 111 | pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; |
468 | 111 | pgxact->delayChkpt = false; /* be sure this is cleared in abort */ |
469 | 111 | proc->recoveryConflictPending = false; |
470 | | |
471 | | /* Clear the subtransaction-XID cache too while holding the lock */ |
472 | 111 | pgxact->nxids = 0; |
473 | 111 | pgxact->overflowed = false; |
474 | | |
475 | | /* Also advance global latestCompletedXid while holding the lock */ |
476 | 111 | if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, |
477 | 111 | latestXid)) |
478 | 110 | ShmemVariableCache->latestCompletedXid = latestXid; |
479 | 111 | } |
480 | | |
481 | | /* |
482 | | * ProcArrayGroupClearXid -- group XID clearing |
483 | | * |
484 | | * When we cannot immediately acquire ProcArrayLock in exclusive mode at |
485 | | * commit time, add ourselves to a list of processes that need their XIDs |
486 | | * cleared. The first process to add itself to the list will acquire |
487 | | * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal |
488 | | * on behalf of all group members. This avoids a great deal of contention |
489 | | * around ProcArrayLock when many processes are trying to commit at once, |
490 | | * since the lock need not be repeatedly handed off from one committing |
491 | | * process to the next. |
492 | | */ |
493 | | static void |
494 | | ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid) |
495 | 0 | { |
496 | 0 | volatile PROC_HDR *procglobal = ProcGlobal; |
497 | 0 | uint32 nextidx; |
498 | 0 | uint32 wakeidx; |
499 | | |
500 | | /* We should definitely have an XID to clear. */ |
501 | 0 | Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid)); |
502 | | |
503 | | /* Add ourselves to the list of processes needing a group XID clear. */ |
504 | 0 | proc->procArrayGroupMember = true; |
505 | 0 | proc->procArrayGroupMemberXid = latestXid; |
506 | 0 | while (true) |
507 | 0 | { |
508 | 0 | nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst); |
509 | 0 | pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx); |
510 | |
|
511 | 0 | if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst, |
512 | 0 | &nextidx, |
513 | 0 | (uint32) proc->pgprocno)) |
514 | 0 | break; |
515 | 0 | } |
516 | | |
517 | | /* |
518 | | * If the list was not empty, the leader will clear our XID. It is |
519 | | * impossible to have followers without a leader because the first process |
520 | | * that has added itself to the list will always have nextidx as |
521 | | * INVALID_PGPROCNO. |
522 | | */ |
523 | 0 | if (nextidx != INVALID_PGPROCNO) |
524 | 0 | { |
525 | 0 | int extraWaits = 0; |
526 | | |
527 | | /* Sleep until the leader clears our XID. */ |
528 | 0 | pgstat_report_wait_start(WAIT_EVENT_PROCARRAY_GROUP_UPDATE); |
529 | 0 | for (;;) |
530 | 0 | { |
531 | | /* acts as a read barrier */ |
532 | 0 | PGSemaphoreLock(proc->sem); |
533 | 0 | if (!proc->procArrayGroupMember) |
534 | 0 | break; |
535 | 0 | extraWaits++; |
536 | 0 | } |
537 | 0 | pgstat_report_wait_end(); |
538 | |
|
539 | 0 | Assert(pg_atomic_read_u32(&proc->procArrayGroupNext) == INVALID_PGPROCNO); |
540 | | |
541 | | /* Fix semaphore count for any absorbed wakeups */ |
542 | 0 | while (extraWaits-- > 0) |
543 | 0 | PGSemaphoreUnlock(proc->sem); |
544 | 0 | return; |
545 | 0 | } |
546 | | |
547 | | /* We are the leader. Acquire the lock on behalf of everyone. */ |
548 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
549 | | |
550 | | /* |
551 | | * Now that we've got the lock, clear the list of processes waiting for |
552 | | * group XID clearing, saving a pointer to the head of the list. Trying |
553 | | * to pop elements one at a time could lead to an ABA problem. |
554 | | */ |
555 | 0 | while (true) |
556 | 0 | { |
557 | 0 | nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst); |
558 | 0 | if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst, |
559 | 0 | &nextidx, |
560 | 0 | INVALID_PGPROCNO)) |
561 | 0 | break; |
562 | 0 | } |
563 | | |
564 | | /* Remember head of list so we can perform wakeups after dropping lock. */ |
565 | 0 | wakeidx = nextidx; |
566 | | |
567 | | /* Walk the list and clear all XIDs. */ |
568 | 0 | while (nextidx != INVALID_PGPROCNO) |
569 | 0 | { |
570 | 0 | PGPROC *proc = &allProcs[nextidx]; |
571 | 0 | PGXACT *pgxact = &allPgXact[nextidx]; |
572 | |
|
573 | 0 | ProcArrayEndTransactionInternal(proc, pgxact, proc->procArrayGroupMemberXid); |
574 | | |
575 | | /* Move to next proc in list. */ |
576 | 0 | nextidx = pg_atomic_read_u32(&proc->procArrayGroupNext); |
577 | 0 | } |
578 | | |
579 | | /* We're done with the lock now. */ |
580 | 0 | LWLockRelease(ProcArrayLock); |
581 | | |
582 | | /* |
583 | | * Now that we've released the lock, go back and wake everybody up. We |
584 | | * don't do this under the lock so as to keep lock hold times to a |
585 | | * minimum. The system calls we need to perform to wake other processes |
586 | | * up are probably much slower than the simple memory writes we did while |
587 | | * holding the lock. |
588 | | */ |
589 | 0 | while (wakeidx != INVALID_PGPROCNO) |
590 | 0 | { |
591 | 0 | PGPROC *proc = &allProcs[wakeidx]; |
592 | |
|
593 | 0 | wakeidx = pg_atomic_read_u32(&proc->procArrayGroupNext); |
594 | 0 | pg_atomic_write_u32(&proc->procArrayGroupNext, INVALID_PGPROCNO); |
595 | | |
596 | | /* ensure all previous writes are visible before follower continues. */ |
597 | 0 | pg_write_barrier(); |
598 | |
|
599 | 0 | proc->procArrayGroupMember = false; |
600 | |
|
601 | 0 | if (proc != MyProc) |
602 | 0 | PGSemaphoreUnlock(proc->sem); |
603 | 0 | } |
604 | 0 | } |
605 | | |
606 | | /* |
607 | | * ProcArrayClearTransaction -- clear the transaction fields |
608 | | * |
609 | | * This is used after successfully preparing a 2-phase transaction. We are |
610 | | * not actually reporting the transaction's XID as no longer running --- it |
611 | | * will still appear as running because the 2PC's gxact is in the ProcArray |
612 | | * too. We just have to clear out our own PGXACT. |
613 | | */ |
614 | | void |
615 | | ProcArrayClearTransaction(PGPROC *proc) |
616 | 0 | { |
617 | 0 | PGXACT *pgxact = &allPgXact[proc->pgprocno]; |
618 | |
|
619 | 0 | if (IsYugaByteEnabled()) { |
620 | 0 | return; |
621 | 0 | } |
622 | | |
623 | | /* |
624 | | * We can skip locking ProcArrayLock here, because this action does not |
625 | | * actually change anyone's view of the set of running XIDs: our entry is |
626 | | * duplicate with the gxact that has already been inserted into the |
627 | | * ProcArray. |
628 | | */ |
629 | 0 | pgxact->xid = InvalidTransactionId; |
630 | 0 | proc->lxid = InvalidLocalTransactionId; |
631 | 0 | pgxact->xmin = InvalidTransactionId; |
632 | 0 | proc->recoveryConflictPending = false; |
633 | | |
634 | | /* redundant, but just in case */ |
635 | 0 | pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; |
636 | 0 | pgxact->delayChkpt = false; |
637 | | |
638 | | /* Clear the subtransaction-XID cache too */ |
639 | 0 | pgxact->nxids = 0; |
640 | 0 | pgxact->overflowed = false; |
641 | 0 | } |
642 | | |
643 | | /* |
644 | | * ProcArrayInitRecovery -- initialize recovery xid mgmt environment |
645 | | * |
646 | | * Remember up to where the startup process initialized the CLOG and subtrans |
647 | | * so we can ensure it's initialized gaplessly up to the point where necessary |
648 | | * while in recovery. |
649 | | */ |
650 | | void |
651 | | ProcArrayInitRecovery(TransactionId initializedUptoXID) |
652 | 0 | { |
653 | 0 | Assert(standbyState == STANDBY_INITIALIZED); |
654 | 0 | Assert(TransactionIdIsNormal(initializedUptoXID)); |
655 | |
|
656 | 0 | if (IsYugaByteEnabled()) { |
657 | 0 | return; |
658 | 0 | } |
659 | | |
660 | | /* |
661 | | * we set latestObservedXid to the xid SUBTRANS has been initialized up |
662 | | * to, so we can extend it from that point onwards in |
663 | | * RecordKnownAssignedTransactionIds, and when we get consistent in |
664 | | * ProcArrayApplyRecoveryInfo(). |
665 | | */ |
666 | 0 | latestObservedXid = initializedUptoXID; |
667 | 0 | TransactionIdRetreat(latestObservedXid); |
668 | 0 | } |
669 | | |
670 | | /* |
671 | | * ProcArrayApplyRecoveryInfo -- apply recovery info about xids |
672 | | * |
673 | | * Takes us through 3 states: Initialized, Pending and Ready. |
674 | | * Normal case is to go all the way to Ready straight away, though there |
675 | | * are atypical cases where we need to take it in steps. |
676 | | * |
677 | | * Use the data about running transactions on master to create the initial |
678 | | * state of KnownAssignedXids. We also use these records to regularly prune |
679 | | * KnownAssignedXids because we know it is possible that some transactions |
680 | | * with FATAL errors fail to write abort records, which could cause eventual |
681 | | * overflow. |
682 | | * |
683 | | * See comments for LogStandbySnapshot(). |
684 | | */ |
685 | | void |
686 | | ProcArrayApplyRecoveryInfo(RunningTransactions running) |
687 | 0 | { |
688 | 0 | TransactionId *xids; |
689 | 0 | int nxids; |
690 | 0 | TransactionId nextXid; |
691 | 0 | int i; |
692 | |
|
693 | 0 | if (IsYugaByteEnabled()) { |
694 | 0 | return; |
695 | 0 | } |
696 | | |
697 | 0 | Assert(standbyState >= STANDBY_INITIALIZED); |
698 | 0 | Assert(TransactionIdIsValid(running->nextXid)); |
699 | 0 | Assert(TransactionIdIsValid(running->oldestRunningXid)); |
700 | 0 | Assert(TransactionIdIsNormal(running->latestCompletedXid)); |
701 | | |
702 | | /* |
703 | | * Remove stale transactions, if any. |
704 | | */ |
705 | 0 | ExpireOldKnownAssignedTransactionIds(running->oldestRunningXid); |
706 | | |
707 | | /* |
708 | | * Remove stale locks, if any. |
709 | | */ |
710 | 0 | StandbyReleaseOldLocks(running->oldestRunningXid); |
711 | | |
712 | | /* |
713 | | * If our snapshot is already valid, nothing else to do... |
714 | | */ |
715 | 0 | if (standbyState == STANDBY_SNAPSHOT_READY) |
716 | 0 | return; |
717 | | |
718 | | /* |
719 | | * If our initial RunningTransactionsData had an overflowed snapshot then |
720 | | * we knew we were missing some subxids from our snapshot. If we continue |
721 | | * to see overflowed snapshots then we might never be able to start up, so |
722 | | * we make another test to see if our snapshot is now valid. We know that |
723 | | * the missing subxids are equal to or earlier than nextXid. After we |
724 | | * initialise we continue to apply changes during recovery, so once the |
725 | | * oldestRunningXid is later than the nextXid from the initial snapshot we |
726 | | * know that we no longer have missing information and can mark the |
727 | | * snapshot as valid. |
728 | | */ |
729 | 0 | if (standbyState == STANDBY_SNAPSHOT_PENDING) |
730 | 0 | { |
731 | | /* |
732 | | * If the snapshot isn't overflowed or if its empty we can reset our |
733 | | * pending state and use this snapshot instead. |
734 | | */ |
735 | 0 | if (!running->subxid_overflow || running->xcnt == 0) |
736 | 0 | { |
737 | | /* |
738 | | * If we have already collected known assigned xids, we need to |
739 | | * throw them away before we apply the recovery snapshot. |
740 | | */ |
741 | 0 | KnownAssignedXidsReset(); |
742 | 0 | standbyState = STANDBY_INITIALIZED; |
743 | 0 | } |
744 | 0 | else |
745 | 0 | { |
746 | 0 | if (TransactionIdPrecedes(standbySnapshotPendingXmin, |
747 | 0 | running->oldestRunningXid)) |
748 | 0 | { |
749 | 0 | standbyState = STANDBY_SNAPSHOT_READY; |
750 | 0 | elog(trace_recovery(DEBUG1), |
751 | 0 | "recovery snapshots are now enabled"); |
752 | 0 | } |
753 | 0 | else |
754 | 0 | elog(trace_recovery(DEBUG1), |
755 | 0 | "recovery snapshot waiting for non-overflowed snapshot or " |
756 | 0 | "until oldest active xid on standby is at least %u (now %u)", |
757 | 0 | standbySnapshotPendingXmin, |
758 | 0 | running->oldestRunningXid); |
759 | 0 | return; |
760 | 0 | } |
761 | 0 | } |
762 | | |
763 | 0 | Assert(standbyState == STANDBY_INITIALIZED); |
764 | | |
765 | | /* |
766 | | * OK, we need to initialise from the RunningTransactionsData record. |
767 | | * |
768 | | * NB: this can be reached at least twice, so make sure new code can deal |
769 | | * with that. |
770 | | */ |
771 | | |
772 | | /* |
773 | | * Nobody else is running yet, but take locks anyhow |
774 | | */ |
775 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
776 | | |
777 | | /* |
778 | | * KnownAssignedXids is sorted so we cannot just add the xids, we have to |
779 | | * sort them first. |
780 | | * |
781 | | * Some of the new xids are top-level xids and some are subtransactions. |
782 | | * We don't call SubtransSetParent because it doesn't matter yet. If we |
783 | | * aren't overflowed then all xids will fit in snapshot and so we don't |
784 | | * need subtrans. If we later overflow, an xid assignment record will add |
785 | | * xids to subtrans. If RunningXacts is overflowed then we don't have |
786 | | * enough information to correctly update subtrans anyway. |
787 | | */ |
788 | | |
789 | | /* |
790 | | * Allocate a temporary array to avoid modifying the array passed as |
791 | | * argument. |
792 | | */ |
793 | 0 | xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt)); |
794 | | |
795 | | /* |
796 | | * Add to the temp array any xids which have not already completed. |
797 | | */ |
798 | 0 | nxids = 0; |
799 | 0 | for (i = 0; i < running->xcnt + running->subxcnt; i++) |
800 | 0 | { |
801 | 0 | TransactionId xid = running->xids[i]; |
802 | | |
803 | | /* |
804 | | * The running-xacts snapshot can contain xids that were still visible |
805 | | * in the procarray when the snapshot was taken, but were already |
806 | | * WAL-logged as completed. They're not running anymore, so ignore |
807 | | * them. |
808 | | */ |
809 | 0 | if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) |
810 | 0 | continue; |
811 | | |
812 | 0 | xids[nxids++] = xid; |
813 | 0 | } |
814 | |
|
815 | 0 | if (nxids > 0) |
816 | 0 | { |
817 | 0 | if (procArray->numKnownAssignedXids != 0) |
818 | 0 | { |
819 | 0 | LWLockRelease(ProcArrayLock); |
820 | 0 | elog(ERROR, "KnownAssignedXids is not empty"); |
821 | 0 | } |
822 | | |
823 | | /* |
824 | | * Sort the array so that we can add them safely into |
825 | | * KnownAssignedXids. |
826 | | */ |
827 | 0 | qsort(xids, nxids, sizeof(TransactionId), xidComparator); |
828 | | |
829 | | /* |
830 | | * Add the sorted snapshot into KnownAssignedXids. The running-xacts |
831 | | * snapshot may include duplicated xids because of prepared |
832 | | * transactions, so ignore them. |
833 | | */ |
834 | 0 | for (i = 0; i < nxids; i++) |
835 | 0 | { |
836 | 0 | if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i])) |
837 | 0 | { |
838 | 0 | elog(DEBUG1, |
839 | 0 | "found duplicated transaction %u for KnownAssignedXids insertion", |
840 | 0 | xids[i]); |
841 | 0 | continue; |
842 | 0 | } |
843 | 0 | KnownAssignedXidsAdd(xids[i], xids[i], true); |
844 | 0 | } |
845 | |
|
846 | 0 | KnownAssignedXidsDisplay(trace_recovery(DEBUG3)); |
847 | 0 | } |
848 | |
|
849 | 0 | pfree(xids); |
850 | | |
851 | | /* |
852 | | * latestObservedXid is at least set to the point where SUBTRANS was |
853 | | * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid |
854 | | * RecordKnownAssignedTransactionIds() was called for. Initialize |
855 | | * subtrans from thereon, up to nextXid - 1. |
856 | | * |
857 | | * We need to duplicate parts of RecordKnownAssignedTransactionId() here, |
858 | | * because we've just added xids to the known assigned xids machinery that |
859 | | * haven't gone through RecordKnownAssignedTransactionId(). |
860 | | */ |
861 | 0 | Assert(TransactionIdIsNormal(latestObservedXid)); |
862 | 0 | TransactionIdAdvance(latestObservedXid); |
863 | 0 | while (TransactionIdPrecedes(latestObservedXid, running->nextXid)) |
864 | 0 | { |
865 | 0 | ExtendSUBTRANS(latestObservedXid); |
866 | 0 | TransactionIdAdvance(latestObservedXid); |
867 | 0 | } |
868 | 0 | TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */ |
869 | | |
870 | | /* ---------- |
871 | | * Now we've got the running xids we need to set the global values that |
872 | | * are used to track snapshots as they evolve further. |
873 | | * |
874 | | * - latestCompletedXid which will be the xmax for snapshots |
875 | | * - lastOverflowedXid which shows whether snapshots overflow |
876 | | * - nextXid |
877 | | * |
878 | | * If the snapshot overflowed, then we still initialise with what we know, |
879 | | * but the recovery snapshot isn't fully valid yet because we know there |
880 | | * are some subxids missing. We don't know the specific subxids that are |
881 | | * missing, so conservatively assume the last one is latestObservedXid. |
882 | | * ---------- |
883 | | */ |
884 | 0 | if (running->subxid_overflow) |
885 | 0 | { |
886 | 0 | standbyState = STANDBY_SNAPSHOT_PENDING; |
887 | |
|
888 | 0 | standbySnapshotPendingXmin = latestObservedXid; |
889 | 0 | procArray->lastOverflowedXid = latestObservedXid; |
890 | 0 | } |
891 | 0 | else |
892 | 0 | { |
893 | 0 | standbyState = STANDBY_SNAPSHOT_READY; |
894 | |
|
895 | 0 | standbySnapshotPendingXmin = InvalidTransactionId; |
896 | 0 | } |
897 | | |
898 | | /* |
899 | | * If a transaction wrote a commit record in the gap between taking and |
900 | | * logging the snapshot then latestCompletedXid may already be higher than |
901 | | * the value from the snapshot, so check before we use the incoming value. |
902 | | */ |
903 | 0 | if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, |
904 | 0 | running->latestCompletedXid)) |
905 | 0 | ShmemVariableCache->latestCompletedXid = running->latestCompletedXid; |
906 | |
|
907 | 0 | Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid)); |
908 | |
|
909 | 0 | LWLockRelease(ProcArrayLock); |
910 | | |
911 | | /* |
912 | | * ShmemVariableCache->nextXid must be beyond any observed xid. |
913 | | * |
914 | | * We don't expect anyone else to modify nextXid, hence we don't need to |
915 | | * hold a lock while examining it. We still acquire the lock to modify |
916 | | * it, though. |
917 | | */ |
918 | 0 | nextXid = latestObservedXid; |
919 | 0 | TransactionIdAdvance(nextXid); |
920 | 0 | if (TransactionIdFollows(nextXid, ShmemVariableCache->nextXid)) |
921 | 0 | { |
922 | 0 | LWLockAcquire(XidGenLock, LW_EXCLUSIVE); |
923 | 0 | ShmemVariableCache->nextXid = nextXid; |
924 | 0 | LWLockRelease(XidGenLock); |
925 | 0 | } |
926 | |
|
927 | 0 | Assert(TransactionIdIsValid(ShmemVariableCache->nextXid)); |
928 | |
|
929 | 0 | KnownAssignedXidsDisplay(trace_recovery(DEBUG3)); |
930 | 0 | if (standbyState == STANDBY_SNAPSHOT_READY) |
931 | 0 | elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled"); |
932 | 0 | else |
933 | 0 | elog(trace_recovery(DEBUG1), |
934 | 0 | "recovery snapshot waiting for non-overflowed snapshot or " |
935 | 0 | "until oldest active xid on standby is at least %u (now %u)", |
936 | 0 | standbySnapshotPendingXmin, |
937 | 0 | running->oldestRunningXid); |
938 | 0 | } |
939 | | |
940 | | /* |
941 | | * ProcArrayApplyXidAssignment |
942 | | * Process an XLOG_XACT_ASSIGNMENT WAL record |
943 | | */ |
944 | | void |
945 | | ProcArrayApplyXidAssignment(TransactionId topxid, |
946 | | int nsubxids, TransactionId *subxids) |
947 | 0 | { |
948 | 0 | TransactionId max_xid; |
949 | 0 | int i; |
950 | |
|
951 | 0 | if (IsYugaByteEnabled()) { |
952 | 0 | return; |
953 | 0 | } |
954 | | |
955 | 0 | Assert(standbyState >= STANDBY_INITIALIZED); |
956 | |
|
957 | 0 | max_xid = TransactionIdLatest(topxid, nsubxids, subxids); |
958 | | |
959 | | /* |
960 | | * Mark all the subtransactions as observed. |
961 | | * |
962 | | * NOTE: This will fail if the subxid contains too many previously |
963 | | * unobserved xids to fit into known-assigned-xids. That shouldn't happen |
964 | | * as the code stands, because xid-assignment records should never contain |
965 | | * more than PGPROC_MAX_CACHED_SUBXIDS entries. |
966 | | */ |
967 | 0 | RecordKnownAssignedTransactionIds(max_xid); |
968 | | |
969 | | /* |
970 | | * Notice that we update pg_subtrans with the top-level xid, rather than |
971 | | * the parent xid. This is a difference between normal processing and |
972 | | * recovery, yet is still correct in all cases. The reason is that |
973 | | * subtransaction commit is not marked in clog until commit processing, so |
974 | | * all aborted subtransactions have already been clearly marked in clog. |
975 | | * As a result we are able to refer directly to the top-level |
976 | | * transaction's state rather than skipping through all the intermediate |
977 | | * states in the subtransaction tree. This should be the first time we |
978 | | * have attempted to SubTransSetParent(). |
979 | | */ |
980 | 0 | for (i = 0; i < nsubxids; i++) |
981 | 0 | SubTransSetParent(subxids[i], topxid); |
982 | | |
983 | | /* KnownAssignedXids isn't maintained yet, so we're done for now */ |
984 | 0 | if (standbyState == STANDBY_INITIALIZED) |
985 | 0 | return; |
986 | | |
987 | | /* |
988 | | * Uses same locking as transaction commit |
989 | | */ |
990 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
991 | | |
992 | | /* |
993 | | * Remove subxids from known-assigned-xacts. |
994 | | */ |
995 | 0 | KnownAssignedXidsRemoveTree(InvalidTransactionId, nsubxids, subxids); |
996 | | |
997 | | /* |
998 | | * Advance lastOverflowedXid to be at least the last of these subxids. |
999 | | */ |
1000 | 0 | if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid)) |
1001 | 0 | procArray->lastOverflowedXid = max_xid; |
1002 | |
|
1003 | 0 | LWLockRelease(ProcArrayLock); |
1004 | 0 | } |
1005 | | |
1006 | | /* |
1007 | | * TransactionIdIsInProgress -- is given transaction running in some backend |
1008 | | * |
1009 | | * Aside from some shortcuts such as checking RecentXmin and our own Xid, |
1010 | | * there are four possibilities for finding a running transaction: |
1011 | | * |
1012 | | * 1. The given Xid is a main transaction Id. We will find this out cheaply |
1013 | | * by looking at the PGXACT struct for each backend. |
1014 | | * |
1015 | | * 2. The given Xid is one of the cached subxact Xids in the PGPROC array. |
1016 | | * We can find this out cheaply too. |
1017 | | * |
1018 | | * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see |
1019 | | * if the Xid is running on the master. |
1020 | | * |
1021 | | * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see |
1022 | | * if that is running according to PGXACT or KnownAssignedXids. This is the |
1023 | | * slowest way, but sadly it has to be done always if the others failed, |
1024 | | * unless we see that the cached subxact sets are complete (none have |
1025 | | * overflowed). |
1026 | | * |
1027 | | * ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids |
1028 | | * while doing 1 and 3, we can release the ProcArrayLock while we do 4. |
1029 | | * This buys back some concurrency (and we can't retrieve the main Xids from |
1030 | | * PGXACT again anyway; see GetNewTransactionId). |
1031 | | */ |
1032 | | bool |
1033 | | TransactionIdIsInProgress(TransactionId xid) |
1034 | 12.6k | { |
1035 | 12.6k | static TransactionId *xids = NULL; |
1036 | 12.6k | int nxids = 0; |
1037 | 12.6k | ProcArrayStruct *arrayP = procArray; |
1038 | 12.6k | TransactionId topxid; |
1039 | 12.6k | int i, |
1040 | 12.6k | j; |
1041 | | |
1042 | 12.6k | if (IsYugaByteEnabled()) { |
1043 | 12.6k | return false; |
1044 | 12.6k | } |
1045 | | |
1046 | | /* |
1047 | | * Don't bother checking a transaction older than RecentXmin; it could not |
1048 | | * possibly still be running. (Note: in particular, this guarantees that |
1049 | | * we reject InvalidTransactionId, FrozenTransactionId, etc as not |
1050 | | * running.) |
1051 | | */ |
1052 | 0 | if (TransactionIdPrecedes(xid, RecentXmin)) |
1053 | 0 | { |
1054 | 0 | xc_by_recent_xmin_inc(); |
1055 | 0 | return false; |
1056 | 0 | } |
1057 | | |
1058 | | /* |
1059 | | * We may have just checked the status of this transaction, so if it is |
1060 | | * already known to be completed, we can fall out without any access to |
1061 | | * shared memory. |
1062 | | */ |
1063 | 0 | if (TransactionIdIsKnownCompleted(xid)) |
1064 | 0 | { |
1065 | 0 | xc_by_known_xact_inc(); |
1066 | 0 | return false; |
1067 | 0 | } |
1068 | | |
1069 | | /* |
1070 | | * Also, we can handle our own transaction (and subtransactions) without |
1071 | | * any access to shared memory. |
1072 | | */ |
1073 | 0 | if (TransactionIdIsCurrentTransactionId(xid)) |
1074 | 0 | { |
1075 | 0 | xc_by_my_xact_inc(); |
1076 | 0 | return true; |
1077 | 0 | } |
1078 | | |
1079 | | /* |
1080 | | * If first time through, get workspace to remember main XIDs in. We |
1081 | | * malloc it permanently to avoid repeated palloc/pfree overhead. |
1082 | | */ |
1083 | 0 | if (xids == NULL) |
1084 | 0 | { |
1085 | | /* |
1086 | | * In hot standby mode, reserve enough space to hold all xids in the |
1087 | | * known-assigned list. If we later finish recovery, we no longer need |
1088 | | * the bigger array, but we don't bother to shrink it. |
1089 | | */ |
1090 | 0 | int maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs; |
1091 | |
|
1092 | 0 | xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId)); |
1093 | 0 | if (xids == NULL) |
1094 | 0 | ereport(ERROR, |
1095 | 0 | (errcode(ERRCODE_OUT_OF_MEMORY), |
1096 | 0 | errmsg("out of memory"))); |
1097 | 0 | } |
1098 | |
|
1099 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
1100 | | |
1101 | | /* |
1102 | | * Now that we have the lock, we can check latestCompletedXid; if the |
1103 | | * target Xid is after that, it's surely still running. |
1104 | | */ |
1105 | 0 | if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xid)) |
1106 | 0 | { |
1107 | 0 | LWLockRelease(ProcArrayLock); |
1108 | 0 | xc_by_latest_xid_inc(); |
1109 | 0 | return true; |
1110 | 0 | } |
1111 | | |
1112 | | /* No shortcuts, gotta grovel through the array */ |
1113 | 0 | for (i = 0; i < arrayP->numProcs; i++) |
1114 | 0 | { |
1115 | 0 | int pgprocno = arrayP->pgprocnos[i]; |
1116 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
1117 | 0 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
1118 | 0 | TransactionId pxid; |
1119 | | |
1120 | | /* Ignore my own proc --- dealt with it above */ |
1121 | 0 | if (proc == MyProc) |
1122 | 0 | continue; |
1123 | | |
1124 | | /* Fetch xid just once - see GetNewTransactionId */ |
1125 | 0 | pxid = pgxact->xid; |
1126 | |
|
1127 | 0 | if (!TransactionIdIsValid(pxid)) |
1128 | 0 | continue; |
1129 | | |
1130 | | /* |
1131 | | * Step 1: check the main Xid |
1132 | | */ |
1133 | 0 | if (TransactionIdEquals(pxid, xid)) |
1134 | 0 | { |
1135 | 0 | LWLockRelease(ProcArrayLock); |
1136 | 0 | xc_by_main_xid_inc(); |
1137 | 0 | return true; |
1138 | 0 | } |
1139 | | |
1140 | | /* |
1141 | | * We can ignore main Xids that are younger than the target Xid, since |
1142 | | * the target could not possibly be their child. |
1143 | | */ |
1144 | 0 | if (TransactionIdPrecedes(xid, pxid)) |
1145 | 0 | continue; |
1146 | | |
1147 | | /* |
1148 | | * Step 2: check the cached child-Xids arrays |
1149 | | */ |
1150 | 0 | for (j = pgxact->nxids - 1; j >= 0; j--) |
1151 | 0 | { |
1152 | | /* Fetch xid just once - see GetNewTransactionId */ |
1153 | 0 | TransactionId cxid = proc->subxids.xids[j]; |
1154 | |
|
1155 | 0 | if (TransactionIdEquals(cxid, xid)) |
1156 | 0 | { |
1157 | 0 | LWLockRelease(ProcArrayLock); |
1158 | 0 | xc_by_child_xid_inc(); |
1159 | 0 | return true; |
1160 | 0 | } |
1161 | 0 | } |
1162 | | |
1163 | | /* |
1164 | | * Save the main Xid for step 4. We only need to remember main Xids |
1165 | | * that have uncached children. (Note: there is no race condition |
1166 | | * here because the overflowed flag cannot be cleared, only set, while |
1167 | | * we hold ProcArrayLock. So we can't miss an Xid that we need to |
1168 | | * worry about.) |
1169 | | */ |
1170 | 0 | if (pgxact->overflowed) |
1171 | 0 | xids[nxids++] = pxid; |
1172 | 0 | } |
1173 | | |
1174 | | /* |
1175 | | * Step 3: in hot standby mode, check the known-assigned-xids list. XIDs |
1176 | | * in the list must be treated as running. |
1177 | | */ |
1178 | 0 | if (RecoveryInProgress()) |
1179 | 0 | { |
1180 | | /* none of the PGXACT entries should have XIDs in hot standby mode */ |
1181 | 0 | Assert(nxids == 0); |
1182 | |
|
1183 | 0 | if (KnownAssignedXidExists(xid)) |
1184 | 0 | { |
1185 | 0 | LWLockRelease(ProcArrayLock); |
1186 | 0 | xc_by_known_assigned_inc(); |
1187 | 0 | return true; |
1188 | 0 | } |
1189 | | |
1190 | | /* |
1191 | | * If the KnownAssignedXids overflowed, we have to check pg_subtrans |
1192 | | * too. Fetch all xids from KnownAssignedXids that are lower than |
1193 | | * xid, since if xid is a subtransaction its parent will always have a |
1194 | | * lower value. Note we will collect both main and subXIDs here, but |
1195 | | * there's no help for it. |
1196 | | */ |
1197 | 0 | if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid)) |
1198 | 0 | nxids = KnownAssignedXidsGet(xids, xid); |
1199 | 0 | } |
1200 | |
|
1201 | 0 | LWLockRelease(ProcArrayLock); |
1202 | | |
1203 | | /* |
1204 | | * If none of the relevant caches overflowed, we know the Xid is not |
1205 | | * running without even looking at pg_subtrans. |
1206 | | */ |
1207 | 0 | if (nxids == 0) |
1208 | 0 | { |
1209 | 0 | xc_no_overflow_inc(); |
1210 | 0 | return false; |
1211 | 0 | } |
1212 | | |
1213 | | /* |
1214 | | * Step 4: have to check pg_subtrans. |
1215 | | * |
1216 | | * At this point, we know it's either a subtransaction of one of the Xids |
1217 | | * in xids[], or it's not running. If it's an already-failed |
1218 | | * subtransaction, we want to say "not running" even though its parent may |
1219 | | * still be running. So first, check pg_xact to see if it's been aborted. |
1220 | | */ |
1221 | 0 | xc_slow_answer_inc(); |
1222 | |
|
1223 | 0 | if (TransactionIdDidAbort(xid)) |
1224 | 0 | return false; |
1225 | | |
1226 | | /* |
1227 | | * It isn't aborted, so check whether the transaction tree it belongs to |
1228 | | * is still running (or, more precisely, whether it was running when we |
1229 | | * held ProcArrayLock). |
1230 | | */ |
1231 | 0 | topxid = SubTransGetTopmostTransaction(xid); |
1232 | 0 | Assert(TransactionIdIsValid(topxid)); |
1233 | 0 | if (!TransactionIdEquals(topxid, xid)) |
1234 | 0 | { |
1235 | 0 | for (i = 0; i < nxids; i++) |
1236 | 0 | { |
1237 | 0 | if (TransactionIdEquals(xids[i], topxid)) |
1238 | 0 | return true; |
1239 | 0 | } |
1240 | 0 | } |
1241 | |
|
1242 | 0 | return false; |
1243 | 0 | } |
1244 | | |
1245 | | /* |
1246 | | * TransactionIdIsActive -- is xid the top-level XID of an active backend? |
1247 | | * |
1248 | | * This differs from TransactionIdIsInProgress in that it ignores prepared |
1249 | | * transactions, as well as transactions running on the master if we're in |
1250 | | * hot standby. Also, we ignore subtransactions since that's not needed |
1251 | | * for current uses. |
1252 | | */ |
1253 | | bool |
1254 | | TransactionIdIsActive(TransactionId xid) |
1255 | 0 | { |
1256 | 0 | bool result = false; |
1257 | 0 | ProcArrayStruct *arrayP = procArray; |
1258 | 0 | int i; |
1259 | |
|
1260 | 0 | if (IsYugaByteEnabled()) { |
1261 | 0 | return false; |
1262 | 0 | } |
1263 | | |
1264 | | /* |
1265 | | * Don't bother checking a transaction older than RecentXmin; it could not |
1266 | | * possibly still be running. |
1267 | | */ |
1268 | 0 | if (TransactionIdPrecedes(xid, RecentXmin)) |
1269 | 0 | return false; |
1270 | | |
1271 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
1272 | |
|
1273 | 0 | for (i = 0; i < arrayP->numProcs; i++) |
1274 | 0 | { |
1275 | 0 | int pgprocno = arrayP->pgprocnos[i]; |
1276 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
1277 | 0 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
1278 | 0 | TransactionId pxid; |
1279 | | |
1280 | | /* Fetch xid just once - see GetNewTransactionId */ |
1281 | 0 | pxid = pgxact->xid; |
1282 | |
|
1283 | 0 | if (!TransactionIdIsValid(pxid)) |
1284 | 0 | continue; |
1285 | | |
1286 | 0 | if (proc->pid == 0) |
1287 | 0 | continue; /* ignore prepared transactions */ |
1288 | | |
1289 | 0 | if (TransactionIdEquals(pxid, xid)) |
1290 | 0 | { |
1291 | 0 | result = true; |
1292 | 0 | break; |
1293 | 0 | } |
1294 | 0 | } |
1295 | |
|
1296 | 0 | LWLockRelease(ProcArrayLock); |
1297 | |
|
1298 | 0 | return result; |
1299 | 0 | } |
1300 | | |
1301 | | |
1302 | | /* |
1303 | | * GetOldestXmin -- returns oldest transaction that was running |
1304 | | * when any current transaction was started. |
1305 | | * |
1306 | | * If rel is NULL or a shared relation, all backends are considered, otherwise |
1307 | | * only backends running in this database are considered. |
1308 | | * |
1309 | | * The flags are used to ignore the backends in calculation when any of the |
1310 | | * corresponding flags is set. Typically, if you want to ignore ones with |
1311 | | * PROC_IN_VACUUM flag, you can use PROCARRAY_FLAGS_VACUUM. |
1312 | | * |
1313 | | * PROCARRAY_SLOTS_XMIN causes GetOldestXmin to ignore the xmin and |
1314 | | * catalog_xmin of any replication slots that exist in the system when |
1315 | | * calculating the oldest xmin. |
1316 | | * |
1317 | | * This is used by VACUUM to decide which deleted tuples must be preserved in |
1318 | | * the passed in table. For shared relations backends in all databases must be |
1319 | | * considered, but for non-shared relations that's not required, since only |
1320 | | * backends in my own database could ever see the tuples in them. Also, we can |
1321 | | * ignore concurrently running lazy VACUUMs because (a) they must be working |
1322 | | * on other tables, and (b) they don't need to do snapshot-based lookups. |
1323 | | * |
1324 | | * This is also used to determine where to truncate pg_subtrans. For that |
1325 | | * backends in all databases have to be considered, so rel = NULL has to be |
1326 | | * passed in. |
1327 | | * |
1328 | | * Note: we include all currently running xids in the set of considered xids. |
1329 | | * This ensures that if a just-started xact has not yet set its snapshot, |
1330 | | * when it does set the snapshot it cannot set xmin less than what we compute. |
1331 | | * See notes in src/backend/access/transam/README. |
1332 | | * |
1333 | | * Note: despite the above, it's possible for the calculated value to move |
1334 | | * backwards on repeated calls. The calculated value is conservative, so that |
1335 | | * anything older is definitely not considered as running by anyone anymore, |
1336 | | * but the exact value calculated depends on a number of things. For example, |
1337 | | * if rel = NULL and there are no transactions running in the current |
1338 | | * database, GetOldestXmin() returns latestCompletedXid. If a transaction |
1339 | | * begins after that, its xmin will include in-progress transactions in other |
1340 | | * databases that started earlier, so another call will return a lower value. |
1341 | | * Nonetheless it is safe to vacuum a table in the current database with the |
1342 | | * first result. There are also replication-related effects: a walsender |
1343 | | * process can set its xmin based on transactions that are no longer running |
1344 | | * in the master but are still being replayed on the standby, thus possibly |
1345 | | * making the GetOldestXmin reading go backwards. In this case there is a |
1346 | | * possibility that we lose data that the standby would like to have, but |
1347 | | * unless the standby uses a replication slot to make its xmin persistent |
1348 | | * there is little we can do about that --- data is only protected if the |
1349 | | * walsender runs continuously while queries are executed on the standby. |
1350 | | * (The Hot Standby code deals with such cases by failing standby queries |
1351 | | * that needed to access already-removed data, so there's no integrity bug.) |
1352 | | * The return value is also adjusted with vacuum_defer_cleanup_age, so |
1353 | | * increasing that setting on the fly is another easy way to make |
1354 | | * GetOldestXmin() move backwards, with no consequences for data integrity. |
1355 | | */ |
1356 | | TransactionId |
1357 | | GetOldestXmin(Relation rel, int flags) |
1358 | 1.06k | { |
1359 | 1.06k | ProcArrayStruct *arrayP = procArray; |
1360 | 1.06k | TransactionId result; |
1361 | 1.06k | int index; |
1362 | 1.06k | bool allDbs; |
1363 | | |
1364 | 1.06k | if (IsYugaByteEnabled()) { |
1365 | 93 | return InvalidTransactionId; |
1366 | 93 | } |
1367 | | |
1368 | 969 | volatile TransactionId replication_slot_xmin = InvalidTransactionId; |
1369 | 969 | volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId; |
1370 | | |
1371 | | /* |
1372 | | * If we're not computing a relation specific limit, or if a shared |
1373 | | * relation has been passed in, backends in all databases have to be |
1374 | | * considered. |
1375 | | */ |
1376 | 969 | allDbs = rel == NULL || rel->rd_rel->relisshared; |
1377 | | |
1378 | | /* Cannot look for individual databases during recovery */ |
1379 | 969 | Assert(allDbs || !RecoveryInProgress()); |
1380 | | |
1381 | 969 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
1382 | | |
1383 | | /* |
1384 | | * We initialize the MIN() calculation with latestCompletedXid + 1. This |
1385 | | * is a lower bound for the XIDs that might appear in the ProcArray later, |
1386 | | * and so protects us against overestimating the result due to future |
1387 | | * additions. |
1388 | | */ |
1389 | 969 | result = ShmemVariableCache->latestCompletedXid; |
1390 | 969 | Assert(TransactionIdIsNormal(result)); |
1391 | 969 | TransactionIdAdvance(result); |
1392 | | |
1393 | 1.95k | for (index = 0; index < arrayP->numProcs; index++) |
1394 | 989 | { |
1395 | 989 | int pgprocno = arrayP->pgprocnos[index]; |
1396 | 989 | volatile PGPROC *proc = &allProcs[pgprocno]; |
1397 | 989 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
1398 | | |
1399 | 989 | if (pgxact->vacuumFlags & (flags & PROCARRAY_PROC_FLAGS_MASK)) |
1400 | 0 | continue; |
1401 | | |
1402 | 989 | if (allDbs || |
1403 | 0 | proc->databaseId == MyDatabaseId || |
1404 | 0 | proc->databaseId == 0) /* always include WalSender */ |
1405 | 989 | { |
1406 | | /* Fetch xid just once - see GetNewTransactionId */ |
1407 | 989 | TransactionId xid = pgxact->xid; |
1408 | | |
1409 | | /* First consider the transaction's own Xid, if any */ |
1410 | 989 | if (TransactionIdIsNormal(xid) && |
1411 | 0 | TransactionIdPrecedes(xid, result)) |
1412 | 0 | result = xid; |
1413 | | |
1414 | | /* |
1415 | | * Also consider the transaction's Xmin, if set. |
1416 | | * |
1417 | | * We must check both Xid and Xmin because a transaction might |
1418 | | * have an Xmin but not (yet) an Xid; conversely, if it has an |
1419 | | * Xid, that could determine some not-yet-set Xmin. |
1420 | | */ |
1421 | 989 | xid = pgxact->xmin; /* Fetch just once */ |
1422 | 989 | if (TransactionIdIsNormal(xid) && |
1423 | 63 | TransactionIdPrecedes(xid, result)) |
1424 | 0 | result = xid; |
1425 | 989 | } |
1426 | 989 | } |
1427 | | |
1428 | | /* fetch into volatile var while ProcArrayLock is held */ |
1429 | 969 | replication_slot_xmin = procArray->replication_slot_xmin; |
1430 | 969 | replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin; |
1431 | | |
1432 | 969 | if (RecoveryInProgress()) |
1433 | 0 | { |
1434 | | /* |
1435 | | * Check to see whether KnownAssignedXids contains an xid value older |
1436 | | * than the main procarray. |
1437 | | */ |
1438 | 0 | TransactionId kaxmin = KnownAssignedXidsGetOldestXmin(); |
1439 | |
|
1440 | 0 | LWLockRelease(ProcArrayLock); |
1441 | |
|
1442 | 0 | if (TransactionIdIsNormal(kaxmin) && |
1443 | 0 | TransactionIdPrecedes(kaxmin, result)) |
1444 | 0 | result = kaxmin; |
1445 | 0 | } |
1446 | 969 | else |
1447 | 969 | { |
1448 | | /* |
1449 | | * No other information needed, so release the lock immediately. |
1450 | | */ |
1451 | 969 | LWLockRelease(ProcArrayLock); |
1452 | | |
1453 | | /* |
1454 | | * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age, |
1455 | | * being careful not to generate a "permanent" XID. |
1456 | | * |
1457 | | * vacuum_defer_cleanup_age provides some additional "slop" for the |
1458 | | * benefit of hot standby queries on standby servers. This is quick |
1459 | | * and dirty, and perhaps not all that useful unless the master has a |
1460 | | * predictable transaction rate, but it offers some protection when |
1461 | | * there's no walsender connection. Note that we are assuming |
1462 | | * vacuum_defer_cleanup_age isn't large enough to cause wraparound --- |
1463 | | * so guc.c should limit it to no more than the xidStopLimit threshold |
1464 | | * in varsup.c. Also note that we intentionally don't apply |
1465 | | * vacuum_defer_cleanup_age on standby servers. |
1466 | | */ |
1467 | 969 | result -= vacuum_defer_cleanup_age; |
1468 | 969 | if (!TransactionIdIsNormal(result)) |
1469 | 0 | result = FirstNormalTransactionId; |
1470 | 969 | } |
1471 | | |
1472 | | /* |
1473 | | * Check whether there are replication slots requiring an older xmin. |
1474 | | */ |
1475 | 969 | if (!(flags & PROCARRAY_SLOTS_XMIN) && |
1476 | 969 | TransactionIdIsValid(replication_slot_xmin) && |
1477 | 0 | NormalTransactionIdPrecedes(replication_slot_xmin, result)) |
1478 | 0 | result = replication_slot_xmin; |
1479 | | |
1480 | | /* |
1481 | | * After locks have been released and defer_cleanup_age has been applied, |
1482 | | * check whether we need to back up further to make logical decoding |
1483 | | * possible. We need to do so if we're computing the global limit (rel = |
1484 | | * NULL) or if the passed relation is a catalog relation of some kind. |
1485 | | */ |
1486 | 969 | if (!(flags & PROCARRAY_SLOTS_XMIN) && |
1487 | 969 | (rel == NULL || |
1488 | 0 | RelationIsAccessibleInLogicalDecoding(rel)) && |
1489 | 969 | TransactionIdIsValid(replication_slot_catalog_xmin) && |
1490 | 0 | NormalTransactionIdPrecedes(replication_slot_catalog_xmin, result)) |
1491 | 0 | result = replication_slot_catalog_xmin; |
1492 | | |
1493 | 969 | return result; |
1494 | 969 | } |
1495 | | |
1496 | | /* |
1497 | | * GetMaxSnapshotXidCount -- get max size for snapshot XID array |
1498 | | * |
1499 | | * We have to export this for use by snapmgr.c. |
1500 | | */ |
1501 | | int |
1502 | | GetMaxSnapshotXidCount(void) |
1503 | 3.29k | { |
1504 | 3.29k | return procArray->maxProcs; |
1505 | 3.29k | } |
1506 | | |
1507 | | /* |
1508 | | * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array |
1509 | | * |
1510 | | * We have to export this for use by snapmgr.c. |
1511 | | */ |
1512 | | int |
1513 | | GetMaxSnapshotSubxidCount(void) |
1514 | 3.29k | { |
1515 | 3.29k | return TOTAL_MAX_CACHED_SUBXIDS; |
1516 | 3.29k | } |
1517 | | |
1518 | | /* |
1519 | | * GetSnapshotData -- returns information about running transactions. |
1520 | | * |
1521 | | * The returned snapshot includes xmin (lowest still-running xact ID), |
1522 | | * xmax (highest completed xact ID + 1), and a list of running xact IDs |
1523 | | * in the range xmin <= xid < xmax. It is used as follows: |
1524 | | * All xact IDs < xmin are considered finished. |
1525 | | * All xact IDs >= xmax are considered still running. |
1526 | | * For an xact ID xmin <= xid < xmax, consult list to see whether |
1527 | | * it is considered running or not. |
1528 | | * This ensures that the set of transactions seen as "running" by the |
1529 | | * current xact will not change after it takes the snapshot. |
1530 | | * |
1531 | | * All running top-level XIDs are included in the snapshot, except for lazy |
1532 | | * VACUUM processes. We also try to include running subtransaction XIDs, |
1533 | | * but since PGPROC has only a limited cache area for subxact XIDs, full |
1534 | | * information may not be available. If we find any overflowed subxid arrays, |
1535 | | * we have to mark the snapshot's subxid data as overflowed, and extra work |
1536 | | * *may* need to be done to determine what's running (see XidInMVCCSnapshot() |
1537 | | * in tqual.c). |
1538 | | * |
1539 | | * We also update the following backend-global variables: |
1540 | | * TransactionXmin: the oldest xmin of any snapshot in use in the |
1541 | | * current transaction (this is the same as MyPgXact->xmin). |
1542 | | * RecentXmin: the xmin computed for the most recent snapshot. XIDs |
1543 | | * older than this are known not running any more. |
1544 | | * RecentGlobalXmin: the global xmin (oldest TransactionXmin across all |
1545 | | * running transactions, except those running LAZY VACUUM). This is |
1546 | | * the same computation done by |
1547 | | * GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM). |
1548 | | * RecentGlobalDataXmin: the global xmin for non-catalog tables |
1549 | | * >= RecentGlobalXmin |
1550 | | * |
1551 | | * Note: this function should probably not be called with an argument that's |
1552 | | * not statically allocated (see xip allocation below). |
1553 | | */ |
1554 | | Snapshot |
1555 | | GetSnapshotData(Snapshot snapshot) |
1556 | 244k | { |
1557 | 244k | ProcArrayStruct *arrayP = procArray; |
1558 | 244k | TransactionId xmin; |
1559 | 244k | TransactionId xmax; |
1560 | 244k | TransactionId globalxmin; |
1561 | 244k | int index; |
1562 | 244k | int count = 0; |
1563 | 244k | int subcount = 0; |
1564 | 244k | bool suboverflowed = false; |
1565 | 244k | volatile TransactionId replication_slot_xmin = InvalidTransactionId; |
1566 | 244k | volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId; |
1567 | | |
1568 | 244k | Assert(snapshot != NULL); |
1569 | | |
1570 | | /* |
1571 | | * Allocating space for maxProcs xids is usually overkill; numProcs would |
1572 | | * be sufficient. But it seems better to do the malloc while not holding |
1573 | | * the lock, so we can't look at numProcs. Likewise, we allocate much |
1574 | | * more subxip storage than is probably needed. |
1575 | | * |
1576 | | * This does open a possibility for avoiding repeated malloc/free: since |
1577 | | * maxProcs does not change at runtime, we can simply reuse the previous |
1578 | | * xip arrays if any. (This relies on the fact that all callers pass |
1579 | | * static SnapshotData structs.) |
1580 | | */ |
1581 | 244k | if (snapshot->xip == NULL) |
1582 | 3.30k | { |
1583 | | /* |
1584 | | * First call for this snapshot. Snapshot is same size whether or not |
1585 | | * we are in recovery, see later comments. |
1586 | | */ |
1587 | 3.30k | snapshot->xip = (TransactionId *) |
1588 | 3.30k | malloc(GetMaxSnapshotXidCount() * sizeof(TransactionId)); |
1589 | 3.30k | if (snapshot->xip == NULL) |
1590 | 3.30k | ereport(ERROR, |
1591 | 3.30k | (errcode(ERRCODE_OUT_OF_MEMORY), |
1592 | 3.30k | errmsg("out of memory"))); |
1593 | 3.30k | Assert(snapshot->subxip == NULL); |
1594 | 3.30k | snapshot->subxip = (TransactionId *) |
1595 | 3.30k | malloc(GetMaxSnapshotSubxidCount() * sizeof(TransactionId)); |
1596 | 3.30k | if (snapshot->subxip == NULL) |
1597 | 3.30k | ereport(ERROR, |
1598 | 3.30k | (errcode(ERRCODE_OUT_OF_MEMORY), |
1599 | 3.30k | errmsg("out of memory"))); |
1600 | 3.30k | } |
1601 | | |
1602 | | /* |
1603 | | * It is sufficient to get shared lock on ProcArrayLock, even if we are |
1604 | | * going to set MyPgXact->xmin. |
1605 | | */ |
1606 | 244k | LWLockAcquire(ProcArrayLock, LW_SHARED); |
1607 | | |
1608 | | /* xmax is always latestCompletedXid + 1 */ |
1609 | 244k | xmax = ShmemVariableCache->latestCompletedXid; |
1610 | 244k | Assert(TransactionIdIsNormal(xmax)); |
1611 | 244k | TransactionIdAdvance(xmax); |
1612 | | |
1613 | | /* initialize xmin calculation with xmax */ |
1614 | 244k | globalxmin = xmin = xmax; |
1615 | | |
1616 | 244k | snapshot->takenDuringRecovery = RecoveryInProgress(); |
1617 | | |
1618 | 244k | if (!snapshot->takenDuringRecovery) |
1619 | 244k | { |
1620 | 244k | int *pgprocnos = arrayP->pgprocnos; |
1621 | 244k | int numProcs; |
1622 | | |
1623 | | /* |
1624 | | * Spin over procArray checking xid, xmin, and subxids. The goal is |
1625 | | * to gather all active xids, find the lowest xmin, and try to record |
1626 | | * subxids. |
1627 | | */ |
1628 | 244k | numProcs = arrayP->numProcs; |
1629 | 952k | for (index = 0; index < numProcs; index++) |
1630 | 707k | { |
1631 | 707k | int pgprocno = pgprocnos[index]; |
1632 | 707k | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
1633 | 707k | TransactionId xid; |
1634 | | |
1635 | | /* |
1636 | | * Backend is doing logical decoding which manages xmin |
1637 | | * separately, check below. |
1638 | | */ |
1639 | 707k | if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING) |
1640 | 0 | continue; |
1641 | | |
1642 | | /* Ignore procs running LAZY VACUUM */ |
1643 | 707k | if (pgxact->vacuumFlags & PROC_IN_VACUUM) |
1644 | 0 | continue; |
1645 | | |
1646 | | /* Update globalxmin to be the smallest valid xmin */ |
1647 | 707k | xid = pgxact->xmin; /* fetch just once */ |
1648 | 707k | if (TransactionIdIsNormal(xid) && |
1649 | 196k | NormalTransactionIdPrecedes(xid, globalxmin)) |
1650 | 1 | globalxmin = xid; |
1651 | | |
1652 | | /* Fetch xid just once - see GetNewTransactionId */ |
1653 | 707k | xid = pgxact->xid; |
1654 | | |
1655 | | /* |
1656 | | * If the transaction has no XID assigned, we can skip it; it |
1657 | | * won't have sub-XIDs either. If the XID is >= xmax, we can also |
1658 | | * skip it; such transactions will be treated as running anyway |
1659 | | * (and any sub-XIDs will also be >= xmax). |
1660 | | */ |
1661 | 707k | if (!TransactionIdIsNormal(xid) |
1662 | 91 | || !NormalTransactionIdPrecedes(xid, xmax)) |
1663 | 707k | continue; |
1664 | | |
1665 | | /* |
1666 | | * We don't include our own XIDs (if any) in the snapshot, but we |
1667 | | * must include them in xmin. |
1668 | | */ |
1669 | 18.4E | if (NormalTransactionIdPrecedes(xid, xmin)) |
1670 | 2 | xmin = xid; |
1671 | 18.4E | if (pgxact == MyPgXact) |
1672 | 2 | continue; |
1673 | | |
1674 | | /* Add XID to snapshot. */ |
1675 | 18.4E | snapshot->xip[count++] = xid; |
1676 | | |
1677 | | /* |
1678 | | * Save subtransaction XIDs if possible (if we've already |
1679 | | * overflowed, there's no point). Note that the subxact XIDs must |
1680 | | * be later than their parent, so no need to check them against |
1681 | | * xmin. We could filter against xmax, but it seems better not to |
1682 | | * do that much work while holding the ProcArrayLock. |
1683 | | * |
1684 | | * The other backend can add more subxids concurrently, but cannot |
1685 | | * remove any. Hence it's important to fetch nxids just once. |
1686 | | * Should be safe to use memcpy, though. (We needn't worry about |
1687 | | * missing any xids added concurrently, because they must postdate |
1688 | | * xmax.) |
1689 | | * |
1690 | | * Again, our own XIDs are not included in the snapshot. |
1691 | | */ |
1692 | 18.4E | if (!suboverflowed) |
1693 | 0 | { |
1694 | 0 | if (pgxact->overflowed) |
1695 | 0 | suboverflowed = true; |
1696 | 0 | else |
1697 | 0 | { |
1698 | 0 | int nxids = pgxact->nxids; |
1699 | |
|
1700 | 0 | if (nxids > 0) |
1701 | 0 | { |
1702 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
1703 | |
|
1704 | 0 | memcpy(snapshot->subxip + subcount, |
1705 | 0 | (void *) proc->subxids.xids, |
1706 | 0 | nxids * sizeof(TransactionId)); |
1707 | 0 | subcount += nxids; |
1708 | 0 | } |
1709 | 0 | } |
1710 | 0 | } |
1711 | 18.4E | } |
1712 | 244k | } |
1713 | 3 | else |
1714 | 3 | { |
1715 | | /* |
1716 | | * We're in hot standby, so get XIDs from KnownAssignedXids. |
1717 | | * |
1718 | | * We store all xids directly into subxip[]. Here's why: |
1719 | | * |
1720 | | * In recovery we don't know which xids are top-level and which are |
1721 | | * subxacts, a design choice that greatly simplifies xid processing. |
1722 | | * |
1723 | | * It seems like we would want to try to put xids into xip[] only, but |
1724 | | * that is fairly small. We would either need to make that bigger or |
1725 | | * to increase the rate at which we WAL-log xid assignment; neither is |
1726 | | * an appealing choice. |
1727 | | * |
1728 | | * We could try to store xids into xip[] first and then into subxip[] |
1729 | | * if there are too many xids. That only works if the snapshot doesn't |
1730 | | * overflow because we do not search subxip[] in that case. A simpler |
1731 | | * way is to just store all xids in the subxact array because this is |
1732 | | * by far the bigger array. We just leave the xip array empty. |
1733 | | * |
1734 | | * Either way we need to change the way XidInMVCCSnapshot() works |
1735 | | * depending upon when the snapshot was taken, or change normal |
1736 | | * snapshot processing so it matches. |
1737 | | * |
1738 | | * Note: It is possible for recovery to end before we finish taking |
1739 | | * the snapshot, and for newly assigned transaction ids to be added to |
1740 | | * the ProcArray. xmax cannot change while we hold ProcArrayLock, so |
1741 | | * those newly added transaction ids would be filtered away, so we |
1742 | | * need not be concerned about them. |
1743 | | */ |
1744 | 3 | subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin, |
1745 | 3 | xmax); |
1746 | | |
1747 | 3 | if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid)) |
1748 | 0 | suboverflowed = true; |
1749 | 3 | } |
1750 | | |
1751 | | |
1752 | | /* fetch into volatile var while ProcArrayLock is held */ |
1753 | 244k | replication_slot_xmin = procArray->replication_slot_xmin; |
1754 | 244k | replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin; |
1755 | | |
1756 | 244k | if (!TransactionIdIsValid(MyPgXact->xmin)) |
1757 | 208k | MyPgXact->xmin = TransactionXmin = xmin; |
1758 | | |
1759 | 244k | LWLockRelease(ProcArrayLock); |
1760 | | |
1761 | | /* |
1762 | | * Update globalxmin to include actual process xids. This is a slightly |
1763 | | * different way of computing it than GetOldestXmin uses, but should give |
1764 | | * the same result. |
1765 | | */ |
1766 | 244k | if (TransactionIdPrecedes(xmin, globalxmin)) |
1767 | 1 | globalxmin = xmin; |
1768 | | |
1769 | | /* Update global variables too */ |
1770 | 244k | RecentGlobalXmin = globalxmin - vacuum_defer_cleanup_age; |
1771 | 244k | if (!TransactionIdIsNormal(RecentGlobalXmin)) |
1772 | 0 | RecentGlobalXmin = FirstNormalTransactionId; |
1773 | | |
1774 | | /* Check whether there's a replication slot requiring an older xmin. */ |
1775 | 244k | if (TransactionIdIsValid(replication_slot_xmin) && |
1776 | 0 | NormalTransactionIdPrecedes(replication_slot_xmin, RecentGlobalXmin)) |
1777 | 0 | RecentGlobalXmin = replication_slot_xmin; |
1778 | | |
1779 | | /* Non-catalog tables can be vacuumed if older than this xid */ |
1780 | 244k | RecentGlobalDataXmin = RecentGlobalXmin; |
1781 | | |
1782 | | /* |
1783 | | * Check whether there's a replication slot requiring an older catalog |
1784 | | * xmin. |
1785 | | */ |
1786 | 244k | if (TransactionIdIsNormal(replication_slot_catalog_xmin) && |
1787 | 0 | NormalTransactionIdPrecedes(replication_slot_catalog_xmin, RecentGlobalXmin)) |
1788 | 0 | RecentGlobalXmin = replication_slot_catalog_xmin; |
1789 | | |
1790 | 244k | RecentXmin = xmin; |
1791 | | |
1792 | 244k | snapshot->xmin = xmin; |
1793 | 244k | snapshot->xmax = xmax; |
1794 | 244k | snapshot->xcnt = count; |
1795 | 244k | snapshot->subxcnt = subcount; |
1796 | 244k | snapshot->suboverflowed = suboverflowed; |
1797 | | |
1798 | 244k | snapshot->curcid = GetCurrentCommandId(false); |
1799 | | |
1800 | | /* |
1801 | | * This is a new snapshot, so set both refcounts are zero, and mark it as |
1802 | | * not copied in persistent memory. |
1803 | | */ |
1804 | 244k | snapshot->active_count = 0; |
1805 | 244k | snapshot->regd_count = 0; |
1806 | 244k | snapshot->copied = false; |
1807 | | |
1808 | 244k | if (old_snapshot_threshold < 0) |
1809 | 244k | { |
1810 | | /* |
1811 | | * If not using "snapshot too old" feature, fill related fields with |
1812 | | * dummy values that don't require any locking. |
1813 | | */ |
1814 | 244k | snapshot->lsn = InvalidXLogRecPtr; |
1815 | 244k | snapshot->whenTaken = 0; |
1816 | 244k | } |
1817 | 8 | else |
1818 | 8 | { |
1819 | | /* |
1820 | | * Capture the current time and WAL stream location in case this |
1821 | | * snapshot becomes old enough to need to fall back on the special |
1822 | | * "old snapshot" logic. |
1823 | | */ |
1824 | 8 | snapshot->lsn = GetXLogInsertRecPtr(); |
1825 | 8 | snapshot->whenTaken = GetSnapshotCurrentTimestamp(); |
1826 | 8 | MaintainOldSnapshotTimeMapping(snapshot->whenTaken, xmin); |
1827 | 8 | } |
1828 | | |
1829 | 244k | return snapshot; |
1830 | 244k | } |
1831 | | |
1832 | | /* |
1833 | | * ProcArrayInstallImportedXmin -- install imported xmin into MyPgXact->xmin |
1834 | | * |
1835 | | * This is called when installing a snapshot imported from another |
1836 | | * transaction. To ensure that OldestXmin doesn't go backwards, we must |
1837 | | * check that the source transaction is still running, and we'd better do |
1838 | | * that atomically with installing the new xmin. |
1839 | | * |
1840 | | * Returns true if successful, false if source xact is no longer running. |
1841 | | */ |
1842 | | bool |
1843 | | ProcArrayInstallImportedXmin(TransactionId xmin, |
1844 | | VirtualTransactionId *sourcevxid) |
1845 | 0 | { |
1846 | 0 | bool result = false; |
1847 | 0 | ProcArrayStruct *arrayP = procArray; |
1848 | 0 | int index; |
1849 | |
|
1850 | 0 | Assert(TransactionIdIsNormal(xmin)); |
1851 | 0 | if (!sourcevxid) |
1852 | 0 | return false; |
1853 | | |
1854 | | /* Get lock so source xact can't end while we're doing this */ |
1855 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
1856 | |
|
1857 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
1858 | 0 | { |
1859 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
1860 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
1861 | 0 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
1862 | 0 | TransactionId xid; |
1863 | | |
1864 | | /* Ignore procs running LAZY VACUUM */ |
1865 | 0 | if (pgxact->vacuumFlags & PROC_IN_VACUUM) |
1866 | 0 | continue; |
1867 | | |
1868 | | /* We are only interested in the specific virtual transaction. */ |
1869 | 0 | if (proc->backendId != sourcevxid->backendId) |
1870 | 0 | continue; |
1871 | 0 | if (proc->lxid != sourcevxid->localTransactionId) |
1872 | 0 | continue; |
1873 | | |
1874 | | /* |
1875 | | * We check the transaction's database ID for paranoia's sake: if it's |
1876 | | * in another DB then its xmin does not cover us. Caller should have |
1877 | | * detected this already, so we just treat any funny cases as |
1878 | | * "transaction not found". |
1879 | | */ |
1880 | 0 | if (proc->databaseId != MyDatabaseId) |
1881 | 0 | continue; |
1882 | | |
1883 | | /* |
1884 | | * Likewise, let's just make real sure its xmin does cover us. |
1885 | | */ |
1886 | 0 | xid = pgxact->xmin; /* fetch just once */ |
1887 | 0 | if (!TransactionIdIsNormal(xid) || |
1888 | 0 | !TransactionIdPrecedesOrEquals(xid, xmin)) |
1889 | 0 | continue; |
1890 | | |
1891 | | /* |
1892 | | * We're good. Install the new xmin. As in GetSnapshotData, set |
1893 | | * TransactionXmin too. (Note that because snapmgr.c called |
1894 | | * GetSnapshotData first, we'll be overwriting a valid xmin here, so |
1895 | | * we don't check that.) |
1896 | | */ |
1897 | 0 | MyPgXact->xmin = TransactionXmin = xmin; |
1898 | |
|
1899 | 0 | result = true; |
1900 | 0 | break; |
1901 | 0 | } |
1902 | |
|
1903 | 0 | LWLockRelease(ProcArrayLock); |
1904 | |
|
1905 | 0 | return result; |
1906 | 0 | } |
1907 | | |
1908 | | /* |
1909 | | * ProcArrayInstallRestoredXmin -- install restored xmin into MyPgXact->xmin |
1910 | | * |
1911 | | * This is like ProcArrayInstallImportedXmin, but we have a pointer to the |
1912 | | * PGPROC of the transaction from which we imported the snapshot, rather than |
1913 | | * an XID. |
1914 | | * |
1915 | | * Returns true if successful, false if source xact is no longer running. |
1916 | | */ |
1917 | | bool |
1918 | | ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc) |
1919 | 0 | { |
1920 | 0 | bool result = false; |
1921 | 0 | TransactionId xid; |
1922 | 0 | volatile PGXACT *pgxact; |
1923 | |
|
1924 | 0 | Assert(TransactionIdIsNormal(xmin)); |
1925 | 0 | Assert(proc != NULL); |
1926 | | |
1927 | | /* Get lock so source xact can't end while we're doing this */ |
1928 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
1929 | |
|
1930 | 0 | pgxact = &allPgXact[proc->pgprocno]; |
1931 | | |
1932 | | /* |
1933 | | * Be certain that the referenced PGPROC has an advertised xmin which is |
1934 | | * no later than the one we're installing, so that the system-wide xmin |
1935 | | * can't go backwards. Also, make sure it's running in the same database, |
1936 | | * so that the per-database xmin cannot go backwards. |
1937 | | */ |
1938 | 0 | xid = pgxact->xmin; /* fetch just once */ |
1939 | 0 | if (proc->databaseId == MyDatabaseId && |
1940 | 0 | TransactionIdIsNormal(xid) && |
1941 | 0 | TransactionIdPrecedesOrEquals(xid, xmin)) |
1942 | 0 | { |
1943 | 0 | MyPgXact->xmin = TransactionXmin = xmin; |
1944 | 0 | result = true; |
1945 | 0 | } |
1946 | |
|
1947 | 0 | LWLockRelease(ProcArrayLock); |
1948 | |
|
1949 | 0 | return result; |
1950 | 0 | } |
1951 | | |
1952 | | /* |
1953 | | * GetRunningTransactionData -- returns information about running transactions. |
1954 | | * |
1955 | | * Similar to GetSnapshotData but returns more information. We include |
1956 | | * all PGXACTs with an assigned TransactionId, even VACUUM processes and |
1957 | | * prepared transactions. |
1958 | | * |
1959 | | * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for |
1960 | | * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc |
1961 | | * array until the caller has WAL-logged this snapshot, and releases the |
1962 | | * lock. Acquiring ProcArrayLock ensures that no transactions commit until the |
1963 | | * lock is released. |
1964 | | * |
1965 | | * The returned data structure is statically allocated; caller should not |
1966 | | * modify it, and must not assume it is valid past the next call. |
1967 | | * |
1968 | | * This is never executed during recovery so there is no need to look at |
1969 | | * KnownAssignedXids. |
1970 | | * |
1971 | | * Dummy PGXACTs from prepared transaction are included, meaning that this |
1972 | | * may return entries with duplicated TransactionId values coming from |
1973 | | * transaction finishing to prepare. Nothing is done about duplicated |
1974 | | * entries here to not hold on ProcArrayLock more than necessary. |
1975 | | * |
1976 | | * We don't worry about updating other counters, we want to keep this as |
1977 | | * simple as possible and leave GetSnapshotData() as the primary code for |
1978 | | * that bookkeeping. |
1979 | | * |
1980 | | * Note that if any transaction has overflowed its cached subtransactions |
1981 | | * then there is no real need include any subtransactions. |
1982 | | */ |
1983 | | RunningTransactions |
1984 | | GetRunningTransactionData(void) |
1985 | 65 | { |
1986 | | /* result workspace */ |
1987 | 65 | static RunningTransactionsData CurrentRunningXactsData; |
1988 | | |
1989 | 65 | ProcArrayStruct *arrayP = procArray; |
1990 | 65 | RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData; |
1991 | 65 | TransactionId latestCompletedXid; |
1992 | 65 | TransactionId oldestRunningXid; |
1993 | 65 | TransactionId *xids; |
1994 | 65 | int index; |
1995 | 65 | int count; |
1996 | 65 | int subcount; |
1997 | 65 | bool suboverflowed; |
1998 | | |
1999 | 65 | Assert(!RecoveryInProgress()); |
2000 | | |
2001 | | /* |
2002 | | * Allocating space for maxProcs xids is usually overkill; numProcs would |
2003 | | * be sufficient. But it seems better to do the malloc while not holding |
2004 | | * the lock, so we can't look at numProcs. Likewise, we allocate much |
2005 | | * more subxip storage than is probably needed. |
2006 | | * |
2007 | | * Should only be allocated in bgwriter, since only ever executed during |
2008 | | * checkpoints. |
2009 | | */ |
2010 | 65 | if (CurrentRunningXacts->xids == NULL) |
2011 | 22 | { |
2012 | | /* |
2013 | | * First call |
2014 | | */ |
2015 | 22 | CurrentRunningXacts->xids = (TransactionId *) |
2016 | 22 | malloc(TOTAL_MAX_CACHED_SUBXIDS * sizeof(TransactionId)); |
2017 | 22 | if (CurrentRunningXacts->xids == NULL) |
2018 | 22 | ereport(ERROR, |
2019 | 22 | (errcode(ERRCODE_OUT_OF_MEMORY), |
2020 | 22 | errmsg("out of memory"))); |
2021 | 22 | } |
2022 | | |
2023 | 65 | xids = CurrentRunningXacts->xids; |
2024 | | |
2025 | 65 | count = subcount = 0; |
2026 | 65 | suboverflowed = false; |
2027 | | |
2028 | | /* |
2029 | | * Ensure that no xids enter or leave the procarray while we obtain |
2030 | | * snapshot. |
2031 | | */ |
2032 | 65 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2033 | 65 | LWLockAcquire(XidGenLock, LW_SHARED); |
2034 | | |
2035 | 65 | latestCompletedXid = ShmemVariableCache->latestCompletedXid; |
2036 | | |
2037 | 65 | oldestRunningXid = ShmemVariableCache->nextXid; |
2038 | | |
2039 | | /* |
2040 | | * Spin over procArray collecting all xids |
2041 | | */ |
2042 | 150 | for (index = 0; index < arrayP->numProcs; index++) |
2043 | 85 | { |
2044 | 85 | int pgprocno = arrayP->pgprocnos[index]; |
2045 | 85 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2046 | 85 | TransactionId xid; |
2047 | | |
2048 | | /* Fetch xid just once - see GetNewTransactionId */ |
2049 | 85 | xid = pgxact->xid; |
2050 | | |
2051 | | /* |
2052 | | * We don't need to store transactions that don't have a TransactionId |
2053 | | * yet because they will not show as running on a standby server. |
2054 | | */ |
2055 | 85 | if (!TransactionIdIsValid(xid)) |
2056 | 85 | continue; |
2057 | | |
2058 | | /* |
2059 | | * Be careful not to exclude any xids before calculating the values of |
2060 | | * oldestRunningXid and suboverflowed, since these are used to clean |
2061 | | * up transaction information held on standbys. |
2062 | | */ |
2063 | 0 | if (TransactionIdPrecedes(xid, oldestRunningXid)) |
2064 | 0 | oldestRunningXid = xid; |
2065 | |
|
2066 | 0 | if (pgxact->overflowed) |
2067 | 0 | suboverflowed = true; |
2068 | | |
2069 | | /* |
2070 | | * If we wished to exclude xids this would be the right place for it. |
2071 | | * Procs with the PROC_IN_VACUUM flag set don't usually assign xids, |
2072 | | * but they do during truncation at the end when they get the lock and |
2073 | | * truncate, so it is not much of a problem to include them if they |
2074 | | * are seen and it is cleaner to include them. |
2075 | | */ |
2076 | |
|
2077 | 0 | xids[count++] = xid; |
2078 | 0 | } |
2079 | | |
2080 | | /* |
2081 | | * Spin over procArray collecting all subxids, but only if there hasn't |
2082 | | * been a suboverflow. |
2083 | | */ |
2084 | 65 | if (!suboverflowed) |
2085 | 65 | { |
2086 | 150 | for (index = 0; index < arrayP->numProcs; index++) |
2087 | 85 | { |
2088 | 85 | int pgprocno = arrayP->pgprocnos[index]; |
2089 | 85 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2090 | 85 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2091 | 85 | int nxids; |
2092 | | |
2093 | | /* |
2094 | | * Save subtransaction XIDs. Other backends can't add or remove |
2095 | | * entries while we're holding XidGenLock. |
2096 | | */ |
2097 | 85 | nxids = pgxact->nxids; |
2098 | 85 | if (nxids > 0) |
2099 | 0 | { |
2100 | 0 | memcpy(&xids[count], (void *) proc->subxids.xids, |
2101 | 0 | nxids * sizeof(TransactionId)); |
2102 | 0 | count += nxids; |
2103 | 0 | subcount += nxids; |
2104 | | |
2105 | | /* |
2106 | | * Top-level XID of a transaction is always less than any of |
2107 | | * its subxids, so we don't need to check if any of the |
2108 | | * subxids are smaller than oldestRunningXid |
2109 | | */ |
2110 | 0 | } |
2111 | 85 | } |
2112 | 65 | } |
2113 | | |
2114 | | /* |
2115 | | * It's important *not* to include the limits set by slots here because |
2116 | | * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those |
2117 | | * were to be included here the initial value could never increase because |
2118 | | * of a circular dependency where slots only increase their limits when |
2119 | | * running xacts increases oldestRunningXid and running xacts only |
2120 | | * increases if slots do. |
2121 | | */ |
2122 | | |
2123 | 65 | CurrentRunningXacts->xcnt = count - subcount; |
2124 | 65 | CurrentRunningXacts->subxcnt = subcount; |
2125 | 65 | CurrentRunningXacts->subxid_overflow = suboverflowed; |
2126 | 65 | CurrentRunningXacts->nextXid = ShmemVariableCache->nextXid; |
2127 | 65 | CurrentRunningXacts->oldestRunningXid = oldestRunningXid; |
2128 | 65 | CurrentRunningXacts->latestCompletedXid = latestCompletedXid; |
2129 | | |
2130 | 65 | Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid)); |
2131 | 65 | Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid)); |
2132 | 65 | Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid)); |
2133 | | |
2134 | | /* We don't release the locks here, the caller is responsible for that */ |
2135 | | |
2136 | 65 | return CurrentRunningXacts; |
2137 | 65 | } |
2138 | | |
2139 | | /* |
2140 | | * GetOldestActiveTransactionId() |
2141 | | * |
2142 | | * Similar to GetSnapshotData but returns just oldestActiveXid. We include |
2143 | | * all PGXACTs with an assigned TransactionId, even VACUUM processes. |
2144 | | * We look at all databases, though there is no need to include WALSender |
2145 | | * since this has no effect on hot standby conflicts. |
2146 | | * |
2147 | | * This is never executed during recovery so there is no need to look at |
2148 | | * KnownAssignedXids. |
2149 | | * |
2150 | | * We don't worry about updating other counters, we want to keep this as |
2151 | | * simple as possible and leave GetSnapshotData() as the primary code for |
2152 | | * that bookkeeping. |
2153 | | */ |
2154 | | TransactionId |
2155 | | GetOldestActiveTransactionId(void) |
2156 | 83 | { |
2157 | 83 | ProcArrayStruct *arrayP = procArray; |
2158 | 83 | TransactionId oldestRunningXid; |
2159 | 83 | int index; |
2160 | | |
2161 | 83 | Assert(!RecoveryInProgress()); |
2162 | | |
2163 | | /* |
2164 | | * Read nextXid, as the upper bound of what's still active. |
2165 | | * |
2166 | | * Reading a TransactionId is atomic, but we must grab the lock to make |
2167 | | * sure that all XIDs < nextXid are already present in the proc array (or |
2168 | | * have already completed), when we spin over it. |
2169 | | */ |
2170 | 83 | LWLockAcquire(XidGenLock, LW_SHARED); |
2171 | 83 | oldestRunningXid = ShmemVariableCache->nextXid; |
2172 | 83 | LWLockRelease(XidGenLock); |
2173 | | |
2174 | | /* |
2175 | | * Spin over procArray collecting all xids and subxids. |
2176 | | */ |
2177 | 83 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2178 | 168 | for (index = 0; index < arrayP->numProcs; index++) |
2179 | 85 | { |
2180 | 85 | int pgprocno = arrayP->pgprocnos[index]; |
2181 | 85 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2182 | 85 | TransactionId xid; |
2183 | | |
2184 | | /* Fetch xid just once - see GetNewTransactionId */ |
2185 | 85 | xid = pgxact->xid; |
2186 | | |
2187 | 85 | if (!TransactionIdIsNormal(xid)) |
2188 | 85 | continue; |
2189 | | |
2190 | 0 | if (TransactionIdPrecedes(xid, oldestRunningXid)) |
2191 | 0 | oldestRunningXid = xid; |
2192 | | |
2193 | | /* |
2194 | | * Top-level XID of a transaction is always less than any of its |
2195 | | * subxids, so we don't need to check if any of the subxids are |
2196 | | * smaller than oldestRunningXid |
2197 | | */ |
2198 | 0 | } |
2199 | 83 | LWLockRelease(ProcArrayLock); |
2200 | | |
2201 | 83 | return oldestRunningXid; |
2202 | 83 | } |
2203 | | |
2204 | | /* |
2205 | | * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum |
2206 | | * |
2207 | | * Returns the oldest xid that we can guarantee not to have been affected by |
2208 | | * vacuum, i.e. no rows >= that xid have been vacuumed away unless the |
2209 | | * transaction aborted. Note that the value can (and most of the time will) be |
2210 | | * much more conservative than what really has been affected by vacuum, but we |
2211 | | * currently don't have better data available. |
2212 | | * |
2213 | | * This is useful to initialize the cutoff xid after which a new changeset |
2214 | | * extraction replication slot can start decoding changes. |
2215 | | * |
2216 | | * Must be called with ProcArrayLock held either shared or exclusively, |
2217 | | * although most callers will want to use exclusive mode since it is expected |
2218 | | * that the caller will immediately use the xid to peg the xmin horizon. |
2219 | | */ |
2220 | | TransactionId |
2221 | | GetOldestSafeDecodingTransactionId(bool catalogOnly) |
2222 | 0 | { |
2223 | 0 | ProcArrayStruct *arrayP = procArray; |
2224 | 0 | TransactionId oldestSafeXid; |
2225 | 0 | int index; |
2226 | 0 | bool recovery_in_progress = RecoveryInProgress(); |
2227 | |
|
2228 | 0 | Assert(LWLockHeldByMe(ProcArrayLock)); |
2229 | | |
2230 | | /* |
2231 | | * Acquire XidGenLock, so no transactions can acquire an xid while we're |
2232 | | * running. If no transaction with xid were running concurrently a new xid |
2233 | | * could influence the RecentXmin et al. |
2234 | | * |
2235 | | * We initialize the computation to nextXid since that's guaranteed to be |
2236 | | * a safe, albeit pessimal, value. |
2237 | | */ |
2238 | 0 | LWLockAcquire(XidGenLock, LW_SHARED); |
2239 | 0 | oldestSafeXid = ShmemVariableCache->nextXid; |
2240 | | |
2241 | | /* |
2242 | | * If there's already a slot pegging the xmin horizon, we can start with |
2243 | | * that value, it's guaranteed to be safe since it's computed by this |
2244 | | * routine initially and has been enforced since. We can always use the |
2245 | | * slot's general xmin horizon, but the catalog horizon is only usable |
2246 | | * when only catalog data is going to be looked at. |
2247 | | */ |
2248 | 0 | if (TransactionIdIsValid(procArray->replication_slot_xmin) && |
2249 | 0 | TransactionIdPrecedes(procArray->replication_slot_xmin, |
2250 | 0 | oldestSafeXid)) |
2251 | 0 | oldestSafeXid = procArray->replication_slot_xmin; |
2252 | |
|
2253 | 0 | if (catalogOnly && |
2254 | 0 | TransactionIdIsValid(procArray->replication_slot_catalog_xmin) && |
2255 | 0 | TransactionIdPrecedes(procArray->replication_slot_catalog_xmin, |
2256 | 0 | oldestSafeXid)) |
2257 | 0 | oldestSafeXid = procArray->replication_slot_catalog_xmin; |
2258 | | |
2259 | | /* |
2260 | | * If we're not in recovery, we walk over the procarray and collect the |
2261 | | * lowest xid. Since we're called with ProcArrayLock held and have |
2262 | | * acquired XidGenLock, no entries can vanish concurrently, since |
2263 | | * PGXACT->xid is only set with XidGenLock held and only cleared with |
2264 | | * ProcArrayLock held. |
2265 | | * |
2266 | | * In recovery we can't lower the safe value besides what we've computed |
2267 | | * above, so we'll have to wait a bit longer there. We unfortunately can |
2268 | | * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids |
2269 | | * machinery can miss values and return an older value than is safe. |
2270 | | */ |
2271 | 0 | if (!recovery_in_progress) |
2272 | 0 | { |
2273 | | /* |
2274 | | * Spin over procArray collecting all min(PGXACT->xid) |
2275 | | */ |
2276 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2277 | 0 | { |
2278 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2279 | 0 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2280 | 0 | TransactionId xid; |
2281 | | |
2282 | | /* Fetch xid just once - see GetNewTransactionId */ |
2283 | 0 | xid = pgxact->xid; |
2284 | |
|
2285 | 0 | if (!TransactionIdIsNormal(xid)) |
2286 | 0 | continue; |
2287 | | |
2288 | 0 | if (TransactionIdPrecedes(xid, oldestSafeXid)) |
2289 | 0 | oldestSafeXid = xid; |
2290 | 0 | } |
2291 | 0 | } |
2292 | |
|
2293 | 0 | LWLockRelease(XidGenLock); |
2294 | |
|
2295 | 0 | return oldestSafeXid; |
2296 | 0 | } |
2297 | | |
2298 | | /* |
2299 | | * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are |
2300 | | * delaying checkpoint because they have critical actions in progress. |
2301 | | * |
2302 | | * Constructs an array of VXIDs of transactions that are currently in commit |
2303 | | * critical sections, as shown by having delayChkpt set in their PGXACT. |
2304 | | * |
2305 | | * Returns a palloc'd array that should be freed by the caller. |
2306 | | * *nvxids is the number of valid entries. |
2307 | | * |
2308 | | * Note that because backends set or clear delayChkpt without holding any lock, |
2309 | | * the result is somewhat indeterminate, but we don't really care. Even in |
2310 | | * a multiprocessor with delayed writes to shared memory, it should be certain |
2311 | | * that setting of delayChkpt will propagate to shared memory when the backend |
2312 | | * takes a lock, so we cannot fail to see a virtual xact as delayChkpt if |
2313 | | * it's already inserted its commit record. Whether it takes a little while |
2314 | | * for clearing of delayChkpt to propagate is unimportant for correctness. |
2315 | | */ |
2316 | | VirtualTransactionId * |
2317 | | GetVirtualXIDsDelayingChkpt(int *nvxids) |
2318 | 987 | { |
2319 | 987 | VirtualTransactionId *vxids; |
2320 | 987 | ProcArrayStruct *arrayP = procArray; |
2321 | 987 | int count = 0; |
2322 | 987 | int index; |
2323 | | |
2324 | | /* allocate what's certainly enough result space */ |
2325 | 987 | vxids = (VirtualTransactionId *) |
2326 | 987 | palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs); |
2327 | | |
2328 | 987 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2329 | | |
2330 | 1.97k | for (index = 0; index < arrayP->numProcs; index++) |
2331 | 989 | { |
2332 | 989 | int pgprocno = arrayP->pgprocnos[index]; |
2333 | 989 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2334 | 989 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2335 | | |
2336 | 989 | if (pgxact->delayChkpt) |
2337 | 0 | { |
2338 | 0 | VirtualTransactionId vxid; |
2339 | |
|
2340 | 0 | GET_VXID_FROM_PGPROC(vxid, *proc); |
2341 | 0 | if (VirtualTransactionIdIsValid(vxid)) |
2342 | 0 | vxids[count++] = vxid; |
2343 | 0 | } |
2344 | 989 | } |
2345 | | |
2346 | 987 | LWLockRelease(ProcArrayLock); |
2347 | | |
2348 | 987 | *nvxids = count; |
2349 | 987 | return vxids; |
2350 | 987 | } |
2351 | | |
2352 | | /* |
2353 | | * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying? |
2354 | | * |
2355 | | * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any |
2356 | | * of the specified VXIDs are still in critical sections of code. |
2357 | | * |
2358 | | * Note: this is O(N^2) in the number of vxacts that are/were delaying, but |
2359 | | * those numbers should be small enough for it not to be a problem. |
2360 | | */ |
2361 | | bool |
2362 | | HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids) |
2363 | 0 | { |
2364 | 0 | bool result = false; |
2365 | 0 | ProcArrayStruct *arrayP = procArray; |
2366 | 0 | int index; |
2367 | |
|
2368 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2369 | |
|
2370 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2371 | 0 | { |
2372 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2373 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2374 | 0 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2375 | 0 | VirtualTransactionId vxid; |
2376 | |
|
2377 | 0 | GET_VXID_FROM_PGPROC(vxid, *proc); |
2378 | |
|
2379 | 0 | if (pgxact->delayChkpt && VirtualTransactionIdIsValid(vxid)) |
2380 | 0 | { |
2381 | 0 | int i; |
2382 | |
|
2383 | 0 | for (i = 0; i < nvxids; i++) |
2384 | 0 | { |
2385 | 0 | if (VirtualTransactionIdEquals(vxid, vxids[i])) |
2386 | 0 | { |
2387 | 0 | result = true; |
2388 | 0 | break; |
2389 | 0 | } |
2390 | 0 | } |
2391 | 0 | if (result) |
2392 | 0 | break; |
2393 | 0 | } |
2394 | 0 | } |
2395 | |
|
2396 | 0 | LWLockRelease(ProcArrayLock); |
2397 | |
|
2398 | 0 | return result; |
2399 | 0 | } |
2400 | | |
2401 | | /* |
2402 | | * BackendPidGetProc -- get a backend's PGPROC given its PID |
2403 | | * |
2404 | | * Returns NULL if not found. Note that it is up to the caller to be |
2405 | | * sure that the question remains meaningful for long enough for the |
2406 | | * answer to be used ... |
2407 | | */ |
2408 | | PGPROC * |
2409 | | BackendPidGetProc(int pid) |
2410 | 11 | { |
2411 | 11 | PGPROC *result; |
2412 | | |
2413 | 11 | if (pid == 0) /* never match dummy PGPROCs */ |
2414 | 0 | return NULL; |
2415 | | |
2416 | 11 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2417 | | |
2418 | 11 | result = BackendPidGetProcWithLock(pid); |
2419 | | |
2420 | 11 | LWLockRelease(ProcArrayLock); |
2421 | | |
2422 | 11 | return result; |
2423 | 11 | } |
2424 | | |
2425 | | /* |
2426 | | * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID |
2427 | | * |
2428 | | * Same as above, except caller must be holding ProcArrayLock. The found |
2429 | | * entry, if any, can be assumed to be valid as long as the lock remains held. |
2430 | | */ |
2431 | | PGPROC * |
2432 | | BackendPidGetProcWithLock(int pid) |
2433 | 6.72k | { |
2434 | 6.72k | PGPROC *result = NULL; |
2435 | 6.72k | ProcArrayStruct *arrayP = procArray; |
2436 | 6.72k | int index; |
2437 | | |
2438 | 6.72k | if (pid == 0) /* never match dummy PGPROCs */ |
2439 | 0 | return NULL; |
2440 | | |
2441 | 11.7k | for (index = 0; index < arrayP->numProcs; index++) |
2442 | 11.7k | { |
2443 | 11.7k | PGPROC *proc = &allProcs[arrayP->pgprocnos[index]]; |
2444 | | |
2445 | 11.7k | if (proc->pid == pid) |
2446 | 6.72k | { |
2447 | 6.72k | result = proc; |
2448 | 6.72k | break; |
2449 | 6.72k | } |
2450 | 11.7k | } |
2451 | | |
2452 | 6.72k | return result; |
2453 | 6.72k | } |
2454 | | |
2455 | | /* |
2456 | | * BackendXidGetPid -- get a backend's pid given its XID |
2457 | | * |
2458 | | * Returns 0 if not found or it's a prepared transaction. Note that |
2459 | | * it is up to the caller to be sure that the question remains |
2460 | | * meaningful for long enough for the answer to be used ... |
2461 | | * |
2462 | | * Only main transaction Ids are considered. This function is mainly |
2463 | | * useful for determining what backend owns a lock. |
2464 | | * |
2465 | | * Beware that not every xact has an XID assigned. However, as long as you |
2466 | | * only call this using an XID found on disk, you're safe. |
2467 | | */ |
2468 | | int |
2469 | | BackendXidGetPid(TransactionId xid) |
2470 | 0 | { |
2471 | 0 | int result = 0; |
2472 | 0 | ProcArrayStruct *arrayP = procArray; |
2473 | 0 | int index; |
2474 | |
|
2475 | 0 | if (xid == InvalidTransactionId) /* never match invalid xid */ |
2476 | 0 | return 0; |
2477 | | |
2478 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2479 | |
|
2480 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2481 | 0 | { |
2482 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2483 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2484 | 0 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2485 | |
|
2486 | 0 | if (pgxact->xid == xid) |
2487 | 0 | { |
2488 | 0 | result = proc->pid; |
2489 | 0 | break; |
2490 | 0 | } |
2491 | 0 | } |
2492 | |
|
2493 | 0 | LWLockRelease(ProcArrayLock); |
2494 | |
|
2495 | 0 | return result; |
2496 | 0 | } |
2497 | | |
2498 | | /* |
2499 | | * IsBackendPid -- is a given pid a running backend |
2500 | | * |
2501 | | * This is not called by the backend, but is called by external modules. |
2502 | | */ |
2503 | | bool |
2504 | | IsBackendPid(int pid) |
2505 | 0 | { |
2506 | 0 | return (BackendPidGetProc(pid) != NULL); |
2507 | 0 | } |
2508 | | |
2509 | | |
2510 | | /* |
2511 | | * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs. |
2512 | | * |
2513 | | * The array is palloc'd. The number of valid entries is returned into *nvxids. |
2514 | | * |
2515 | | * The arguments allow filtering the set of VXIDs returned. Our own process |
2516 | | * is always skipped. In addition: |
2517 | | * If limitXmin is not InvalidTransactionId, skip processes with |
2518 | | * xmin > limitXmin. |
2519 | | * If excludeXmin0 is true, skip processes with xmin = 0. |
2520 | | * If allDbs is false, skip processes attached to other databases. |
2521 | | * If excludeVacuum isn't zero, skip processes for which |
2522 | | * (vacuumFlags & excludeVacuum) is not zero. |
2523 | | * |
2524 | | * Note: the purpose of the limitXmin and excludeXmin0 parameters is to |
2525 | | * allow skipping backends whose oldest live snapshot is no older than |
2526 | | * some snapshot we have. Since we examine the procarray with only shared |
2527 | | * lock, there are race conditions: a backend could set its xmin just after |
2528 | | * we look. Indeed, on multiprocessors with weak memory ordering, the |
2529 | | * other backend could have set its xmin *before* we look. We know however |
2530 | | * that such a backend must have held shared ProcArrayLock overlapping our |
2531 | | * own hold of ProcArrayLock, else we would see its xmin update. Therefore, |
2532 | | * any snapshot the other backend is taking concurrently with our scan cannot |
2533 | | * consider any transactions as still running that we think are committed |
2534 | | * (since backends must hold ProcArrayLock exclusive to commit). |
2535 | | */ |
2536 | | VirtualTransactionId * |
2537 | | GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, |
2538 | | bool allDbs, int excludeVacuum, |
2539 | | int *nvxids) |
2540 | 0 | { |
2541 | 0 | VirtualTransactionId *vxids; |
2542 | 0 | ProcArrayStruct *arrayP = procArray; |
2543 | 0 | int count = 0; |
2544 | 0 | int index; |
2545 | | |
2546 | | /* allocate what's certainly enough result space */ |
2547 | 0 | vxids = (VirtualTransactionId *) |
2548 | 0 | palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs); |
2549 | |
|
2550 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2551 | |
|
2552 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2553 | 0 | { |
2554 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2555 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2556 | 0 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2557 | |
|
2558 | 0 | if (proc == MyProc) |
2559 | 0 | continue; |
2560 | | |
2561 | 0 | if (excludeVacuum & pgxact->vacuumFlags) |
2562 | 0 | continue; |
2563 | | |
2564 | 0 | if (allDbs || proc->databaseId == MyDatabaseId) |
2565 | 0 | { |
2566 | | /* Fetch xmin just once - might change on us */ |
2567 | 0 | TransactionId pxmin = pgxact->xmin; |
2568 | |
|
2569 | 0 | if (excludeXmin0 && !TransactionIdIsValid(pxmin)) |
2570 | 0 | continue; |
2571 | | |
2572 | | /* |
2573 | | * InvalidTransactionId precedes all other XIDs, so a proc that |
2574 | | * hasn't set xmin yet will not be rejected by this test. |
2575 | | */ |
2576 | 0 | if (!TransactionIdIsValid(limitXmin) || |
2577 | 0 | TransactionIdPrecedesOrEquals(pxmin, limitXmin)) |
2578 | 0 | { |
2579 | 0 | VirtualTransactionId vxid; |
2580 | |
|
2581 | 0 | GET_VXID_FROM_PGPROC(vxid, *proc); |
2582 | 0 | if (VirtualTransactionIdIsValid(vxid)) |
2583 | 0 | vxids[count++] = vxid; |
2584 | 0 | } |
2585 | 0 | } |
2586 | 0 | } |
2587 | |
|
2588 | 0 | LWLockRelease(ProcArrayLock); |
2589 | |
|
2590 | 0 | *nvxids = count; |
2591 | 0 | return vxids; |
2592 | 0 | } |
2593 | | |
2594 | | /* |
2595 | | * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs. |
2596 | | * |
2597 | | * Usage is limited to conflict resolution during recovery on standby servers. |
2598 | | * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId |
2599 | | * in cases where we cannot accurately determine a value for latestRemovedXid. |
2600 | | * |
2601 | | * If limitXmin is InvalidTransactionId then we want to kill everybody, |
2602 | | * so we're not worried if they have a snapshot or not, nor does it really |
2603 | | * matter what type of lock we hold. |
2604 | | * |
2605 | | * All callers that are checking xmins always now supply a valid and useful |
2606 | | * value for limitXmin. The limitXmin is always lower than the lowest |
2607 | | * numbered KnownAssignedXid that is not already a FATAL error. This is |
2608 | | * because we only care about cleanup records that are cleaning up tuple |
2609 | | * versions from committed transactions. In that case they will only occur |
2610 | | * at the point where the record is less than the lowest running xid. That |
2611 | | * allows us to say that if any backend takes a snapshot concurrently with |
2612 | | * us then the conflict assessment made here would never include the snapshot |
2613 | | * that is being derived. So we take LW_SHARED on the ProcArray and allow |
2614 | | * concurrent snapshots when limitXmin is valid. We might think about adding |
2615 | | * Assert(limitXmin < lowest(KnownAssignedXids)) |
2616 | | * but that would not be true in the case of FATAL errors lagging in array, |
2617 | | * but we already know those are bogus anyway, so we skip that test. |
2618 | | * |
2619 | | * If dbOid is valid we skip backends attached to other databases. |
2620 | | * |
2621 | | * Be careful to *not* pfree the result from this function. We reuse |
2622 | | * this array sufficiently often that we use malloc for the result. |
2623 | | */ |
2624 | | VirtualTransactionId * |
2625 | | GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) |
2626 | 0 | { |
2627 | 0 | static VirtualTransactionId *vxids; |
2628 | 0 | ProcArrayStruct *arrayP = procArray; |
2629 | 0 | int count = 0; |
2630 | 0 | int index; |
2631 | | |
2632 | | /* |
2633 | | * If first time through, get workspace to remember main XIDs in. We |
2634 | | * malloc it permanently to avoid repeated palloc/pfree overhead. Allow |
2635 | | * result space, remembering room for a terminator. |
2636 | | */ |
2637 | 0 | if (vxids == NULL) |
2638 | 0 | { |
2639 | 0 | vxids = (VirtualTransactionId *) |
2640 | 0 | malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1)); |
2641 | 0 | if (vxids == NULL) |
2642 | 0 | ereport(ERROR, |
2643 | 0 | (errcode(ERRCODE_OUT_OF_MEMORY), |
2644 | 0 | errmsg("out of memory"))); |
2645 | 0 | } |
2646 | |
|
2647 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2648 | |
|
2649 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2650 | 0 | { |
2651 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2652 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2653 | 0 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2654 | | |
2655 | | /* Exclude prepared transactions */ |
2656 | 0 | if (proc->pid == 0) |
2657 | 0 | continue; |
2658 | | |
2659 | 0 | if (!OidIsValid(dbOid) || |
2660 | 0 | proc->databaseId == dbOid) |
2661 | 0 | { |
2662 | | /* Fetch xmin just once - can't change on us, but good coding */ |
2663 | 0 | TransactionId pxmin = pgxact->xmin; |
2664 | | |
2665 | | /* |
2666 | | * We ignore an invalid pxmin because this means that backend has |
2667 | | * no snapshot currently. We hold a Share lock to avoid contention |
2668 | | * with users taking snapshots. That is not a problem because the |
2669 | | * current xmin is always at least one higher than the latest |
2670 | | * removed xid, so any new snapshot would never conflict with the |
2671 | | * test here. |
2672 | | */ |
2673 | 0 | if (!TransactionIdIsValid(limitXmin) || |
2674 | 0 | (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin))) |
2675 | 0 | { |
2676 | 0 | VirtualTransactionId vxid; |
2677 | |
|
2678 | 0 | GET_VXID_FROM_PGPROC(vxid, *proc); |
2679 | 0 | if (VirtualTransactionIdIsValid(vxid)) |
2680 | 0 | vxids[count++] = vxid; |
2681 | 0 | } |
2682 | 0 | } |
2683 | 0 | } |
2684 | |
|
2685 | 0 | LWLockRelease(ProcArrayLock); |
2686 | | |
2687 | | /* add the terminator */ |
2688 | 0 | vxids[count].backendId = InvalidBackendId; |
2689 | 0 | vxids[count].localTransactionId = InvalidLocalTransactionId; |
2690 | |
|
2691 | 0 | return vxids; |
2692 | 0 | } |
2693 | | |
2694 | | /* |
2695 | | * CancelVirtualTransaction - used in recovery conflict processing |
2696 | | * |
2697 | | * Returns pid of the process signaled, or 0 if not found. |
2698 | | */ |
2699 | | pid_t |
2700 | | CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) |
2701 | 0 | { |
2702 | 0 | ProcArrayStruct *arrayP = procArray; |
2703 | 0 | int index; |
2704 | 0 | pid_t pid = 0; |
2705 | |
|
2706 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2707 | |
|
2708 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2709 | 0 | { |
2710 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2711 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2712 | 0 | VirtualTransactionId procvxid; |
2713 | |
|
2714 | 0 | GET_VXID_FROM_PGPROC(procvxid, *proc); |
2715 | |
|
2716 | 0 | if (procvxid.backendId == vxid.backendId && |
2717 | 0 | procvxid.localTransactionId == vxid.localTransactionId) |
2718 | 0 | { |
2719 | 0 | proc->recoveryConflictPending = true; |
2720 | 0 | pid = proc->pid; |
2721 | 0 | if (pid != 0) |
2722 | 0 | { |
2723 | | /* |
2724 | | * Kill the pid if it's still here. If not, that's what we |
2725 | | * wanted so ignore any errors. |
2726 | | */ |
2727 | 0 | (void) SendProcSignal(pid, sigmode, vxid.backendId); |
2728 | 0 | } |
2729 | 0 | break; |
2730 | 0 | } |
2731 | 0 | } |
2732 | |
|
2733 | 0 | LWLockRelease(ProcArrayLock); |
2734 | |
|
2735 | 0 | return pid; |
2736 | 0 | } |
2737 | | |
2738 | | /* |
2739 | | * MinimumActiveBackends --- count backends (other than myself) that are |
2740 | | * in active transactions. Return true if the count exceeds the |
2741 | | * minimum threshold passed. This is used as a heuristic to decide if |
2742 | | * a pre-XLOG-flush delay is worthwhile during commit. |
2743 | | * |
2744 | | * Do not count backends that are blocked waiting for locks, since they are |
2745 | | * not going to get to run until someone else commits. |
2746 | | */ |
2747 | | bool |
2748 | | MinimumActiveBackends(int min) |
2749 | 0 | { |
2750 | 0 | ProcArrayStruct *arrayP = procArray; |
2751 | 0 | int count = 0; |
2752 | 0 | int index; |
2753 | | |
2754 | | /* Quick short-circuit if no minimum is specified */ |
2755 | 0 | if (min == 0) |
2756 | 0 | return true; |
2757 | | |
2758 | | /* |
2759 | | * Note: for speed, we don't acquire ProcArrayLock. This is a little bit |
2760 | | * bogus, but since we are only testing fields for zero or nonzero, it |
2761 | | * should be OK. The result is only used for heuristic purposes anyway... |
2762 | | */ |
2763 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2764 | 0 | { |
2765 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2766 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2767 | 0 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2768 | | |
2769 | | /* |
2770 | | * Since we're not holding a lock, need to be prepared to deal with |
2771 | | * garbage, as someone could have incremented numProcs but not yet |
2772 | | * filled the structure. |
2773 | | * |
2774 | | * If someone just decremented numProcs, 'proc' could also point to a |
2775 | | * PGPROC entry that's no longer in the array. It still points to a |
2776 | | * PGPROC struct, though, because freed PGPROC entries just go to the |
2777 | | * free list and are recycled. Its contents are nonsense in that case, |
2778 | | * but that's acceptable for this function. |
2779 | | */ |
2780 | 0 | if (pgprocno == -1) |
2781 | 0 | continue; /* do not count deleted entries */ |
2782 | 0 | if (proc == MyProc) |
2783 | 0 | continue; /* do not count myself */ |
2784 | 0 | if (pgxact->xid == InvalidTransactionId) |
2785 | 0 | continue; /* do not count if no XID assigned */ |
2786 | 0 | if (proc->pid == 0) |
2787 | 0 | continue; /* do not count prepared xacts */ |
2788 | 0 | if (proc->waitLock != NULL) |
2789 | 0 | continue; /* do not count if blocked on a lock */ |
2790 | 0 | count++; |
2791 | 0 | if (count >= min) |
2792 | 0 | break; |
2793 | 0 | } |
2794 | |
|
2795 | 0 | return count >= min; |
2796 | 0 | } |
2797 | | |
2798 | | /* |
2799 | | * CountDBBackends --- count backends that are using specified database |
2800 | | */ |
2801 | | int |
2802 | | CountDBBackends(Oid databaseid) |
2803 | 0 | { |
2804 | 0 | ProcArrayStruct *arrayP = procArray; |
2805 | 0 | int count = 0; |
2806 | 0 | int index; |
2807 | |
|
2808 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2809 | |
|
2810 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2811 | 0 | { |
2812 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2813 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2814 | |
|
2815 | 0 | if (proc->pid == 0) |
2816 | 0 | continue; /* do not count prepared xacts */ |
2817 | 0 | if (!OidIsValid(databaseid) || |
2818 | 0 | proc->databaseId == databaseid) |
2819 | 0 | count++; |
2820 | 0 | } |
2821 | |
|
2822 | 0 | LWLockRelease(ProcArrayLock); |
2823 | |
|
2824 | 0 | return count; |
2825 | 0 | } |
2826 | | |
2827 | | /* |
2828 | | * CountDBConnections --- counts database backends ignoring any background |
2829 | | * worker processes |
2830 | | */ |
2831 | | int |
2832 | | CountDBConnections(Oid databaseid) |
2833 | 0 | { |
2834 | 0 | ProcArrayStruct *arrayP = procArray; |
2835 | 0 | int count = 0; |
2836 | 0 | int index; |
2837 | |
|
2838 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2839 | |
|
2840 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2841 | 0 | { |
2842 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2843 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2844 | |
|
2845 | 0 | if (proc->pid == 0) |
2846 | 0 | continue; /* do not count prepared xacts */ |
2847 | 0 | if (proc->isBackgroundWorker) |
2848 | 0 | continue; /* do not count background workers */ |
2849 | 0 | if (!OidIsValid(databaseid) || |
2850 | 0 | proc->databaseId == databaseid) |
2851 | 0 | count++; |
2852 | 0 | } |
2853 | |
|
2854 | 0 | LWLockRelease(ProcArrayLock); |
2855 | |
|
2856 | 0 | return count; |
2857 | 0 | } |
2858 | | |
2859 | | /* |
2860 | | * CancelDBBackends --- cancel backends that are using specified database |
2861 | | */ |
2862 | | void |
2863 | | CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending) |
2864 | 0 | { |
2865 | 0 | ProcArrayStruct *arrayP = procArray; |
2866 | 0 | int index; |
2867 | 0 | pid_t pid = 0; |
2868 | | |
2869 | | /* tell all backends to die */ |
2870 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
2871 | |
|
2872 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2873 | 0 | { |
2874 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2875 | 0 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2876 | |
|
2877 | 0 | if (databaseid == InvalidOid || proc->databaseId == databaseid) |
2878 | 0 | { |
2879 | 0 | VirtualTransactionId procvxid; |
2880 | |
|
2881 | 0 | GET_VXID_FROM_PGPROC(procvxid, *proc); |
2882 | |
|
2883 | 0 | proc->recoveryConflictPending = conflictPending; |
2884 | 0 | pid = proc->pid; |
2885 | 0 | if (pid != 0) |
2886 | 0 | { |
2887 | | /* |
2888 | | * Kill the pid if it's still here. If not, that's what we |
2889 | | * wanted so ignore any errors. |
2890 | | */ |
2891 | 0 | (void) SendProcSignal(pid, sigmode, procvxid.backendId); |
2892 | 0 | } |
2893 | 0 | } |
2894 | 0 | } |
2895 | |
|
2896 | 0 | LWLockRelease(ProcArrayLock); |
2897 | 0 | } |
2898 | | |
2899 | | /* |
2900 | | * CountUserBackends --- count backends that are used by specified user |
2901 | | */ |
2902 | | int |
2903 | | CountUserBackends(Oid roleid) |
2904 | 6 | { |
2905 | 6 | ProcArrayStruct *arrayP = procArray; |
2906 | 6 | int count = 0; |
2907 | 6 | int index; |
2908 | | |
2909 | 6 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2910 | | |
2911 | 22 | for (index = 0; index < arrayP->numProcs; index++) |
2912 | 16 | { |
2913 | 16 | int pgprocno = arrayP->pgprocnos[index]; |
2914 | 16 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2915 | | |
2916 | 16 | if (proc->pid == 0) |
2917 | 0 | continue; /* do not count prepared xacts */ |
2918 | 16 | if (proc->isBackgroundWorker) |
2919 | 0 | continue; /* do not count background workers */ |
2920 | 16 | if (proc->roleId == roleid) |
2921 | 10 | count++; |
2922 | 16 | } |
2923 | | |
2924 | 6 | LWLockRelease(ProcArrayLock); |
2925 | | |
2926 | 6 | return count; |
2927 | 6 | } |
2928 | | |
2929 | | /* |
2930 | | * CountOtherDBBackends -- check for other backends running in the given DB |
2931 | | * |
2932 | | * If there are other backends in the DB, we will wait a maximum of 5 seconds |
2933 | | * for them to exit. Autovacuum backends are encouraged to exit early by |
2934 | | * sending them SIGTERM, but normal user backends are just waited for. |
2935 | | * |
2936 | | * The current backend is always ignored; it is caller's responsibility to |
2937 | | * check whether the current backend uses the given DB, if it's important. |
2938 | | * |
2939 | | * Returns true if there are (still) other backends in the DB, false if not. |
2940 | | * Also, *nbackends and *nprepared are set to the number of other backends |
2941 | | * and prepared transactions in the DB, respectively. |
2942 | | * |
2943 | | * This function is used to interlock DROP DATABASE and related commands |
2944 | | * against there being any active backends in the target DB --- dropping the |
2945 | | * DB while active backends remain would be a Bad Thing. Note that we cannot |
2946 | | * detect here the possibility of a newly-started backend that is trying to |
2947 | | * connect to the doomed database, so additional interlocking is needed during |
2948 | | * backend startup. The caller should normally hold an exclusive lock on the |
2949 | | * target DB before calling this, which is one reason we mustn't wait |
2950 | | * indefinitely. |
2951 | | */ |
2952 | | bool |
2953 | | CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) |
2954 | 43 | { |
2955 | 43 | ProcArrayStruct *arrayP = procArray; |
2956 | | |
2957 | 0 | #define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */ |
2958 | 43 | int autovac_pids[MAXAUTOVACPIDS]; |
2959 | 43 | int tries; |
2960 | | |
2961 | | /* 50 tries with 100ms sleep between tries makes 5 sec total wait */ |
2962 | 43 | for (tries = 0; tries < 50; tries++) |
2963 | 43 | { |
2964 | 43 | int nautovacs = 0; |
2965 | 43 | bool found = false; |
2966 | 43 | int index; |
2967 | | |
2968 | 43 | CHECK_FOR_INTERRUPTS(); |
2969 | | |
2970 | 43 | *nbackends = *nprepared = 0; |
2971 | | |
2972 | 43 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2973 | | |
2974 | 101 | for (index = 0; index < arrayP->numProcs; index++) |
2975 | 58 | { |
2976 | 58 | int pgprocno = arrayP->pgprocnos[index]; |
2977 | 58 | volatile PGPROC *proc = &allProcs[pgprocno]; |
2978 | 58 | volatile PGXACT *pgxact = &allPgXact[pgprocno]; |
2979 | | |
2980 | 58 | if (proc->databaseId != databaseId) |
2981 | 58 | continue; |
2982 | 0 | if (proc == MyProc) |
2983 | 0 | continue; |
2984 | | |
2985 | 0 | found = true; |
2986 | |
|
2987 | 0 | if (proc->pid == 0) |
2988 | 0 | (*nprepared)++; |
2989 | 0 | else |
2990 | 0 | { |
2991 | 0 | (*nbackends)++; |
2992 | 0 | if ((pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) && |
2993 | 0 | nautovacs < MAXAUTOVACPIDS) |
2994 | 0 | autovac_pids[nautovacs++] = proc->pid; |
2995 | 0 | } |
2996 | 0 | } |
2997 | | |
2998 | 43 | LWLockRelease(ProcArrayLock); |
2999 | | |
3000 | 43 | if (!found) |
3001 | 43 | return false; /* no conflicting backends, so done */ |
3002 | | |
3003 | | /* |
3004 | | * Send SIGTERM to any conflicting autovacuums before sleeping. We |
3005 | | * postpone this step until after the loop because we don't want to |
3006 | | * hold ProcArrayLock while issuing kill(). We have no idea what might |
3007 | | * block kill() inside the kernel... |
3008 | | */ |
3009 | 0 | for (index = 0; index < nautovacs; index++) |
3010 | 0 | (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */ |
3011 | | |
3012 | | /* sleep, then try again */ |
3013 | 0 | pg_usleep(100 * 1000L); /* 100ms */ |
3014 | 0 | } |
3015 | | |
3016 | 0 | return true; /* timed out, still conflicts */ |
3017 | 43 | } |
3018 | | |
3019 | | /* |
3020 | | * Terminate existing connections to the specified database. This routine |
3021 | | * is used by the DROP DATABASE command when user has asked to forcefully |
3022 | | * drop the database. |
3023 | | * |
3024 | | * The current backend is always ignored; it is caller's responsibility to |
3025 | | * check whether the current backend uses the given DB, if it's important. |
3026 | | * |
3027 | | * It doesn't allow to terminate the connections even if there is a one |
3028 | | * backend with the prepared transaction in the target database. |
3029 | | */ |
3030 | | void |
3031 | | TerminateOtherDBBackends(Oid databaseId) |
3032 | 0 | { |
3033 | 0 | ProcArrayStruct *arrayP = procArray; |
3034 | 0 | List *pids = NIL; |
3035 | 0 | int nprepared = 0; |
3036 | 0 | int i; |
3037 | |
|
3038 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3039 | |
|
3040 | 0 | for (i = 0; i < procArray->numProcs; i++) |
3041 | 0 | { |
3042 | 0 | int pgprocno = arrayP->pgprocnos[i]; |
3043 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3044 | |
|
3045 | 0 | if (proc->databaseId != databaseId) |
3046 | 0 | continue; |
3047 | 0 | if (proc == MyProc) |
3048 | 0 | continue; |
3049 | | |
3050 | 0 | if (proc->pid != 0) |
3051 | 0 | pids = lappend_int(pids, proc->pid); |
3052 | 0 | else |
3053 | 0 | nprepared++; |
3054 | 0 | } |
3055 | |
|
3056 | 0 | LWLockRelease(ProcArrayLock); |
3057 | |
|
3058 | 0 | if (nprepared > 0) |
3059 | 0 | ereport(ERROR, |
3060 | 0 | (errcode(ERRCODE_OBJECT_IN_USE), |
3061 | 0 | errmsg("database \"%s\" is being used by prepared transaction", |
3062 | 0 | get_database_name(databaseId)), |
3063 | 0 | errdetail_plural("There is %d prepared transaction using the database.", |
3064 | 0 | "There are %d prepared transactions using the database.", |
3065 | 0 | nprepared, |
3066 | 0 | nprepared))); |
3067 | |
|
3068 | 0 | if (pids) |
3069 | 0 | { |
3070 | 0 | ListCell *lc; |
3071 | | |
3072 | | /* |
3073 | | * Check whether we have the necessary rights to terminate other |
3074 | | * sessions. We don't terminate any session untill we ensure that we |
3075 | | * have rights on all the sessions to be terminated. These checks are |
3076 | | * the same as we do in pg_terminate_backend. |
3077 | | * |
3078 | | * In this case we don't raise some warnings - like "PID %d is not a |
3079 | | * PostgreSQL server process", because for us already finished session |
3080 | | * is not a problem. |
3081 | | */ |
3082 | 0 | foreach(lc, pids) |
3083 | 0 | { |
3084 | 0 | int pid = lfirst_int(lc); |
3085 | 0 | PGPROC *proc = BackendPidGetProc(pid); |
3086 | |
|
3087 | 0 | if (proc != NULL) |
3088 | 0 | { |
3089 | | /* Only allow superusers to signal superuser-owned backends. */ |
3090 | 0 | if (superuser_arg(proc->roleId) && !superuser()) |
3091 | 0 | ereport(ERROR, |
3092 | 0 | (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
3093 | 0 | (errmsg("must be a superuser to terminate superuser process")))); |
3094 | | |
3095 | | /* Users can signal backends they have role membership in. */ |
3096 | 0 | if (!has_privs_of_role(GetUserId(), proc->roleId) && |
3097 | 0 | !has_privs_of_role(GetUserId(), DEFAULT_ROLE_SIGNAL_BACKENDID)) |
3098 | 0 | ereport(ERROR, |
3099 | 0 | (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
3100 | 0 | (errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend")))); |
3101 | 0 | } |
3102 | 0 | } |
3103 | | |
3104 | | /* |
3105 | | * There's a race condition here: once we release the ProcArrayLock, |
3106 | | * it's possible for the session to exit before we issue kill. That |
3107 | | * race condition possibility seems too unlikely to worry about. See |
3108 | | * pg_signal_backend. |
3109 | | */ |
3110 | 0 | foreach(lc, pids) |
3111 | 0 | { |
3112 | 0 | int pid = lfirst_int(lc); |
3113 | 0 | PGPROC *proc = BackendPidGetProc(pid); |
3114 | |
|
3115 | 0 | if (proc != NULL) |
3116 | 0 | { |
3117 | | /* |
3118 | | * If we have setsid(), signal the backend's whole process |
3119 | | * group |
3120 | | */ |
3121 | 0 | #ifdef HAVE_SETSID |
3122 | 0 | (void) kill(-pid, SIGTERM); |
3123 | | #else |
3124 | | (void) kill(pid, SIGTERM); |
3125 | | #endif |
3126 | 0 | } |
3127 | 0 | } |
3128 | 0 | } |
3129 | 0 | } |
3130 | | |
3131 | | /* |
3132 | | * ProcArraySetReplicationSlotXmin |
3133 | | * |
3134 | | * Install limits to future computations of the xmin horizon to prevent vacuum |
3135 | | * and HOT pruning from removing affected rows still needed by clients with |
3136 | | * replication slots. |
3137 | | */ |
3138 | | void |
3139 | | ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, |
3140 | | bool already_locked) |
3141 | 1.80k | { |
3142 | 1.80k | Assert(!already_locked || LWLockHeldByMe(ProcArrayLock)); |
3143 | | |
3144 | 1.80k | if (!already_locked) |
3145 | 1.80k | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
3146 | | |
3147 | 1.80k | procArray->replication_slot_xmin = xmin; |
3148 | 1.80k | procArray->replication_slot_catalog_xmin = catalog_xmin; |
3149 | | |
3150 | 1.80k | if (!already_locked) |
3151 | 1.80k | LWLockRelease(ProcArrayLock); |
3152 | 1.80k | } |
3153 | | |
3154 | | /* |
3155 | | * ProcArrayGetReplicationSlotXmin |
3156 | | * |
3157 | | * Return the current slot xmin limits. That's useful to be able to remove |
3158 | | * data that's older than those limits. |
3159 | | */ |
3160 | | void |
3161 | | ProcArrayGetReplicationSlotXmin(TransactionId *xmin, |
3162 | | TransactionId *catalog_xmin) |
3163 | 0 | { |
3164 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3165 | |
|
3166 | 0 | if (xmin != NULL) |
3167 | 0 | *xmin = procArray->replication_slot_xmin; |
3168 | |
|
3169 | 0 | if (catalog_xmin != NULL) |
3170 | 0 | *catalog_xmin = procArray->replication_slot_catalog_xmin; |
3171 | |
|
3172 | 0 | LWLockRelease(ProcArrayLock); |
3173 | 0 | } |
3174 | | |
3175 | | |
3176 | | #define XidCacheRemove(i) \ |
3177 | 1 | do { \ |
3178 | 1 | MyProc->subxids.xids[i] = MyProc->subxids.xids[MyPgXact->nxids - 1]; \ |
3179 | 1 | MyPgXact->nxids--; \ |
3180 | 1 | } while (0) |
3181 | | |
3182 | | /* |
3183 | | * XidCacheRemoveRunningXids |
3184 | | * |
3185 | | * Remove a bunch of TransactionIds from the list of known-running |
3186 | | * subtransactions for my backend. Both the specified xid and those in |
3187 | | * the xids[] array (of length nxids) are removed from the subxids cache. |
3188 | | * latestXid must be the latest XID among the group. |
3189 | | */ |
3190 | | void |
3191 | | XidCacheRemoveRunningXids(TransactionId xid, |
3192 | | int nxids, const TransactionId *xids, |
3193 | | TransactionId latestXid) |
3194 | 1 | { |
3195 | 1 | int i, |
3196 | 1 | j; |
3197 | | |
3198 | 1 | Assert(TransactionIdIsValid(xid)); |
3199 | | |
3200 | | /* |
3201 | | * We must hold ProcArrayLock exclusively in order to remove transactions |
3202 | | * from the PGPROC array. (See src/backend/access/transam/README.) It's |
3203 | | * possible this could be relaxed since we know this routine is only used |
3204 | | * to abort subtransactions, but pending closer analysis we'd best be |
3205 | | * conservative. |
3206 | | */ |
3207 | 1 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
3208 | | |
3209 | | /* |
3210 | | * Under normal circumstances xid and xids[] will be in increasing order, |
3211 | | * as will be the entries in subxids. Scan backwards to avoid O(N^2) |
3212 | | * behavior when removing a lot of xids. |
3213 | | */ |
3214 | 1 | for (i = nxids - 1; i >= 0; i--) |
3215 | 0 | { |
3216 | 0 | TransactionId anxid = xids[i]; |
3217 | |
|
3218 | 0 | for (j = MyPgXact->nxids - 1; j >= 0; j--) |
3219 | 0 | { |
3220 | 0 | if (TransactionIdEquals(MyProc->subxids.xids[j], anxid)) |
3221 | 0 | { |
3222 | 0 | XidCacheRemove(j); |
3223 | 0 | break; |
3224 | 0 | } |
3225 | 0 | } |
3226 | | |
3227 | | /* |
3228 | | * Ordinarily we should have found it, unless the cache has |
3229 | | * overflowed. However it's also possible for this routine to be |
3230 | | * invoked multiple times for the same subtransaction, in case of an |
3231 | | * error during AbortSubTransaction. So instead of Assert, emit a |
3232 | | * debug warning. |
3233 | | */ |
3234 | 0 | if (j < 0 && !MyPgXact->overflowed) |
3235 | 0 | elog(WARNING, "did not find subXID %u in MyProc", anxid); |
3236 | 0 | } |
3237 | | |
3238 | 1 | for (j = MyPgXact->nxids - 1; j >= 0; j--) |
3239 | 1 | { |
3240 | 1 | if (TransactionIdEquals(MyProc->subxids.xids[j], xid)) |
3241 | 1 | { |
3242 | 1 | XidCacheRemove(j); |
3243 | 1 | break; |
3244 | 1 | } |
3245 | 1 | } |
3246 | | /* Ordinarily we should have found it, unless the cache has overflowed */ |
3247 | 1 | if (j < 0 && !MyPgXact->overflowed) |
3248 | 0 | elog(WARNING, "did not find subXID %u in MyProc", xid); |
3249 | | |
3250 | | /* Also advance global latestCompletedXid while holding the lock */ |
3251 | 1 | if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, |
3252 | 1 | latestXid)) |
3253 | 1 | ShmemVariableCache->latestCompletedXid = latestXid; |
3254 | | |
3255 | 1 | LWLockRelease(ProcArrayLock); |
3256 | 1 | } |
3257 | | |
3258 | | #ifdef XIDCACHE_DEBUG |
3259 | | |
3260 | | /* |
3261 | | * Print stats about effectiveness of XID cache |
3262 | | */ |
3263 | | static void |
3264 | | DisplayXidCache(void) |
3265 | | { |
3266 | | fprintf(stderr, |
3267 | | "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n", |
3268 | | xc_by_recent_xmin, |
3269 | | xc_by_known_xact, |
3270 | | xc_by_my_xact, |
3271 | | xc_by_latest_xid, |
3272 | | xc_by_main_xid, |
3273 | | xc_by_child_xid, |
3274 | | xc_by_known_assigned, |
3275 | | xc_no_overflow, |
3276 | | xc_slow_answer); |
3277 | | } |
3278 | | #endif /* XIDCACHE_DEBUG */ |
3279 | | |
3280 | | |
3281 | | /* ---------------------------------------------- |
3282 | | * KnownAssignedTransactionIds sub-module |
3283 | | * ---------------------------------------------- |
3284 | | */ |
3285 | | |
3286 | | /* |
3287 | | * In Hot Standby mode, we maintain a list of transactions that are (or were) |
3288 | | * running in the master at the current point in WAL. These XIDs must be |
3289 | | * treated as running by standby transactions, even though they are not in |
3290 | | * the standby server's PGXACT array. |
3291 | | * |
3292 | | * We record all XIDs that we know have been assigned. That includes all the |
3293 | | * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have |
3294 | | * been assigned. We can deduce the existence of unobserved XIDs because we |
3295 | | * know XIDs are assigned in sequence, with no gaps. The KnownAssignedXids |
3296 | | * list expands as new XIDs are observed or inferred, and contracts when |
3297 | | * transaction completion records arrive. |
3298 | | * |
3299 | | * During hot standby we do not fret too much about the distinction between |
3300 | | * top-level XIDs and subtransaction XIDs. We store both together in the |
3301 | | * KnownAssignedXids list. In backends, this is copied into snapshots in |
3302 | | * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot() |
3303 | | * doesn't care about the distinction either. Subtransaction XIDs are |
3304 | | * effectively treated as top-level XIDs and in the typical case pg_subtrans |
3305 | | * links are *not* maintained (which does not affect visibility). |
3306 | | * |
3307 | | * We have room in KnownAssignedXids and in snapshots to hold maxProcs * |
3308 | | * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every master transaction must |
3309 | | * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at |
3310 | | * least every PGPROC_MAX_CACHED_SUBXIDS. When we receive one of these |
3311 | | * records, we mark the subXIDs as children of the top XID in pg_subtrans, |
3312 | | * and then remove them from KnownAssignedXids. This prevents overflow of |
3313 | | * KnownAssignedXids and snapshots, at the cost that status checks for these |
3314 | | * subXIDs will take a slower path through TransactionIdIsInProgress(). |
3315 | | * This means that KnownAssignedXids is not necessarily complete for subXIDs, |
3316 | | * though it should be complete for top-level XIDs; this is the same situation |
3317 | | * that holds with respect to the PGPROC entries in normal running. |
3318 | | * |
3319 | | * When we throw away subXIDs from KnownAssignedXids, we need to keep track of |
3320 | | * that, similarly to tracking overflow of a PGPROC's subxids array. We do |
3321 | | * that by remembering the lastOverflowedXID, ie the last thrown-away subXID. |
3322 | | * As long as that is within the range of interesting XIDs, we have to assume |
3323 | | * that subXIDs are missing from snapshots. (Note that subXID overflow occurs |
3324 | | * on primary when 65th subXID arrives, whereas on standby it occurs when 64th |
3325 | | * subXID arrives - that is not an error.) |
3326 | | * |
3327 | | * Should a backend on primary somehow disappear before it can write an abort |
3328 | | * record, then we just leave those XIDs in KnownAssignedXids. They actually |
3329 | | * aborted but we think they were running; the distinction is irrelevant |
3330 | | * because either way any changes done by the transaction are not visible to |
3331 | | * backends in the standby. We prune KnownAssignedXids when |
3332 | | * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the |
3333 | | * array due to such dead XIDs. |
3334 | | */ |
3335 | | |
3336 | | /* |
3337 | | * RecordKnownAssignedTransactionIds |
3338 | | * Record the given XID in KnownAssignedXids, as well as any preceding |
3339 | | * unobserved XIDs. |
3340 | | * |
3341 | | * RecordKnownAssignedTransactionIds() should be run for *every* WAL record |
3342 | | * associated with a transaction. Must be called for each record after we |
3343 | | * have executed StartupCLOG() et al, since we must ExtendCLOG() etc.. |
3344 | | * |
3345 | | * Called during recovery in analogy with and in place of GetNewTransactionId() |
3346 | | */ |
3347 | | void |
3348 | | RecordKnownAssignedTransactionIds(TransactionId xid) |
3349 | 0 | { |
3350 | 0 | Assert(standbyState >= STANDBY_INITIALIZED); |
3351 | 0 | Assert(TransactionIdIsValid(xid)); |
3352 | 0 | Assert(TransactionIdIsValid(latestObservedXid)); |
3353 | |
|
3354 | 0 | elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u", |
3355 | 0 | xid, latestObservedXid); |
3356 | | |
3357 | | /* |
3358 | | * When a newly observed xid arrives, it is frequently the case that it is |
3359 | | * *not* the next xid in sequence. When this occurs, we must treat the |
3360 | | * intervening xids as running also. |
3361 | | */ |
3362 | 0 | if (TransactionIdFollows(xid, latestObservedXid)) |
3363 | 0 | { |
3364 | 0 | TransactionId next_expected_xid; |
3365 | | |
3366 | | /* |
3367 | | * Extend subtrans like we do in GetNewTransactionId() during normal |
3368 | | * operation using individual extend steps. Note that we do not need |
3369 | | * to extend clog since its extensions are WAL logged. |
3370 | | * |
3371 | | * This part has to be done regardless of standbyState since we |
3372 | | * immediately start assigning subtransactions to their toplevel |
3373 | | * transactions. |
3374 | | */ |
3375 | 0 | next_expected_xid = latestObservedXid; |
3376 | 0 | while (TransactionIdPrecedes(next_expected_xid, xid)) |
3377 | 0 | { |
3378 | 0 | TransactionIdAdvance(next_expected_xid); |
3379 | 0 | ExtendSUBTRANS(next_expected_xid); |
3380 | 0 | } |
3381 | 0 | Assert(next_expected_xid == xid); |
3382 | | |
3383 | | /* |
3384 | | * If the KnownAssignedXids machinery isn't up yet, there's nothing |
3385 | | * more to do since we don't track assigned xids yet. |
3386 | | */ |
3387 | 0 | if (standbyState <= STANDBY_INITIALIZED) |
3388 | 0 | { |
3389 | 0 | latestObservedXid = xid; |
3390 | 0 | return; |
3391 | 0 | } |
3392 | | |
3393 | | /* |
3394 | | * Add (latestObservedXid, xid] onto the KnownAssignedXids array. |
3395 | | */ |
3396 | 0 | next_expected_xid = latestObservedXid; |
3397 | 0 | TransactionIdAdvance(next_expected_xid); |
3398 | 0 | KnownAssignedXidsAdd(next_expected_xid, xid, false); |
3399 | | |
3400 | | /* |
3401 | | * Now we can advance latestObservedXid |
3402 | | */ |
3403 | 0 | latestObservedXid = xid; |
3404 | | |
3405 | | /* ShmemVariableCache->nextXid must be beyond any observed xid */ |
3406 | 0 | next_expected_xid = latestObservedXid; |
3407 | 0 | TransactionIdAdvance(next_expected_xid); |
3408 | 0 | LWLockAcquire(XidGenLock, LW_EXCLUSIVE); |
3409 | 0 | ShmemVariableCache->nextXid = next_expected_xid; |
3410 | 0 | LWLockRelease(XidGenLock); |
3411 | 0 | } |
3412 | 0 | } |
3413 | | |
3414 | | /* |
3415 | | * ExpireTreeKnownAssignedTransactionIds |
3416 | | * Remove the given XIDs from KnownAssignedXids. |
3417 | | * |
3418 | | * Called during recovery in analogy with and in place of ProcArrayEndTransaction() |
3419 | | */ |
3420 | | void |
3421 | | ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, |
3422 | | TransactionId *subxids, TransactionId max_xid) |
3423 | 0 | { |
3424 | 0 | Assert(standbyState >= STANDBY_INITIALIZED); |
3425 | | |
3426 | | /* |
3427 | | * Uses same locking as transaction commit |
3428 | | */ |
3429 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
3430 | |
|
3431 | 0 | KnownAssignedXidsRemoveTree(xid, nsubxids, subxids); |
3432 | | |
3433 | | /* As in ProcArrayEndTransaction, advance latestCompletedXid */ |
3434 | 0 | if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, |
3435 | 0 | max_xid)) |
3436 | 0 | ShmemVariableCache->latestCompletedXid = max_xid; |
3437 | |
|
3438 | 0 | LWLockRelease(ProcArrayLock); |
3439 | 0 | } |
3440 | | |
3441 | | /* |
3442 | | * ExpireAllKnownAssignedTransactionIds |
3443 | | * Remove all entries in KnownAssignedXids |
3444 | | */ |
3445 | | void |
3446 | | ExpireAllKnownAssignedTransactionIds(void) |
3447 | 0 | { |
3448 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
3449 | 0 | KnownAssignedXidsRemovePreceding(InvalidTransactionId); |
3450 | 0 | LWLockRelease(ProcArrayLock); |
3451 | 0 | } |
3452 | | |
3453 | | /* |
3454 | | * ExpireOldKnownAssignedTransactionIds |
3455 | | * Remove KnownAssignedXids entries preceding the given XID |
3456 | | */ |
3457 | | void |
3458 | | ExpireOldKnownAssignedTransactionIds(TransactionId xid) |
3459 | 0 | { |
3460 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
3461 | 0 | KnownAssignedXidsRemovePreceding(xid); |
3462 | 0 | LWLockRelease(ProcArrayLock); |
3463 | 0 | } |
3464 | | |
3465 | | |
3466 | | /* |
3467 | | * Private module functions to manipulate KnownAssignedXids |
3468 | | * |
3469 | | * There are 5 main uses of the KnownAssignedXids data structure: |
3470 | | * |
3471 | | * * backends taking snapshots - all valid XIDs need to be copied out |
3472 | | * * backends seeking to determine presence of a specific XID |
3473 | | * * startup process adding new known-assigned XIDs |
3474 | | * * startup process removing specific XIDs as transactions end |
3475 | | * * startup process pruning array when special WAL records arrive |
3476 | | * |
3477 | | * This data structure is known to be a hot spot during Hot Standby, so we |
3478 | | * go to some lengths to make these operations as efficient and as concurrent |
3479 | | * as possible. |
3480 | | * |
3481 | | * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes |
3482 | | * order, to be exact --- to allow binary search for specific XIDs. Note: |
3483 | | * in general TransactionIdPrecedes would not provide a total order, but |
3484 | | * we know that the entries present at any instant should not extend across |
3485 | | * a large enough fraction of XID space to wrap around (the master would |
3486 | | * shut down for fear of XID wrap long before that happens). So it's OK to |
3487 | | * use TransactionIdPrecedes as a binary-search comparator. |
3488 | | * |
3489 | | * It's cheap to maintain the sortedness during insertions, since new known |
3490 | | * XIDs are always reported in XID order; we just append them at the right. |
3491 | | * |
3492 | | * To keep individual deletions cheap, we need to allow gaps in the array. |
3493 | | * This is implemented by marking array elements as valid or invalid using |
3494 | | * the parallel boolean array KnownAssignedXidsValid[]. A deletion is done |
3495 | | * by setting KnownAssignedXidsValid[i] to false, *without* clearing the |
3496 | | * XID entry itself. This preserves the property that the XID entries are |
3497 | | * sorted, so we can do binary searches easily. Periodically we compress |
3498 | | * out the unused entries; that's much cheaper than having to compress the |
3499 | | * array immediately on every deletion. |
3500 | | * |
3501 | | * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[] |
3502 | | * are those with indexes tail <= i < head; items outside this subscript range |
3503 | | * have unspecified contents. When head reaches the end of the array, we |
3504 | | * force compression of unused entries rather than wrapping around, since |
3505 | | * allowing wraparound would greatly complicate the search logic. We maintain |
3506 | | * an explicit tail pointer so that pruning of old XIDs can be done without |
3507 | | * immediately moving the array contents. In most cases only a small fraction |
3508 | | * of the array contains valid entries at any instant. |
3509 | | * |
3510 | | * Although only the startup process can ever change the KnownAssignedXids |
3511 | | * data structure, we still need interlocking so that standby backends will |
3512 | | * not observe invalid intermediate states. The convention is that backends |
3513 | | * must hold shared ProcArrayLock to examine the array. To remove XIDs from |
3514 | | * the array, the startup process must hold ProcArrayLock exclusively, for |
3515 | | * the usual transactional reasons (compare commit/abort of a transaction |
3516 | | * during normal running). Compressing unused entries out of the array |
3517 | | * likewise requires exclusive lock. To add XIDs to the array, we just insert |
3518 | | * them into slots to the right of the head pointer and then advance the head |
3519 | | * pointer. This wouldn't require any lock at all, except that on machines |
3520 | | * with weak memory ordering we need to be careful that other processors |
3521 | | * see the array element changes before they see the head pointer change. |
3522 | | * We handle this by using a spinlock to protect reads and writes of the |
3523 | | * head/tail pointers. (We could dispense with the spinlock if we were to |
3524 | | * create suitable memory access barrier primitives and use those instead.) |
3525 | | * The spinlock must be taken to read or write the head/tail pointers unless |
3526 | | * the caller holds ProcArrayLock exclusively. |
3527 | | * |
3528 | | * Algorithmic analysis: |
3529 | | * |
3530 | | * If we have a maximum of M slots, with N XIDs currently spread across |
3531 | | * S elements then we have N <= S <= M always. |
3532 | | * |
3533 | | * * Adding a new XID is O(1) and needs little locking (unless compression |
3534 | | * must happen) |
3535 | | * * Compressing the array is O(S) and requires exclusive lock |
3536 | | * * Removing an XID is O(logS) and requires exclusive lock |
3537 | | * * Taking a snapshot is O(S) and requires shared lock |
3538 | | * * Checking for an XID is O(logS) and requires shared lock |
3539 | | * |
3540 | | * In comparison, using a hash table for KnownAssignedXids would mean that |
3541 | | * taking snapshots would be O(M). If we can maintain S << M then the |
3542 | | * sorted array technique will deliver significantly faster snapshots. |
3543 | | * If we try to keep S too small then we will spend too much time compressing, |
3544 | | * so there is an optimal point for any workload mix. We use a heuristic to |
3545 | | * decide when to compress the array, though trimming also helps reduce |
3546 | | * frequency of compressing. The heuristic requires us to track the number of |
3547 | | * currently valid XIDs in the array. |
3548 | | */ |
3549 | | |
3550 | | |
3551 | | /* |
3552 | | * Compress KnownAssignedXids by shifting valid data down to the start of the |
3553 | | * array, removing any gaps. |
3554 | | * |
3555 | | * A compression step is forced if "force" is true, otherwise we do it |
3556 | | * only if a heuristic indicates it's a good time to do it. |
3557 | | * |
3558 | | * Caller must hold ProcArrayLock in exclusive mode. |
3559 | | */ |
3560 | | static void |
3561 | | KnownAssignedXidsCompress(bool force) |
3562 | 0 | { |
3563 | | /* use volatile pointer to prevent code rearrangement */ |
3564 | 0 | volatile ProcArrayStruct *pArray = procArray; |
3565 | 0 | int head, |
3566 | 0 | tail; |
3567 | 0 | int compress_index; |
3568 | 0 | int i; |
3569 | | |
3570 | | /* no spinlock required since we hold ProcArrayLock exclusively */ |
3571 | 0 | head = pArray->headKnownAssignedXids; |
3572 | 0 | tail = pArray->tailKnownAssignedXids; |
3573 | |
|
3574 | 0 | if (!force) |
3575 | 0 | { |
3576 | | /* |
3577 | | * If we can choose how much to compress, use a heuristic to avoid |
3578 | | * compressing too often or not often enough. |
3579 | | * |
3580 | | * Heuristic is if we have a large enough current spread and less than |
3581 | | * 50% of the elements are currently in use, then compress. This |
3582 | | * should ensure we compress fairly infrequently. We could compress |
3583 | | * less often though the virtual array would spread out more and |
3584 | | * snapshots would become more expensive. |
3585 | | */ |
3586 | 0 | int nelements = head - tail; |
3587 | |
|
3588 | 0 | if (nelements < 4 * PROCARRAY_MAXPROCS || |
3589 | 0 | nelements < 2 * pArray->numKnownAssignedXids) |
3590 | 0 | return; |
3591 | 0 | } |
3592 | | |
3593 | | /* |
3594 | | * We compress the array by reading the valid values from tail to head, |
3595 | | * re-aligning data to 0th element. |
3596 | | */ |
3597 | 0 | compress_index = 0; |
3598 | 0 | for (i = tail; i < head; i++) |
3599 | 0 | { |
3600 | 0 | if (KnownAssignedXidsValid[i]) |
3601 | 0 | { |
3602 | 0 | KnownAssignedXids[compress_index] = KnownAssignedXids[i]; |
3603 | 0 | KnownAssignedXidsValid[compress_index] = true; |
3604 | 0 | compress_index++; |
3605 | 0 | } |
3606 | 0 | } |
3607 | |
|
3608 | 0 | pArray->tailKnownAssignedXids = 0; |
3609 | 0 | pArray->headKnownAssignedXids = compress_index; |
3610 | 0 | } |
3611 | | |
3612 | | /* |
3613 | | * Add xids into KnownAssignedXids at the head of the array. |
3614 | | * |
3615 | | * xids from from_xid to to_xid, inclusive, are added to the array. |
3616 | | * |
3617 | | * If exclusive_lock is true then caller already holds ProcArrayLock in |
3618 | | * exclusive mode, so we need no extra locking here. Else caller holds no |
3619 | | * lock, so we need to be sure we maintain sufficient interlocks against |
3620 | | * concurrent readers. (Only the startup process ever calls this, so no need |
3621 | | * to worry about concurrent writers.) |
3622 | | */ |
3623 | | static void |
3624 | | KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, |
3625 | | bool exclusive_lock) |
3626 | 0 | { |
3627 | | /* use volatile pointer to prevent code rearrangement */ |
3628 | 0 | volatile ProcArrayStruct *pArray = procArray; |
3629 | 0 | TransactionId next_xid; |
3630 | 0 | int head, |
3631 | 0 | tail; |
3632 | 0 | int nxids; |
3633 | 0 | int i; |
3634 | |
|
3635 | 0 | Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid)); |
3636 | | |
3637 | | /* |
3638 | | * Calculate how many array slots we'll need. Normally this is cheap; in |
3639 | | * the unusual case where the XIDs cross the wrap point, we do it the hard |
3640 | | * way. |
3641 | | */ |
3642 | 0 | if (to_xid >= from_xid) |
3643 | 0 | nxids = to_xid - from_xid + 1; |
3644 | 0 | else |
3645 | 0 | { |
3646 | 0 | nxids = 1; |
3647 | 0 | next_xid = from_xid; |
3648 | 0 | while (TransactionIdPrecedes(next_xid, to_xid)) |
3649 | 0 | { |
3650 | 0 | nxids++; |
3651 | 0 | TransactionIdAdvance(next_xid); |
3652 | 0 | } |
3653 | 0 | } |
3654 | | |
3655 | | /* |
3656 | | * Since only the startup process modifies the head/tail pointers, we |
3657 | | * don't need a lock to read them here. |
3658 | | */ |
3659 | 0 | head = pArray->headKnownAssignedXids; |
3660 | 0 | tail = pArray->tailKnownAssignedXids; |
3661 | |
|
3662 | 0 | Assert(head >= 0 && head <= pArray->maxKnownAssignedXids); |
3663 | 0 | Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids); |
3664 | | |
3665 | | /* |
3666 | | * Verify that insertions occur in TransactionId sequence. Note that even |
3667 | | * if the last existing element is marked invalid, it must still have a |
3668 | | * correctly sequenced XID value. |
3669 | | */ |
3670 | 0 | if (head > tail && |
3671 | 0 | TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid)) |
3672 | 0 | { |
3673 | 0 | KnownAssignedXidsDisplay(LOG); |
3674 | 0 | elog(ERROR, "out-of-order XID insertion in KnownAssignedXids"); |
3675 | 0 | } |
3676 | | |
3677 | | /* |
3678 | | * If our xids won't fit in the remaining space, compress out free space |
3679 | | */ |
3680 | 0 | if (head + nxids > pArray->maxKnownAssignedXids) |
3681 | 0 | { |
3682 | | /* must hold lock to compress */ |
3683 | 0 | if (!exclusive_lock) |
3684 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
3685 | |
|
3686 | 0 | KnownAssignedXidsCompress(true); |
3687 | |
|
3688 | 0 | head = pArray->headKnownAssignedXids; |
3689 | | /* note: we no longer care about the tail pointer */ |
3690 | |
|
3691 | 0 | if (!exclusive_lock) |
3692 | 0 | LWLockRelease(ProcArrayLock); |
3693 | | |
3694 | | /* |
3695 | | * If it still won't fit then we're out of memory |
3696 | | */ |
3697 | 0 | if (head + nxids > pArray->maxKnownAssignedXids) |
3698 | 0 | elog(ERROR, "too many KnownAssignedXids"); |
3699 | 0 | } |
3700 | | |
3701 | | /* Now we can insert the xids into the space starting at head */ |
3702 | 0 | next_xid = from_xid; |
3703 | 0 | for (i = 0; i < nxids; i++) |
3704 | 0 | { |
3705 | 0 | KnownAssignedXids[head] = next_xid; |
3706 | 0 | KnownAssignedXidsValid[head] = true; |
3707 | 0 | TransactionIdAdvance(next_xid); |
3708 | 0 | head++; |
3709 | 0 | } |
3710 | | |
3711 | | /* Adjust count of number of valid entries */ |
3712 | 0 | pArray->numKnownAssignedXids += nxids; |
3713 | | |
3714 | | /* |
3715 | | * Now update the head pointer. We use a spinlock to protect this |
3716 | | * pointer, not because the update is likely to be non-atomic, but to |
3717 | | * ensure that other processors see the above array updates before they |
3718 | | * see the head pointer change. |
3719 | | * |
3720 | | * If we're holding ProcArrayLock exclusively, there's no need to take the |
3721 | | * spinlock. |
3722 | | */ |
3723 | 0 | if (exclusive_lock) |
3724 | 0 | pArray->headKnownAssignedXids = head; |
3725 | 0 | else |
3726 | 0 | { |
3727 | 0 | SpinLockAcquire(&pArray->known_assigned_xids_lck); |
3728 | 0 | pArray->headKnownAssignedXids = head; |
3729 | 0 | SpinLockRelease(&pArray->known_assigned_xids_lck); |
3730 | 0 | } |
3731 | 0 | } |
3732 | | |
3733 | | /* |
3734 | | * KnownAssignedXidsSearch |
3735 | | * |
3736 | | * Searches KnownAssignedXids for a specific xid and optionally removes it. |
3737 | | * Returns true if it was found, false if not. |
3738 | | * |
3739 | | * Caller must hold ProcArrayLock in shared or exclusive mode. |
3740 | | * Exclusive lock must be held for remove = true. |
3741 | | */ |
3742 | | static bool |
3743 | | KnownAssignedXidsSearch(TransactionId xid, bool remove) |
3744 | 0 | { |
3745 | | /* use volatile pointer to prevent code rearrangement */ |
3746 | 0 | volatile ProcArrayStruct *pArray = procArray; |
3747 | 0 | int first, |
3748 | 0 | last; |
3749 | 0 | int head; |
3750 | 0 | int tail; |
3751 | 0 | int result_index = -1; |
3752 | |
|
3753 | 0 | if (remove) |
3754 | 0 | { |
3755 | | /* we hold ProcArrayLock exclusively, so no need for spinlock */ |
3756 | 0 | tail = pArray->tailKnownAssignedXids; |
3757 | 0 | head = pArray->headKnownAssignedXids; |
3758 | 0 | } |
3759 | 0 | else |
3760 | 0 | { |
3761 | | /* take spinlock to ensure we see up-to-date array contents */ |
3762 | 0 | SpinLockAcquire(&pArray->known_assigned_xids_lck); |
3763 | 0 | tail = pArray->tailKnownAssignedXids; |
3764 | 0 | head = pArray->headKnownAssignedXids; |
3765 | 0 | SpinLockRelease(&pArray->known_assigned_xids_lck); |
3766 | 0 | } |
3767 | | |
3768 | | /* |
3769 | | * Standard binary search. Note we can ignore the KnownAssignedXidsValid |
3770 | | * array here, since even invalid entries will contain sorted XIDs. |
3771 | | */ |
3772 | 0 | first = tail; |
3773 | 0 | last = head - 1; |
3774 | 0 | while (first <= last) |
3775 | 0 | { |
3776 | 0 | int mid_index; |
3777 | 0 | TransactionId mid_xid; |
3778 | |
|
3779 | 0 | mid_index = (first + last) / 2; |
3780 | 0 | mid_xid = KnownAssignedXids[mid_index]; |
3781 | |
|
3782 | 0 | if (xid == mid_xid) |
3783 | 0 | { |
3784 | 0 | result_index = mid_index; |
3785 | 0 | break; |
3786 | 0 | } |
3787 | 0 | else if (TransactionIdPrecedes(xid, mid_xid)) |
3788 | 0 | last = mid_index - 1; |
3789 | 0 | else |
3790 | 0 | first = mid_index + 1; |
3791 | 0 | } |
3792 | |
|
3793 | 0 | if (result_index < 0) |
3794 | 0 | return false; /* not in array */ |
3795 | | |
3796 | 0 | if (!KnownAssignedXidsValid[result_index]) |
3797 | 0 | return false; /* in array, but invalid */ |
3798 | | |
3799 | 0 | if (remove) |
3800 | 0 | { |
3801 | 0 | KnownAssignedXidsValid[result_index] = false; |
3802 | |
|
3803 | 0 | pArray->numKnownAssignedXids--; |
3804 | 0 | Assert(pArray->numKnownAssignedXids >= 0); |
3805 | | |
3806 | | /* |
3807 | | * If we're removing the tail element then advance tail pointer over |
3808 | | * any invalid elements. This will speed future searches. |
3809 | | */ |
3810 | 0 | if (result_index == tail) |
3811 | 0 | { |
3812 | 0 | tail++; |
3813 | 0 | while (tail < head && !KnownAssignedXidsValid[tail]) |
3814 | 0 | tail++; |
3815 | 0 | if (tail >= head) |
3816 | 0 | { |
3817 | | /* Array is empty, so we can reset both pointers */ |
3818 | 0 | pArray->headKnownAssignedXids = 0; |
3819 | 0 | pArray->tailKnownAssignedXids = 0; |
3820 | 0 | } |
3821 | 0 | else |
3822 | 0 | { |
3823 | 0 | pArray->tailKnownAssignedXids = tail; |
3824 | 0 | } |
3825 | 0 | } |
3826 | 0 | } |
3827 | |
|
3828 | 0 | return true; |
3829 | 0 | } |
3830 | | |
3831 | | /* |
3832 | | * Is the specified XID present in KnownAssignedXids[]? |
3833 | | * |
3834 | | * Caller must hold ProcArrayLock in shared or exclusive mode. |
3835 | | */ |
3836 | | static bool |
3837 | | KnownAssignedXidExists(TransactionId xid) |
3838 | 0 | { |
3839 | 0 | Assert(TransactionIdIsValid(xid)); |
3840 | |
|
3841 | 0 | return KnownAssignedXidsSearch(xid, false); |
3842 | 0 | } |
3843 | | |
3844 | | /* |
3845 | | * Remove the specified XID from KnownAssignedXids[]. |
3846 | | * |
3847 | | * Caller must hold ProcArrayLock in exclusive mode. |
3848 | | */ |
3849 | | static void |
3850 | | KnownAssignedXidsRemove(TransactionId xid) |
3851 | 0 | { |
3852 | 0 | Assert(TransactionIdIsValid(xid)); |
3853 | |
|
3854 | 0 | elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid); |
3855 | | |
3856 | | /* |
3857 | | * Note: we cannot consider it an error to remove an XID that's not |
3858 | | * present. We intentionally remove subxact IDs while processing |
3859 | | * XLOG_XACT_ASSIGNMENT, to avoid array overflow. Then those XIDs will be |
3860 | | * removed again when the top-level xact commits or aborts. |
3861 | | * |
3862 | | * It might be possible to track such XIDs to distinguish this case from |
3863 | | * actual errors, but it would be complicated and probably not worth it. |
3864 | | * So, just ignore the search result. |
3865 | | */ |
3866 | 0 | (void) KnownAssignedXidsSearch(xid, true); |
3867 | 0 | } |
3868 | | |
3869 | | /* |
3870 | | * KnownAssignedXidsRemoveTree |
3871 | | * Remove xid (if it's not InvalidTransactionId) and all the subxids. |
3872 | | * |
3873 | | * Caller must hold ProcArrayLock in exclusive mode. |
3874 | | */ |
3875 | | static void |
3876 | | KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, |
3877 | | TransactionId *subxids) |
3878 | 0 | { |
3879 | 0 | int i; |
3880 | |
|
3881 | 0 | if (TransactionIdIsValid(xid)) |
3882 | 0 | KnownAssignedXidsRemove(xid); |
3883 | |
|
3884 | 0 | for (i = 0; i < nsubxids; i++) |
3885 | 0 | KnownAssignedXidsRemove(subxids[i]); |
3886 | | |
3887 | | /* Opportunistically compress the array */ |
3888 | 0 | KnownAssignedXidsCompress(false); |
3889 | 0 | } |
3890 | | |
3891 | | /* |
3892 | | * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid |
3893 | | * then clear the whole table. |
3894 | | * |
3895 | | * Caller must hold ProcArrayLock in exclusive mode. |
3896 | | */ |
3897 | | static void |
3898 | | KnownAssignedXidsRemovePreceding(TransactionId removeXid) |
3899 | 0 | { |
3900 | | /* use volatile pointer to prevent code rearrangement */ |
3901 | 0 | volatile ProcArrayStruct *pArray = procArray; |
3902 | 0 | int count = 0; |
3903 | 0 | int head, |
3904 | 0 | tail, |
3905 | 0 | i; |
3906 | |
|
3907 | 0 | if (!TransactionIdIsValid(removeXid)) |
3908 | 0 | { |
3909 | 0 | elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids"); |
3910 | 0 | pArray->numKnownAssignedXids = 0; |
3911 | 0 | pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0; |
3912 | 0 | return; |
3913 | 0 | } |
3914 | | |
3915 | 0 | elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid); |
3916 | | |
3917 | | /* |
3918 | | * Mark entries invalid starting at the tail. Since array is sorted, we |
3919 | | * can stop as soon as we reach an entry >= removeXid. |
3920 | | */ |
3921 | 0 | tail = pArray->tailKnownAssignedXids; |
3922 | 0 | head = pArray->headKnownAssignedXids; |
3923 | |
|
3924 | 0 | for (i = tail; i < head; i++) |
3925 | 0 | { |
3926 | 0 | if (KnownAssignedXidsValid[i]) |
3927 | 0 | { |
3928 | 0 | TransactionId knownXid = KnownAssignedXids[i]; |
3929 | |
|
3930 | 0 | if (TransactionIdFollowsOrEquals(knownXid, removeXid)) |
3931 | 0 | break; |
3932 | | |
3933 | 0 | if (!StandbyTransactionIdIsPrepared(knownXid)) |
3934 | 0 | { |
3935 | 0 | KnownAssignedXidsValid[i] = false; |
3936 | 0 | count++; |
3937 | 0 | } |
3938 | 0 | } |
3939 | 0 | } |
3940 | |
|
3941 | 0 | pArray->numKnownAssignedXids -= count; |
3942 | 0 | Assert(pArray->numKnownAssignedXids >= 0); |
3943 | | |
3944 | | /* |
3945 | | * Advance the tail pointer if we've marked the tail item invalid. |
3946 | | */ |
3947 | 0 | for (i = tail; i < head; i++) |
3948 | 0 | { |
3949 | 0 | if (KnownAssignedXidsValid[i]) |
3950 | 0 | break; |
3951 | 0 | } |
3952 | 0 | if (i >= head) |
3953 | 0 | { |
3954 | | /* Array is empty, so we can reset both pointers */ |
3955 | 0 | pArray->headKnownAssignedXids = 0; |
3956 | 0 | pArray->tailKnownAssignedXids = 0; |
3957 | 0 | } |
3958 | 0 | else |
3959 | 0 | { |
3960 | 0 | pArray->tailKnownAssignedXids = i; |
3961 | 0 | } |
3962 | | |
3963 | | /* Opportunistically compress the array */ |
3964 | 0 | KnownAssignedXidsCompress(false); |
3965 | 0 | } |
3966 | | |
3967 | | /* |
3968 | | * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids. |
3969 | | * We filter out anything >= xmax. |
3970 | | * |
3971 | | * Returns the number of XIDs stored into xarray[]. Caller is responsible |
3972 | | * that array is large enough. |
3973 | | * |
3974 | | * Caller must hold ProcArrayLock in (at least) shared mode. |
3975 | | */ |
3976 | | static int |
3977 | | KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax) |
3978 | 0 | { |
3979 | 0 | TransactionId xtmp = InvalidTransactionId; |
3980 | |
|
3981 | 0 | return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax); |
3982 | 0 | } |
3983 | | |
3984 | | /* |
3985 | | * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus |
3986 | | * we reduce *xmin to the lowest xid value seen if not already lower. |
3987 | | * |
3988 | | * Caller must hold ProcArrayLock in (at least) shared mode. |
3989 | | */ |
3990 | | static int |
3991 | | KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, |
3992 | | TransactionId xmax) |
3993 | 0 | { |
3994 | 0 | int count = 0; |
3995 | 0 | int head, |
3996 | 0 | tail; |
3997 | 0 | int i; |
3998 | | |
3999 | | /* |
4000 | | * Fetch head just once, since it may change while we loop. We can stop |
4001 | | * once we reach the initially seen head, since we are certain that an xid |
4002 | | * cannot enter and then leave the array while we hold ProcArrayLock. We |
4003 | | * might miss newly-added xids, but they should be >= xmax so irrelevant |
4004 | | * anyway. |
4005 | | * |
4006 | | * Must take spinlock to ensure we see up-to-date array contents. |
4007 | | */ |
4008 | 0 | SpinLockAcquire(&procArray->known_assigned_xids_lck); |
4009 | 0 | tail = procArray->tailKnownAssignedXids; |
4010 | 0 | head = procArray->headKnownAssignedXids; |
4011 | 0 | SpinLockRelease(&procArray->known_assigned_xids_lck); |
4012 | |
|
4013 | 0 | for (i = tail; i < head; i++) |
4014 | 0 | { |
4015 | | /* Skip any gaps in the array */ |
4016 | 0 | if (KnownAssignedXidsValid[i]) |
4017 | 0 | { |
4018 | 0 | TransactionId knownXid = KnownAssignedXids[i]; |
4019 | | |
4020 | | /* |
4021 | | * Update xmin if required. Only the first XID need be checked, |
4022 | | * since the array is sorted. |
4023 | | */ |
4024 | 0 | if (count == 0 && |
4025 | 0 | TransactionIdPrecedes(knownXid, *xmin)) |
4026 | 0 | *xmin = knownXid; |
4027 | | |
4028 | | /* |
4029 | | * Filter out anything >= xmax, again relying on sorted property |
4030 | | * of array. |
4031 | | */ |
4032 | 0 | if (TransactionIdIsValid(xmax) && |
4033 | 0 | TransactionIdFollowsOrEquals(knownXid, xmax)) |
4034 | 0 | break; |
4035 | | |
4036 | | /* Add knownXid into output array */ |
4037 | 0 | xarray[count++] = knownXid; |
4038 | 0 | } |
4039 | 0 | } |
4040 | |
|
4041 | 0 | return count; |
4042 | 0 | } |
4043 | | |
4044 | | /* |
4045 | | * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId |
4046 | | * if nothing there. |
4047 | | */ |
4048 | | static TransactionId |
4049 | | KnownAssignedXidsGetOldestXmin(void) |
4050 | 0 | { |
4051 | 0 | int head, |
4052 | 0 | tail; |
4053 | 0 | int i; |
4054 | | |
4055 | | /* |
4056 | | * Fetch head just once, since it may change while we loop. |
4057 | | */ |
4058 | 0 | SpinLockAcquire(&procArray->known_assigned_xids_lck); |
4059 | 0 | tail = procArray->tailKnownAssignedXids; |
4060 | 0 | head = procArray->headKnownAssignedXids; |
4061 | 0 | SpinLockRelease(&procArray->known_assigned_xids_lck); |
4062 | |
|
4063 | 0 | for (i = tail; i < head; i++) |
4064 | 0 | { |
4065 | | /* Skip any gaps in the array */ |
4066 | 0 | if (KnownAssignedXidsValid[i]) |
4067 | 0 | return KnownAssignedXids[i]; |
4068 | 0 | } |
4069 | |
|
4070 | 0 | return InvalidTransactionId; |
4071 | 0 | } |
4072 | | |
4073 | | /* |
4074 | | * Display KnownAssignedXids to provide debug trail |
4075 | | * |
4076 | | * Currently this is only called within startup process, so we need no |
4077 | | * special locking. |
4078 | | * |
4079 | | * Note this is pretty expensive, and much of the expense will be incurred |
4080 | | * even if the elog message will get discarded. It's not currently called |
4081 | | * in any performance-critical places, however, so no need to be tenser. |
4082 | | */ |
4083 | | static void |
4084 | | KnownAssignedXidsDisplay(int trace_level) |
4085 | 0 | { |
4086 | | /* use volatile pointer to prevent code rearrangement */ |
4087 | 0 | volatile ProcArrayStruct *pArray = procArray; |
4088 | 0 | StringInfoData buf; |
4089 | 0 | int head, |
4090 | 0 | tail, |
4091 | 0 | i; |
4092 | 0 | int nxids = 0; |
4093 | |
|
4094 | 0 | tail = pArray->tailKnownAssignedXids; |
4095 | 0 | head = pArray->headKnownAssignedXids; |
4096 | |
|
4097 | 0 | initStringInfo(&buf); |
4098 | |
|
4099 | 0 | for (i = tail; i < head; i++) |
4100 | 0 | { |
4101 | 0 | if (KnownAssignedXidsValid[i]) |
4102 | 0 | { |
4103 | 0 | nxids++; |
4104 | 0 | appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]); |
4105 | 0 | } |
4106 | 0 | } |
4107 | |
|
4108 | 0 | elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s", |
4109 | 0 | nxids, |
4110 | 0 | pArray->numKnownAssignedXids, |
4111 | 0 | pArray->tailKnownAssignedXids, |
4112 | 0 | pArray->headKnownAssignedXids, |
4113 | 0 | buf.data); |
4114 | |
|
4115 | 0 | pfree(buf.data); |
4116 | 0 | } |
4117 | | |
4118 | | /* |
4119 | | * KnownAssignedXidsReset |
4120 | | * Resets KnownAssignedXids to be empty |
4121 | | */ |
4122 | | static void |
4123 | | KnownAssignedXidsReset(void) |
4124 | 0 | { |
4125 | | /* use volatile pointer to prevent code rearrangement */ |
4126 | 0 | volatile ProcArrayStruct *pArray = procArray; |
4127 | |
|
4128 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
4129 | |
|
4130 | 0 | pArray->numKnownAssignedXids = 0; |
4131 | 0 | pArray->tailKnownAssignedXids = 0; |
4132 | 0 | pArray->headKnownAssignedXids = 0; |
4133 | |
|
4134 | 0 | LWLockRelease(ProcArrayLock); |
4135 | 0 | } |