/Users/deen/code/yugabyte-db/src/postgres/src/backend/postmaster/bgworker.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*-------------------------------------------------------------------- |
2 | | * bgworker.c |
3 | | * POSTGRES pluggable background workers implementation |
4 | | * |
5 | | * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group |
6 | | * |
7 | | * IDENTIFICATION |
8 | | * src/backend/postmaster/bgworker.c |
9 | | * |
10 | | *------------------------------------------------------------------------- |
11 | | */ |
12 | | |
13 | | #include "postgres.h" |
14 | | |
15 | | #include <unistd.h> |
16 | | |
17 | | #include "libpq/pqsignal.h" |
18 | | #include "access/parallel.h" |
19 | | #include "miscadmin.h" |
20 | | #include "pgstat.h" |
21 | | #include "port/atomics.h" |
22 | | #include "postmaster/bgworker_internals.h" |
23 | | #include "postmaster/postmaster.h" |
24 | | #include "replication/logicallauncher.h" |
25 | | #include "replication/logicalworker.h" |
26 | | #include "storage/dsm.h" |
27 | | #include "storage/ipc.h" |
28 | | #include "storage/latch.h" |
29 | | #include "storage/lwlock.h" |
30 | | #include "storage/pg_shmem.h" |
31 | | #include "storage/pmsignal.h" |
32 | | #include "storage/proc.h" |
33 | | #include "storage/procsignal.h" |
34 | | #include "storage/shmem.h" |
35 | | #include "tcop/tcopprot.h" |
36 | | #include "utils/ascii.h" |
37 | | #include "utils/ps_status.h" |
38 | | #include "utils/timeout.h" |
39 | | |
40 | | /* |
41 | | * The postmaster's list of registered background workers, in private memory. |
42 | | */ |
43 | | slist_head BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList); |
44 | | |
45 | | /* |
46 | | * BackgroundWorkerSlots exist in shared memory and can be accessed (via |
47 | | * the BackgroundWorkerArray) by both the postmaster and by regular backends. |
48 | | * However, the postmaster cannot take locks, even spinlocks, because this |
49 | | * might allow it to crash or become wedged if shared memory gets corrupted. |
50 | | * Such an outcome is intolerable. Therefore, we need a lockless protocol |
51 | | * for coordinating access to this data. |
52 | | * |
53 | | * The 'in_use' flag is used to hand off responsibility for the slot between |
54 | | * the postmaster and the rest of the system. When 'in_use' is false, |
55 | | * the postmaster will ignore the slot entirely, except for the 'in_use' flag |
56 | | * itself, which it may read. In this state, regular backends may modify the |
57 | | * slot. Once a backend sets 'in_use' to true, the slot becomes the |
58 | | * responsibility of the postmaster. Regular backends may no longer modify it, |
59 | | * but the postmaster may examine it. Thus, a backend initializing a slot |
60 | | * must fully initialize the slot - and insert a write memory barrier - before |
61 | | * marking it as in use. |
62 | | * |
63 | | * As an exception, however, even when the slot is in use, regular backends |
64 | | * may set the 'terminate' flag for a slot, telling the postmaster not |
65 | | * to restart it. Once the background worker is no longer running, the slot |
66 | | * will be released for reuse. |
67 | | * |
68 | | * In addition to coordinating with the postmaster, backends modifying this |
69 | | * data structure must coordinate with each other. Since they can take locks, |
70 | | * this is straightforward: any backend wishing to manipulate a slot must |
71 | | * take BackgroundWorkerLock in exclusive mode. Backends wishing to read |
72 | | * data that might get concurrently modified by other backends should take |
73 | | * this lock in shared mode. No matter what, backends reading this data |
74 | | * structure must be able to tolerate concurrent modifications by the |
75 | | * postmaster. |
76 | | */ |
77 | | typedef struct BackgroundWorkerSlot |
78 | | { |
79 | | bool in_use; |
80 | | bool terminate; |
81 | | pid_t pid; /* InvalidPid = not started yet; 0 = dead */ |
82 | | uint64 generation; /* incremented when slot is recycled */ |
83 | | BackgroundWorker worker; |
84 | | } BackgroundWorkerSlot; |
85 | | |
86 | | /* |
87 | | * In order to limit the total number of parallel workers (according to |
88 | | * max_parallel_workers GUC), we maintain the number of active parallel |
89 | | * workers. Since the postmaster cannot take locks, two variables are used for |
90 | | * this purpose: the number of registered parallel workers (modified by the |
91 | | * backends, protected by BackgroundWorkerLock) and the number of terminated |
92 | | * parallel workers (modified only by the postmaster, lockless). The active |
93 | | * number of parallel workers is the number of registered workers minus the |
94 | | * terminated ones. These counters can of course overflow, but it's not |
95 | | * important here since the subtraction will still give the right number. |
96 | | */ |
97 | | typedef struct BackgroundWorkerArray |
98 | | { |
99 | | int total_slots; |
100 | | uint32 parallel_register_count; |
101 | | uint32 parallel_terminate_count; |
102 | | BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER]; |
103 | | } BackgroundWorkerArray; |
104 | | |
105 | | struct BackgroundWorkerHandle |
106 | | { |
107 | | int slot; |
108 | | uint64 generation; |
109 | | }; |
110 | | |
111 | | static BackgroundWorkerArray *BackgroundWorkerData; |
112 | | |
113 | | /* |
114 | | * List of internal background worker entry points. We need this for |
115 | | * reasons explained in LookupBackgroundWorkerFunction(), below. |
116 | | */ |
117 | | static const struct |
118 | | { |
119 | | const char *fn_name; |
120 | | bgworker_main_type fn_addr; |
121 | | } InternalBGWorkers[] = |
122 | | |
123 | | { |
124 | | { |
125 | | "ParallelWorkerMain", ParallelWorkerMain |
126 | | }, |
127 | | { |
128 | | "ApplyLauncherMain", ApplyLauncherMain |
129 | | }, |
130 | | { |
131 | | "ApplyWorkerMain", ApplyWorkerMain |
132 | | } |
133 | | }; |
134 | | |
135 | | /* Private functions. */ |
136 | | static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname); |
137 | | |
138 | | |
139 | | /* |
140 | | * Calculate shared memory needed. |
141 | | */ |
142 | | Size |
143 | | BackgroundWorkerShmemSize(void) |
144 | 7.22k | { |
145 | 7.22k | Size size; |
146 | | |
147 | | /* Array of workers is variably sized. */ |
148 | 7.22k | size = offsetof(BackgroundWorkerArray, slot); |
149 | 7.22k | size = add_size(size, mul_size(max_worker_processes, |
150 | 7.22k | sizeof(BackgroundWorkerSlot))); |
151 | | |
152 | 7.22k | return size; |
153 | 7.22k | } |
154 | | |
155 | | /* |
156 | | * Initialize shared memory. |
157 | | */ |
158 | | void |
159 | | BackgroundWorkerShmemInit(void) |
160 | 3.61k | { |
161 | 3.61k | bool found; |
162 | | |
163 | 3.61k | BackgroundWorkerData = ShmemInitStruct("Background Worker Data", |
164 | 3.61k | BackgroundWorkerShmemSize(), |
165 | 3.61k | &found); |
166 | 3.61k | if (!IsUnderPostmaster) |
167 | 3.61k | { |
168 | 3.61k | slist_iter siter; |
169 | 3.61k | int slotno = 0; |
170 | | |
171 | 3.61k | BackgroundWorkerData->total_slots = max_worker_processes; |
172 | 3.61k | BackgroundWorkerData->parallel_register_count = 0; |
173 | 3.61k | BackgroundWorkerData->parallel_terminate_count = 0; |
174 | | |
175 | | /* |
176 | | * Copy contents of worker list into shared memory. Record the shared |
177 | | * memory slot assigned to each worker. This ensures a 1-to-1 |
178 | | * correspondence between the postmaster's private list and the array |
179 | | * in shared memory. |
180 | | */ |
181 | 3.61k | slist_foreach(siter, &BackgroundWorkerList) |
182 | 902 | { |
183 | 902 | BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno]; |
184 | 902 | RegisteredBgWorker *rw; |
185 | | |
186 | 902 | rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); |
187 | 902 | Assert(slotno < max_worker_processes); |
188 | 902 | slot->in_use = true; |
189 | 902 | slot->terminate = false; |
190 | 902 | slot->pid = InvalidPid; |
191 | 902 | slot->generation = 0; |
192 | 902 | rw->rw_shmem_slot = slotno; |
193 | 902 | rw->rw_worker.bgw_notify_pid = 0; /* might be reinit after crash */ |
194 | 902 | memcpy(&slot->worker, &rw->rw_worker, sizeof(BackgroundWorker)); |
195 | 902 | ++slotno; |
196 | 902 | } |
197 | | |
198 | | /* |
199 | | * Mark any remaining slots as not in use. |
200 | | */ |
201 | 31.6k | while (slotno < max_worker_processes) |
202 | 28.0k | { |
203 | 28.0k | BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno]; |
204 | | |
205 | 28.0k | slot->in_use = false; |
206 | 28.0k | ++slotno; |
207 | 28.0k | } |
208 | 3.61k | } |
209 | 3.61k | else |
210 | 3.61k | Assert(found); |
211 | 3.61k | } |
212 | | |
213 | | /* |
214 | | * Search the postmaster's backend-private list of RegisteredBgWorker objects |
215 | | * for the one that maps to the given slot number. |
216 | | */ |
217 | | static RegisteredBgWorker * |
218 | | FindRegisteredWorkerBySlotNumber(int slotno) |
219 | 0 | { |
220 | 0 | slist_iter siter; |
221 | |
|
222 | 0 | slist_foreach(siter, &BackgroundWorkerList) |
223 | 0 | { |
224 | 0 | RegisteredBgWorker *rw; |
225 | |
|
226 | 0 | rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); |
227 | 0 | if (rw->rw_shmem_slot == slotno) |
228 | 0 | return rw; |
229 | 0 | } |
230 | |
|
231 | 0 | return NULL; |
232 | 0 | } |
233 | | |
234 | | /* |
235 | | * Notice changes to shared memory made by other backends. This code |
236 | | * runs in the postmaster, so we must be very careful not to assume that |
237 | | * shared memory contents are sane. Otherwise, a rogue backend could take |
238 | | * out the postmaster. |
239 | | */ |
240 | | void |
241 | | BackgroundWorkerStateChange(void) |
242 | 0 | { |
243 | 0 | int slotno; |
244 | | |
245 | | /* |
246 | | * The total number of slots stored in shared memory should match our |
247 | | * notion of max_worker_processes. If it does not, something is very |
248 | | * wrong. Further down, we always refer to this value as |
249 | | * max_worker_processes, in case shared memory gets corrupted while we're |
250 | | * looping. |
251 | | */ |
252 | 0 | if (max_worker_processes != BackgroundWorkerData->total_slots) |
253 | 0 | { |
254 | 0 | elog(LOG, |
255 | 0 | "inconsistent background worker state (max_worker_processes=%d, total_slots=%d", |
256 | 0 | max_worker_processes, |
257 | 0 | BackgroundWorkerData->total_slots); |
258 | 0 | return; |
259 | 0 | } |
260 | | |
261 | | /* |
262 | | * Iterate through slots, looking for newly-registered workers or workers |
263 | | * who must die. |
264 | | */ |
265 | 0 | for (slotno = 0; slotno < max_worker_processes; ++slotno) |
266 | 0 | { |
267 | 0 | BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno]; |
268 | 0 | RegisteredBgWorker *rw; |
269 | |
|
270 | 0 | if (!slot->in_use) |
271 | 0 | continue; |
272 | | |
273 | | /* |
274 | | * Make sure we don't see the in_use flag before the updated slot |
275 | | * contents. |
276 | | */ |
277 | 0 | pg_read_barrier(); |
278 | | |
279 | | /* See whether we already know about this worker. */ |
280 | 0 | rw = FindRegisteredWorkerBySlotNumber(slotno); |
281 | 0 | if (rw != NULL) |
282 | 0 | { |
283 | | /* |
284 | | * In general, the worker data can't change after it's initially |
285 | | * registered. However, someone can set the terminate flag. |
286 | | */ |
287 | 0 | if (slot->terminate && !rw->rw_terminate) |
288 | 0 | { |
289 | 0 | rw->rw_terminate = true; |
290 | 0 | if (rw->rw_pid != 0) |
291 | 0 | kill(rw->rw_pid, SIGTERM); |
292 | 0 | else |
293 | 0 | { |
294 | | /* Report never-started, now-terminated worker as dead. */ |
295 | 0 | ReportBackgroundWorkerPID(rw); |
296 | 0 | } |
297 | 0 | } |
298 | 0 | continue; |
299 | 0 | } |
300 | | |
301 | | /* |
302 | | * If the worker is marked for termination, we don't need to add it to |
303 | | * the registered workers list; we can just free the slot. However, if |
304 | | * bgw_notify_pid is set, the process that registered the worker may |
305 | | * need to know that we've processed the terminate request, so be sure |
306 | | * to signal it. |
307 | | */ |
308 | 0 | if (slot->terminate) |
309 | 0 | { |
310 | 0 | int notify_pid; |
311 | | |
312 | | /* |
313 | | * We need a memory barrier here to make sure that the load of |
314 | | * bgw_notify_pid and the update of parallel_terminate_count |
315 | | * complete before the store to in_use. |
316 | | */ |
317 | 0 | notify_pid = slot->worker.bgw_notify_pid; |
318 | 0 | if ((slot->worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) |
319 | 0 | BackgroundWorkerData->parallel_terminate_count++; |
320 | 0 | pg_memory_barrier(); |
321 | 0 | slot->pid = 0; |
322 | 0 | slot->in_use = false; |
323 | 0 | if (notify_pid != 0) |
324 | 0 | kill(notify_pid, SIGUSR1); |
325 | |
|
326 | 0 | continue; |
327 | 0 | } |
328 | | |
329 | | /* |
330 | | * Copy the registration data into the registered workers list. |
331 | | */ |
332 | 0 | rw = malloc(sizeof(RegisteredBgWorker)); |
333 | 0 | if (rw == NULL) |
334 | 0 | { |
335 | 0 | ereport(LOG, |
336 | 0 | (errcode(ERRCODE_OUT_OF_MEMORY), |
337 | 0 | errmsg("out of memory"))); |
338 | 0 | return; |
339 | 0 | } |
340 | | |
341 | | /* |
342 | | * Copy strings in a paranoid way. If shared memory is corrupted, the |
343 | | * source data might not even be NUL-terminated. |
344 | | */ |
345 | 0 | ascii_safe_strlcpy(rw->rw_worker.bgw_name, |
346 | 0 | slot->worker.bgw_name, BGW_MAXLEN); |
347 | 0 | ascii_safe_strlcpy(rw->rw_worker.bgw_type, |
348 | 0 | slot->worker.bgw_type, BGW_MAXLEN); |
349 | 0 | ascii_safe_strlcpy(rw->rw_worker.bgw_library_name, |
350 | 0 | slot->worker.bgw_library_name, BGW_MAXLEN); |
351 | 0 | ascii_safe_strlcpy(rw->rw_worker.bgw_function_name, |
352 | 0 | slot->worker.bgw_function_name, BGW_MAXLEN); |
353 | | |
354 | | /* |
355 | | * Copy various fixed-size fields. |
356 | | * |
357 | | * flags, start_time, and restart_time are examined by the postmaster, |
358 | | * but nothing too bad will happen if they are corrupted. The |
359 | | * remaining fields will only be examined by the child process. It |
360 | | * might crash, but we won't. |
361 | | */ |
362 | 0 | rw->rw_worker.bgw_flags = slot->worker.bgw_flags; |
363 | 0 | rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time; |
364 | 0 | rw->rw_worker.bgw_restart_time = slot->worker.bgw_restart_time; |
365 | 0 | rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg; |
366 | 0 | memcpy(rw->rw_worker.bgw_extra, slot->worker.bgw_extra, BGW_EXTRALEN); |
367 | | |
368 | | /* |
369 | | * Copy the PID to be notified about state changes, but only if the |
370 | | * postmaster knows about a backend with that PID. It isn't an error |
371 | | * if the postmaster doesn't know about the PID, because the backend |
372 | | * that requested the worker could have died (or been killed) just |
373 | | * after doing so. Nonetheless, at least until we get some experience |
374 | | * with how this plays out in the wild, log a message at a relative |
375 | | * high debug level. |
376 | | */ |
377 | 0 | rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid; |
378 | 0 | if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid)) |
379 | 0 | { |
380 | 0 | elog(DEBUG1, "worker notification PID %lu is not valid", |
381 | 0 | (long) rw->rw_worker.bgw_notify_pid); |
382 | 0 | rw->rw_worker.bgw_notify_pid = 0; |
383 | 0 | } |
384 | | |
385 | | /* Initialize postmaster bookkeeping. */ |
386 | 0 | rw->rw_backend = NULL; |
387 | 0 | rw->rw_pid = 0; |
388 | 0 | rw->rw_child_slot = 0; |
389 | 0 | rw->rw_crashed_at = 0; |
390 | 0 | rw->rw_shmem_slot = slotno; |
391 | 0 | rw->rw_terminate = false; |
392 | | |
393 | | /* Log it! */ |
394 | 0 | ereport(DEBUG1, |
395 | 0 | (errmsg("registering background worker \"%s\"", |
396 | 0 | rw->rw_worker.bgw_name))); |
397 | |
|
398 | 0 | slist_push_head(&BackgroundWorkerList, &rw->rw_lnode); |
399 | 0 | } |
400 | 0 | } |
401 | | |
402 | | /* |
403 | | * Forget about a background worker that's no longer needed. |
404 | | * |
405 | | * The worker must be identified by passing an slist_mutable_iter that |
406 | | * points to it. This convention allows deletion of workers during |
407 | | * searches of the worker list, and saves having to search the list again. |
408 | | * |
409 | | * This function must be invoked only in the postmaster. |
410 | | */ |
411 | | void |
412 | | ForgetBackgroundWorker(slist_mutable_iter *cur) |
413 | 0 | { |
414 | 0 | RegisteredBgWorker *rw; |
415 | 0 | BackgroundWorkerSlot *slot; |
416 | |
|
417 | 0 | rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur); |
418 | |
|
419 | 0 | Assert(rw->rw_shmem_slot < max_worker_processes); |
420 | 0 | slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot]; |
421 | 0 | if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) |
422 | 0 | BackgroundWorkerData->parallel_terminate_count++; |
423 | |
|
424 | 0 | slot->in_use = false; |
425 | |
|
426 | 0 | ereport(DEBUG1, |
427 | 0 | (errmsg("unregistering background worker \"%s\"", |
428 | 0 | rw->rw_worker.bgw_name))); |
429 | |
|
430 | 0 | slist_delete_current(cur); |
431 | 0 | free(rw); |
432 | 0 | } |
433 | | |
434 | | /* |
435 | | * Report the PID of a newly-launched background worker in shared memory. |
436 | | * |
437 | | * This function should only be called from the postmaster. |
438 | | */ |
439 | | void |
440 | | ReportBackgroundWorkerPID(RegisteredBgWorker *rw) |
441 | 902 | { |
442 | 902 | BackgroundWorkerSlot *slot; |
443 | | |
444 | 902 | Assert(rw->rw_shmem_slot < max_worker_processes); |
445 | 902 | slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot]; |
446 | 902 | slot->pid = rw->rw_pid; |
447 | | |
448 | 902 | if (rw->rw_worker.bgw_notify_pid != 0) |
449 | 0 | kill(rw->rw_worker.bgw_notify_pid, SIGUSR1); |
450 | 902 | } |
451 | | |
452 | | /* |
453 | | * Report that the PID of a background worker is now zero because a |
454 | | * previously-running background worker has exited. |
455 | | * |
456 | | * This function should only be called from the postmaster. |
457 | | */ |
458 | | void |
459 | | ReportBackgroundWorkerExit(slist_mutable_iter *cur) |
460 | 0 | { |
461 | 0 | RegisteredBgWorker *rw; |
462 | 0 | BackgroundWorkerSlot *slot; |
463 | 0 | int notify_pid; |
464 | |
|
465 | 0 | rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur); |
466 | |
|
467 | 0 | Assert(rw->rw_shmem_slot < max_worker_processes); |
468 | 0 | slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot]; |
469 | 0 | slot->pid = rw->rw_pid; |
470 | 0 | notify_pid = rw->rw_worker.bgw_notify_pid; |
471 | | |
472 | | /* |
473 | | * If this worker is slated for deregistration, do that before notifying |
474 | | * the process which started it. Otherwise, if that process tries to |
475 | | * reuse the slot immediately, it might not be available yet. In theory |
476 | | * that could happen anyway if the process checks slot->pid at just the |
477 | | * wrong moment, but this makes the window narrower. |
478 | | */ |
479 | 0 | if (rw->rw_terminate || |
480 | 0 | rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) |
481 | 0 | ForgetBackgroundWorker(cur); |
482 | |
|
483 | 0 | if (notify_pid != 0) |
484 | 0 | kill(notify_pid, SIGUSR1); |
485 | 0 | } |
486 | | |
487 | | /* |
488 | | * Cancel SIGUSR1 notifications for a PID belonging to an exiting backend. |
489 | | * |
490 | | * This function should only be called from the postmaster. |
491 | | */ |
492 | | void |
493 | | BackgroundWorkerStopNotifications(pid_t pid) |
494 | 0 | { |
495 | 0 | slist_iter siter; |
496 | |
|
497 | 0 | slist_foreach(siter, &BackgroundWorkerList) |
498 | 0 | { |
499 | 0 | RegisteredBgWorker *rw; |
500 | |
|
501 | 0 | rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); |
502 | 0 | if (rw->rw_worker.bgw_notify_pid == pid) |
503 | 0 | rw->rw_worker.bgw_notify_pid = 0; |
504 | 0 | } |
505 | 0 | } |
506 | | |
507 | | /* |
508 | | * Reset background worker crash state. |
509 | | * |
510 | | * We assume that, after a crash-and-restart cycle, background workers without |
511 | | * the never-restart flag should be restarted immediately, instead of waiting |
512 | | * for bgw_restart_time to elapse. |
513 | | */ |
514 | | void |
515 | | ResetBackgroundWorkerCrashTimes(void) |
516 | 0 | { |
517 | 0 | slist_mutable_iter iter; |
518 | |
|
519 | 0 | slist_foreach_modify(iter, &BackgroundWorkerList) |
520 | 0 | { |
521 | 0 | RegisteredBgWorker *rw; |
522 | |
|
523 | 0 | rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); |
524 | |
|
525 | 0 | if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) |
526 | 0 | { |
527 | | /* |
528 | | * Workers marked BGW_NVER_RESTART shouldn't get relaunched after |
529 | | * the crash, so forget about them. (If we wait until after the |
530 | | * crash to forget about them, and they are parallel workers, |
531 | | * parallel_terminate_count will get incremented after we've |
532 | | * already zeroed parallel_register_count, which would be bad.) |
533 | | */ |
534 | 0 | ForgetBackgroundWorker(&iter); |
535 | 0 | } |
536 | 0 | else |
537 | 0 | { |
538 | | /* |
539 | | * The accounting which we do via parallel_register_count and |
540 | | * parallel_terminate_count would get messed up if a worker marked |
541 | | * parallel could survive a crash and restart cycle. All such |
542 | | * workers should be marked BGW_NEVER_RESTART, and thus control |
543 | | * should never reach this branch. |
544 | | */ |
545 | 0 | Assert((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) == 0); |
546 | | |
547 | | /* |
548 | | * Allow this worker to be restarted immediately after we finish |
549 | | * resetting. |
550 | | */ |
551 | 0 | rw->rw_crashed_at = 0; |
552 | 0 | } |
553 | 0 | } |
554 | 0 | } |
555 | | |
556 | | #ifdef EXEC_BACKEND |
557 | | /* |
558 | | * In EXEC_BACKEND mode, workers use this to retrieve their details from |
559 | | * shared memory. |
560 | | */ |
561 | | BackgroundWorker * |
562 | | BackgroundWorkerEntry(int slotno) |
563 | | { |
564 | | static BackgroundWorker myEntry; |
565 | | BackgroundWorkerSlot *slot; |
566 | | |
567 | | Assert(slotno < BackgroundWorkerData->total_slots); |
568 | | slot = &BackgroundWorkerData->slot[slotno]; |
569 | | Assert(slot->in_use); |
570 | | |
571 | | /* must copy this in case we don't intend to retain shmem access */ |
572 | | memcpy(&myEntry, &slot->worker, sizeof myEntry); |
573 | | return &myEntry; |
574 | | } |
575 | | #endif |
576 | | |
577 | | /* |
578 | | * Complain about the BackgroundWorker definition using error level elevel. |
579 | | * Return true if it looks ok, false if not (unless elevel >= ERROR, in |
580 | | * which case we won't return at all in the not-OK case). |
581 | | */ |
582 | | static bool |
583 | | SanityCheckBackgroundWorker(BackgroundWorker *worker, int elevel) |
584 | 902 | { |
585 | | /* sanity check for flags */ |
586 | 902 | if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION) |
587 | 0 | { |
588 | 0 | if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS)) |
589 | 0 | { |
590 | 0 | ereport(elevel, |
591 | 0 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
592 | 0 | errmsg("background worker \"%s\": must attach to shared memory in order to request a database connection", |
593 | 0 | worker->bgw_name))); |
594 | 0 | return false; |
595 | 0 | } |
596 | | |
597 | 0 | if (worker->bgw_start_time == BgWorkerStart_PostmasterStart) |
598 | 0 | { |
599 | 0 | ereport(elevel, |
600 | 0 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
601 | 0 | errmsg("background worker \"%s\": cannot request database access if starting at postmaster start", |
602 | 0 | worker->bgw_name))); |
603 | 0 | return false; |
604 | 902 | } |
605 | | |
606 | | /* XXX other checks? */ |
607 | 0 | } |
608 | | |
609 | 902 | if ((worker->bgw_restart_time < 0 && |
610 | 0 | worker->bgw_restart_time != BGW_NEVER_RESTART) || |
611 | 902 | (worker->bgw_restart_time > USECS_PER_DAY / 1000)) |
612 | 0 | { |
613 | 0 | ereport(elevel, |
614 | 0 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
615 | 0 | errmsg("background worker \"%s\": invalid restart interval", |
616 | 0 | worker->bgw_name))); |
617 | 0 | return false; |
618 | 902 | } |
619 | | |
620 | | /* |
621 | | * Parallel workers may not be configured for restart, because the |
622 | | * parallel_register_count/parallel_terminate_count accounting can't |
623 | | * handle parallel workers lasting through a crash-and-restart cycle. |
624 | | */ |
625 | 902 | if (worker->bgw_restart_time != BGW_NEVER_RESTART && |
626 | 902 | (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) |
627 | 0 | { |
628 | 0 | ereport(elevel, |
629 | 0 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
630 | 0 | errmsg("background worker \"%s\": parallel workers may not be configured for restart", |
631 | 0 | worker->bgw_name))); |
632 | 0 | return false; |
633 | 902 | } |
634 | | |
635 | | /* |
636 | | * If bgw_type is not filled in, use bgw_name. |
637 | | */ |
638 | 902 | if (strcmp(worker->bgw_type, "") == 0) |
639 | 902 | strcpy(worker->bgw_type, worker->bgw_name); |
640 | | |
641 | 902 | return true; |
642 | 902 | } |
643 | | |
644 | | static void |
645 | | bgworker_quickdie(SIGNAL_ARGS) |
646 | 3 | { |
647 | | /* |
648 | | * We DO NOT want to run proc_exit() or atexit() callbacks -- we're here |
649 | | * because shared memory may be corrupted, so we don't want to try to |
650 | | * clean up our transaction. Just nail the windows shut and get out of |
651 | | * town. The callbacks wouldn't be safe to run from a signal handler, |
652 | | * anyway. |
653 | | * |
654 | | * Note we do _exit(2) not _exit(0). This is to force the postmaster into |
655 | | * a system reset cycle if someone sends a manual SIGQUIT to a random |
656 | | * backend. This is necessary precisely because we don't clean up our |
657 | | * shared memory state. (The "dead man switch" mechanism in pmsignal.c |
658 | | * should ensure the postmaster sees this as a crash, too, but no harm in |
659 | | * being doubly sure.) |
660 | | */ |
661 | 3 | _exit(2); |
662 | 3 | } |
663 | | |
664 | | /* |
665 | | * Standard SIGTERM handler for background workers |
666 | | */ |
667 | | static void |
668 | | bgworker_die(SIGNAL_ARGS) |
669 | 0 | { |
670 | 0 | PG_SETMASK(&BlockSig); |
671 | |
|
672 | 0 | ereport(FATAL, |
673 | 0 | (errcode(ERRCODE_ADMIN_SHUTDOWN), |
674 | 0 | errmsg("terminating background worker \"%s\" due to administrator command", |
675 | 0 | MyBgworkerEntry->bgw_type))); |
676 | 0 | } |
677 | | |
678 | | /* |
679 | | * Standard SIGUSR1 handler for unconnected workers |
680 | | * |
681 | | * Here, we want to make sure an unconnected worker will at least heed |
682 | | * latch activity. |
683 | | */ |
684 | | static void |
685 | | bgworker_sigusr1_handler(SIGNAL_ARGS) |
686 | 0 | { |
687 | 0 | int save_errno = errno; |
688 | |
|
689 | 0 | latch_sigusr1_handler(); |
690 | |
|
691 | 0 | errno = save_errno; |
692 | 0 | } |
693 | | |
694 | | /* |
695 | | * Start a new background worker |
696 | | * |
697 | | * This is the main entry point for background worker, to be called from |
698 | | * postmaster. |
699 | | */ |
700 | | void |
701 | | StartBackgroundWorker(void) |
702 | 902 | { |
703 | 902 | sigjmp_buf local_sigjmp_buf; |
704 | 902 | BackgroundWorker *worker = MyBgworkerEntry; |
705 | 902 | bgworker_main_type entrypt; |
706 | | |
707 | 902 | if (worker == NULL) |
708 | 0 | elog(FATAL, "unable to find bgworker entry"); |
709 | | |
710 | 902 | IsBackgroundWorker = true; |
711 | | |
712 | | /* Identify myself via ps */ |
713 | 902 | init_ps_display(worker->bgw_name, "", "", ""); |
714 | | |
715 | | /* |
716 | | * If we're not supposed to have shared memory access, then detach from |
717 | | * shared memory. If we didn't request shared memory access, the |
718 | | * postmaster won't force a cluster-wide restart if we exit unexpectedly, |
719 | | * so we'd better make sure that we don't mess anything up that would |
720 | | * require that sort of cleanup. |
721 | | */ |
722 | 902 | if ((worker->bgw_flags & BGWORKER_SHMEM_ACCESS) == 0) |
723 | 0 | { |
724 | 0 | dsm_detach_all(); |
725 | 0 | PGSharedMemoryDetach(); |
726 | 0 | } |
727 | | |
728 | 902 | SetProcessingMode(InitProcessing); |
729 | | |
730 | | /* Apply PostAuthDelay */ |
731 | 902 | if (PostAuthDelay > 0) |
732 | 0 | pg_usleep(PostAuthDelay * 1000000L); |
733 | | |
734 | | /* |
735 | | * Set up signal handlers. |
736 | | */ |
737 | 902 | if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION) |
738 | 0 | { |
739 | | /* |
740 | | * SIGINT is used to signal canceling the current action |
741 | | */ |
742 | 0 | pqsignal(SIGINT, StatementCancelHandler); |
743 | 0 | pqsignal(SIGUSR1, procsignal_sigusr1_handler); |
744 | 0 | pqsignal(SIGFPE, FloatExceptionHandler); |
745 | | |
746 | | /* XXX Any other handlers needed here? */ |
747 | 0 | } |
748 | 902 | else |
749 | 902 | { |
750 | 902 | pqsignal(SIGINT, SIG_IGN); |
751 | 902 | pqsignal(SIGUSR1, bgworker_sigusr1_handler); |
752 | 902 | pqsignal(SIGFPE, SIG_IGN); |
753 | 902 | } |
754 | 902 | pqsignal(SIGTERM, bgworker_die); |
755 | 902 | pqsignal(SIGHUP, SIG_IGN); |
756 | | |
757 | 902 | pqsignal(SIGQUIT, bgworker_quickdie); |
758 | 902 | InitializeTimeouts(); /* establishes SIGALRM handler */ |
759 | | |
760 | 902 | pqsignal(SIGPIPE, SIG_IGN); |
761 | 902 | pqsignal(SIGUSR2, SIG_IGN); |
762 | 902 | pqsignal(SIGCHLD, SIG_DFL); |
763 | | |
764 | | /* |
765 | | * If an exception is encountered, processing resumes here. |
766 | | * |
767 | | * We just need to clean up, report the error, and go away. |
768 | | */ |
769 | 902 | if (sigsetjmp(local_sigjmp_buf, 1) != 0) |
770 | 0 | { |
771 | | /* Since not using PG_TRY, must reset error stack by hand */ |
772 | 0 | error_context_stack = NULL; |
773 | | |
774 | | /* Prevent interrupts while cleaning up */ |
775 | 0 | HOLD_INTERRUPTS(); |
776 | | |
777 | | /* |
778 | | * sigsetjmp will have blocked all signals, but we may need to accept |
779 | | * signals while communicating with our parallel leader. Once we've |
780 | | * done HOLD_INTERRUPTS() it should be safe to unblock signals. |
781 | | */ |
782 | 0 | BackgroundWorkerUnblockSignals(); |
783 | | |
784 | | /* Report the error to the parallel leader and the server log */ |
785 | 0 | EmitErrorReport(); |
786 | | |
787 | | /* |
788 | | * Do we need more cleanup here? For shmem-connected bgworkers, we |
789 | | * will call InitProcess below, which will install ProcKill as exit |
790 | | * callback. That will take care of releasing locks, etc. |
791 | | */ |
792 | | |
793 | | /* and go away */ |
794 | 0 | proc_exit(1); |
795 | 0 | } |
796 | | |
797 | | /* We can now handle ereport(ERROR) */ |
798 | 902 | PG_exception_stack = &local_sigjmp_buf; |
799 | | |
800 | | /* |
801 | | * If the background worker request shared memory access, set that up now; |
802 | | * else, detach all shared memory segments. |
803 | | */ |
804 | 902 | if (worker->bgw_flags & BGWORKER_SHMEM_ACCESS) |
805 | 902 | { |
806 | | /* |
807 | | * Early initialization. Some of this could be useful even for |
808 | | * background workers that aren't using shared memory, but they can |
809 | | * call the individual startup routines for those subsystems if |
810 | | * needed. |
811 | | */ |
812 | 902 | BaseInit(); |
813 | | |
814 | | /* |
815 | | * Create a per-backend PGPROC struct in shared memory, except in the |
816 | | * EXEC_BACKEND case where this was done in SubPostmasterMain. We must |
817 | | * do this before we can use LWLocks (and in the EXEC_BACKEND case we |
818 | | * already had to do some stuff with LWLocks). |
819 | | */ |
820 | 902 | #ifndef EXEC_BACKEND |
821 | 902 | InitProcess(); |
822 | 902 | #endif |
823 | 902 | } |
824 | | |
825 | | /* |
826 | | * Look up the entry point function, loading its library if necessary. |
827 | | */ |
828 | 902 | entrypt = LookupBackgroundWorkerFunction(worker->bgw_library_name, |
829 | 902 | worker->bgw_function_name); |
830 | | |
831 | | /* |
832 | | * Note that in normal processes, we would call InitPostgres here. For a |
833 | | * worker, however, we don't know what database to connect to, yet; so we |
834 | | * need to wait until the user code does it via |
835 | | * BackgroundWorkerInitializeConnection(). |
836 | | */ |
837 | | |
838 | | /* |
839 | | * Now invoke the user-defined worker code |
840 | | */ |
841 | 902 | entrypt(worker->bgw_main_arg); |
842 | | |
843 | | /* ... and if it returns, we're done */ |
844 | 902 | proc_exit(0); |
845 | 902 | } |
846 | | |
847 | | /* |
848 | | * Register a new static background worker. |
849 | | * |
850 | | * This can only be called directly from postmaster or in the _PG_init |
851 | | * function of a module library that's loaded by shared_preload_libraries; |
852 | | * otherwise it will have no effect. |
853 | | */ |
854 | | void |
855 | | RegisterBackgroundWorker(BackgroundWorker *worker) |
856 | 902 | { |
857 | 902 | RegisteredBgWorker *rw; |
858 | 902 | static int numworkers = 0; |
859 | | |
860 | 902 | if (!IsUnderPostmaster) |
861 | 902 | ereport(DEBUG1, |
862 | 902 | (errmsg("registering background worker \"%s\"", worker->bgw_name))); |
863 | | |
864 | 902 | if (!process_shared_preload_libraries_in_progress && |
865 | 0 | strcmp(worker->bgw_library_name, "postgres") != 0) |
866 | 0 | { |
867 | 0 | if (!IsUnderPostmaster) |
868 | 0 | ereport(LOG, |
869 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
870 | 0 | errmsg("background worker \"%s\": must be registered in shared_preload_libraries", |
871 | 0 | worker->bgw_name))); |
872 | 0 | return; |
873 | 902 | } |
874 | | |
875 | 902 | if (!SanityCheckBackgroundWorker(worker, LOG)) |
876 | 0 | return; |
877 | | |
878 | 902 | if (worker->bgw_notify_pid != 0) |
879 | 0 | { |
880 | 0 | ereport(LOG, |
881 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
882 | 0 | errmsg("background worker \"%s\": only dynamic background workers can request notification", |
883 | 0 | worker->bgw_name))); |
884 | 0 | return; |
885 | 902 | } |
886 | | |
887 | | /* |
888 | | * Enforce maximum number of workers. Note this is overly restrictive: we |
889 | | * could allow more non-shmem-connected workers, because these don't count |
890 | | * towards the MAX_BACKENDS limit elsewhere. For now, it doesn't seem |
891 | | * important to relax this restriction. |
892 | | */ |
893 | 902 | if (++numworkers > max_worker_processes) |
894 | 0 | { |
895 | 0 | ereport(LOG, |
896 | 0 | (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), |
897 | 0 | errmsg("too many background workers"), |
898 | 0 | errdetail_plural("Up to %d background worker can be registered with the current settings.", |
899 | 0 | "Up to %d background workers can be registered with the current settings.", |
900 | 0 | max_worker_processes, |
901 | 0 | max_worker_processes), |
902 | 0 | errhint("Consider increasing the configuration parameter \"max_worker_processes\"."))); |
903 | 0 | return; |
904 | 902 | } |
905 | | |
906 | | /* |
907 | | * Copy the registration data into the registered workers list. |
908 | | */ |
909 | 902 | rw = malloc(sizeof(RegisteredBgWorker)); |
910 | 902 | if (rw == NULL) |
911 | 0 | { |
912 | 0 | ereport(LOG, |
913 | 0 | (errcode(ERRCODE_OUT_OF_MEMORY), |
914 | 0 | errmsg("out of memory"))); |
915 | 0 | return; |
916 | 902 | } |
917 | | |
918 | 902 | rw->rw_worker = *worker; |
919 | 902 | rw->rw_backend = NULL; |
920 | 902 | rw->rw_pid = 0; |
921 | 902 | rw->rw_child_slot = 0; |
922 | 902 | rw->rw_crashed_at = 0; |
923 | 902 | rw->rw_terminate = false; |
924 | | |
925 | 902 | slist_push_head(&BackgroundWorkerList, &rw->rw_lnode); |
926 | 902 | } |
927 | | |
928 | | /* |
929 | | * Register a new background worker from a regular backend. |
930 | | * |
931 | | * Returns true on success and false on failure. Failure typically indicates |
932 | | * that no background worker slots are currently available. |
933 | | * |
934 | | * If handle != NULL, we'll set *handle to a pointer that can subsequently |
935 | | * be used as an argument to GetBackgroundWorkerPid(). The caller can |
936 | | * free this pointer using pfree(), if desired. |
937 | | */ |
938 | | bool |
939 | | RegisterDynamicBackgroundWorker(BackgroundWorker *worker, |
940 | | BackgroundWorkerHandle **handle) |
941 | 0 | { |
942 | 0 | int slotno; |
943 | 0 | bool success = false; |
944 | 0 | bool parallel; |
945 | 0 | uint64 generation = 0; |
946 | | |
947 | | /* |
948 | | * We can't register dynamic background workers from the postmaster. If |
949 | | * this is a standalone backend, we're the only process and can't start |
950 | | * any more. In a multi-process environment, it might be theoretically |
951 | | * possible, but we don't currently support it due to locking |
952 | | * considerations; see comments on the BackgroundWorkerSlot data |
953 | | * structure. |
954 | | */ |
955 | 0 | if (!IsUnderPostmaster) |
956 | 0 | return false; |
957 | | |
958 | 0 | if (!SanityCheckBackgroundWorker(worker, ERROR)) |
959 | 0 | return false; |
960 | | |
961 | 0 | parallel = (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0; |
962 | |
|
963 | 0 | LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE); |
964 | | |
965 | | /* |
966 | | * If this is a parallel worker, check whether there are already too many |
967 | | * parallel workers; if so, don't register another one. Our view of |
968 | | * parallel_terminate_count may be slightly stale, but that doesn't really |
969 | | * matter: we would have gotten the same result if we'd arrived here |
970 | | * slightly earlier anyway. There's no help for it, either, since the |
971 | | * postmaster must not take locks; a memory barrier wouldn't guarantee |
972 | | * anything useful. |
973 | | */ |
974 | 0 | if (parallel && (BackgroundWorkerData->parallel_register_count - |
975 | 0 | BackgroundWorkerData->parallel_terminate_count) >= |
976 | 0 | max_parallel_workers) |
977 | 0 | { |
978 | 0 | Assert(BackgroundWorkerData->parallel_register_count - |
979 | 0 | BackgroundWorkerData->parallel_terminate_count <= |
980 | 0 | MAX_PARALLEL_WORKER_LIMIT); |
981 | 0 | LWLockRelease(BackgroundWorkerLock); |
982 | 0 | return false; |
983 | 0 | } |
984 | | |
985 | | /* |
986 | | * Look for an unused slot. If we find one, grab it. |
987 | | */ |
988 | 0 | for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno) |
989 | 0 | { |
990 | 0 | BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno]; |
991 | |
|
992 | 0 | if (!slot->in_use) |
993 | 0 | { |
994 | 0 | memcpy(&slot->worker, worker, sizeof(BackgroundWorker)); |
995 | 0 | slot->pid = InvalidPid; /* indicates not started yet */ |
996 | 0 | slot->generation++; |
997 | 0 | slot->terminate = false; |
998 | 0 | generation = slot->generation; |
999 | 0 | if (parallel) |
1000 | 0 | BackgroundWorkerData->parallel_register_count++; |
1001 | | |
1002 | | /* |
1003 | | * Make sure postmaster doesn't see the slot as in use before it |
1004 | | * sees the new contents. |
1005 | | */ |
1006 | 0 | pg_write_barrier(); |
1007 | |
|
1008 | 0 | slot->in_use = true; |
1009 | 0 | success = true; |
1010 | 0 | break; |
1011 | 0 | } |
1012 | 0 | } |
1013 | |
|
1014 | 0 | LWLockRelease(BackgroundWorkerLock); |
1015 | | |
1016 | | /* If we found a slot, tell the postmaster to notice the change. */ |
1017 | 0 | if (success) |
1018 | 0 | SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE); |
1019 | | |
1020 | | /* |
1021 | | * If we found a slot and the user has provided a handle, initialize it. |
1022 | | */ |
1023 | 0 | if (success && handle) |
1024 | 0 | { |
1025 | 0 | *handle = palloc(sizeof(BackgroundWorkerHandle)); |
1026 | 0 | (*handle)->slot = slotno; |
1027 | 0 | (*handle)->generation = generation; |
1028 | 0 | } |
1029 | |
|
1030 | 0 | return success; |
1031 | 0 | } |
1032 | | |
1033 | | /* |
1034 | | * Get the PID of a dynamically-registered background worker. |
1035 | | * |
1036 | | * If the worker is determined to be running, the return value will be |
1037 | | * BGWH_STARTED and *pidp will get the PID of the worker process. If the |
1038 | | * postmaster has not yet attempted to start the worker, the return value will |
1039 | | * be BGWH_NOT_YET_STARTED. Otherwise, the return value is BGWH_STOPPED. |
1040 | | * |
1041 | | * BGWH_STOPPED can indicate either that the worker is temporarily stopped |
1042 | | * (because it is configured for automatic restart and exited non-zero), |
1043 | | * or that the worker is permanently stopped (because it exited with exit |
1044 | | * code 0, or was not configured for automatic restart), or even that the |
1045 | | * worker was unregistered without ever starting (either because startup |
1046 | | * failed and the worker is not configured for automatic restart, or because |
1047 | | * TerminateBackgroundWorker was used before the worker was successfully |
1048 | | * started). |
1049 | | */ |
1050 | | BgwHandleStatus |
1051 | | GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp) |
1052 | 0 | { |
1053 | 0 | BackgroundWorkerSlot *slot; |
1054 | 0 | pid_t pid; |
1055 | |
|
1056 | 0 | Assert(handle->slot < max_worker_processes); |
1057 | 0 | slot = &BackgroundWorkerData->slot[handle->slot]; |
1058 | | |
1059 | | /* |
1060 | | * We could probably arrange to synchronize access to data using memory |
1061 | | * barriers only, but for now, let's just keep it simple and grab the |
1062 | | * lock. It seems unlikely that there will be enough traffic here to |
1063 | | * result in meaningful contention. |
1064 | | */ |
1065 | 0 | LWLockAcquire(BackgroundWorkerLock, LW_SHARED); |
1066 | | |
1067 | | /* |
1068 | | * The generation number can't be concurrently changed while we hold the |
1069 | | * lock. The pid, which is updated by the postmaster, can change at any |
1070 | | * time, but we assume such changes are atomic. So the value we read |
1071 | | * won't be garbage, but it might be out of date by the time the caller |
1072 | | * examines it (but that's unavoidable anyway). |
1073 | | * |
1074 | | * The in_use flag could be in the process of changing from true to false, |
1075 | | * but if it is already false then it can't change further. |
1076 | | */ |
1077 | 0 | if (handle->generation != slot->generation || !slot->in_use) |
1078 | 0 | pid = 0; |
1079 | 0 | else |
1080 | 0 | pid = slot->pid; |
1081 | | |
1082 | | /* All done. */ |
1083 | 0 | LWLockRelease(BackgroundWorkerLock); |
1084 | |
|
1085 | 0 | if (pid == 0) |
1086 | 0 | return BGWH_STOPPED; |
1087 | 0 | else if (pid == InvalidPid) |
1088 | 0 | return BGWH_NOT_YET_STARTED; |
1089 | 0 | *pidp = pid; |
1090 | 0 | return BGWH_STARTED; |
1091 | 0 | } |
1092 | | |
1093 | | /* |
1094 | | * Wait for a background worker to start up. |
1095 | | * |
1096 | | * This is like GetBackgroundWorkerPid(), except that if the worker has not |
1097 | | * yet started, we wait for it to do so; thus, BGWH_NOT_YET_STARTED is never |
1098 | | * returned. However, if the postmaster has died, we give up and return |
1099 | | * BGWH_POSTMASTER_DIED, since it that case we know that startup will not |
1100 | | * take place. |
1101 | | */ |
1102 | | BgwHandleStatus |
1103 | | WaitForBackgroundWorkerStartup(BackgroundWorkerHandle *handle, pid_t *pidp) |
1104 | 0 | { |
1105 | 0 | BgwHandleStatus status; |
1106 | 0 | int rc; |
1107 | |
|
1108 | 0 | for (;;) |
1109 | 0 | { |
1110 | 0 | pid_t pid; |
1111 | |
|
1112 | 0 | CHECK_FOR_INTERRUPTS(); |
1113 | |
|
1114 | 0 | status = GetBackgroundWorkerPid(handle, &pid); |
1115 | 0 | if (status == BGWH_STARTED) |
1116 | 0 | *pidp = pid; |
1117 | 0 | if (status != BGWH_NOT_YET_STARTED) |
1118 | 0 | break; |
1119 | | |
1120 | 0 | rc = WaitLatch(MyLatch, |
1121 | 0 | WL_LATCH_SET | WL_POSTMASTER_DEATH, 0, |
1122 | 0 | WAIT_EVENT_BGWORKER_STARTUP); |
1123 | |
|
1124 | 0 | if (rc & WL_POSTMASTER_DEATH) |
1125 | 0 | { |
1126 | 0 | status = BGWH_POSTMASTER_DIED; |
1127 | 0 | break; |
1128 | 0 | } |
1129 | | |
1130 | 0 | ResetLatch(MyLatch); |
1131 | 0 | } |
1132 | |
|
1133 | 0 | return status; |
1134 | 0 | } |
1135 | | |
1136 | | /* |
1137 | | * Wait for a background worker to stop. |
1138 | | * |
1139 | | * If the worker hasn't yet started, or is running, we wait for it to stop |
1140 | | * and then return BGWH_STOPPED. However, if the postmaster has died, we give |
1141 | | * up and return BGWH_POSTMASTER_DIED, because it's the postmaster that |
1142 | | * notifies us when a worker's state changes. |
1143 | | */ |
1144 | | BgwHandleStatus |
1145 | | WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle) |
1146 | 0 | { |
1147 | 0 | BgwHandleStatus status; |
1148 | 0 | int rc; |
1149 | |
|
1150 | 0 | for (;;) |
1151 | 0 | { |
1152 | 0 | pid_t pid; |
1153 | |
|
1154 | 0 | CHECK_FOR_INTERRUPTS(); |
1155 | |
|
1156 | 0 | status = GetBackgroundWorkerPid(handle, &pid); |
1157 | 0 | if (status == BGWH_STOPPED) |
1158 | 0 | break; |
1159 | | |
1160 | 0 | rc = WaitLatch(MyLatch, |
1161 | 0 | WL_LATCH_SET | WL_POSTMASTER_DEATH, 0, |
1162 | 0 | WAIT_EVENT_BGWORKER_SHUTDOWN); |
1163 | |
|
1164 | 0 | if (rc & WL_POSTMASTER_DEATH) |
1165 | 0 | { |
1166 | 0 | status = BGWH_POSTMASTER_DIED; |
1167 | 0 | break; |
1168 | 0 | } |
1169 | | |
1170 | 0 | ResetLatch(MyLatch); |
1171 | 0 | } |
1172 | |
|
1173 | 0 | return status; |
1174 | 0 | } |
1175 | | |
1176 | | /* |
1177 | | * Instruct the postmaster to terminate a background worker. |
1178 | | * |
1179 | | * Note that it's safe to do this without regard to whether the worker is |
1180 | | * still running, or even if the worker may already have existed and been |
1181 | | * unregistered. |
1182 | | */ |
1183 | | void |
1184 | | TerminateBackgroundWorker(BackgroundWorkerHandle *handle) |
1185 | 0 | { |
1186 | 0 | BackgroundWorkerSlot *slot; |
1187 | 0 | bool signal_postmaster = false; |
1188 | |
|
1189 | 0 | Assert(handle->slot < max_worker_processes); |
1190 | 0 | slot = &BackgroundWorkerData->slot[handle->slot]; |
1191 | | |
1192 | | /* Set terminate flag in shared memory, unless slot has been reused. */ |
1193 | 0 | LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE); |
1194 | 0 | if (handle->generation == slot->generation) |
1195 | 0 | { |
1196 | 0 | slot->terminate = true; |
1197 | 0 | signal_postmaster = true; |
1198 | 0 | } |
1199 | 0 | LWLockRelease(BackgroundWorkerLock); |
1200 | | |
1201 | | /* Make sure the postmaster notices the change to shared memory. */ |
1202 | 0 | if (signal_postmaster) |
1203 | 0 | SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE); |
1204 | 0 | } |
1205 | | |
1206 | | /* |
1207 | | * Look up (and possibly load) a bgworker entry point function. |
1208 | | * |
1209 | | * For functions contained in the core code, we use library name "postgres" |
1210 | | * and consult the InternalBGWorkers array. External functions are |
1211 | | * looked up, and loaded if necessary, using load_external_function(). |
1212 | | * |
1213 | | * The point of this is to pass function names as strings across process |
1214 | | * boundaries. We can't pass actual function addresses because of the |
1215 | | * possibility that the function has been loaded at a different address |
1216 | | * in a different process. This is obviously a hazard for functions in |
1217 | | * loadable libraries, but it can happen even for functions in the core code |
1218 | | * on platforms using EXEC_BACKEND (e.g., Windows). |
1219 | | * |
1220 | | * At some point it might be worthwhile to get rid of InternalBGWorkers[] |
1221 | | * in favor of applying load_external_function() for core functions too; |
1222 | | * but that raises portability issues that are not worth addressing now. |
1223 | | */ |
1224 | | static bgworker_main_type |
1225 | | LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname) |
1226 | 902 | { |
1227 | | /* |
1228 | | * If the function is to be loaded from postgres itself, search the |
1229 | | * InternalBGWorkers array. |
1230 | | */ |
1231 | 902 | if (strcmp(libraryname, "postgres") == 0) |
1232 | 0 | { |
1233 | 0 | int i; |
1234 | |
|
1235 | 0 | for (i = 0; i < lengthof(InternalBGWorkers); i++) |
1236 | 0 | { |
1237 | 0 | if (strcmp(InternalBGWorkers[i].fn_name, funcname) == 0) |
1238 | 0 | return InternalBGWorkers[i].fn_addr; |
1239 | 0 | } |
1240 | | |
1241 | | /* We can only reach this by programming error. */ |
1242 | 0 | elog(ERROR, "internal function \"%s\" not found", funcname); |
1243 | 0 | } |
1244 | | |
1245 | | /* Otherwise load from external library. */ |
1246 | 902 | return (bgworker_main_type) |
1247 | 902 | load_external_function(libraryname, funcname, true, NULL); |
1248 | 902 | } |
1249 | | |
1250 | | /* |
1251 | | * Given a PID, get the bgw_type of the background worker. Returns NULL if |
1252 | | * not a valid background worker. |
1253 | | * |
1254 | | * The return value is in static memory belonging to this function, so it has |
1255 | | * to be used before calling this function again. This is so that the caller |
1256 | | * doesn't have to worry about the background worker locking protocol. |
1257 | | */ |
1258 | | const char * |
1259 | | GetBackgroundWorkerTypeByPid(pid_t pid) |
1260 | 0 | { |
1261 | 0 | int slotno; |
1262 | 0 | bool found = false; |
1263 | 0 | static char result[BGW_MAXLEN]; |
1264 | |
|
1265 | 0 | LWLockAcquire(BackgroundWorkerLock, LW_SHARED); |
1266 | |
|
1267 | 0 | for (slotno = 0; slotno < BackgroundWorkerData->total_slots; slotno++) |
1268 | 0 | { |
1269 | 0 | BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno]; |
1270 | |
|
1271 | 0 | if (slot->pid > 0 && slot->pid == pid) |
1272 | 0 | { |
1273 | 0 | strcpy(result, slot->worker.bgw_type); |
1274 | 0 | found = true; |
1275 | 0 | break; |
1276 | 0 | } |
1277 | 0 | } |
1278 | |
|
1279 | 0 | LWLockRelease(BackgroundWorkerLock); |
1280 | |
|
1281 | 0 | if (!found) |
1282 | 0 | return NULL; |
1283 | | |
1284 | 0 | return result; |
1285 | 0 | } |