YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/build/debugcov-clang-dynamic-arm64-ninja/postgres_build/src/backend/port/pg_shmem.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * sysv_shmem.c
4
 *    Implement shared memory using SysV facilities
5
 *
6
 * These routines used to be a fairly thin layer on top of SysV shared
7
 * memory functionality.  With the addition of anonymous-shmem logic,
8
 * they're a bit fatter now.  We still require a SysV shmem block to
9
 * exist, though, because mmap'd shmem provides no way to find out how
10
 * many processes are attached, which we need for interlocking purposes.
11
 *
12
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
13
 * Portions Copyright (c) 1994, Regents of the University of California
14
 *
15
 * IDENTIFICATION
16
 *    src/backend/port/sysv_shmem.c
17
 *
18
 *-------------------------------------------------------------------------
19
 */
20
#include "postgres.h"
21
22
#include <signal.h>
23
#include <unistd.h>
24
#include <sys/file.h>
25
#include <sys/mman.h>
26
#include <sys/stat.h>
27
#ifdef HAVE_SYS_IPC_H
28
#include <sys/ipc.h>
29
#endif
30
#ifdef HAVE_SYS_SHM_H
31
#include <sys/shm.h>
32
#endif
33
34
#include "miscadmin.h"
35
#include "portability/mem.h"
36
#include "storage/dsm.h"
37
#include "storage/fd.h"
38
#include "storage/ipc.h"
39
#include "storage/pg_shmem.h"
40
#include "utils/guc.h"
41
#include "utils/pidfile.h"
42
43
44
/*
45
 * As of PostgreSQL 9.3, we normally allocate only a very small amount of
46
 * System V shared memory, and only for the purposes of providing an
47
 * interlock to protect the data directory.  The real shared memory block
48
 * is allocated using mmap().  This works around the problem that many
49
 * systems have very low limits on the amount of System V shared memory
50
 * that can be allocated.  Even a limit of a few megabytes will be enough
51
 * to run many copies of PostgreSQL without needing to adjust system settings.
52
 *
53
 * We assume that no one will attempt to run PostgreSQL 9.3 or later on
54
 * systems that are ancient enough that anonymous shared memory is not
55
 * supported, such as pre-2.4 versions of Linux.  If that turns out to be
56
 * false, we might need to add compile and/or run-time tests here and do this
57
 * only if the running kernel supports it.
58
 *
59
 * However, we must always disable this logic in the EXEC_BACKEND case, and
60
 * fall back to the old method of allocating the entire segment using System V
61
 * shared memory, because there's no way to attach an anonymous mmap'd segment
62
 * to a process after exec().  Since EXEC_BACKEND is intended only for
63
 * developer use, this shouldn't be a big problem.  Because of this, we do
64
 * not worry about supporting anonymous shmem in the EXEC_BACKEND cases below.
65
 *
66
 * As of PostgreSQL 12, we regained the ability to use a large System V shared
67
 * memory region even in non-EXEC_BACKEND builds, if shared_memory_type is set
68
 * to sysv (though this is not the default).
69
 */
70
71
72
typedef key_t IpcMemoryKey;   /* shared memory key passed to shmget(2) */
73
typedef int IpcMemoryId;    /* shared memory ID returned by shmget(2) */
74
75
/*
76
 * How does a given IpcMemoryId relate to this PostgreSQL process?
77
 *
78
 * One could recycle unattached segments of different data directories if we
79
 * distinguished that case from other SHMSTATE_FOREIGN cases.  Doing so would
80
 * cause us to visit less of the key space, making us less likely to detect a
81
 * SHMSTATE_ATTACHED key.  It would also complicate the concurrency analysis,
82
 * in that postmasters of different data directories could simultaneously
83
 * attempt to recycle a given key.  We'll waste keys longer in some cases, but
84
 * avoiding the problems of the alternative justifies that loss.
85
 */
86
typedef enum
87
{
88
  SHMSTATE_ANALYSIS_FAILURE,  /* unexpected failure to analyze the ID */
89
  SHMSTATE_ATTACHED,      /* pertinent to DataDir, has attached PIDs */
90
  SHMSTATE_ENOENT,      /* no segment of that ID */
91
  SHMSTATE_FOREIGN,     /* exists, but not pertinent to DataDir */
92
  SHMSTATE_UNATTACHED     /* pertinent to DataDir, no attached PIDs */
93
} IpcMemoryState;
94
95
96
unsigned long UsedShmemSegID = 0;
97
void     *UsedShmemSegAddr = NULL;
98
99
static Size AnonymousShmemSize;
100
static void *AnonymousShmem = NULL;
101
102
static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
103
static void IpcMemoryDetach(int status, Datum shmaddr);
104
static void IpcMemoryDelete(int status, Datum shmId);
105
static IpcMemoryState PGSharedMemoryAttach(IpcMemoryId shmId,
106
           PGShmemHeader **addr);
107
108
109
/*
110
 *  InternalIpcMemoryCreate(memKey, size)
111
 *
112
 * Attempt to create a new shared memory segment with the specified key.
113
 * Will fail (return NULL) if such a segment already exists.  If successful,
114
 * attach the segment to the current process and return its attached address.
115
 * On success, callbacks are registered with on_shmem_exit to detach and
116
 * delete the segment when on_shmem_exit is called.
117
 *
118
 * If we fail with a failure code other than collision-with-existing-segment,
119
 * print out an error and abort.  Other types of errors are not recoverable.
120
 */
121
static void *
122
InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
123
8.96k
{
124
8.96k
  IpcMemoryId shmid;
125
8.96k
  void     *requestedAddress = NULL;
126
8.96k
  void     *memAddress;
127
128
  /*
129
   * Normally we just pass requestedAddress = NULL to shmat(), allowing the
130
   * system to choose where the segment gets mapped.  But in an EXEC_BACKEND
131
   * build, it's possible for whatever is chosen in the postmaster to not
132
   * work for backends, due to variations in address space layout.  As a
133
   * rather klugy workaround, allow the user to specify the address to use
134
   * via setting the environment variable PG_SHMEM_ADDR.  (If this were of
135
   * interest for anything except debugging, we'd probably create a cleaner
136
   * and better-documented way to set it, such as a GUC.)
137
   */
138
#ifdef EXEC_BACKEND
139
  {
140
    char     *pg_shmem_addr = getenv("PG_SHMEM_ADDR");
141
142
    if (pg_shmem_addr)
143
      requestedAddress = (void *) strtoul(pg_shmem_addr, NULL, 0);
144
  }
145
#endif
146
147
8.96k
  shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);
148
149
8.96k
  if (shmid < 0)
150
5.35k
  {
151
5.35k
    int     shmget_errno = errno;
152
153
    /*
154
     * Fail quietly if error indicates a collision with existing segment.
155
     * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
156
     * we could get a permission violation instead?  Also, EIDRM might
157
     * occur if an old seg is slated for destruction but not gone yet.
158
     */
159
5.35k
    if (shmget_errno == EEXIST || shmget_errno == EACCES
160
5.35k
#ifdef EIDRM
161
0
      || shmget_errno == EIDRM
162
5.35k
#endif
163
5.35k
      )
164
5.35k
      return NULL;
165
166
    /*
167
     * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if
168
     * there is an existing segment but it's smaller than "size" (this is
169
     * a result of poorly-thought-out ordering of error tests). To
170
     * distinguish between collision and invalid size in such cases, we
171
     * make a second try with size = 0.  These kernels do not test size
172
     * against SHMMIN in the preexisting-segment case, so we will not get
173
     * EINVAL a second time if there is such a segment.
174
     */
175
0
    if (shmget_errno == EINVAL)
176
0
    {
177
0
      shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection);
178
179
0
      if (shmid < 0)
180
0
      {
181
        /* As above, fail quietly if we verify a collision */
182
0
        if (errno == EEXIST || errno == EACCES
183
0
#ifdef EIDRM
184
0
          || errno == EIDRM
185
0
#endif
186
0
          )
187
0
          return NULL;
188
        /* Otherwise, fall through to report the original error */
189
0
      }
190
0
      else
191
0
      {
192
        /*
193
         * On most platforms we cannot get here because SHMMIN is
194
         * greater than zero.  However, if we do succeed in creating a
195
         * zero-size segment, free it and then fall through to report
196
         * the original error.
197
         */
198
0
        if (shmctl(shmid, IPC_RMID, NULL) < 0)
199
0
          elog(LOG, "shmctl(%d, %d, 0) failed: %m",
200
0
             (int) shmid, IPC_RMID);
201
0
      }
202
0
    }
203
204
    /*
205
     * Else complain and abort.
206
     *
207
     * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX
208
     * is violated.  SHMALL violation might be reported as either ENOMEM
209
     * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which
210
     * it should be.  SHMMNI violation is ENOSPC, per spec.  Just plain
211
     * not-enough-RAM is ENOMEM.
212
     */
213
0
    errno = shmget_errno;
214
0
    ereport(FATAL,
215
0
        (errmsg("could not create shared memory segment: %m"),
216
0
         errdetail("Failed system call was shmget(key=%lu, size=%zu, 0%o).",
217
0
               (unsigned long) memKey, size,
218
0
               IPC_CREAT | IPC_EXCL | IPCProtection),
219
0
         (shmget_errno == EINVAL) ?
220
0
         errhint("This error usually means that PostgreSQL's request for a shared memory "
221
0
             "segment exceeded your kernel's SHMMAX parameter, or possibly that "
222
0
             "it is less than "
223
0
             "your kernel's SHMMIN parameter.\n"
224
0
             "The PostgreSQL documentation contains more information about shared "
225
0
             "memory configuration.") : 0,
226
0
         (shmget_errno == ENOMEM) ?
227
0
         errhint("This error usually means that PostgreSQL's request for a shared "
228
0
             "memory segment exceeded your kernel's SHMALL parameter.  You might need "
229
0
             "to reconfigure the kernel with larger SHMALL.\n"
230
0
             "The PostgreSQL documentation contains more information about shared "
231
0
             "memory configuration.") : 0,
232
0
         (shmget_errno == ENOSPC) ?
233
0
         errhint("This error does *not* mean that you have run out of disk space.  "
234
0
             "It occurs either if all available shared memory IDs have been taken, "
235
0
             "in which case you need to raise the SHMMNI parameter in your kernel, "
236
0
             "or because the system's overall limit for shared memory has been "
237
0
             "reached.\n"
238
0
             "The PostgreSQL documentation contains more information about shared "
239
0
             "memory configuration.") : 0));
240
0
  }
241
242
  /* Register on-exit routine to delete the new segment */
243
3.61k
  on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
244
245
  /* OK, should be able to attach to the segment */
246
3.61k
  memAddress = shmat(shmid, requestedAddress, PG_SHMAT_FLAGS);
247
248
3.61k
  if (memAddress == (void *) -1)
249
0
    elog(FATAL, "shmat(id=%d, addr=%p, flags=0x%x) failed: %m",
250
3.61k
       shmid, requestedAddress, PG_SHMAT_FLAGS);
251
252
  /* Register on-exit routine to detach new segment before deleting */
253
3.61k
  on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));
254
255
  /*
256
   * Store shmem key and ID in data directory lockfile.  Format to try to
257
   * keep it the same length always (trailing junk in the lockfile won't
258
   * hurt, but might confuse humans).
259
   */
260
3.61k
  {
261
3.61k
    char    line[64];
262
263
3.61k
    sprintf(line, "%9lu %9lu",
264
3.61k
        (unsigned long) memKey, (unsigned long) shmid);
265
3.61k
    AddToDataDirLockFile(LOCK_FILE_LINE_SHMEM_KEY, line);
266
3.61k
  }
267
268
3.61k
  return memAddress;
269
3.61k
}
270
271
/****************************************************************************/
272
/*  IpcMemoryDetach(status, shmaddr)  removes a shared memory segment   */
273
/*                    from process' address space     */
274
/*  (called as an on_shmem_exit callback, hence funny argument list)    */
275
/****************************************************************************/
276
static void
277
IpcMemoryDetach(int status, Datum shmaddr)
278
3.61k
{
279
  /* Detach System V shared memory block. */
280
3.61k
  if (shmdt(DatumGetPointer(shmaddr)) < 0)
281
0
    elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr));
282
3.61k
}
283
284
/****************************************************************************/
285
/*  IpcMemoryDelete(status, shmId)    deletes a shared memory segment   */
286
/*  (called as an on_shmem_exit callback, hence funny argument list)    */
287
/****************************************************************************/
288
static void
289
IpcMemoryDelete(int status, Datum shmId)
290
3.61k
{
291
3.61k
  if (shmctl(DatumGetInt32(shmId), IPC_RMID, NULL) < 0)
292
0
    elog(LOG, "shmctl(%d, %d, 0) failed: %m",
293
3.61k
       DatumGetInt32(shmId), IPC_RMID);
294
3.61k
}
295
296
/*
297
 * PGSharedMemoryIsInUse
298
 *
299
 * Is a previously-existing shmem segment still existing and in use?
300
 *
301
 * The point of this exercise is to detect the case where a prior postmaster
302
 * crashed, but it left child backends that are still running.  Therefore
303
 * we only care about shmem segments that are associated with the intended
304
 * DataDir.  This is an important consideration since accidental matches of
305
 * shmem segment IDs are reasonably common.
306
 */
307
bool
308
PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
309
0
{
310
0
  PGShmemHeader *memAddress;
311
0
  IpcMemoryState state;
312
313
0
  state = PGSharedMemoryAttach((IpcMemoryId) id2, &memAddress);
314
0
  if (memAddress && shmdt(memAddress) < 0)
315
0
    elog(LOG, "shmdt(%p) failed: %m", memAddress);
316
0
  switch (state)
317
0
  {
318
0
    case SHMSTATE_ENOENT:
319
0
    case SHMSTATE_FOREIGN:
320
0
    case SHMSTATE_UNATTACHED:
321
0
      return false;
322
0
    case SHMSTATE_ANALYSIS_FAILURE:
323
0
    case SHMSTATE_ATTACHED:
324
0
      return true;
325
0
  }
326
0
  return true;
327
0
}
328
329
/* See comment at IpcMemoryState. */
330
static IpcMemoryState
331
PGSharedMemoryAttach(IpcMemoryId shmId,
332
           PGShmemHeader **addr)
333
5.35k
{
334
5.35k
  struct shmid_ds shmStat;
335
5.35k
  struct stat statbuf;
336
5.35k
  PGShmemHeader *hdr;
337
338
5.35k
  *addr = NULL;
339
340
  /*
341
   * We detect whether a shared memory segment is in use by seeing whether
342
   * it (a) exists and (b) has any processes attached to it.
343
   */
344
5.35k
  if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
345
0
  {
346
    /*
347
     * EINVAL actually has multiple possible causes documented in the
348
     * shmctl man page, but we assume it must mean the segment no longer
349
     * exists.
350
     */
351
0
    if (errno == EINVAL)
352
0
      return SHMSTATE_ENOENT;
353
354
    /*
355
     * EACCES implies we have no read permission, which means it is not a
356
     * Postgres shmem segment (or at least, not one that is relevant to
357
     * our data directory).
358
     */
359
0
    if (errno == EACCES)
360
0
      return SHMSTATE_FOREIGN;
361
362
    /*
363
     * Some Linux kernel versions (in fact, all of them as of July 2007)
364
     * sometimes return EIDRM when EINVAL is correct.  The Linux kernel
365
     * actually does not have any internal state that would justify
366
     * returning EIDRM, so we can get away with assuming that EIDRM is
367
     * equivalent to EINVAL on that platform.
368
     */
369
#ifdef HAVE_LINUX_EIDRM_BUG
370
    if (errno == EIDRM)
371
      return SHMSTATE_ENOENT;
372
#endif
373
374
    /*
375
     * Otherwise, we had better assume that the segment is in use. The
376
     * only likely case is EIDRM, which implies that the segment has been
377
     * IPC_RMID'd but there are still processes attached to it.
378
     */
379
0
    return SHMSTATE_ANALYSIS_FAILURE;
380
0
  }
381
382
  /*
383
   * Try to attach to the segment and see if it matches our data directory.
384
   * This avoids shmid-conflict problems on machines that are running
385
   * several postmasters under the same userid.
386
   */
387
5.35k
  if (stat(DataDir, &statbuf) < 0)
388
0
    return SHMSTATE_ANALYSIS_FAILURE; /* can't stat; be conservative */
389
390
  /*
391
   * Attachment fails if we have no write permission.  Since that will never
392
   * happen with Postgres IPCProtection, such a failure shows the segment is
393
   * not a Postgres segment.  If attachment fails for some other reason, be
394
   * conservative.
395
   */
396
5.35k
  hdr = (PGShmemHeader *) shmat(shmId, UsedShmemSegAddr, PG_SHMAT_FLAGS);
397
5.35k
  if (hdr == (PGShmemHeader *) -1)
398
0
  {
399
0
    if (errno == EACCES)
400
0
      return SHMSTATE_FOREIGN;
401
0
    else
402
0
      return SHMSTATE_ANALYSIS_FAILURE;
403
5.35k
  }
404
5.35k
  *addr = hdr;
405
406
5.35k
  if (hdr->magic != PGShmemMagic ||
407
5.33k
    hdr->device != statbuf.st_dev ||
408
5.33k
    hdr->inode != statbuf.st_ino)
409
5.35k
  {
410
    /*
411
     * It's either not a Postgres segment, or not one for my data
412
     * directory.
413
     */
414
5.35k
    return SHMSTATE_FOREIGN;
415
5.35k
  }
416
417
0
  return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : SHMSTATE_ATTACHED;
418
0
}
419
420
#ifdef MAP_HUGETLB
421
422
/*
423
 * Identify the huge page size to use.
424
 *
425
 * Some Linux kernel versions have a bug causing mmap() to fail on requests
426
 * that are not a multiple of the hugepage size.  Versions without that bug
427
 * instead silently round the request up to the next hugepage multiple ---
428
 * and then munmap() fails when we give it a size different from that.
429
 * So we have to round our request up to a multiple of the actual hugepage
430
 * size to avoid trouble.
431
 *
432
 * Doing the round-up ourselves also lets us make use of the extra memory,
433
 * rather than just wasting it.  Currently, we just increase the available
434
 * space recorded in the shmem header, which will make the extra usable for
435
 * purposes such as additional locktable entries.  Someday, for very large
436
 * hugepage sizes, we might want to think about more invasive strategies,
437
 * such as increasing shared_buffers to absorb the extra space.
438
 *
439
 * Returns the (real or assumed) page size into *hugepagesize,
440
 * and the hugepage-related mmap flags to use into *mmap_flags.
441
 *
442
 * Currently *mmap_flags is always just MAP_HUGETLB.  Someday, on systems
443
 * that support it, we might OR in additional bits to specify a particular
444
 * non-default huge page size.
445
 */
446
static void
447
GetHugePageSize(Size *hugepagesize, int *mmap_flags)
448
{
449
  /*
450
   * If we fail to find out the system's default huge page size, assume it
451
   * is 2MB.  This will work fine when the actual size is less.  If it's
452
   * more, we might get mmap() or munmap() failures due to unaligned
453
   * requests; but at this writing, there are no reports of any non-Linux
454
   * systems being picky about that.
455
   */
456
  *hugepagesize = 2 * 1024 * 1024;
457
  *mmap_flags = MAP_HUGETLB;
458
459
  /*
460
   * System-dependent code to find out the default huge page size.
461
   *
462
   * On Linux, read /proc/meminfo looking for a line like "Hugepagesize:
463
   * nnnn kB".  Ignore any failures, falling back to the preset default.
464
   */
465
#ifdef __linux__
466
  {
467
    FILE     *fp = AllocateFile("/proc/meminfo", "r");
468
    char    buf[128];
469
    unsigned int sz;
470
    char    ch;
471
472
    if (fp)
473
    {
474
      while (fgets(buf, sizeof(buf), fp))
475
      {
476
        if (sscanf(buf, "Hugepagesize: %u %c", &sz, &ch) == 2)
477
        {
478
          if (ch == 'k')
479
          {
480
            *hugepagesize = sz * (Size) 1024;
481
            break;
482
          }
483
          /* We could accept other units besides kB, if needed */
484
        }
485
      }
486
      FreeFile(fp);
487
    }
488
  }
489
#endif              /* __linux__ */
490
}
491
492
#endif              /* MAP_HUGETLB */
493
494
/*
495
 * Creates an anonymous mmap()ed shared memory segment.
496
 *
497
 * Pass the requested size in *size.  This function will modify *size to the
498
 * actual size of the allocation, if it ends up allocating a segment that is
499
 * larger than requested.
500
 */
501
static void *
502
CreateAnonymousSegment(Size *size)
503
3.61k
{
504
3.61k
  Size    allocsize = *size;
505
3.61k
  void     *ptr = MAP_FAILED;
506
3.61k
  int     mmap_errno = 0;
507
508
3.61k
#ifndef MAP_HUGETLB
509
  /* PGSharedMemoryCreate should have dealt with this case */
510
3.61k
  Assert(huge_pages != HUGE_PAGES_ON);
511
#else
512
  if (huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY)
513
  {
514
    /*
515
     * Round up the request size to a suitable large value.
516
     */
517
    Size    hugepagesize;
518
    int     mmap_flags;
519
520
    GetHugePageSize(&hugepagesize, &mmap_flags);
521
522
    if (allocsize % hugepagesize != 0)
523
      allocsize += hugepagesize - (allocsize % hugepagesize);
524
525
    ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
526
           PG_MMAP_FLAGS | mmap_flags, -1, 0);
527
    mmap_errno = errno;
528
    if (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED)
529
      elog(DEBUG1, "mmap(%zu) with MAP_HUGETLB failed, huge pages disabled: %m",
530
         allocsize);
531
  }
532
#endif
533
534
3.61k
  if (ptr == MAP_FAILED && huge_pages != HUGE_PAGES_ON)
535
3.61k
  {
536
    /*
537
     * Use the original size, not the rounded-up value, when falling back
538
     * to non-huge pages.
539
     */
540
3.61k
    allocsize = *size;
541
3.61k
    ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
542
3.61k
           PG_MMAP_FLAGS, -1, 0);
543
3.61k
    mmap_errno = errno;
544
3.61k
  }
545
546
3.61k
  if (ptr == MAP_FAILED)
547
0
  {
548
0
    errno = mmap_errno;
549
0
    ereport(FATAL,
550
0
        (errmsg("could not map anonymous shared memory: %m"),
551
0
         (mmap_errno == ENOMEM) ?
552
0
         errhint("This error usually means that PostgreSQL's request "
553
0
             "for a shared memory segment exceeded available memory, "
554
0
             "swap space, or huge pages. To reduce the request size "
555
0
             "(currently %zu bytes), reduce PostgreSQL's shared "
556
0
             "memory usage, perhaps by reducing shared_buffers or "
557
0
             "max_connections.",
558
0
             *size) : 0));
559
0
  }
560
561
3.61k
  *size = allocsize;
562
3.61k
  return ptr;
563
3.61k
}
564
565
/*
566
 * AnonymousShmemDetach --- detach from an anonymous mmap'd block
567
 * (called as an on_shmem_exit callback, hence funny argument list)
568
 */
569
static void
570
AnonymousShmemDetach(int status, Datum arg)
571
3.61k
{
572
  /* Release anonymous shared memory block, if any. */
573
3.61k
  if (AnonymousShmem != NULL)
574
3.61k
  {
575
3.61k
    if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
576
0
      elog(LOG, "munmap(%p, %zu) failed: %m",
577
3.61k
         AnonymousShmem, AnonymousShmemSize);
578
3.61k
    AnonymousShmem = NULL;
579
3.61k
  }
580
3.61k
}
581
582
/*
583
 * PGSharedMemoryCreate
584
 *
585
 * Create a shared memory segment of the given size and initialize its
586
 * standard header.  Also, register an on_shmem_exit callback to release
587
 * the storage.
588
 *
589
 * Dead Postgres segments pertinent to this DataDir are recycled if found, but
590
 * we do not fail upon collision with foreign shmem segments.  The idea here
591
 * is to detect and re-use keys that may have been assigned by a crashed
592
 * postmaster or backend.
593
 *
594
 * The port number is passed for possible use as a key (for SysV, we use
595
 * it to generate the starting shmem key).
596
 */
597
PGShmemHeader *
598
PGSharedMemoryCreate(Size size, int port,
599
           PGShmemHeader **shim)
600
3.61k
{
601
3.61k
  IpcMemoryKey NextShmemSegID;
602
3.61k
  void     *memAddress;
603
3.61k
  PGShmemHeader *hdr;
604
3.61k
  struct stat statbuf;
605
3.61k
  Size    sysvsize;
606
607
  /* Complain if hugepages demanded but we can't possibly support them */
608
3.61k
#if !defined(MAP_HUGETLB)
609
3.61k
  if (huge_pages == HUGE_PAGES_ON)
610
3.61k
    ereport(ERROR,
611
3.61k
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
612
3.61k
         errmsg("huge pages not supported on this platform")));
613
3.61k
#endif
614
615
  /* Room for a header? */
616
3.61k
  Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
617
618
3.61k
  if (shared_memory_type == SHMEM_TYPE_MMAP)
619
3.61k
  {
620
3.61k
    AnonymousShmem = CreateAnonymousSegment(&size);
621
3.61k
    AnonymousShmemSize = size;
622
623
    /* Register on-exit routine to unmap the anonymous segment */
624
3.61k
    on_shmem_exit(AnonymousShmemDetach, (Datum) 0);
625
626
    /* Now we need only allocate a minimal-sized SysV shmem block. */
627
3.61k
    sysvsize = sizeof(PGShmemHeader);
628
3.61k
  }
629
0
  else
630
0
    sysvsize = size;
631
632
  /* Make sure PGSharedMemoryAttach doesn't fail without need */
633
3.61k
  UsedShmemSegAddr = NULL;
634
635
  /*
636
   * Loop till we find a free IPC key.  Trust CreateDataDirLockFile() to
637
   * ensure no more than one postmaster per data directory can enter this
638
   * loop simultaneously.  (CreateDataDirLockFile() does not ensure that,
639
   * but prefer fixing it over coping here.)
640
   */
641
3.61k
  NextShmemSegID = 1 + port * 1000;
642
643
3.61k
  for (;;)
644
8.96k
  {
645
8.96k
    IpcMemoryId shmid;
646
8.96k
    PGShmemHeader *oldhdr;
647
8.96k
    IpcMemoryState state;
648
649
    /* Try to create new segment */
650
8.96k
    memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
651
8.96k
    if (memAddress)
652
3.61k
      break;       /* successful create and attach */
653
654
    /* Check shared memory and possibly remove and recreate */
655
656
    /*
657
     * shmget() failure is typically EACCES, hence SHMSTATE_FOREIGN.
658
     * ENOENT, a narrow possibility, implies SHMSTATE_ENOENT, but one can
659
     * safely treat SHMSTATE_ENOENT like SHMSTATE_FOREIGN.
660
     */
661
5.35k
    shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
662
5.35k
    if (shmid < 0)
663
0
    {
664
0
      oldhdr = NULL;
665
0
      state = SHMSTATE_FOREIGN;
666
0
    }
667
5.35k
    else
668
5.35k
      state = PGSharedMemoryAttach(shmid, &oldhdr);
669
670
5.35k
    switch (state)
671
5.35k
    {
672
0
      case SHMSTATE_ANALYSIS_FAILURE:
673
0
      case SHMSTATE_ATTACHED:
674
0
        ereport(FATAL,
675
0
            (errcode(ERRCODE_LOCK_FILE_EXISTS),
676
0
             errmsg("pre-existing shared memory block (key %lu, ID %lu) is still in use",
677
0
                (unsigned long) NextShmemSegID,
678
0
                (unsigned long) shmid),
679
0
             errhint("Terminate any old server processes associated with data directory \"%s\".",
680
0
                 DataDir)));
681
0
        break;
682
0
      case SHMSTATE_ENOENT:
683
684
        /*
685
         * To our surprise, some other process deleted since our last
686
         * InternalIpcMemoryCreate().  Moments earlier, we would have
687
         * seen SHMSTATE_FOREIGN.  Try that same ID again.
688
         */
689
0
        elog(LOG,
690
0
           "shared memory block (key %lu, ID %lu) deleted during startup",
691
0
           (unsigned long) NextShmemSegID,
692
0
           (unsigned long) shmid);
693
0
        break;
694
5.35k
      case SHMSTATE_FOREIGN:
695
5.35k
        NextShmemSegID++;
696
5.35k
        break;
697
0
      case SHMSTATE_UNATTACHED:
698
699
        /*
700
         * The segment pertains to DataDir, and every process that had
701
         * used it has died or detached.  Zap it, if possible, and any
702
         * associated dynamic shared memory segments, as well.  This
703
         * shouldn't fail, but if it does, assume the segment belongs
704
         * to someone else after all, and try the next candidate.
705
         * Otherwise, try again to create the segment.  That may fail
706
         * if some other process creates the same shmem key before we
707
         * do, in which case we'll try the next key.
708
         */
709
0
        if (oldhdr->dsm_control != 0)
710
0
          dsm_cleanup_using_control_segment(oldhdr->dsm_control);
711
0
        if (shmctl(shmid, IPC_RMID, NULL) < 0)
712
0
          NextShmemSegID++;
713
0
        break;
714
5.35k
    }
715
716
5.35k
    if (oldhdr && shmdt(oldhdr) < 0)
717
0
      elog(LOG, "shmdt(%p) failed: %m", oldhdr);
718
5.35k
  }
719
720
  /* Initialize new segment. */
721
3.61k
  hdr = (PGShmemHeader *) memAddress;
722
3.61k
  hdr->creatorPID = getpid();
723
3.61k
  hdr->magic = PGShmemMagic;
724
3.61k
  hdr->dsm_control = 0;
725
726
  /* Fill in the data directory ID info, too */
727
3.61k
  if (stat(DataDir, &statbuf) < 0)
728
3.61k
    ereport(FATAL,
729
3.61k
        (errcode_for_file_access(),
730
3.61k
         errmsg("could not stat data directory \"%s\": %m",
731
3.61k
            DataDir)));
732
3.61k
  hdr->device = statbuf.st_dev;
733
3.61k
  hdr->inode = statbuf.st_ino;
734
735
  /*
736
   * Initialize space allocation status for segment.
737
   */
738
3.61k
  hdr->totalsize = size;
739
3.61k
  hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
740
3.61k
  *shim = hdr;
741
742
  /* Save info for possible future use */
743
3.61k
  UsedShmemSegAddr = memAddress;
744
3.61k
  UsedShmemSegID = (unsigned long) NextShmemSegID;
745
746
  /*
747
   * If AnonymousShmem is NULL here, then we're not using anonymous shared
748
   * memory, and should return a pointer to the System V shared memory
749
   * block. Otherwise, the System V shared memory block is only a shim, and
750
   * we must return a pointer to the real block.
751
   */
752
3.61k
  if (AnonymousShmem == NULL)
753
0
    return hdr;
754
3.61k
  memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader));
755
3.61k
  return (PGShmemHeader *) AnonymousShmem;
756
3.61k
}
757
758
#ifdef EXEC_BACKEND
759
760
/*
761
 * PGSharedMemoryReAttach
762
 *
763
 * This is called during startup of a postmaster child process to re-attach to
764
 * an already existing shared memory segment.  This is needed only in the
765
 * EXEC_BACKEND case; otherwise postmaster children inherit the shared memory
766
 * segment attachment via fork().
767
 *
768
 * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
769
 * routine.  The caller must have already restored them to the postmaster's
770
 * values.
771
 */
772
void
773
PGSharedMemoryReAttach(void)
774
{
775
  IpcMemoryId shmid;
776
  PGShmemHeader *hdr;
777
  IpcMemoryState state;
778
  void     *origUsedShmemSegAddr = UsedShmemSegAddr;
779
780
  Assert(UsedShmemSegAddr != NULL);
781
  Assert(IsUnderPostmaster);
782
783
#ifdef __CYGWIN__
784
  /* cygipc (currently) appears to not detach on exec. */
785
  PGSharedMemoryDetach();
786
  UsedShmemSegAddr = origUsedShmemSegAddr;
787
#endif
788
789
  elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
790
  shmid = shmget(UsedShmemSegID, sizeof(PGShmemHeader), 0);
791
  if (shmid < 0)
792
    state = SHMSTATE_FOREIGN;
793
  else
794
    state = PGSharedMemoryAttach(shmid, &hdr);
795
  if (state != SHMSTATE_ATTACHED)
796
    elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
797
       (int) UsedShmemSegID, UsedShmemSegAddr);
798
  if (hdr != origUsedShmemSegAddr)
799
    elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
800
       hdr, origUsedShmemSegAddr);
801
  dsm_set_control_handle(hdr->dsm_control);
802
803
  UsedShmemSegAddr = hdr;   /* probably redundant */
804
}
805
806
/*
807
 * PGSharedMemoryNoReAttach
808
 *
809
 * This is called during startup of a postmaster child process when we choose
810
 * *not* to re-attach to the existing shared memory segment.  We must clean up
811
 * to leave things in the appropriate state.  This is not used in the non
812
 * EXEC_BACKEND case, either.
813
 *
814
 * The child process startup logic might or might not call PGSharedMemoryDetach
815
 * after this; make sure that it will be a no-op if called.
816
 *
817
 * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
818
 * routine.  The caller must have already restored them to the postmaster's
819
 * values.
820
 */
821
void
822
PGSharedMemoryNoReAttach(void)
823
{
824
  Assert(UsedShmemSegAddr != NULL);
825
  Assert(IsUnderPostmaster);
826
827
#ifdef __CYGWIN__
828
  /* cygipc (currently) appears to not detach on exec. */
829
  PGSharedMemoryDetach();
830
#endif
831
832
  /* For cleanliness, reset UsedShmemSegAddr to show we're not attached. */
833
  UsedShmemSegAddr = NULL;
834
  /* And the same for UsedShmemSegID. */
835
  UsedShmemSegID = 0;
836
}
837
838
#endif              /* EXEC_BACKEND */
839
840
/*
841
 * PGSharedMemoryDetach
842
 *
843
 * Detach from the shared memory segment, if still attached.  This is not
844
 * intended to be called explicitly by the process that originally created the
845
 * segment (it will have on_shmem_exit callback(s) registered to do that).
846
 * Rather, this is for subprocesses that have inherited an attachment and want
847
 * to get rid of it.
848
 *
849
 * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
850
 * routine, also AnonymousShmem and AnonymousShmemSize.
851
 */
852
void
853
PGSharedMemoryDetach(void)
854
901
{
855
901
  if (UsedShmemSegAddr != NULL)
856
901
  {
857
901
    if ((shmdt(UsedShmemSegAddr) < 0)
858
#if defined(EXEC_BACKEND) && defined(__CYGWIN__)
859
    /* Work-around for cygipc exec bug */
860
      && shmdt(NULL) < 0
861
#endif
862
901
      )
863
0
      elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr);
864
901
    UsedShmemSegAddr = NULL;
865
901
  }
866
867
901
  if (AnonymousShmem != NULL)
868
901
  {
869
901
    if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
870
0
      elog(LOG, "munmap(%p, %zu) failed: %m",
871
901
         AnonymousShmem, AnonymousShmemSize);
872
901
    AnonymousShmem = NULL;
873
901
  }
874
901
}