YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/postgres/src/backend/storage/ipc/shm_toc.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * shm_toc.c
4
 *    shared memory segment table of contents
5
 *
6
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7
 * Portions Copyright (c) 1994, Regents of the University of California
8
 *
9
 * src/backend/storage/ipc/shm_toc.c
10
 *
11
 *-------------------------------------------------------------------------
12
 */
13
14
#include "postgres.h"
15
16
#include "port/atomics.h"
17
#include "storage/shm_toc.h"
18
#include "storage/spin.h"
19
20
typedef struct shm_toc_entry
21
{
22
  uint64    key;      /* Arbitrary identifier */
23
  Size    offset;     /* Offset, in bytes, from TOC start */
24
} shm_toc_entry;
25
26
struct shm_toc
27
{
28
  uint64    toc_magic;    /* Magic number identifying this TOC */
29
  slock_t   toc_mutex;    /* Spinlock for mutual exclusion */
30
  Size    toc_total_bytes;  /* Bytes managed by this TOC */
31
  Size    toc_allocated_bytes;  /* Bytes allocated of those managed */
32
  uint32    toc_nentry;   /* Number of entries in TOC */
33
  shm_toc_entry toc_entry[FLEXIBLE_ARRAY_MEMBER];
34
};
35
36
/*
37
 * Initialize a region of shared memory with a table of contents.
38
 */
39
shm_toc *
40
shm_toc_create(uint64 magic, void *address, Size nbytes)
41
0
{
42
0
  shm_toc    *toc = (shm_toc *) address;
43
44
0
  Assert(nbytes > offsetof(shm_toc, toc_entry));
45
0
  toc->toc_magic = magic;
46
0
  SpinLockInit(&toc->toc_mutex);
47
48
  /*
49
   * The alignment code in shm_toc_allocate() assumes that the starting
50
   * value is buffer-aligned.
51
   */
52
0
  toc->toc_total_bytes = BUFFERALIGN_DOWN(nbytes);
53
0
  toc->toc_allocated_bytes = 0;
54
0
  toc->toc_nentry = 0;
55
56
0
  return toc;
57
0
}
58
59
/*
60
 * Attach to an existing table of contents.  If the magic number found at
61
 * the target address doesn't match our expectations, return NULL.
62
 */
63
shm_toc *
64
shm_toc_attach(uint64 magic, void *address)
65
0
{
66
0
  shm_toc    *toc = (shm_toc *) address;
67
68
0
  if (toc->toc_magic != magic)
69
0
    return NULL;
70
71
0
  Assert(toc->toc_total_bytes >= toc->toc_allocated_bytes);
72
0
  Assert(toc->toc_total_bytes > offsetof(shm_toc, toc_entry));
73
74
0
  return toc;
75
0
}
76
77
/*
78
 * Allocate shared memory from a segment managed by a table of contents.
79
 *
80
 * This is not a full-blown allocator; there's no way to free memory.  It's
81
 * just a way of dividing a single physical shared memory segment into logical
82
 * chunks that may be used for different purposes.
83
 *
84
 * We allocate backwards from the end of the segment, so that the TOC entries
85
 * can grow forward from the start of the segment.
86
 */
87
void *
88
shm_toc_allocate(shm_toc *toc, Size nbytes)
89
0
{
90
0
  volatile shm_toc *vtoc = toc;
91
0
  Size    total_bytes;
92
0
  Size    allocated_bytes;
93
0
  Size    nentry;
94
0
  Size    toc_bytes;
95
96
  /*
97
   * Make sure request is well-aligned.  XXX: MAXALIGN is not enough,
98
   * because atomic ops might need a wider alignment.  We don't have a
99
   * proper definition for the minimum to make atomic ops safe, but
100
   * BUFFERALIGN ought to be enough.
101
   */
102
0
  nbytes = BUFFERALIGN(nbytes);
103
104
0
  SpinLockAcquire(&toc->toc_mutex);
105
106
0
  total_bytes = vtoc->toc_total_bytes;
107
0
  allocated_bytes = vtoc->toc_allocated_bytes;
108
0
  nentry = vtoc->toc_nentry;
109
0
  toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry)
110
0
    + allocated_bytes;
111
112
  /* Check for memory exhaustion and overflow. */
113
0
  if (toc_bytes + nbytes > total_bytes || toc_bytes + nbytes < toc_bytes)
114
0
  {
115
0
    SpinLockRelease(&toc->toc_mutex);
116
0
    ereport(ERROR,
117
0
        (errcode(ERRCODE_OUT_OF_MEMORY),
118
0
         errmsg("out of shared memory")));
119
0
  }
120
0
  vtoc->toc_allocated_bytes += nbytes;
121
122
0
  SpinLockRelease(&toc->toc_mutex);
123
124
0
  return ((char *) toc) + (total_bytes - allocated_bytes - nbytes);
125
0
}
126
127
/*
128
 * Return the number of bytes that can still be allocated.
129
 */
130
Size
131
shm_toc_freespace(shm_toc *toc)
132
0
{
133
0
  volatile shm_toc *vtoc = toc;
134
0
  Size    total_bytes;
135
0
  Size    allocated_bytes;
136
0
  Size    nentry;
137
0
  Size    toc_bytes;
138
139
0
  SpinLockAcquire(&toc->toc_mutex);
140
0
  total_bytes = vtoc->toc_total_bytes;
141
0
  allocated_bytes = vtoc->toc_allocated_bytes;
142
0
  nentry = vtoc->toc_nentry;
143
0
  SpinLockRelease(&toc->toc_mutex);
144
145
0
  toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry);
146
0
  Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes);
147
0
  return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes));
148
0
}
149
150
/*
151
 * Insert a TOC entry.
152
 *
153
 * The idea here is that the process setting up the shared memory segment will
154
 * register the addresses of data structures within the segment using this
155
 * function.  Each data structure will be identified using a 64-bit key, which
156
 * is assumed to be a well-known or discoverable integer.  Other processes
157
 * accessing the shared memory segment can pass the same key to
158
 * shm_toc_lookup() to discover the addresses of those data structures.
159
 *
160
 * Since the shared memory segment may be mapped at different addresses within
161
 * different backends, we store relative rather than absolute pointers.
162
 *
163
 * This won't scale well to a large number of keys.  Hopefully, that isn't
164
 * necessary; if it proves to be, we might need to provide a more sophisticated
165
 * data structure here.  But the real idea here is just to give someone mapping
166
 * a dynamic shared memory the ability to find the bare minimum number of
167
 * pointers that they need to bootstrap.  If you're storing a lot of stuff in
168
 * the TOC, you're doing it wrong.
169
 */
170
void
171
shm_toc_insert(shm_toc *toc, uint64 key, void *address)
172
0
{
173
0
  volatile shm_toc *vtoc = toc;
174
0
  Size    total_bytes;
175
0
  Size    allocated_bytes;
176
0
  Size    nentry;
177
0
  Size    toc_bytes;
178
0
  Size    offset;
179
180
  /* Relativize pointer. */
181
0
  Assert(address > (void *) toc);
182
0
  offset = ((char *) address) - (char *) toc;
183
184
0
  SpinLockAcquire(&toc->toc_mutex);
185
186
0
  total_bytes = vtoc->toc_total_bytes;
187
0
  allocated_bytes = vtoc->toc_allocated_bytes;
188
0
  nentry = vtoc->toc_nentry;
189
0
  toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry)
190
0
    + allocated_bytes;
191
192
  /* Check for memory exhaustion and overflow. */
193
0
  if (toc_bytes + sizeof(shm_toc_entry) > total_bytes ||
194
0
    toc_bytes + sizeof(shm_toc_entry) < toc_bytes ||
195
0
    nentry >= PG_UINT32_MAX)
196
0
  {
197
0
    SpinLockRelease(&toc->toc_mutex);
198
0
    ereport(ERROR,
199
0
        (errcode(ERRCODE_OUT_OF_MEMORY),
200
0
         errmsg("out of shared memory")));
201
0
  }
202
203
0
  Assert(offset < total_bytes);
204
0
  vtoc->toc_entry[nentry].key = key;
205
0
  vtoc->toc_entry[nentry].offset = offset;
206
207
  /*
208
   * By placing a write barrier after filling in the entry and before
209
   * updating the number of entries, we make it safe to read the TOC
210
   * unlocked.
211
   */
212
0
  pg_write_barrier();
213
214
0
  vtoc->toc_nentry++;
215
216
0
  SpinLockRelease(&toc->toc_mutex);
217
0
}
218
219
/*
220
 * Look up a TOC entry.
221
 *
222
 * If the key is not found, returns NULL if noError is true, otherwise
223
 * throws elog(ERROR).
224
 *
225
 * Unlike the other functions in this file, this operation acquires no lock;
226
 * it uses only barriers.  It probably wouldn't hurt concurrency very much even
227
 * if it did get a lock, but since it's reasonably likely that a group of
228
 * worker processes could each read a series of entries from the same TOC
229
 * right around the same time, there seems to be some value in avoiding it.
230
 */
231
void *
232
shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
233
0
{
234
0
  uint32    nentry;
235
0
  uint32    i;
236
237
  /*
238
   * Read the number of entries before we examine any entry.  We assume that
239
   * reading a uint32 is atomic.
240
   */
241
0
  nentry = toc->toc_nentry;
242
0
  pg_read_barrier();
243
244
  /* Now search for a matching entry. */
245
0
  for (i = 0; i < nentry; ++i)
246
0
  {
247
0
    if (toc->toc_entry[i].key == key)
248
0
      return ((char *) toc) + toc->toc_entry[i].offset;
249
0
  }
250
251
  /* No matching entry was found. */
252
0
  if (!noError)
253
0
    elog(ERROR, "could not find key " UINT64_FORMAT " in shm TOC at %p",
254
0
       key, toc);
255
0
  return NULL;
256
0
}
257
258
/*
259
 * Estimate how much shared memory will be required to store a TOC and its
260
 * dependent data structures.
261
 */
262
Size
263
shm_toc_estimate(shm_toc_estimator *e)
264
0
{
265
0
  Size    sz;
266
267
0
  sz = offsetof(shm_toc, toc_entry);
268
0
  sz = add_size(sz, mul_size(e->number_of_keys, sizeof(shm_toc_entry)));
269
0
  sz = add_size(sz, e->space_for_chunks);
270
271
0
  return BUFFERALIGN(sz);
272
0
}