/Users/deen/code/yugabyte-db/src/postgres/src/backend/access/gin/ginentrypage.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * ginentrypage.c |
4 | | * routines for handling GIN entry tree pages. |
5 | | * |
6 | | * |
7 | | * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group |
8 | | * Portions Copyright (c) 1994, Regents of the University of California |
9 | | * |
10 | | * IDENTIFICATION |
11 | | * src/backend/access/gin/ginentrypage.c |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | |
15 | | #include "postgres.h" |
16 | | |
17 | | #include "access/gin_private.h" |
18 | | #include "access/ginxlog.h" |
19 | | #include "access/xloginsert.h" |
20 | | #include "miscadmin.h" |
21 | | #include "utils/rel.h" |
22 | | |
23 | | static void entrySplitPage(GinBtree btree, Buffer origbuf, |
24 | | GinBtreeStack *stack, |
25 | | GinBtreeEntryInsertData *insertData, |
26 | | BlockNumber updateblkno, |
27 | | Page *newlpage, Page *newrpage); |
28 | | |
29 | | /* |
30 | | * Form a tuple for entry tree. |
31 | | * |
32 | | * If the tuple would be too big to be stored, function throws a suitable |
33 | | * error if errorTooBig is true, or returns NULL if errorTooBig is false. |
34 | | * |
35 | | * See src/backend/access/gin/README for a description of the index tuple |
36 | | * format that is being built here. We build on the assumption that we |
37 | | * are making a leaf-level key entry containing a posting list of nipd items. |
38 | | * If the caller is actually trying to make a posting-tree entry, non-leaf |
39 | | * entry, or pending-list entry, it should pass dataSize = 0 and then overwrite |
40 | | * the t_tid fields as necessary. In any case, 'data' can be NULL to skip |
41 | | * filling in the posting list; the caller is responsible for filling it |
42 | | * afterwards if data = NULL and nipd > 0. |
43 | | */ |
44 | | IndexTuple |
45 | | GinFormTuple(GinState *ginstate, |
46 | | OffsetNumber attnum, Datum key, GinNullCategory category, |
47 | | Pointer data, Size dataSize, int nipd, |
48 | | bool errorTooBig) |
49 | 114 | { |
50 | 114 | Datum datums[2]; |
51 | 114 | bool isnull[2]; |
52 | 114 | IndexTuple itup; |
53 | 114 | uint32 newsize; |
54 | | |
55 | | /* Build the basic tuple: optional column number, plus key datum */ |
56 | 114 | if (ginstate->oneCol) |
57 | 85 | { |
58 | 85 | datums[0] = key; |
59 | 85 | isnull[0] = (category != GIN_CAT_NORM_KEY); |
60 | 85 | } |
61 | 29 | else |
62 | 29 | { |
63 | 29 | datums[0] = UInt16GetDatum(attnum); |
64 | 29 | isnull[0] = false; |
65 | 29 | datums[1] = key; |
66 | 29 | isnull[1] = (category != GIN_CAT_NORM_KEY); |
67 | 29 | } |
68 | | |
69 | 114 | itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull); |
70 | | |
71 | | /* |
72 | | * Determine and store offset to the posting list, making sure there is |
73 | | * room for the category byte if needed. |
74 | | * |
75 | | * Note: because index_form_tuple MAXALIGNs the tuple size, there may well |
76 | | * be some wasted pad space. Is it worth recomputing the data length to |
77 | | * prevent that? That would also allow us to Assert that the real data |
78 | | * doesn't overlap the GinNullCategory byte, which this code currently |
79 | | * takes on faith. |
80 | | */ |
81 | 114 | newsize = IndexTupleSize(itup); |
82 | | |
83 | 114 | if (IndexTupleHasNulls(itup)) |
84 | 0 | { |
85 | 0 | uint32 minsize; |
86 | |
|
87 | 0 | Assert(category != GIN_CAT_NORM_KEY); |
88 | 0 | minsize = GinCategoryOffset(itup, ginstate) + sizeof(GinNullCategory); |
89 | 0 | newsize = Max(newsize, minsize); |
90 | 0 | } |
91 | | |
92 | 114 | newsize = SHORTALIGN(newsize); |
93 | | |
94 | 114 | GinSetPostingOffset(itup, newsize); |
95 | 114 | GinSetNPosting(itup, nipd); |
96 | | |
97 | | /* |
98 | | * Add space needed for posting list, if any. Then check that the tuple |
99 | | * won't be too big to store. |
100 | | */ |
101 | 0 | newsize += dataSize; |
102 | | |
103 | 114 | newsize = MAXALIGN(newsize); |
104 | | |
105 | 114 | if (newsize > GinMaxItemSize) |
106 | 0 | { |
107 | 0 | if (errorTooBig) |
108 | 0 | ereport(ERROR, |
109 | 0 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
110 | 0 | errmsg("index row size %zu exceeds maximum %zu for index \"%s\"", |
111 | 0 | (Size) newsize, (Size) GinMaxItemSize, |
112 | 0 | RelationGetRelationName(ginstate->index)))); |
113 | 0 | pfree(itup); |
114 | 0 | return NULL; |
115 | 0 | } |
116 | | |
117 | | /* |
118 | | * Resize tuple if needed |
119 | | */ |
120 | 114 | if (newsize != IndexTupleSize(itup)) |
121 | 73 | { |
122 | 73 | itup = repalloc(itup, newsize); |
123 | | |
124 | | /* |
125 | | * PostgreSQL 9.3 and earlier did not clear this new space, so we |
126 | | * might find uninitialized padding when reading tuples from disk. |
127 | | */ |
128 | 73 | memset((char *) itup + IndexTupleSize(itup), |
129 | 73 | 0, newsize - IndexTupleSize(itup)); |
130 | | /* set new size in tuple header */ |
131 | 73 | itup->t_info &= ~INDEX_SIZE_MASK; |
132 | 73 | itup->t_info |= newsize; |
133 | 73 | } |
134 | | |
135 | | /* |
136 | | * Copy in the posting list, if provided |
137 | | */ |
138 | 114 | if (data) |
139 | 73 | { |
140 | 73 | char *ptr = GinGetPosting(itup); |
141 | | |
142 | 0 | memcpy(ptr, data, dataSize); |
143 | 73 | } |
144 | | |
145 | | /* |
146 | | * Insert category byte, if needed |
147 | | */ |
148 | 114 | if (category != GIN_CAT_NORM_KEY) |
149 | 0 | { |
150 | 0 | Assert(IndexTupleHasNulls(itup)); |
151 | 0 | GinSetNullCategory(itup, ginstate, category); |
152 | 0 | } |
153 | 114 | return itup; |
154 | 114 | } |
155 | | |
156 | | /* |
157 | | * Read item pointers from leaf entry tuple. |
158 | | * |
159 | | * Returns a palloc'd array of ItemPointers. The number of items is returned |
160 | | * in *nitems. |
161 | | */ |
162 | | ItemPointer |
163 | | ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup, |
164 | | int *nitems) |
165 | 61 | { |
166 | 61 | Pointer ptr = GinGetPosting(itup); |
167 | 61 | int nipd = GinGetNPosting(itup); |
168 | 61 | ItemPointer ipd; |
169 | 61 | int ndecoded; |
170 | | |
171 | 61 | if (GinItupIsCompressed(itup)) |
172 | 61 | { |
173 | 61 | if (nipd > 0) |
174 | 61 | { |
175 | 61 | ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded); |
176 | 61 | if (nipd != ndecoded) |
177 | 0 | elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded", |
178 | 61 | nipd, ndecoded); |
179 | 61 | } |
180 | 0 | else |
181 | 0 | { |
182 | 0 | ipd = palloc(0); |
183 | 0 | } |
184 | 61 | } |
185 | 0 | else |
186 | 0 | { |
187 | 0 | ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd); |
188 | 0 | memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd); |
189 | 0 | } |
190 | 61 | *nitems = nipd; |
191 | 61 | return ipd; |
192 | 61 | } |
193 | | |
194 | | /* |
195 | | * Form a non-leaf entry tuple by copying the key data from the given tuple, |
196 | | * which can be either a leaf or non-leaf entry tuple. |
197 | | * |
198 | | * Any posting list in the source tuple is not copied. The specified child |
199 | | * block number is inserted into t_tid. |
200 | | */ |
201 | | static IndexTuple |
202 | | GinFormInteriorTuple(IndexTuple itup, Page page, BlockNumber childblk) |
203 | 0 | { |
204 | 0 | IndexTuple nitup; |
205 | |
|
206 | 0 | if (GinPageIsLeaf(page) && !GinIsPostingTree(itup)) |
207 | 0 | { |
208 | | /* Tuple contains a posting list, just copy stuff before that */ |
209 | 0 | uint32 origsize = GinGetPostingOffset(itup); |
210 | | |
211 | 0 | origsize = MAXALIGN(origsize); |
212 | 0 | nitup = (IndexTuple) palloc(origsize); |
213 | 0 | memcpy(nitup, itup, origsize); |
214 | | /* ... be sure to fix the size header field ... */ |
215 | 0 | nitup->t_info &= ~INDEX_SIZE_MASK; |
216 | 0 | nitup->t_info |= origsize; |
217 | 0 | } |
218 | 0 | else |
219 | 0 | { |
220 | | /* Copy the tuple as-is */ |
221 | 0 | nitup = (IndexTuple) palloc(IndexTupleSize(itup)); |
222 | 0 | memcpy(nitup, itup, IndexTupleSize(itup)); |
223 | 0 | } |
224 | | |
225 | | /* Now insert the correct downlink */ |
226 | 0 | GinSetDownlink(nitup, childblk); |
227 | | |
228 | 0 | return nitup; |
229 | 0 | } |
230 | | |
231 | | /* |
232 | | * Entry tree is a "static", ie tuple never deletes from it, |
233 | | * so we don't use right bound, we use rightmost key instead. |
234 | | */ |
235 | | static IndexTuple |
236 | | getRightMostTuple(Page page) |
237 | 0 | { |
238 | 0 | OffsetNumber maxoff = PageGetMaxOffsetNumber(page); |
239 | |
|
240 | 0 | return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff)); |
241 | 0 | } |
242 | | |
243 | | static bool |
244 | | entryIsMoveRight(GinBtree btree, Page page) |
245 | 0 | { |
246 | 0 | IndexTuple itup; |
247 | 0 | OffsetNumber attnum; |
248 | 0 | Datum key; |
249 | 0 | GinNullCategory category; |
250 | |
|
251 | 0 | if (GinPageRightMost(page)) |
252 | 0 | return false; |
253 | | |
254 | 0 | itup = getRightMostTuple(page); |
255 | 0 | attnum = gintuple_get_attrnum(btree->ginstate, itup); |
256 | 0 | key = gintuple_get_key(btree->ginstate, itup, &category); |
257 | |
|
258 | 0 | if (ginCompareAttEntries(btree->ginstate, |
259 | 0 | btree->entryAttnum, btree->entryKey, btree->entryCategory, |
260 | 0 | attnum, key, category) > 0) |
261 | 0 | return true; |
262 | | |
263 | 0 | return false; |
264 | 0 | } |
265 | | |
266 | | /* |
267 | | * Find correct tuple in non-leaf page. It supposed that |
268 | | * page correctly chosen and searching value SHOULD be on page |
269 | | */ |
270 | | static BlockNumber |
271 | | entryLocateEntry(GinBtree btree, GinBtreeStack *stack) |
272 | 0 | { |
273 | 0 | OffsetNumber low, |
274 | 0 | high, |
275 | 0 | maxoff; |
276 | 0 | IndexTuple itup = NULL; |
277 | 0 | int result; |
278 | 0 | Page page = BufferGetPage(stack->buffer); |
279 | | |
280 | 0 | Assert(!GinPageIsLeaf(page)); |
281 | 0 | Assert(!GinPageIsData(page)); |
282 | | |
283 | 0 | if (btree->fullScan) |
284 | 0 | { |
285 | 0 | stack->off = FirstOffsetNumber; |
286 | 0 | stack->predictNumber *= PageGetMaxOffsetNumber(page); |
287 | 0 | return btree->getLeftMostChild(btree, page); |
288 | 0 | } |
289 | | |
290 | 0 | low = FirstOffsetNumber; |
291 | 0 | maxoff = high = PageGetMaxOffsetNumber(page); |
292 | 0 | Assert(high >= low); |
293 | | |
294 | 0 | high++; |
295 | |
|
296 | 0 | while (high > low) |
297 | 0 | { |
298 | 0 | OffsetNumber mid = low + ((high - low) / 2); |
299 | |
|
300 | 0 | if (mid == maxoff && GinPageRightMost(page)) |
301 | 0 | { |
302 | | /* Right infinity */ |
303 | 0 | result = -1; |
304 | 0 | } |
305 | 0 | else |
306 | 0 | { |
307 | 0 | OffsetNumber attnum; |
308 | 0 | Datum key; |
309 | 0 | GinNullCategory category; |
310 | |
|
311 | 0 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); |
312 | 0 | attnum = gintuple_get_attrnum(btree->ginstate, itup); |
313 | 0 | key = gintuple_get_key(btree->ginstate, itup, &category); |
314 | 0 | result = ginCompareAttEntries(btree->ginstate, |
315 | 0 | btree->entryAttnum, |
316 | 0 | btree->entryKey, |
317 | 0 | btree->entryCategory, |
318 | 0 | attnum, key, category); |
319 | 0 | } |
320 | | |
321 | 0 | if (result == 0) |
322 | 0 | { |
323 | 0 | stack->off = mid; |
324 | 0 | Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO); |
325 | 0 | return GinGetDownlink(itup); |
326 | 0 | } |
327 | 0 | else if (result > 0) |
328 | 0 | low = mid + 1; |
329 | 0 | else |
330 | 0 | high = mid; |
331 | 0 | } |
332 | | |
333 | 0 | Assert(high >= FirstOffsetNumber && high <= maxoff); |
334 | | |
335 | 0 | stack->off = high; |
336 | 0 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high)); |
337 | 0 | Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO); |
338 | 0 | return GinGetDownlink(itup); |
339 | 0 | } |
340 | | |
341 | | /* |
342 | | * Searches correct position for value on leaf page. |
343 | | * Page should be correctly chosen. |
344 | | * Returns true if value found on page. |
345 | | */ |
346 | | static bool |
347 | | entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) |
348 | 127 | { |
349 | 127 | Page page = BufferGetPage(stack->buffer); |
350 | 0 | OffsetNumber low, |
351 | 127 | high; |
352 | | |
353 | 127 | Assert(GinPageIsLeaf(page)); |
354 | 127 | Assert(!GinPageIsData(page)); |
355 | | |
356 | 127 | if (btree->fullScan) |
357 | 0 | { |
358 | 0 | stack->off = FirstOffsetNumber; |
359 | 0 | return true; |
360 | 0 | } |
361 | | |
362 | 127 | low = FirstOffsetNumber; |
363 | 127 | high = PageGetMaxOffsetNumber(page); |
364 | | |
365 | 127 | if (high < low) |
366 | 14 | { |
367 | 14 | stack->off = FirstOffsetNumber; |
368 | 14 | return false; |
369 | 14 | } |
370 | | |
371 | 113 | high++; |
372 | | |
373 | 311 | while (high > low) |
374 | 239 | { |
375 | 239 | OffsetNumber mid = low + ((high - low) / 2); |
376 | 239 | IndexTuple itup; |
377 | 239 | OffsetNumber attnum; |
378 | 239 | Datum key; |
379 | 239 | GinNullCategory category; |
380 | 239 | int result; |
381 | | |
382 | 239 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); |
383 | 0 | attnum = gintuple_get_attrnum(btree->ginstate, itup); |
384 | 239 | key = gintuple_get_key(btree->ginstate, itup, &category); |
385 | 239 | result = ginCompareAttEntries(btree->ginstate, |
386 | 239 | btree->entryAttnum, |
387 | 239 | btree->entryKey, |
388 | 239 | btree->entryCategory, |
389 | 239 | attnum, key, category); |
390 | 239 | if (result == 0) |
391 | 41 | { |
392 | 41 | stack->off = mid; |
393 | 41 | return true; |
394 | 41 | } |
395 | 198 | else if (result > 0) |
396 | 129 | low = mid + 1; |
397 | 69 | else |
398 | 69 | high = mid; |
399 | 239 | } |
400 | | |
401 | 72 | stack->off = high; |
402 | 72 | return false; |
403 | 113 | } |
404 | | |
405 | | static OffsetNumber |
406 | | entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) |
407 | 0 | { |
408 | 0 | OffsetNumber i, |
409 | 0 | maxoff = PageGetMaxOffsetNumber(page); |
410 | 0 | IndexTuple itup; |
411 | |
|
412 | 0 | Assert(!GinPageIsLeaf(page)); |
413 | 0 | Assert(!GinPageIsData(page)); |
414 | | |
415 | | /* if page isn't changed, we returns storedOff */ |
416 | 0 | if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) |
417 | 0 | { |
418 | 0 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff)); |
419 | 0 | if (GinGetDownlink(itup) == blkno) |
420 | 0 | return storedOff; |
421 | | |
422 | | /* |
423 | | * we hope, that needed pointer goes to right. It's true if there |
424 | | * wasn't a deletion |
425 | | */ |
426 | 0 | for (i = storedOff + 1; i <= maxoff; i++) |
427 | 0 | { |
428 | 0 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); |
429 | 0 | if (GinGetDownlink(itup) == blkno) |
430 | 0 | return i; |
431 | 0 | } |
432 | 0 | maxoff = storedOff - 1; |
433 | 0 | } |
434 | | |
435 | | /* last chance */ |
436 | 0 | for (i = FirstOffsetNumber; i <= maxoff; i++) |
437 | 0 | { |
438 | 0 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); |
439 | 0 | if (GinGetDownlink(itup) == blkno) |
440 | 0 | return i; |
441 | 0 | } |
442 | | |
443 | 0 | return InvalidOffsetNumber; |
444 | 0 | } |
445 | | |
446 | | static BlockNumber |
447 | | entryGetLeftMostPage(GinBtree btree, Page page) |
448 | 0 | { |
449 | 0 | IndexTuple itup; |
450 | |
|
451 | 0 | Assert(!GinPageIsLeaf(page)); |
452 | 0 | Assert(!GinPageIsData(page)); |
453 | 0 | Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); |
454 | | |
455 | 0 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); |
456 | 0 | return GinGetDownlink(itup); |
457 | 0 | } |
458 | | |
459 | | static bool |
460 | | entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off, |
461 | | GinBtreeEntryInsertData *insertData) |
462 | 73 | { |
463 | 73 | Size releasedsz = 0; |
464 | 73 | Size addedsz; |
465 | 73 | Page page = BufferGetPage(buf); |
466 | | |
467 | 73 | Assert(insertData->entry); |
468 | 73 | Assert(!GinPageIsData(page)); |
469 | | |
470 | 73 | if (insertData->isDelete) |
471 | 0 | { |
472 | 0 | IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); |
473 | | |
474 | 0 | releasedsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); |
475 | 0 | } |
476 | | |
477 | 73 | addedsz = MAXALIGN(IndexTupleSize(insertData->entry)) + sizeof(ItemIdData); |
478 | | |
479 | 73 | if (PageGetFreeSpace(page) + releasedsz >= addedsz) |
480 | 73 | return true; |
481 | | |
482 | 0 | return false; |
483 | 73 | } |
484 | | |
485 | | /* |
486 | | * Delete tuple on leaf page if tuples existed and we |
487 | | * should update it, update old child blkno to new right page |
488 | | * if child split occurred |
489 | | */ |
490 | | static void |
491 | | entryPreparePage(GinBtree btree, Page page, OffsetNumber off, |
492 | | GinBtreeEntryInsertData *insertData, BlockNumber updateblkno) |
493 | 73 | { |
494 | 73 | Assert(insertData->entry); |
495 | 73 | Assert(!GinPageIsData(page)); |
496 | | |
497 | 73 | if (insertData->isDelete) |
498 | 0 | { |
499 | 0 | Assert(GinPageIsLeaf(page)); |
500 | 0 | PageIndexTupleDelete(page, off); |
501 | 0 | } |
502 | | |
503 | 73 | if (!GinPageIsLeaf(page) && updateblkno != 0 InvalidBlockNumber0 ) |
504 | 0 | { |
505 | 0 | IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); |
506 | | |
507 | 0 | GinSetDownlink(itup, updateblkno); |
508 | 0 | } |
509 | 73 | } |
510 | | |
511 | | /* |
512 | | * Prepare to insert data on an entry page. |
513 | | * |
514 | | * If it will fit, return GPTP_INSERT after doing whatever setup is needed |
515 | | * before we enter the insertion critical section. *ptp_workspace can be |
516 | | * set to pass information along to the execPlaceToPage function. |
517 | | * |
518 | | * If it won't fit, perform a page split and return two temporary page |
519 | | * images into *newlpage and *newrpage, with result GPTP_SPLIT. |
520 | | * |
521 | | * In neither case should the given page buffer be modified here. |
522 | | * |
523 | | * Note: on insertion to an internal node, in addition to inserting the given |
524 | | * item, the downlink of the existing item at stack->off will be updated to |
525 | | * point to updateblkno. |
526 | | */ |
527 | | static GinPlaceToPageRC |
528 | | entryBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, |
529 | | void *insertPayload, BlockNumber updateblkno, |
530 | | void **ptp_workspace, |
531 | | Page *newlpage, Page *newrpage) |
532 | 73 | { |
533 | 73 | GinBtreeEntryInsertData *insertData = insertPayload; |
534 | 73 | OffsetNumber off = stack->off; |
535 | | |
536 | | /* If it doesn't fit, deal with split case */ |
537 | 73 | if (!entryIsEnoughSpace(btree, buf, off, insertData)) |
538 | 0 | { |
539 | 0 | entrySplitPage(btree, buf, stack, insertData, updateblkno, |
540 | 0 | newlpage, newrpage); |
541 | 0 | return GPTP_SPLIT; |
542 | 0 | } |
543 | | |
544 | | /* Else, we're ready to proceed with insertion */ |
545 | 73 | return GPTP_INSERT; |
546 | 73 | } |
547 | | |
548 | | /* |
549 | | * Perform data insertion after beginPlaceToPage has decided it will fit. |
550 | | * |
551 | | * This is invoked within a critical section, and XLOG record creation (if |
552 | | * needed) is already started. The target buffer is registered in slot 0. |
553 | | */ |
554 | | static void |
555 | | entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, |
556 | | void *insertPayload, BlockNumber updateblkno, |
557 | | void *ptp_workspace) |
558 | 73 | { |
559 | 73 | GinBtreeEntryInsertData *insertData = insertPayload; |
560 | 73 | Page page = BufferGetPage(buf); |
561 | 0 | OffsetNumber off = stack->off; |
562 | 73 | OffsetNumber placed; |
563 | | |
564 | 73 | entryPreparePage(btree, page, off, insertData, updateblkno); |
565 | | |
566 | 73 | placed = PageAddItem(page, |
567 | 73 | (Item) insertData->entry, |
568 | 73 | IndexTupleSize(insertData->entry), |
569 | 73 | off, false, false); |
570 | 73 | if (placed != off) |
571 | 0 | elog(ERROR, "failed to add item to index page in \"%s\"", |
572 | 73 | RelationGetRelationName(btree->index)); |
573 | | |
574 | 73 | if (RelationNeedsWAL(btree->index)) |
575 | 0 | { |
576 | | /* |
577 | | * This must be static, because it has to survive until XLogInsert, |
578 | | * and we can't palloc here. Ugly, but the XLogInsert infrastructure |
579 | | * isn't reentrant anyway. |
580 | | */ |
581 | 0 | static ginxlogInsertEntry data; |
582 | |
|
583 | 0 | data.isDelete = insertData->isDelete; |
584 | 0 | data.offset = off; |
585 | |
|
586 | 0 | XLogRegisterBufData(0, (char *) &data, |
587 | 0 | offsetof(ginxlogInsertEntry, tuple)); |
588 | 0 | XLogRegisterBufData(0, (char *) insertData->entry, |
589 | 0 | IndexTupleSize(insertData->entry)); |
590 | 0 | } |
591 | 73 | } |
592 | | |
593 | | /* |
594 | | * Split entry page and insert new data. |
595 | | * |
596 | | * Returns new temp pages to *newlpage and *newrpage. |
597 | | * The original buffer is left untouched. |
598 | | */ |
599 | | static void |
600 | | entrySplitPage(GinBtree btree, Buffer origbuf, |
601 | | GinBtreeStack *stack, |
602 | | GinBtreeEntryInsertData *insertData, |
603 | | BlockNumber updateblkno, |
604 | | Page *newlpage, Page *newrpage) |
605 | 0 | { |
606 | 0 | OffsetNumber off = stack->off; |
607 | 0 | OffsetNumber i, |
608 | 0 | maxoff, |
609 | 0 | separator = InvalidOffsetNumber; |
610 | 0 | Size totalsize = 0; |
611 | 0 | Size lsize = 0, |
612 | 0 | size; |
613 | 0 | char *ptr; |
614 | 0 | IndexTuple itup; |
615 | 0 | Page page; |
616 | 0 | Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf)); |
617 | 0 | Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf)); |
618 | 0 | Size pageSize = PageGetPageSize(lpage); |
619 | 0 | PGAlignedBlock tupstore[2]; /* could need 2 pages' worth of tuples */ |
620 | |
|
621 | 0 | entryPreparePage(btree, lpage, off, insertData, updateblkno); |
622 | | |
623 | | /* |
624 | | * First, append all the existing tuples and the new tuple we're inserting |
625 | | * one after another in a temporary workspace. |
626 | | */ |
627 | 0 | maxoff = PageGetMaxOffsetNumber(lpage); |
628 | 0 | ptr = tupstore[0].data; |
629 | 0 | for (i = FirstOffsetNumber; i <= maxoff; i++) |
630 | 0 | { |
631 | 0 | if (i == off) |
632 | 0 | { |
633 | 0 | size = MAXALIGN(IndexTupleSize(insertData->entry)); |
634 | 0 | memcpy(ptr, insertData->entry, size); |
635 | 0 | ptr += size; |
636 | 0 | totalsize += size + sizeof(ItemIdData); |
637 | 0 | } |
638 | |
|
639 | 0 | itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i)); |
640 | 0 | size = MAXALIGN(IndexTupleSize(itup)); |
641 | 0 | memcpy(ptr, itup, size); |
642 | 0 | ptr += size; |
643 | 0 | totalsize += size + sizeof(ItemIdData); |
644 | 0 | } |
645 | |
|
646 | 0 | if (off == maxoff + 1) |
647 | 0 | { |
648 | 0 | size = MAXALIGN(IndexTupleSize(insertData->entry)); |
649 | 0 | memcpy(ptr, insertData->entry, size); |
650 | 0 | ptr += size; |
651 | 0 | totalsize += size + sizeof(ItemIdData); |
652 | 0 | } |
653 | | |
654 | | /* |
655 | | * Initialize the left and right pages, and copy all the tuples back to |
656 | | * them. |
657 | | */ |
658 | 0 | GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); |
659 | 0 | GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); |
660 | | |
661 | 0 | ptr = tupstore[0].data; |
662 | 0 | maxoff++; |
663 | 0 | lsize = 0; |
664 | |
|
665 | 0 | page = lpage; |
666 | 0 | for (i = FirstOffsetNumber; i <= maxoff; i++) |
667 | 0 | { |
668 | 0 | itup = (IndexTuple) ptr; |
669 | | |
670 | | /* |
671 | | * Decide where to split. We try to equalize the pages' total data |
672 | | * size, not number of tuples. |
673 | | */ |
674 | 0 | if (lsize > totalsize / 2) |
675 | 0 | { |
676 | 0 | if (separator == InvalidOffsetNumber) |
677 | 0 | separator = i - 1; |
678 | 0 | page = rpage; |
679 | 0 | } |
680 | 0 | else |
681 | 0 | { |
682 | 0 | lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); |
683 | 0 | } |
684 | |
|
685 | 0 | if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) |
686 | 0 | elog(ERROR, "failed to add item to index page in \"%s\"", |
687 | 0 | RelationGetRelationName(btree->index)); |
688 | 0 | ptr += MAXALIGN(IndexTupleSize(itup)); |
689 | 0 | } |
690 | | |
691 | | /* return temp pages to caller */ |
692 | 0 | *newlpage = lpage; |
693 | 0 | *newrpage = rpage; |
694 | 0 | } |
695 | | |
696 | | /* |
697 | | * Construct insertion payload for inserting the downlink for given buffer. |
698 | | */ |
699 | | static void * |
700 | | entryPrepareDownlink(GinBtree btree, Buffer lbuf) |
701 | 0 | { |
702 | 0 | GinBtreeEntryInsertData *insertData; |
703 | 0 | Page lpage = BufferGetPage(lbuf); |
704 | 0 | BlockNumber lblkno = BufferGetBlockNumber(lbuf); |
705 | 0 | IndexTuple itup; |
706 | |
|
707 | 0 | itup = getRightMostTuple(lpage); |
708 | |
|
709 | 0 | insertData = palloc(sizeof(GinBtreeEntryInsertData)); |
710 | 0 | insertData->entry = GinFormInteriorTuple(itup, lpage, lblkno); |
711 | 0 | insertData->isDelete = false; |
712 | |
|
713 | 0 | return insertData; |
714 | 0 | } |
715 | | |
716 | | /* |
717 | | * Fills new root by rightest values from child. |
718 | | * Also called from ginxlog, should not use btree |
719 | | */ |
720 | | void |
721 | | ginEntryFillRoot(GinBtree btree, Page root, |
722 | | BlockNumber lblkno, Page lpage, |
723 | | BlockNumber rblkno, Page rpage) |
724 | 0 | { |
725 | 0 | IndexTuple itup; |
726 | |
|
727 | 0 | itup = GinFormInteriorTuple(getRightMostTuple(lpage), lpage, lblkno); |
728 | 0 | if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) |
729 | 0 | elog(ERROR, "failed to add item to index root page"); |
730 | 0 | pfree(itup); |
731 | |
|
732 | 0 | itup = GinFormInteriorTuple(getRightMostTuple(rpage), rpage, rblkno); |
733 | 0 | if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) |
734 | 0 | elog(ERROR, "failed to add item to index root page"); |
735 | 0 | pfree(itup); |
736 | 0 | } |
737 | | |
738 | | /* |
739 | | * Set up GinBtree for entry page access |
740 | | * |
741 | | * Note: during WAL recovery, there may be no valid data in ginstate |
742 | | * other than a faked-up Relation pointer; the key datum is bogus too. |
743 | | */ |
744 | | void |
745 | | ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, |
746 | | Datum key, GinNullCategory category, |
747 | | GinState *ginstate) |
748 | 127 | { |
749 | 127 | memset(btree, 0, sizeof(GinBtreeData)); |
750 | | |
751 | 127 | btree->index = ginstate->index; |
752 | 127 | btree->rootBlkno = GIN_ROOT_BLKNO; |
753 | 127 | btree->ginstate = ginstate; |
754 | | |
755 | 127 | btree->findChildPage = entryLocateEntry; |
756 | 127 | btree->getLeftMostChild = entryGetLeftMostPage; |
757 | 127 | btree->isMoveRight = entryIsMoveRight; |
758 | 127 | btree->findItem = entryLocateLeafEntry; |
759 | 127 | btree->findChildPtr = entryFindChildPtr; |
760 | 127 | btree->beginPlaceToPage = entryBeginPlaceToPage; |
761 | 127 | btree->execPlaceToPage = entryExecPlaceToPage; |
762 | 127 | btree->fillRoot = ginEntryFillRoot; |
763 | 127 | btree->prepareDownlink = entryPrepareDownlink; |
764 | | |
765 | 127 | btree->isData = false; |
766 | 127 | btree->fullScan = false; |
767 | 127 | btree->isBuild = false; |
768 | | |
769 | 127 | btree->entryAttnum = attnum; |
770 | 127 | btree->entryKey = key; |
771 | 127 | btree->entryCategory = category; |
772 | 127 | } |