YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/postgres/src/backend/executor/nodeBitmapHeapscan.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * nodeBitmapHeapscan.c
4
 *    Routines to support bitmapped scans of relations
5
 *
6
 * NOTE: it is critical that this plan type only be used with MVCC-compliant
7
 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8
 * special snapshots).  The reason is that since index and heap scans are
9
 * decoupled, there can be no assurance that the index tuple prompting a
10
 * visit to a particular heap TID still exists when the visit is made.
11
 * Therefore the tuple might not exist anymore either (which is OK because
12
 * heap_fetch will cope) --- but worse, the tuple slot could have been
13
 * re-used for a newer tuple.  With an MVCC snapshot the newer tuple is
14
 * certain to fail the time qual and so it will not be mistakenly returned,
15
 * but with anything else we might return a tuple that doesn't meet the
16
 * required index qual conditions.
17
 *
18
 *
19
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
20
 * Portions Copyright (c) 1994, Regents of the University of California
21
 *
22
 *
23
 * IDENTIFICATION
24
 *    src/backend/executor/nodeBitmapHeapscan.c
25
 *
26
 *-------------------------------------------------------------------------
27
 */
28
/*
29
 * INTERFACE ROUTINES
30
 *    ExecBitmapHeapScan      scans a relation using bitmap info
31
 *    ExecBitmapHeapNext      workhorse for above
32
 *    ExecInitBitmapHeapScan    creates and initializes state info.
33
 *    ExecReScanBitmapHeapScan  prepares to rescan the plan.
34
 *    ExecEndBitmapHeapScan   releases all storage.
35
 */
36
#include "postgres.h"
37
38
#include <math.h>
39
40
#include "access/relscan.h"
41
#include "access/transam.h"
42
#include "access/visibilitymap.h"
43
#include "executor/execdebug.h"
44
#include "executor/nodeBitmapHeapscan.h"
45
#include "miscadmin.h"
46
#include "pgstat.h"
47
#include "storage/bufmgr.h"
48
#include "storage/predicate.h"
49
#include "utils/memutils.h"
50
#include "utils/rel.h"
51
#include "utils/spccache.h"
52
#include "utils/snapmgr.h"
53
#include "utils/tqual.h"
54
55
56
static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
57
static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
58
static inline void BitmapDoneInitializingSharedState(
59
                  ParallelBitmapHeapState *pstate);
60
static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
61
               TBMIterateResult *tbmres);
62
static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
63
static inline void BitmapPrefetch(BitmapHeapScanState *node,
64
         HeapScanDesc scan);
65
static bool BitmapShouldInitializeSharedState(
66
                  ParallelBitmapHeapState *pstate);
67
68
69
/* ----------------------------------------------------------------
70
 *    BitmapHeapNext
71
 *
72
 *    Retrieve next tuple from the BitmapHeapScan node's currentRelation
73
 * ----------------------------------------------------------------
74
 */
75
static TupleTableSlot *
76
BitmapHeapNext(BitmapHeapScanState *node)
77
20.0k
{
78
20.0k
  ExprContext *econtext;
79
20.0k
  HeapScanDesc scan;
80
20.0k
  TIDBitmap  *tbm;
81
20.0k
  TBMIterator *tbmiterator = NULL;
82
20.0k
  TBMSharedIterator *shared_tbmiterator = NULL;
83
20.0k
  TBMIterateResult *tbmres;
84
20.0k
  OffsetNumber targoffset;
85
20.0k
  TupleTableSlot *slot;
86
20.0k
  ParallelBitmapHeapState *pstate = node->pstate;
87
20.0k
  dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
88
89
  /*
90
   * extract necessary information from index scan node
91
   */
92
20.0k
  econtext = node->ss.ps.ps_ExprContext;
93
20.0k
  slot = node->ss.ss_ScanTupleSlot;
94
20.0k
  scan = node->ss.ss_currentScanDesc;
95
20.0k
  tbm = node->tbm;
96
20.0k
  if (pstate == NULL)
97
20.0k
    tbmiterator = node->tbmiterator;
98
0
  else
99
0
    shared_tbmiterator = node->shared_tbmiterator;
100
20.0k
  tbmres = node->tbmres;
101
102
  /*
103
   * If we haven't yet performed the underlying index scan, do it, and begin
104
   * the iteration over the bitmap.
105
   *
106
   * For prefetching, we use *two* iterators, one for the pages we are
107
   * actually scanning and another that runs ahead of the first for
108
   * prefetching.  node->prefetch_pages tracks exactly how many pages ahead
109
   * the prefetch iterator is.  Also, node->prefetch_target tracks the
110
   * desired prefetch distance, which starts small and increases up to the
111
   * node->prefetch_maximum.  This is to avoid doing a lot of prefetching in
112
   * a scan that stops after a few tuples because of a LIMIT.
113
   */
114
20.0k
  if (!node->initialized)
115
2
  {
116
2
    if (!pstate)
117
2
    {
118
2
      tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
119
120
2
      if (!tbm || !IsA(tbm, TIDBitmap))
121
0
        elog(ERROR, "unrecognized result from subplan");
122
123
2
      node->tbm = tbm;
124
2
      node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
125
2
      node->tbmres = tbmres = NULL;
126
127
#ifdef USE_PREFETCH
128
      if (node->prefetch_maximum > 0)
129
      {
130
        node->prefetch_iterator = tbm_begin_iterate(tbm);
131
        node->prefetch_pages = 0;
132
        node->prefetch_target = -1;
133
      }
134
#endif              /* USE_PREFETCH */
135
2
    }
136
0
    else
137
0
    {
138
      /*
139
       * The leader will immediately come out of the function, but
140
       * others will be blocked until leader populates the TBM and wakes
141
       * them up.
142
       */
143
0
      if (BitmapShouldInitializeSharedState(pstate))
144
0
      {
145
0
        tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
146
0
        if (!tbm || !IsA(tbm, TIDBitmap))
147
0
          elog(ERROR, "unrecognized result from subplan");
148
149
0
        node->tbm = tbm;
150
151
        /*
152
         * Prepare to iterate over the TBM. This will return the
153
         * dsa_pointer of the iterator state which will be used by
154
         * multiple processes to iterate jointly.
155
         */
156
0
        pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
157
#ifdef USE_PREFETCH
158
        if (node->prefetch_maximum > 0)
159
        {
160
          pstate->prefetch_iterator =
161
            tbm_prepare_shared_iterate(tbm);
162
163
          /*
164
           * We don't need the mutex here as we haven't yet woke up
165
           * others.
166
           */
167
          pstate->prefetch_pages = 0;
168
          pstate->prefetch_target = -1;
169
        }
170
#endif
171
172
        /* We have initialized the shared state so wake up others. */
173
0
        BitmapDoneInitializingSharedState(pstate);
174
0
      }
175
176
      /* Allocate a private iterator and attach the shared state to it */
177
0
      node->shared_tbmiterator = shared_tbmiterator =
178
0
        tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
179
0
      node->tbmres = tbmres = NULL;
180
181
#ifdef USE_PREFETCH
182
      if (node->prefetch_maximum > 0)
183
      {
184
        node->shared_prefetch_iterator =
185
          tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
186
      }
187
#endif              /* USE_PREFETCH */
188
0
    }
189
2
    node->initialized = true;
190
2
  }
191
192
20.0k
  for (;;)
193
20.0k
  {
194
20.0k
    Page    dp;
195
20.0k
    ItemId    lp;
196
197
20.0k
    CHECK_FOR_INTERRUPTS();
198
199
    /*
200
     * Get next page of results if needed
201
     */
202
20.0k
    if (tbmres == NULL)
203
92
    {
204
92
      if (!pstate)
205
92
        node->tbmres = tbmres = tbm_iterate(tbmiterator);
206
0
      else
207
0
        node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
208
92
      if (tbmres == NULL)
209
2
      {
210
        /* no more entries in the bitmap */
211
2
        break;
212
2
      }
213
214
90
      BitmapAdjustPrefetchIterator(node, tbmres);
215
216
      /*
217
       * Ignore any claimed entries past what we think is the end of the
218
       * relation.  (This is probably not necessary given that we got at
219
       * least AccessShareLock on the table before performing any of the
220
       * indexscans, but let's be safe.)
221
       */
222
90
      if (tbmres->blockno >= scan->rs_nblocks)
223
0
      {
224
0
        node->tbmres = tbmres = NULL;
225
0
        continue;
226
0
      }
227
228
      /*
229
       * We can skip fetching the heap page if we don't need any fields
230
       * from the heap, and the bitmap entries don't need rechecking,
231
       * and all tuples on the page are visible to our transaction.
232
       */
233
90
      node->skip_fetch = (node->can_skip_fetch &&
234
0
                !tbmres->recheck &&
235
0
                VM_ALL_VISIBLE(node->ss.ss_currentRelation,
236
90
                         tbmres->blockno,
237
90
                         &node->vmbuffer));
238
239
90
      if (node->skip_fetch)
240
0
      {
241
        /*
242
         * The number of tuples on this page is put into
243
         * scan->rs_ntuples; note we don't fill scan->rs_vistuples.
244
         */
245
0
        scan->rs_ntuples = tbmres->ntuples;
246
0
      }
247
90
      else
248
90
      {
249
        /*
250
         * Fetch the current heap page and identify candidate tuples.
251
         */
252
90
        bitgetpage(scan, tbmres);
253
90
      }
254
255
90
      if (tbmres->ntuples >= 0)
256
90
        node->exact_pages++;
257
0
      else
258
0
        node->lossy_pages++;
259
260
      /*
261
       * Set rs_cindex to first slot to examine
262
       */
263
90
      scan->rs_cindex = 0;
264
265
      /* Adjust the prefetch target */
266
90
      BitmapAdjustPrefetchTarget(node);
267
90
    }
268
20.0k
    else
269
20.0k
    {
270
      /*
271
       * Continuing in previously obtained page; advance rs_cindex
272
       */
273
20.0k
      scan->rs_cindex++;
274
275
#ifdef USE_PREFETCH
276
277
      /*
278
       * Try to prefetch at least a few pages even before we get to the
279
       * second page if we don't stop reading after the first tuple.
280
       */
281
      if (!pstate)
282
      {
283
        if (node->prefetch_target < node->prefetch_maximum)
284
          node->prefetch_target++;
285
      }
286
      else if (pstate->prefetch_target < node->prefetch_maximum)
287
      {
288
        /* take spinlock while updating shared state */
289
        SpinLockAcquire(&pstate->mutex);
290
        if (pstate->prefetch_target < node->prefetch_maximum)
291
          pstate->prefetch_target++;
292
        SpinLockRelease(&pstate->mutex);
293
      }
294
#endif              /* USE_PREFETCH */
295
20.0k
    }
296
297
    /*
298
     * Out of range?  If so, nothing more to look at on this page
299
     */
300
20.0k
    if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
301
90
    {
302
90
      node->tbmres = tbmres = NULL;
303
90
      continue;
304
90
    }
305
306
    /*
307
     * We issue prefetch requests *after* fetching the current page to try
308
     * to avoid having prefetching interfere with the main I/O. Also, this
309
     * should happen only when we have determined there is still something
310
     * to do on the current page, else we may uselessly prefetch the same
311
     * page we are just about to request for real.
312
     */
313
20.0k
    BitmapPrefetch(node, scan);
314
315
20.0k
    if (node->skip_fetch)
316
0
    {
317
      /*
318
       * If we don't have to fetch the tuple, just return nulls.
319
       */
320
0
      ExecStoreAllNullTuple(slot);
321
0
    }
322
20.0k
    else
323
20.0k
    {
324
      /*
325
       * Okay to fetch the tuple.
326
       */
327
20.0k
      targoffset = scan->rs_vistuples[scan->rs_cindex];
328
20.0k
      dp = (Page) BufferGetPage(scan->rs_cbuf);
329
20.0k
      lp = PageGetItemId(dp, targoffset);
330
20.0k
      Assert(ItemIdIsNormal(lp));
331
332
20.0k
      scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
333
20.0k
      scan->rs_ctup.t_len = ItemIdGetLength(lp);
334
20.0k
      scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
335
20.0k
      ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
336
337
20.0k
      pgstat_count_heap_fetch(scan->rs_rd);
338
339
      /*
340
       * Set up the result slot to point to this tuple.  Note that the
341
       * slot acquires a pin on the buffer.
342
       */
343
20.0k
      ExecStoreBufferHeapTuple(&scan->rs_ctup,
344
20.0k
                   slot,
345
20.0k
                   scan->rs_cbuf);
346
347
      /*
348
       * If we are using lossy info, we have to recheck the qual
349
       * conditions at every tuple.
350
       */
351
20.0k
      if (tbmres->recheck)
352
0
      {
353
0
        econtext->ecxt_scantuple = slot;
354
0
        if (!ExecQualAndReset(node->bitmapqualorig, econtext))
355
0
        {
356
          /* Fails recheck, so drop it and loop back for another */
357
0
          InstrCountFiltered2(node, 1);
358
0
          ExecClearTuple(slot);
359
0
          continue;
360
0
        }
361
20.0k
      }
362
20.0k
    }
363
364
    /* OK to return this tuple */
365
20.0k
    return slot;
366
20.0k
  }
367
368
  /*
369
   * if we get here it means we are at the end of the scan..
370
   */
371
2
  return ExecClearTuple(slot);
372
20.0k
}
373
374
/*
375
 * bitgetpage - subroutine for BitmapHeapNext()
376
 *
377
 * This routine reads and pins the specified page of the relation, then
378
 * builds an array indicating which tuples on the page are both potentially
379
 * interesting according to the bitmap, and visible according to the snapshot.
380
 */
381
static void
382
bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
383
90
{
384
90
  BlockNumber page = tbmres->blockno;
385
90
  Buffer    buffer;
386
90
  Snapshot  snapshot;
387
90
  int     ntup;
388
389
  /*
390
   * Acquire pin on the target heap page, trading in any pin we held before.
391
   */
392
90
  Assert(page < scan->rs_nblocks);
393
394
90
  scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
395
90
                     scan->rs_rd,
396
90
                     page);
397
90
  buffer = scan->rs_cbuf;
398
90
  snapshot = scan->rs_snapshot;
399
400
90
  ntup = 0;
401
402
  /*
403
   * Prune and repair fragmentation for the whole page, if possible.
404
   */
405
90
  heap_page_prune_opt(scan->rs_rd, buffer);
406
407
  /*
408
   * We must hold share lock on the buffer content while examining tuple
409
   * visibility.  Afterwards, however, the tuples we have found to be
410
   * visible are guaranteed good as long as we hold the buffer pin.
411
   */
412
90
  LockBuffer(buffer, BUFFER_LOCK_SHARE);
413
414
  /*
415
   * We need two separate strategies for lossy and non-lossy cases.
416
   */
417
90
  if (tbmres->ntuples >= 0)
418
90
  {
419
    /*
420
     * Bitmap is non-lossy, so we just look through the offsets listed in
421
     * tbmres; but we have to follow any HOT chain starting at each such
422
     * offset.
423
     */
424
90
    int     curslot;
425
426
20.0k
    for (curslot = 0; curslot < tbmres->ntuples; curslot++)
427
20.0k
    {
428
20.0k
      OffsetNumber offnum = tbmres->offsets[curslot];
429
20.0k
      ItemPointerData tid;
430
20.0k
      HeapTupleData heapTuple;
431
432
20.0k
      ItemPointerSet(&tid, page, offnum);
433
20.0k
      if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
434
20.0k
                     &heapTuple, NULL, true))
435
20.0k
        scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
436
20.0k
    }
437
90
  }
438
0
  else
439
0
  {
440
    /*
441
     * Bitmap is lossy, so we must examine each item pointer on the page.
442
     * But we can ignore HOT chains, since we'll check each tuple anyway.
443
     */
444
0
    Page    dp = (Page) BufferGetPage(buffer);
445
0
    OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
446
0
    OffsetNumber offnum;
447
448
0
    for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
449
0
    {
450
0
      ItemId    lp;
451
0
      HeapTupleData loctup;
452
0
      bool    valid;
453
454
0
      lp = PageGetItemId(dp, offnum);
455
0
      if (!ItemIdIsNormal(lp))
456
0
        continue;
457
0
      loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
458
0
      loctup.t_len = ItemIdGetLength(lp);
459
0
      loctup.t_tableOid = scan->rs_rd->rd_id;
460
0
      ItemPointerSet(&loctup.t_self, page, offnum);
461
0
      valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
462
0
      if (valid)
463
0
      {
464
0
        scan->rs_vistuples[ntup++] = offnum;
465
0
        PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
466
0
      }
467
0
      CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
468
0
                      buffer, snapshot);
469
0
    }
470
0
  }
471
472
90
  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
473
474
90
  Assert(ntup <= MaxHeapTuplesPerPage);
475
90
  scan->rs_ntuples = ntup;
476
90
}
477
478
/*
479
 *  BitmapDoneInitializingSharedState - Shared state is initialized
480
 *
481
 *  By this time the leader has already populated the TBM and initialized the
482
 *  shared state so wake up other processes.
483
 */
484
static inline void
485
BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
486
0
{
487
0
  SpinLockAcquire(&pstate->mutex);
488
0
  pstate->state = BM_FINISHED;
489
0
  SpinLockRelease(&pstate->mutex);
490
0
  ConditionVariableBroadcast(&pstate->cv);
491
0
}
492
493
/*
494
 *  BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
495
 */
496
static inline void
497
BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
498
               TBMIterateResult *tbmres)
499
90
{
500
#ifdef USE_PREFETCH
501
  ParallelBitmapHeapState *pstate = node->pstate;
502
503
  if (pstate == NULL)
504
  {
505
    TBMIterator *prefetch_iterator = node->prefetch_iterator;
506
507
    if (node->prefetch_pages > 0)
508
    {
509
      /* The main iterator has closed the distance by one page */
510
      node->prefetch_pages--;
511
    }
512
    else if (prefetch_iterator)
513
    {
514
      /* Do not let the prefetch iterator get behind the main one */
515
      TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
516
517
      if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
518
        elog(ERROR, "prefetch and main iterators are out of sync");
519
    }
520
    return;
521
  }
522
523
  if (node->prefetch_maximum > 0)
524
  {
525
    TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
526
527
    SpinLockAcquire(&pstate->mutex);
528
    if (pstate->prefetch_pages > 0)
529
    {
530
      pstate->prefetch_pages--;
531
      SpinLockRelease(&pstate->mutex);
532
    }
533
    else
534
    {
535
      /* Release the mutex before iterating */
536
      SpinLockRelease(&pstate->mutex);
537
538
      /*
539
       * In case of shared mode, we can not ensure that the current
540
       * blockno of the main iterator and that of the prefetch iterator
541
       * are same.  It's possible that whatever blockno we are
542
       * prefetching will be processed by another process.  Therefore,
543
       * we don't validate the blockno here as we do in non-parallel
544
       * case.
545
       */
546
      if (prefetch_iterator)
547
        tbm_shared_iterate(prefetch_iterator);
548
    }
549
  }
550
#endif              /* USE_PREFETCH */
551
90
}
552
553
/*
554
 * BitmapAdjustPrefetchTarget - Adjust the prefetch target
555
 *
556
 * Increase prefetch target if it's not yet at the max.  Note that
557
 * we will increase it to zero after fetching the very first
558
 * page/tuple, then to one after the second tuple is fetched, then
559
 * it doubles as later pages are fetched.
560
 */
561
static inline void
562
BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
563
90
{
564
#ifdef USE_PREFETCH
565
  ParallelBitmapHeapState *pstate = node->pstate;
566
567
  if (pstate == NULL)
568
  {
569
    if (node->prefetch_target >= node->prefetch_maximum)
570
       /* don't increase any further */ ;
571
    else if (node->prefetch_target >= node->prefetch_maximum / 2)
572
      node->prefetch_target = node->prefetch_maximum;
573
    else if (node->prefetch_target > 0)
574
      node->prefetch_target *= 2;
575
    else
576
      node->prefetch_target++;
577
    return;
578
  }
579
580
  /* Do an unlocked check first to save spinlock acquisitions. */
581
  if (pstate->prefetch_target < node->prefetch_maximum)
582
  {
583
    SpinLockAcquire(&pstate->mutex);
584
    if (pstate->prefetch_target >= node->prefetch_maximum)
585
       /* don't increase any further */ ;
586
    else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
587
      pstate->prefetch_target = node->prefetch_maximum;
588
    else if (pstate->prefetch_target > 0)
589
      pstate->prefetch_target *= 2;
590
    else
591
      pstate->prefetch_target++;
592
    SpinLockRelease(&pstate->mutex);
593
  }
594
#endif              /* USE_PREFETCH */
595
90
}
596
597
/*
598
 * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
599
 */
600
static inline void
601
BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
602
20.0k
{
603
#ifdef USE_PREFETCH
604
  ParallelBitmapHeapState *pstate = node->pstate;
605
606
  if (pstate == NULL)
607
  {
608
    TBMIterator *prefetch_iterator = node->prefetch_iterator;
609
610
    if (prefetch_iterator)
611
    {
612
      while (node->prefetch_pages < node->prefetch_target)
613
      {
614
        TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
615
        bool    skip_fetch;
616
617
        if (tbmpre == NULL)
618
        {
619
          /* No more pages to prefetch */
620
          tbm_end_iterate(prefetch_iterator);
621
          node->prefetch_iterator = NULL;
622
          break;
623
        }
624
        node->prefetch_pages++;
625
626
        /*
627
         * If we expect not to have to actually read this heap page,
628
         * skip this prefetch call, but continue to run the prefetch
629
         * logic normally.  (Would it be better not to increment
630
         * prefetch_pages?)
631
         *
632
         * This depends on the assumption that the index AM will
633
         * report the same recheck flag for this future heap page as
634
         * it did for the current heap page; which is not a certainty
635
         * but is true in many cases.
636
         */
637
        skip_fetch = (node->can_skip_fetch &&
638
                (node->tbmres ? !node->tbmres->recheck : false) &&
639
                VM_ALL_VISIBLE(node->ss.ss_currentRelation,
640
                       tbmpre->blockno,
641
                       &node->pvmbuffer));
642
643
        if (!skip_fetch)
644
          PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
645
      }
646
    }
647
648
    return;
649
  }
650
651
  if (pstate->prefetch_pages < pstate->prefetch_target)
652
  {
653
    TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
654
655
    if (prefetch_iterator)
656
    {
657
      while (1)
658
      {
659
        TBMIterateResult *tbmpre;
660
        bool    do_prefetch = false;
661
        bool    skip_fetch;
662
663
        /*
664
         * Recheck under the mutex. If some other process has already
665
         * done enough prefetching then we need not to do anything.
666
         */
667
        SpinLockAcquire(&pstate->mutex);
668
        if (pstate->prefetch_pages < pstate->prefetch_target)
669
        {
670
          pstate->prefetch_pages++;
671
          do_prefetch = true;
672
        }
673
        SpinLockRelease(&pstate->mutex);
674
675
        if (!do_prefetch)
676
          return;
677
678
        tbmpre = tbm_shared_iterate(prefetch_iterator);
679
        if (tbmpre == NULL)
680
        {
681
          /* No more pages to prefetch */
682
          tbm_end_shared_iterate(prefetch_iterator);
683
          node->shared_prefetch_iterator = NULL;
684
          break;
685
        }
686
687
        /* As above, skip prefetch if we expect not to need page */
688
        skip_fetch = (node->can_skip_fetch &&
689
                (node->tbmres ? !node->tbmres->recheck : false) &&
690
                VM_ALL_VISIBLE(node->ss.ss_currentRelation,
691
                       tbmpre->blockno,
692
                       &node->pvmbuffer));
693
694
        if (!skip_fetch)
695
          PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
696
      }
697
    }
698
  }
699
#endif              /* USE_PREFETCH */
700
20.0k
}
701
702
/*
703
 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
704
 */
705
static bool
706
BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
707
0
{
708
0
  ExprContext *econtext;
709
710
  /*
711
   * extract necessary information from index scan node
712
   */
713
0
  econtext = node->ss.ps.ps_ExprContext;
714
715
  /* Does the tuple meet the original qual conditions? */
716
0
  econtext->ecxt_scantuple = slot;
717
0
  return ExecQualAndReset(node->bitmapqualorig, econtext);
718
0
}
719
720
/* ----------------------------------------------------------------
721
 *    ExecBitmapHeapScan(node)
722
 * ----------------------------------------------------------------
723
 */
724
static TupleTableSlot *
725
ExecBitmapHeapScan(PlanState *pstate)
726
20.0k
{
727
20.0k
  BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
728
729
20.0k
  return ExecScan(&node->ss,
730
20.0k
          (ExecScanAccessMtd) BitmapHeapNext,
731
20.0k
          (ExecScanRecheckMtd) BitmapHeapRecheck);
732
20.0k
}
733
734
/* ----------------------------------------------------------------
735
 *    ExecReScanBitmapHeapScan(node)
736
 * ----------------------------------------------------------------
737
 */
738
void
739
ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
740
0
{
741
0
  PlanState  *outerPlan = outerPlanState(node);
742
743
  /* rescan to release any page pin */
744
0
  heap_rescan(node->ss.ss_currentScanDesc, NULL);
745
746
  /* release bitmaps and buffers if any */
747
0
  if (node->tbmiterator)
748
0
    tbm_end_iterate(node->tbmiterator);
749
0
  if (node->prefetch_iterator)
750
0
    tbm_end_iterate(node->prefetch_iterator);
751
0
  if (node->shared_tbmiterator)
752
0
    tbm_end_shared_iterate(node->shared_tbmiterator);
753
0
  if (node->shared_prefetch_iterator)
754
0
    tbm_end_shared_iterate(node->shared_prefetch_iterator);
755
0
  if (node->tbm)
756
0
    tbm_free(node->tbm);
757
0
  if (node->vmbuffer != InvalidBuffer)
758
0
    ReleaseBuffer(node->vmbuffer);
759
0
  if (node->pvmbuffer != InvalidBuffer)
760
0
    ReleaseBuffer(node->pvmbuffer);
761
0
  node->tbm = NULL;
762
0
  node->tbmiterator = NULL;
763
0
  node->tbmres = NULL;
764
0
  node->prefetch_iterator = NULL;
765
0
  node->initialized = false;
766
0
  node->shared_tbmiterator = NULL;
767
0
  node->shared_prefetch_iterator = NULL;
768
0
  node->vmbuffer = InvalidBuffer;
769
0
  node->pvmbuffer = InvalidBuffer;
770
771
0
  ExecScanReScan(&node->ss);
772
773
  /*
774
   * if chgParam of subnode is not null then plan will be re-scanned by
775
   * first ExecProcNode.
776
   */
777
0
  if (outerPlan->chgParam == NULL)
778
0
    ExecReScan(outerPlan);
779
0
}
780
781
/* ----------------------------------------------------------------
782
 *    ExecEndBitmapHeapScan
783
 * ----------------------------------------------------------------
784
 */
785
void
786
ExecEndBitmapHeapScan(BitmapHeapScanState *node)
787
3
{
788
3
  Relation  relation;
789
3
  HeapScanDesc scanDesc;
790
791
  /*
792
   * extract information from the node
793
   */
794
3
  relation = node->ss.ss_currentRelation;
795
3
  scanDesc = node->ss.ss_currentScanDesc;
796
797
  /*
798
   * Free the exprcontext
799
   */
800
3
  ExecFreeExprContext(&node->ss.ps);
801
802
  /*
803
   * clear out tuple table slots
804
   */
805
3
  if (node->ss.ps.ps_ResultTupleSlot)
806
3
    ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
807
3
  ExecClearTuple(node->ss.ss_ScanTupleSlot);
808
809
  /*
810
   * close down subplans
811
   */
812
3
  ExecEndNode(outerPlanState(node));
813
814
  /*
815
   * release bitmaps and buffers if any
816
   */
817
3
  if (node->tbmiterator)
818
2
    tbm_end_iterate(node->tbmiterator);
819
3
  if (node->prefetch_iterator)
820
0
    tbm_end_iterate(node->prefetch_iterator);
821
3
  if (node->tbm)
822
2
    tbm_free(node->tbm);
823
3
  if (node->shared_tbmiterator)
824
0
    tbm_end_shared_iterate(node->shared_tbmiterator);
825
3
  if (node->shared_prefetch_iterator)
826
0
    tbm_end_shared_iterate(node->shared_prefetch_iterator);
827
3
  if (node->vmbuffer != InvalidBuffer)
828
0
    ReleaseBuffer(node->vmbuffer);
829
3
  if (node->pvmbuffer != InvalidBuffer)
830
0
    ReleaseBuffer(node->pvmbuffer);
831
832
  /*
833
   * close heap scan
834
   */
835
3
  heap_endscan(scanDesc);
836
837
  /*
838
   * close the heap relation.
839
   */
840
3
  ExecCloseScanRelation(relation);
841
3
}
842
843
/* ----------------------------------------------------------------
844
 *    ExecInitBitmapHeapScan
845
 *
846
 *    Initializes the scan's state information.
847
 * ----------------------------------------------------------------
848
 */
849
BitmapHeapScanState *
850
ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
851
3
{
852
3
  BitmapHeapScanState *scanstate;
853
3
  Relation  currentRelation;
854
3
  int     io_concurrency;
855
856
  /* check for unsupported flags */
857
3
  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
858
859
  /*
860
   * Assert caller didn't ask for an unsafe snapshot --- see comments at
861
   * head of file.
862
   */
863
3
  Assert(IsMVCCSnapshot(estate->es_snapshot));
864
865
  /*
866
   * create state structure
867
   */
868
3
  scanstate = makeNode(BitmapHeapScanState);
869
3
  scanstate->ss.ps.plan = (Plan *) node;
870
3
  scanstate->ss.ps.state = estate;
871
3
  scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
872
873
3
  scanstate->tbm = NULL;
874
3
  scanstate->tbmiterator = NULL;
875
3
  scanstate->tbmres = NULL;
876
3
  scanstate->skip_fetch = false;
877
3
  scanstate->vmbuffer = InvalidBuffer;
878
3
  scanstate->pvmbuffer = InvalidBuffer;
879
3
  scanstate->exact_pages = 0;
880
3
  scanstate->lossy_pages = 0;
881
3
  scanstate->prefetch_iterator = NULL;
882
3
  scanstate->prefetch_pages = 0;
883
3
  scanstate->prefetch_target = 0;
884
  /* may be updated below */
885
3
  scanstate->prefetch_maximum = target_prefetch_pages;
886
3
  scanstate->pscan_len = 0;
887
3
  scanstate->initialized = false;
888
3
  scanstate->shared_tbmiterator = NULL;
889
3
  scanstate->shared_prefetch_iterator = NULL;
890
3
  scanstate->pstate = NULL;
891
892
  /*
893
   * We can potentially skip fetching heap pages if we do not need any
894
   * columns of the table, either for checking non-indexable quals or for
895
   * returning data.  This test is a bit simplistic, as it checks the
896
   * stronger condition that there's no qual or return tlist at all.  But in
897
   * most cases it's probably not worth working harder than that.
898
   */
899
3
  scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
900
3
                 node->scan.plan.targetlist == NIL);
901
902
  /*
903
   * Miscellaneous initialization
904
   *
905
   * create expression context for node
906
   */
907
3
  ExecAssignExprContext(estate, &scanstate->ss.ps);
908
909
  /*
910
   * open the base relation and acquire appropriate lock on it.
911
   */
912
3
  currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
913
914
  /*
915
   * initialize child nodes
916
   *
917
   * We do this after ExecOpenScanRelation because the child nodes will open
918
   * indexscans on our relation's indexes, and we want to be sure we have
919
   * acquired a lock on the relation first.
920
   */
921
3
  outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
922
923
  /*
924
   * get the scan type from the relation descriptor.
925
   */
926
3
  ExecInitScanTupleSlot(estate, &scanstate->ss,
927
3
              RelationGetDescr(currentRelation));
928
929
930
  /*
931
   * Initialize result type and projection.
932
   */
933
3
  ExecInitResultTypeTL(&scanstate->ss.ps);
934
3
  ExecAssignScanProjectionInfo(&scanstate->ss);
935
936
  /*
937
   * initialize child expressions
938
   */
939
3
  scanstate->ss.ps.qual =
940
3
    ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
941
3
  scanstate->bitmapqualorig =
942
3
    ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
943
944
  /*
945
   * Determine the maximum for prefetch_target.  If the tablespace has a
946
   * specific IO concurrency set, use that to compute the corresponding
947
   * maximum value; otherwise, we already initialized to the value computed
948
   * by the GUC machinery.
949
   */
950
3
  io_concurrency =
951
3
    get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
952
3
  if (io_concurrency != effective_io_concurrency)
953
0
  {
954
0
    double    maximum;
955
956
0
    if (ComputeIoConcurrency(io_concurrency, &maximum))
957
0
      scanstate->prefetch_maximum = rint(maximum);
958
0
  }
959
960
3
  scanstate->ss.ss_currentRelation = currentRelation;
961
962
  /*
963
   * Even though we aren't going to do a conventional seqscan, it is useful
964
   * to create a HeapScanDesc --- most of the fields in it are usable.
965
   */
966
3
  scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
967
3
                             estate->es_snapshot,
968
3
                             0,
969
3
                             NULL);
970
971
  /*
972
   * all done.
973
   */
974
3
  return scanstate;
975
3
}
976
977
/*----------------
978
 *    BitmapShouldInitializeSharedState
979
 *
980
 *    The first process to come here and see the state to the BM_INITIAL
981
 *    will become the leader for the parallel bitmap scan and will be
982
 *    responsible for populating the TIDBitmap.  The other processes will
983
 *    be blocked by the condition variable until the leader wakes them up.
984
 * ---------------
985
 */
986
static bool
987
BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
988
0
{
989
0
  SharedBitmapState state;
990
991
0
  while (1)
992
0
  {
993
0
    SpinLockAcquire(&pstate->mutex);
994
0
    state = pstate->state;
995
0
    if (pstate->state == BM_INITIAL)
996
0
      pstate->state = BM_INPROGRESS;
997
0
    SpinLockRelease(&pstate->mutex);
998
999
    /* Exit if bitmap is done, or if we're the leader. */
1000
0
    if (state != BM_INPROGRESS)
1001
0
      break;
1002
1003
    /* Wait for the leader to wake us up. */
1004
0
    ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
1005
0
  }
1006
1007
0
  ConditionVariableCancelSleep();
1008
1009
0
  return (state == BM_INITIAL);
1010
0
}
1011
1012
/* ----------------------------------------------------------------
1013
 *    ExecBitmapHeapEstimate
1014
 *
1015
 *    Compute the amount of space we'll need in the parallel
1016
 *    query DSM, and inform pcxt->estimator about our needs.
1017
 * ----------------------------------------------------------------
1018
 */
1019
void
1020
ExecBitmapHeapEstimate(BitmapHeapScanState *node,
1021
             ParallelContext *pcxt)
1022
0
{
1023
0
  EState     *estate = node->ss.ps.state;
1024
1025
0
  node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
1026
0
                    phs_snapshot_data),
1027
0
                 EstimateSnapshotSpace(estate->es_snapshot));
1028
1029
0
  shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
1030
0
  shm_toc_estimate_keys(&pcxt->estimator, 1);
1031
0
}
1032
1033
/* ----------------------------------------------------------------
1034
 *    ExecBitmapHeapInitializeDSM
1035
 *
1036
 *    Set up a parallel bitmap heap scan descriptor.
1037
 * ----------------------------------------------------------------
1038
 */
1039
void
1040
ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
1041
              ParallelContext *pcxt)
1042
0
{
1043
0
  ParallelBitmapHeapState *pstate;
1044
0
  EState     *estate = node->ss.ps.state;
1045
0
  dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
1046
1047
  /* If there's no DSA, there are no workers; initialize nothing. */
1048
0
  if (dsa == NULL)
1049
0
    return;
1050
1051
0
  pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
1052
1053
0
  pstate->tbmiterator = 0;
1054
0
  pstate->prefetch_iterator = 0;
1055
1056
  /* Initialize the mutex */
1057
0
  SpinLockInit(&pstate->mutex);
1058
0
  pstate->prefetch_pages = 0;
1059
0
  pstate->prefetch_target = 0;
1060
0
  pstate->state = BM_INITIAL;
1061
1062
0
  ConditionVariableInit(&pstate->cv);
1063
0
  SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
1064
1065
0
  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
1066
0
  node->pstate = pstate;
1067
0
}
1068
1069
/* ----------------------------------------------------------------
1070
 *    ExecBitmapHeapReInitializeDSM
1071
 *
1072
 *    Reset shared state before beginning a fresh scan.
1073
 * ----------------------------------------------------------------
1074
 */
1075
void
1076
ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
1077
                ParallelContext *pcxt)
1078
0
{
1079
0
  ParallelBitmapHeapState *pstate = node->pstate;
1080
0
  dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
1081
1082
  /* If there's no DSA, there are no workers; do nothing. */
1083
0
  if (dsa == NULL)
1084
0
    return;
1085
1086
0
  pstate->state = BM_INITIAL;
1087
1088
0
  if (DsaPointerIsValid(pstate->tbmiterator))
1089
0
    tbm_free_shared_area(dsa, pstate->tbmiterator);
1090
1091
0
  if (DsaPointerIsValid(pstate->prefetch_iterator))
1092
0
    tbm_free_shared_area(dsa, pstate->prefetch_iterator);
1093
1094
0
  pstate->tbmiterator = InvalidDsaPointer;
1095
0
  pstate->prefetch_iterator = InvalidDsaPointer;
1096
0
}
1097
1098
/* ----------------------------------------------------------------
1099
 *    ExecBitmapHeapInitializeWorker
1100
 *
1101
 *    Copy relevant information from TOC into planstate.
1102
 * ----------------------------------------------------------------
1103
 */
1104
void
1105
ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
1106
                 ParallelWorkerContext *pwcxt)
1107
0
{
1108
0
  ParallelBitmapHeapState *pstate;
1109
0
  Snapshot  snapshot;
1110
1111
0
  Assert(node->ss.ps.state->es_query_dsa != NULL);
1112
1113
0
  pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
1114
0
  node->pstate = pstate;
1115
1116
0
  snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
1117
0
  heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
1118
0
}