YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/postgres/src/backend/statistics/dependencies.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * dependencies.c
4
 *    POSTGRES functional dependencies
5
 *
6
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7
 * Portions Copyright (c) 1994, Regents of the University of California
8
 *
9
 * IDENTIFICATION
10
 *    src/backend/statistics/dependencies.c
11
 *
12
 *-------------------------------------------------------------------------
13
 */
14
#include "postgres.h"
15
16
#include "access/htup_details.h"
17
#include "access/sysattr.h"
18
#include "catalog/pg_operator.h"
19
#include "catalog/pg_statistic_ext.h"
20
#include "lib/stringinfo.h"
21
#include "optimizer/clauses.h"
22
#include "optimizer/cost.h"
23
#include "optimizer/var.h"
24
#include "nodes/nodes.h"
25
#include "nodes/relation.h"
26
#include "statistics/extended_stats_internal.h"
27
#include "statistics/statistics.h"
28
#include "utils/bytea.h"
29
#include "utils/fmgroids.h"
30
#include "utils/fmgrprotos.h"
31
#include "utils/lsyscache.h"
32
#include "utils/syscache.h"
33
#include "utils/typcache.h"
34
35
/*
36
 * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
37
 * k-permutations of n elements, except that the order does not matter for the
38
 * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
39
 */
40
typedef struct DependencyGeneratorData
41
{
42
  int     k;        /* size of the dependency */
43
  int     n;        /* number of possible attributes */
44
  int     current;    /* next dependency to return (index) */
45
  AttrNumber  ndependencies;  /* number of dependencies generated */
46
  AttrNumber *dependencies; /* array of pre-generated dependencies  */
47
} DependencyGeneratorData;
48
49
typedef DependencyGeneratorData *DependencyGenerator;
50
51
static void generate_dependencies_recurse(DependencyGenerator state,
52
                int index, AttrNumber start, AttrNumber *current);
53
static void generate_dependencies(DependencyGenerator state);
54
static DependencyGenerator DependencyGenerator_init(int n, int k);
55
static void DependencyGenerator_free(DependencyGenerator state);
56
static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
57
static double dependency_degree(int numrows, HeapTuple *rows, int k,
58
          AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs);
59
static bool dependency_is_fully_matched(MVDependency *dependency,
60
              Bitmapset *attnums);
61
static bool dependency_implies_attribute(MVDependency *dependency,
62
               AttrNumber attnum);
63
static bool dependency_is_compatible_clause(Node *clause, Index relid,
64
                AttrNumber *attnum);
65
static MVDependency *find_strongest_dependency(StatisticExtInfo *stats,
66
              MVDependencies *dependencies,
67
              Bitmapset *attnums);
68
69
static void
70
generate_dependencies_recurse(DependencyGenerator state, int index,
71
                AttrNumber start, AttrNumber *current)
72
0
{
73
  /*
74
   * The generator handles the first (k-1) elements differently from the
75
   * last element.
76
   */
77
0
  if (index < (state->k - 1))
78
0
  {
79
0
    AttrNumber  i;
80
81
    /*
82
     * The first (k-1) values have to be in ascending order, which we
83
     * generate recursively.
84
     */
85
86
0
    for (i = start; i < state->n; i++)
87
0
    {
88
0
      current[index] = i;
89
0
      generate_dependencies_recurse(state, (index + 1), (i + 1), current);
90
0
    }
91
0
  }
92
0
  else
93
0
  {
94
0
    int     i;
95
96
    /*
97
     * the last element is the implied value, which does not respect the
98
     * ascending order. We just need to check that the value is not in the
99
     * first (k-1) elements.
100
     */
101
102
0
    for (i = 0; i < state->n; i++)
103
0
    {
104
0
      int     j;
105
0
      bool    match = false;
106
107
0
      current[index] = i;
108
109
0
      for (j = 0; j < index; j++)
110
0
      {
111
0
        if (current[j] == i)
112
0
        {
113
0
          match = true;
114
0
          break;
115
0
        }
116
0
      }
117
118
      /*
119
       * If the value is not found in the first part of the dependency,
120
       * we're done.
121
       */
122
0
      if (!match)
123
0
      {
124
0
        state->dependencies = (AttrNumber *) repalloc(state->dependencies,
125
0
                                state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
126
0
        memcpy(&state->dependencies[(state->k * state->ndependencies)],
127
0
             current, state->k * sizeof(AttrNumber));
128
0
        state->ndependencies++;
129
0
      }
130
0
    }
131
0
  }
132
0
}
133
134
/* generate all dependencies (k-permutations of n elements) */
135
static void
136
generate_dependencies(DependencyGenerator state)
137
0
{
138
0
  AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
139
140
0
  generate_dependencies_recurse(state, 0, 0, current);
141
142
0
  pfree(current);
143
0
}
144
145
/*
146
 * initialize the DependencyGenerator of variations, and prebuild the variations
147
 *
148
 * This pre-builds all the variations. We could also generate them in
149
 * DependencyGenerator_next(), but this seems simpler.
150
 */
151
static DependencyGenerator
152
DependencyGenerator_init(int n, int k)
153
0
{
154
0
  DependencyGenerator state;
155
156
0
  Assert((n >= k) && (k > 0));
157
158
  /* allocate the DependencyGenerator state */
159
0
  state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
160
0
  state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
161
162
0
  state->ndependencies = 0;
163
0
  state->current = 0;
164
0
  state->k = k;
165
0
  state->n = n;
166
167
  /* now actually pre-generate all the variations */
168
0
  generate_dependencies(state);
169
170
0
  return state;
171
0
}
172
173
/* free the DependencyGenerator state */
174
static void
175
DependencyGenerator_free(DependencyGenerator state)
176
0
{
177
0
  pfree(state->dependencies);
178
0
  pfree(state);
179
180
0
}
181
182
/* generate next combination */
183
static AttrNumber *
184
DependencyGenerator_next(DependencyGenerator state)
185
0
{
186
0
  if (state->current == state->ndependencies)
187
0
    return NULL;
188
189
0
  return &state->dependencies[state->k * state->current++];
190
0
}
191
192
193
/*
194
 * validates functional dependency on the data
195
 *
196
 * An actual work horse of detecting functional dependencies. Given a variation
197
 * of k attributes, it checks that the first (k-1) are sufficient to determine
198
 * the last one.
199
 */
200
static double
201
dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
202
          VacAttrStats **stats, Bitmapset *attrs)
203
0
{
204
0
  int     i,
205
0
        j;
206
0
  int     nvalues = numrows * k;
207
0
  MultiSortSupport mss;
208
0
  SortItem   *items;
209
0
  Datum    *values;
210
0
  bool     *isnull;
211
0
  int      *attnums;
212
213
  /* counters valid within a group */
214
0
  int     group_size = 0;
215
0
  int     n_violations = 0;
216
217
  /* total number of rows supporting (consistent with) the dependency */
218
0
  int     n_supporting_rows = 0;
219
220
  /* Make sure we have at least two input attributes. */
221
0
  Assert(k >= 2);
222
223
  /* sort info for all attributes columns */
224
0
  mss = multi_sort_init(k);
225
226
  /* data for the sort */
227
0
  items = (SortItem *) palloc(numrows * sizeof(SortItem));
228
0
  values = (Datum *) palloc(sizeof(Datum) * nvalues);
229
0
  isnull = (bool *) palloc(sizeof(bool) * nvalues);
230
231
  /* fix the pointers to values/isnull */
232
0
  for (i = 0; i < numrows; i++)
233
0
  {
234
0
    items[i].values = &values[i * k];
235
0
    items[i].isnull = &isnull[i * k];
236
0
  }
237
238
  /*
239
   * Transform the bms into an array, to make accessing i-th member easier.
240
   */
241
0
  attnums = (int *) palloc(sizeof(int) * bms_num_members(attrs));
242
0
  i = 0;
243
0
  j = -1;
244
0
  while ((j = bms_next_member(attrs, j)) >= 0)
245
0
    attnums[i++] = j;
246
247
  /*
248
   * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
249
   *
250
   * (a) sort the data lexicographically
251
   *
252
   * (b) split the data into groups by first (k-1) columns
253
   *
254
   * (c) for each group count different values in the last column
255
   */
256
257
  /* prepare the sort function for the first dimension, and SortItem array */
258
0
  for (i = 0; i < k; i++)
259
0
  {
260
0
    VacAttrStats *colstat = stats[dependency[i]];
261
0
    TypeCacheEntry *type;
262
263
0
    type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
264
0
    if (type->lt_opr == InvalidOid) /* shouldn't happen */
265
0
      elog(ERROR, "cache lookup failed for ordering operator for type %u",
266
0
         colstat->attrtypid);
267
268
    /* prepare the sort function for this dimension */
269
0
    multi_sort_add_dimension(mss, i, type->lt_opr);
270
271
    /* accumulate all the data for both columns into an array and sort it */
272
0
    for (j = 0; j < numrows; j++)
273
0
    {
274
0
      items[j].values[i] =
275
0
        heap_getattr(rows[j], attnums[dependency[i]],
276
0
               stats[i]->tupDesc, &items[j].isnull[i]);
277
0
    }
278
0
  }
279
280
  /* sort the items so that we can detect the groups */
281
0
  qsort_arg((void *) items, numrows, sizeof(SortItem),
282
0
        multi_sort_compare, mss);
283
284
  /*
285
   * Walk through the sorted array, split it into rows according to the
286
   * first (k-1) columns. If there's a single value in the last column, we
287
   * count the group as 'supporting' the functional dependency. Otherwise we
288
   * count it as contradicting.
289
   */
290
291
  /* start with the first row forming a group */
292
0
  group_size = 1;
293
294
  /* loop 1 beyond the end of the array so that we count the final group */
295
0
  for (i = 1; i <= numrows; i++)
296
0
  {
297
    /*
298
     * Check if the group ended, which may be either because we processed
299
     * all the items (i==numrows), or because the i-th item is not equal
300
     * to the preceding one.
301
     */
302
0
    if (i == numrows ||
303
0
      multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
304
0
    {
305
      /*
306
       * If no violations were found in the group then track the rows of
307
       * the group as supporting the functional dependency.
308
       */
309
0
      if (n_violations == 0)
310
0
        n_supporting_rows += group_size;
311
312
      /* Reset counters for the new group */
313
0
      n_violations = 0;
314
0
      group_size = 1;
315
0
      continue;
316
0
    }
317
    /* first columns match, but the last one does not (so contradicting) */
318
0
    else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
319
0
      n_violations++;
320
321
0
    group_size++;
322
0
  }
323
324
0
  pfree(items);
325
0
  pfree(values);
326
0
  pfree(isnull);
327
0
  pfree(mss);
328
329
  /* Compute the 'degree of validity' as (supporting/total). */
330
0
  return (n_supporting_rows * 1.0 / numrows);
331
0
}
332
333
/*
334
 * detects functional dependencies between groups of columns
335
 *
336
 * Generates all possible subsets of columns (variations) and computes
337
 * the degree of validity for each one. For example when creating statistics
338
 * on three columns (a,b,c) there are 9 possible dependencies
339
 *
340
 *     two columns        three columns
341
 *     -----------        -------------
342
 *     (a) -> b         (a,b) -> c
343
 *     (a) -> c         (a,c) -> b
344
 *     (b) -> a         (b,c) -> a
345
 *     (b) -> c
346
 *     (c) -> a
347
 *     (c) -> b
348
 */
349
MVDependencies *
350
statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
351
               VacAttrStats **stats)
352
0
{
353
0
  int     i,
354
0
        j,
355
0
        k;
356
0
  int     numattrs;
357
0
  int      *attnums;
358
359
  /* result */
360
0
  MVDependencies *dependencies = NULL;
361
362
0
  numattrs = bms_num_members(attrs);
363
364
  /*
365
   * Transform the bms into an array, to make accessing i-th member easier.
366
   */
367
0
  attnums = palloc(sizeof(int) * bms_num_members(attrs));
368
0
  i = 0;
369
0
  j = -1;
370
0
  while ((j = bms_next_member(attrs, j)) >= 0)
371
0
    attnums[i++] = j;
372
373
0
  Assert(numattrs >= 2);
374
375
  /*
376
   * We'll try build functional dependencies starting from the smallest ones
377
   * covering just 2 columns, to the largest ones, covering all columns
378
   * included in the statistics object.  We start from the smallest ones
379
   * because we want to be able to skip already implied ones.
380
   */
381
0
  for (k = 2; k <= numattrs; k++)
382
0
  {
383
0
    AttrNumber *dependency; /* array with k elements */
384
385
    /* prepare a DependencyGenerator of variation */
386
0
    DependencyGenerator DependencyGenerator = DependencyGenerator_init(numattrs, k);
387
388
    /* generate all possible variations of k values (out of n) */
389
0
    while ((dependency = DependencyGenerator_next(DependencyGenerator)))
390
0
    {
391
0
      double    degree;
392
0
      MVDependency *d;
393
394
      /* compute how valid the dependency seems */
395
0
      degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
396
397
      /*
398
       * if the dependency seems entirely invalid, don't store it
399
       */
400
0
      if (degree == 0.0)
401
0
        continue;
402
403
0
      d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
404
0
                     + k * sizeof(AttrNumber));
405
406
      /* copy the dependency (and keep the indexes into stxkeys) */
407
0
      d->degree = degree;
408
0
      d->nattributes = k;
409
0
      for (i = 0; i < k; i++)
410
0
        d->attributes[i] = attnums[dependency[i]];
411
412
      /* initialize the list of dependencies */
413
0
      if (dependencies == NULL)
414
0
      {
415
0
        dependencies
416
0
          = (MVDependencies *) palloc0(sizeof(MVDependencies));
417
418
0
        dependencies->magic = STATS_DEPS_MAGIC;
419
0
        dependencies->type = STATS_DEPS_TYPE_BASIC;
420
0
        dependencies->ndeps = 0;
421
0
      }
422
423
0
      dependencies->ndeps++;
424
0
      dependencies = (MVDependencies *) repalloc(dependencies,
425
0
                             offsetof(MVDependencies, deps)
426
0
                             + dependencies->ndeps * sizeof(MVDependency));
427
428
0
      dependencies->deps[dependencies->ndeps - 1] = d;
429
0
    }
430
431
    /*
432
     * we're done with variations of k elements, so free the
433
     * DependencyGenerator
434
     */
435
0
    DependencyGenerator_free(DependencyGenerator);
436
0
  }
437
438
0
  return dependencies;
439
0
}
440
441
442
/*
443
 * Serialize list of dependencies into a bytea value.
444
 */
445
bytea *
446
statext_dependencies_serialize(MVDependencies *dependencies)
447
0
{
448
0
  int     i;
449
0
  bytea    *output;
450
0
  char     *tmp;
451
0
  Size    len;
452
453
  /* we need to store ndeps, with a number of attributes for each one */
454
0
  len = VARHDRSZ + SizeOfDependencies
455
0
    + dependencies->ndeps * SizeOfDependency;
456
457
  /* and also include space for the actual attribute numbers and degrees */
458
0
  for (i = 0; i < dependencies->ndeps; i++)
459
0
    len += (sizeof(AttrNumber) * dependencies->deps[i]->nattributes);
460
461
0
  output = (bytea *) palloc0(len);
462
0
  SET_VARSIZE(output, len);
463
464
0
  tmp = VARDATA(output);
465
466
  /* Store the base struct values (magic, type, ndeps) */
467
0
  memcpy(tmp, &dependencies->magic, sizeof(uint32));
468
0
  tmp += sizeof(uint32);
469
0
  memcpy(tmp, &dependencies->type, sizeof(uint32));
470
0
  tmp += sizeof(uint32);
471
0
  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
472
0
  tmp += sizeof(uint32);
473
474
  /* store number of attributes and attribute numbers for each dependency */
475
0
  for (i = 0; i < dependencies->ndeps; i++)
476
0
  {
477
0
    MVDependency *d = dependencies->deps[i];
478
479
0
    memcpy(tmp, d, SizeOfDependency);
480
0
    tmp += SizeOfDependency;
481
482
0
    memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
483
0
    tmp += sizeof(AttrNumber) * d->nattributes;
484
485
0
    Assert(tmp <= ((char *) output + len));
486
0
  }
487
488
0
  return output;
489
0
}
490
491
/*
492
 * Reads serialized dependencies into MVDependencies structure.
493
 */
494
MVDependencies *
495
statext_dependencies_deserialize(bytea *data)
496
0
{
497
0
  int     i;
498
0
  Size    min_expected_size;
499
0
  MVDependencies *dependencies;
500
0
  char     *tmp;
501
502
0
  if (data == NULL)
503
0
    return NULL;
504
505
0
  if (VARSIZE_ANY_EXHDR(data) < SizeOfDependencies)
506
0
    elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
507
0
       VARSIZE_ANY_EXHDR(data), SizeOfDependencies);
508
509
  /* read the MVDependencies header */
510
0
  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
511
512
  /* initialize pointer to the data part (skip the varlena header) */
513
0
  tmp = VARDATA_ANY(data);
514
515
  /* read the header fields and perform basic sanity checks */
516
0
  memcpy(&dependencies->magic, tmp, sizeof(uint32));
517
0
  tmp += sizeof(uint32);
518
0
  memcpy(&dependencies->type, tmp, sizeof(uint32));
519
0
  tmp += sizeof(uint32);
520
0
  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
521
0
  tmp += sizeof(uint32);
522
523
0
  if (dependencies->magic != STATS_DEPS_MAGIC)
524
0
    elog(ERROR, "invalid dependency magic %d (expected %d)",
525
0
       dependencies->magic, STATS_DEPS_MAGIC);
526
527
0
  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
528
0
    elog(ERROR, "invalid dependency type %d (expected %d)",
529
0
       dependencies->type, STATS_DEPS_TYPE_BASIC);
530
531
0
  if (dependencies->ndeps == 0)
532
0
    ereport(ERROR,
533
0
        (errcode(ERRCODE_DATA_CORRUPTED),
534
0
         errmsg("invalid zero-length item array in MVDependencies")));
535
536
  /* what minimum bytea size do we expect for those parameters */
537
0
  min_expected_size = SizeOfDependencies +
538
0
    dependencies->ndeps * (SizeOfDependency +
539
0
                 sizeof(AttrNumber) * 2);
540
541
0
  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
542
0
    elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
543
0
       VARSIZE_ANY_EXHDR(data), min_expected_size);
544
545
  /* allocate space for the MCV items */
546
0
  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
547
0
              + (dependencies->ndeps * sizeof(MVDependency *)));
548
549
0
  for (i = 0; i < dependencies->ndeps; i++)
550
0
  {
551
0
    double    degree;
552
0
    AttrNumber  k;
553
0
    MVDependency *d;
554
555
    /* degree of validity */
556
0
    memcpy(&degree, tmp, sizeof(double));
557
0
    tmp += sizeof(double);
558
559
    /* number of attributes */
560
0
    memcpy(&k, tmp, sizeof(AttrNumber));
561
0
    tmp += sizeof(AttrNumber);
562
563
    /* is the number of attributes valid? */
564
0
    Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
565
566
    /* now that we know the number of attributes, allocate the dependency */
567
0
    d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
568
0
                   + (k * sizeof(AttrNumber)));
569
570
0
    d->degree = degree;
571
0
    d->nattributes = k;
572
573
    /* copy attribute numbers */
574
0
    memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
575
0
    tmp += sizeof(AttrNumber) * d->nattributes;
576
577
0
    dependencies->deps[i] = d;
578
579
    /* still within the bytea */
580
0
    Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
581
0
  }
582
583
  /* we should have consumed the whole bytea exactly */
584
0
  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
585
586
0
  return dependencies;
587
0
}
588
589
/*
590
 * dependency_is_fully_matched
591
 *    checks that a functional dependency is fully matched given clauses on
592
 *    attributes (assuming the clauses are suitable equality clauses)
593
 */
594
static bool
595
dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
596
0
{
597
0
  int     j;
598
599
  /*
600
   * Check that the dependency actually is fully covered by clauses. We have
601
   * to translate all attribute numbers, as those are referenced
602
   */
603
0
  for (j = 0; j < dependency->nattributes; j++)
604
0
  {
605
0
    int     attnum = dependency->attributes[j];
606
607
0
    if (!bms_is_member(attnum, attnums))
608
0
      return false;
609
0
  }
610
611
0
  return true;
612
0
}
613
614
/*
615
 * dependency_implies_attribute
616
 *    check that the attnum matches is implied by the functional dependency
617
 */
618
static bool
619
dependency_implies_attribute(MVDependency *dependency, AttrNumber attnum)
620
0
{
621
0
  if (attnum == dependency->attributes[dependency->nattributes - 1])
622
0
    return true;
623
624
0
  return false;
625
0
}
626
627
/*
628
 * statext_dependencies_load
629
 *    Load the functional dependencies for the indicated pg_statistic_ext tuple
630
 */
631
MVDependencies *
632
statext_dependencies_load(Oid mvoid)
633
0
{
634
0
  MVDependencies *result;
635
0
  bool    isnull;
636
0
  Datum   deps;
637
0
  HeapTuple htup;
638
639
0
  htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid));
640
0
  if (!HeapTupleIsValid(htup))
641
0
    elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
642
643
0
  deps = SysCacheGetAttr(STATEXTOID, htup,
644
0
               Anum_pg_statistic_ext_stxdependencies, &isnull);
645
0
  if (isnull)
646
0
    elog(ERROR,
647
0
       "requested statistic kind \"%c\" is not yet built for statistics object %u",
648
0
       STATS_EXT_DEPENDENCIES, mvoid);
649
650
0
  result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
651
652
0
  ReleaseSysCache(htup);
653
654
0
  return result;
655
0
}
656
657
/*
658
 * pg_dependencies_in   - input routine for type pg_dependencies.
659
 *
660
 * pg_dependencies is real enough to be a table column, but it has no operations
661
 * of its own, and disallows input too
662
 */
663
Datum
664
pg_dependencies_in(PG_FUNCTION_ARGS)
665
0
{
666
  /*
667
   * pg_node_list stores the data in binary form and parsing text input is
668
   * not needed, so disallow this.
669
   */
670
0
  ereport(ERROR,
671
0
      (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
672
0
       errmsg("cannot accept a value of type %s", "pg_dependencies")));
673
674
0
  PG_RETURN_VOID();     /* keep compiler quiet */
675
0
}
676
677
/*
678
 * pg_dependencies    - output routine for type pg_dependencies.
679
 */
680
Datum
681
pg_dependencies_out(PG_FUNCTION_ARGS)
682
0
{
683
0
  bytea    *data = PG_GETARG_BYTEA_PP(0);
684
0
  MVDependencies *dependencies = statext_dependencies_deserialize(data);
685
0
  int     i,
686
0
        j;
687
0
  StringInfoData str;
688
689
0
  initStringInfo(&str);
690
0
  appendStringInfoChar(&str, '{');
691
692
0
  for (i = 0; i < dependencies->ndeps; i++)
693
0
  {
694
0
    MVDependency *dependency = dependencies->deps[i];
695
696
0
    if (i > 0)
697
0
      appendStringInfoString(&str, ", ");
698
699
0
    appendStringInfoChar(&str, '"');
700
0
    for (j = 0; j < dependency->nattributes; j++)
701
0
    {
702
0
      if (j == dependency->nattributes - 1)
703
0
        appendStringInfoString(&str, " => ");
704
0
      else if (j > 0)
705
0
        appendStringInfoString(&str, ", ");
706
707
0
      appendStringInfo(&str, "%d", dependency->attributes[j]);
708
0
    }
709
0
    appendStringInfo(&str, "\": %f", dependency->degree);
710
0
  }
711
712
0
  appendStringInfoChar(&str, '}');
713
714
0
  PG_RETURN_CSTRING(str.data);
715
0
}
716
717
/*
718
 * pg_dependencies_recv   - binary input routine for type pg_dependencies.
719
 */
720
Datum
721
pg_dependencies_recv(PG_FUNCTION_ARGS)
722
0
{
723
0
  ereport(ERROR,
724
0
      (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
725
0
       errmsg("cannot accept a value of type %s", "pg_dependencies")));
726
727
0
  PG_RETURN_VOID();     /* keep compiler quiet */
728
0
}
729
730
/*
731
 * pg_dependencies_send   - binary output routine for type pg_dependencies.
732
 *
733
 * Functional dependencies are serialized in a bytea value (although the type
734
 * is named differently), so let's just send that.
735
 */
736
Datum
737
pg_dependencies_send(PG_FUNCTION_ARGS)
738
0
{
739
0
  return byteasend(fcinfo);
740
0
}
741
742
/*
743
 * dependency_is_compatible_clause
744
 *    Determines if the clause is compatible with functional dependencies
745
 *
746
 * Only clauses that have the form of equality to a pseudoconstant, or can be
747
 * interpreted that way, are currently accepted.  Furthermore the variable
748
 * part of the clause must be a simple Var belonging to the specified
749
 * relation, whose attribute number we return in *attnum on success.
750
 */
751
static bool
752
dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
753
0
{
754
0
  RestrictInfo *rinfo = (RestrictInfo *) clause;
755
0
  Var      *var;
756
757
0
  if (!IsA(rinfo, RestrictInfo))
758
0
    return false;
759
760
  /* Pseudoconstants are not interesting (they couldn't contain a Var) */
761
0
  if (rinfo->pseudoconstant)
762
0
    return false;
763
764
  /* Clauses referencing multiple, or no, varnos are incompatible */
765
0
  if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
766
0
    return false;
767
768
0
  if (is_opclause(rinfo->clause))
769
0
  {
770
    /* If it's an opclause, check for Var = Const or Const = Var. */
771
0
    OpExpr     *expr = (OpExpr *) rinfo->clause;
772
773
    /* Only expressions with two arguments are candidates. */
774
0
    if (list_length(expr->args) != 2)
775
0
      return false;
776
777
    /* Make sure non-selected argument is a pseudoconstant. */
778
0
    if (is_pseudo_constant_clause(lsecond(expr->args)))
779
0
      var = linitial(expr->args);
780
0
    else if (is_pseudo_constant_clause(linitial(expr->args)))
781
0
      var = lsecond(expr->args);
782
0
    else
783
0
      return false;
784
785
    /*
786
     * If it's not an "=" operator, just ignore the clause, as it's not
787
     * compatible with functional dependencies.
788
     *
789
     * This uses the function for estimating selectivity, not the operator
790
     * directly (a bit awkward, but well ...).
791
     *
792
     * XXX this is pretty dubious; probably it'd be better to check btree
793
     * or hash opclass membership, so as not to be fooled by custom
794
     * selectivity functions, and to be more consistent with decisions
795
     * elsewhere in the planner.
796
     */
797
0
    if (get_oprrest(expr->opno) != F_EQSEL)
798
0
      return false;
799
800
    /* OK to proceed with checking "var" */
801
0
  }
802
0
  else if (not_clause((Node *) rinfo->clause))
803
0
  {
804
    /*
805
     * "NOT x" can be interpreted as "x = false", so get the argument and
806
     * proceed with seeing if it's a suitable Var.
807
     */
808
0
    var = (Var *) get_notclausearg(rinfo->clause);
809
0
  }
810
0
  else
811
0
  {
812
    /*
813
     * A boolean expression "x" can be interpreted as "x = true", so
814
     * proceed with seeing if it's a suitable Var.
815
     */
816
0
    var = (Var *) rinfo->clause;
817
0
  }
818
819
  /*
820
   * We may ignore any RelabelType node above the operand.  (There won't be
821
   * more than one, since eval_const_expressions has been applied already.)
822
   */
823
0
  if (IsA(var, RelabelType))
824
0
    var = (Var *) ((RelabelType *) var)->arg;
825
826
  /* We only support plain Vars for now */
827
0
  if (!IsA(var, Var))
828
0
    return false;
829
830
  /* Ensure Var is from the correct relation */
831
0
  if (var->varno != relid)
832
0
    return false;
833
834
  /* We also better ensure the Var is from the current level */
835
0
  if (var->varlevelsup != 0)
836
0
    return false;
837
838
  /* Also ignore system attributes (we don't allow stats on those) */
839
0
  if (!AttrNumberIsForUserDefinedAttr(var->varattno))
840
0
    return false;
841
842
0
  *attnum = var->varattno;
843
0
  return true;
844
0
}
845
846
/*
847
 * find_strongest_dependency
848
 *    find the strongest dependency on the attributes
849
 *
850
 * When applying functional dependencies, we start with the strongest
851
 * dependencies. That is, we select the dependency that:
852
 *
853
 * (a) has all attributes covered by equality clauses
854
 *
855
 * (b) has the most attributes
856
 *
857
 * (c) has the highest degree of validity
858
 *
859
 * This guarantees that we eliminate the most redundant conditions first
860
 * (see the comment in dependencies_clauselist_selectivity).
861
 */
862
static MVDependency *
863
find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
864
              Bitmapset *attnums)
865
0
{
866
0
  int     i;
867
0
  MVDependency *strongest = NULL;
868
869
  /* number of attnums in clauses */
870
0
  int     nattnums = bms_num_members(attnums);
871
872
  /*
873
   * Iterate over the MVDependency items and find the strongest one from the
874
   * fully-matched dependencies. We do the cheap checks first, before
875
   * matching it against the attnums.
876
   */
877
0
  for (i = 0; i < dependencies->ndeps; i++)
878
0
  {
879
0
    MVDependency *dependency = dependencies->deps[i];
880
881
    /*
882
     * Skip dependencies referencing more attributes than available
883
     * clauses, as those can't be fully matched.
884
     */
885
0
    if (dependency->nattributes > nattnums)
886
0
      continue;
887
888
0
    if (strongest)
889
0
    {
890
      /* skip dependencies on fewer attributes than the strongest. */
891
0
      if (dependency->nattributes < strongest->nattributes)
892
0
        continue;
893
894
      /* also skip weaker dependencies when attribute count matches */
895
0
      if (strongest->nattributes == dependency->nattributes &&
896
0
        strongest->degree > dependency->degree)
897
0
        continue;
898
0
    }
899
900
    /*
901
     * this dependency is stronger, but we must still check that it's
902
     * fully matched to these attnums. We perform this check last as it's
903
     * slightly more expensive than the previous checks.
904
     */
905
0
    if (dependency_is_fully_matched(dependency, attnums))
906
0
      strongest = dependency; /* save new best match */
907
0
  }
908
909
0
  return strongest;
910
0
}
911
912
/*
913
 * dependencies_clauselist_selectivity
914
 *    Return the estimated selectivity of (a subset of) the given clauses
915
 *    using functional dependency statistics, or 1.0 if no useful functional
916
 *    dependency statistic exists.
917
 *
918
 * 'estimatedclauses' is an output argument that gets a bit set corresponding
919
 * to the (zero-based) list index of each clause that is included in the
920
 * estimated selectivity.
921
 *
922
 * Given equality clauses on attributes (a,b) we find the strongest dependency
923
 * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
924
 * dependency, we then combine the per-clause selectivities using the formula
925
 *
926
 *     P(a,b) = P(a) * [f + (1-f)*P(b)]
927
 *
928
 * where 'f' is the degree of the dependency.
929
 *
930
 * With clauses on more than two attributes, the dependencies are applied
931
 * recursively, starting with the widest/strongest dependencies. For example
932
 * P(a,b,c) is first split like this:
933
 *
934
 *     P(a,b,c) = P(a,b) * [f + (1-f)*P(c)]
935
 *
936
 * assuming (a,b=>c) is the strongest dependency.
937
 */
938
Selectivity
939
dependencies_clauselist_selectivity(PlannerInfo *root,
940
                  List *clauses,
941
                  int varRelid,
942
                  JoinType jointype,
943
                  SpecialJoinInfo *sjinfo,
944
                  RelOptInfo *rel,
945
                  Bitmapset **estimatedclauses)
946
0
{
947
0
  Selectivity s1 = 1.0;
948
0
  ListCell   *l;
949
0
  Bitmapset  *clauses_attnums = NULL;
950
0
  StatisticExtInfo *stat;
951
0
  MVDependencies *dependencies;
952
0
  AttrNumber *list_attnums;
953
0
  int     listidx;
954
955
  /* initialize output argument */
956
0
  *estimatedclauses = NULL;
957
958
  /* check if there's any stats that might be useful for us. */
959
0
  if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
960
0
    return 1.0;
961
962
0
  list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
963
0
                     list_length(clauses));
964
965
  /*
966
   * Pre-process the clauses list to extract the attnums seen in each item.
967
   * We need to determine if there's any clauses which will be useful for
968
   * dependency selectivity estimations. Along the way we'll record all of
969
   * the attnums for each clause in a list which we'll reference later so we
970
   * don't need to repeat the same work again. We'll also keep track of all
971
   * attnums seen.
972
   */
973
0
  listidx = 0;
974
0
  foreach(l, clauses)
975
0
  {
976
0
    Node     *clause = (Node *) lfirst(l);
977
0
    AttrNumber  attnum;
978
979
0
    if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
980
0
    {
981
0
      list_attnums[listidx] = attnum;
982
0
      clauses_attnums = bms_add_member(clauses_attnums, attnum);
983
0
    }
984
0
    else
985
0
      list_attnums[listidx] = InvalidAttrNumber;
986
987
0
    listidx++;
988
0
  }
989
990
  /*
991
   * If there's not at least two distinct attnums then reject the whole list
992
   * of clauses. We must return 1.0 so the calling function's selectivity is
993
   * unaffected.
994
   */
995
0
  if (bms_num_members(clauses_attnums) < 2)
996
0
  {
997
0
    pfree(list_attnums);
998
0
    return 1.0;
999
0
  }
1000
1001
  /* find the best suited statistics object for these attnums */
1002
0
  stat = choose_best_statistics(rel->statlist, clauses_attnums,
1003
0
                  STATS_EXT_DEPENDENCIES);
1004
1005
  /* if no matching stats could be found then we've nothing to do */
1006
0
  if (!stat)
1007
0
  {
1008
0
    pfree(list_attnums);
1009
0
    return 1.0;
1010
0
  }
1011
1012
  /* load the dependency items stored in the statistics object */
1013
0
  dependencies = statext_dependencies_load(stat->statOid);
1014
1015
  /*
1016
   * Apply the dependencies recursively, starting with the widest/strongest
1017
   * ones, and proceeding to the smaller/weaker ones. At the end of each
1018
   * round we factor in the selectivity of clauses on the implied attribute,
1019
   * and remove the clauses from the list.
1020
   */
1021
0
  while (true)
1022
0
  {
1023
0
    Selectivity s2 = 1.0;
1024
0
    MVDependency *dependency;
1025
1026
    /* the widest/strongest dependency, fully matched by clauses */
1027
0
    dependency = find_strongest_dependency(stat, dependencies,
1028
0
                         clauses_attnums);
1029
1030
    /* if no suitable dependency was found, we're done */
1031
0
    if (!dependency)
1032
0
      break;
1033
1034
    /*
1035
     * We found an applicable dependency, so find all the clauses on the
1036
     * implied attribute - with dependency (a,b => c) we look for clauses
1037
     * on 'c'.
1038
     */
1039
0
    listidx = -1;
1040
0
    foreach(l, clauses)
1041
0
    {
1042
0
      Node     *clause;
1043
1044
0
      listidx++;
1045
1046
      /*
1047
       * Skip incompatible clauses, and ones we've already estimated on.
1048
       */
1049
0
      if (list_attnums[listidx] == InvalidAttrNumber ||
1050
0
        bms_is_member(listidx, *estimatedclauses))
1051
0
        continue;
1052
1053
      /*
1054
       * Technically we could find more than one clause for a given
1055
       * attnum. Since these clauses must be equality clauses, we choose
1056
       * to only take the selectivity estimate from the final clause in
1057
       * the list for this attnum. If the attnum happens to be compared
1058
       * to a different Const in another clause then no rows will match
1059
       * anyway. If it happens to be compared to the same Const, then
1060
       * ignoring the additional clause is just the thing to do.
1061
       */
1062
0
      if (dependency_implies_attribute(dependency,
1063
0
                       list_attnums[listidx]))
1064
0
      {
1065
0
        clause = (Node *) lfirst(l);
1066
1067
0
        s2 = clause_selectivity(root, clause, varRelid, jointype,
1068
0
                    sjinfo);
1069
1070
        /* mark this one as done, so we don't touch it again. */
1071
0
        *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1072
1073
        /*
1074
         * Mark that we've got and used the dependency on this clause.
1075
         * We'll want to ignore this when looking for the next
1076
         * strongest dependency above.
1077
         */
1078
0
        clauses_attnums = bms_del_member(clauses_attnums,
1079
0
                         list_attnums[listidx]);
1080
0
      }
1081
0
    }
1082
1083
    /*
1084
     * Now factor in the selectivity for all the "implied" clauses into
1085
     * the final one, using this formula:
1086
     *
1087
     * P(a,b) = P(a) * (f + (1-f) * P(b))
1088
     *
1089
     * where 'f' is the degree of validity of the dependency.
1090
     */
1091
0
    s1 *= (dependency->degree + (1 - dependency->degree) * s2);
1092
0
  }
1093
1094
0
  pfree(dependencies);
1095
0
  pfree(list_attnums);
1096
1097
0
  return s1;
1098
0
}