YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/postgres/src/backend/regex/regexec.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * re_*exec and friends - match REs
3
 *
4
 * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
5
 *
6
 * Development of this software was funded, in part, by Cray Research Inc.,
7
 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8
 * Corporation, none of whom are responsible for the results.  The author
9
 * thanks all of them.
10
 *
11
 * Redistribution and use in source and binary forms -- with or without
12
 * modification -- are permitted for any purpose, provided that
13
 * redistributions in source form retain this entire copyright notice and
14
 * indicate the origin and nature of any modifications.
15
 *
16
 * I'd appreciate being given credit for this package in the documentation
17
 * of software which uses it, but that is not a requirement.
18
 *
19
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
22
 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
 *
30
 * src/backend/regex/regexec.c
31
 *
32
 */
33
34
#include "regex/regguts.h"
35
36
37
38
/* lazy-DFA representation */
39
struct arcp
40
{               /* "pointer" to an outarc */
41
  struct sset *ss;
42
  color   co;
43
};
44
45
struct sset
46
{               /* state set */
47
  unsigned   *states;     /* pointer to bitvector */
48
  unsigned  hash;     /* hash of bitvector */
49
58.8k
#define  HASH(bv, nw)  (((nw) == 1) ? 
*(bv)56.1k
:
hash(bv, nw)2.66k
)
50
201k
#define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && 
(9.44k
(nw) == 19.44k
|| \
51
9.44k
    
memcmp(731
VS731
(bv),
VS731
((ss)->states), (nw)*sizeof(unsigned)) == 0))
52
  int     flags;
53
16.9k
#define  STARTER   01      /* the initial state set */
54
654k
#define  POSTSTATE   02      /* includes the goal state */
55
15.9k
#define  LOCKED    04      /* locked in cache */
56
101k
#define  NOPROGRESS  010    /* zero-progress state set */
57
  struct arcp ins;      /* chain of inarcs pointing here */
58
  chr      *lastseen;   /* last entered on arrival here */
59
  struct sset **outs;     /* outarc vector indexed by color */
60
  struct arcp *inchain;   /* chain-pointer vector for outarcs */
61
};
62
63
struct dfa
64
{
65
  int     nssets;     /* size of cache */
66
  int     nssused;    /* how many entries occupied yet */
67
  int     nstates;    /* number of states */
68
  int     ncolors;    /* length of outarc and inchain vectors */
69
  int     wordsper;   /* length of state-set bitvectors */
70
  struct sset *ssets;     /* state-set cache */
71
  unsigned   *statesarea;   /* bitvector storage */
72
  unsigned   *work;     /* pointer to work area within statesarea */
73
  struct sset **outsarea;   /* outarc-vector storage */
74
  struct arcp *incarea;   /* inchain storage */
75
  struct cnfa *cnfa;
76
  struct colormap *cm;
77
  chr      *lastpost;   /* location of last cache-flushed success */
78
  chr      *lastnopr;   /* location of last cache-flushed NOPROGRESS */
79
  struct sset *search;    /* replacement-search-pointer memory */
80
  int     cptsmalloced; /* were the areas individually malloced? */
81
  char     *mallocarea;   /* self, or master malloced area, or NULL */
82
};
83
84
#define WORK  1       /* number of work bitvectors needed */
85
86
/* setup for non-malloc allocation for small cases */
87
31.9k
#define FEWSTATES 20      /* must be less than UBITS */
88
2.40k
#define FEWCOLORS 15
89
struct smalldfa
90
{
91
  struct dfa  dfa;
92
  struct sset ssets[FEWSTATES * 2];
93
  unsigned  statesarea[FEWSTATES * 2 + WORK];
94
  struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
95
  struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
96
};
97
98
157
#define DOMALLOC  ((struct smalldfa *)NULL)  /* force malloc */
99
100
101
102
/* internal variables, bundled for easy passing around */
103
struct vars
104
{
105
  regex_t    *re;
106
  struct guts *g;
107
  int     eflags;     /* copies of arguments */
108
  size_t    nmatch;
109
  regmatch_t *pmatch;
110
  rm_detail_t *details;
111
  chr      *start;      /* start of string */
112
  chr      *search_start; /* search start of string */
113
  chr      *stop;     /* just past end of string */
114
  int     err;      /* error code if any (0 none) */
115
  struct dfa **subdfas;   /* per-tree-subre DFAs */
116
  struct dfa **ladfas;    /* per-lacon-subre DFAs */
117
  struct sset **lblastcss;  /* per-lacon-subre lookbehind restart data */
118
  chr     **lblastcp;   /* per-lacon-subre lookbehind restart data */
119
  struct smalldfa dfa1;
120
  struct smalldfa dfa2;
121
};
122
123
34.5k
#define VISERR(vv)  ((vv)->err != 0)  /* have we seen an error yet? */
124
34.5k
#define ISERR() VISERR(v)
125
0
#define VERR(vv,e)  ((vv)->err = ((vv)->err ? (vv)->err : (e)))
126
0
#define ERR(e)  VERR(v, e)    /* record an error */
127
31.9k
#define NOERR() {if (ISERR()) 
return v->err0
;} /* if error seen, return it */
128
400
#define OFF(p)  ((p) - v->start)
129
#define LOFF(p) ((long)OFF(p))
130
131
132
133
/*
134
 * forward declarations
135
 */
136
/* === regexec.c === */
137
static struct dfa *getsubdfa(struct vars *, struct subre *);
138
static struct dfa *getladfa(struct vars *, int);
139
static int  find(struct vars *, struct cnfa *, struct colormap *);
140
static int  cfind(struct vars *, struct cnfa *, struct colormap *);
141
static int  cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
142
static void zapallsubs(regmatch_t *, size_t);
143
static void zaptreesubs(struct vars *, struct subre *);
144
static void subset(struct vars *, struct subre *, chr *, chr *);
145
static int  cdissect(struct vars *, struct subre *, chr *, chr *);
146
static int  ccondissect(struct vars *, struct subre *, chr *, chr *);
147
static int  crevcondissect(struct vars *, struct subre *, chr *, chr *);
148
static int  cbrdissect(struct vars *, struct subre *, chr *, chr *);
149
static int  caltdissect(struct vars *, struct subre *, chr *, chr *);
150
static int  citerdissect(struct vars *, struct subre *, chr *, chr *);
151
static int  creviterdissect(struct vars *, struct subre *, chr *, chr *);
152
153
/* === rege_dfa.c === */
154
static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
155
static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
156
static int  matchuntil(struct vars *, struct dfa *, chr *, struct sset **, chr **);
157
static chr *lastcold(struct vars *, struct dfa *);
158
static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
159
static void freedfa(struct dfa *);
160
static unsigned hash(unsigned *, int);
161
static struct sset *initialize(struct vars *, struct dfa *, chr *);
162
static struct sset *miss(struct vars *, struct dfa *, struct sset *, color, chr *, chr *);
163
static int  lacon(struct vars *, struct cnfa *, chr *, color);
164
static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
165
static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
166
167
168
/*
169
 * pg_regexec - match regular expression
170
 */
171
int
172
pg_regexec(regex_t *re,
173
       const chr *string,
174
       size_t len,
175
       size_t search_start,
176
       rm_detail_t *details,
177
       size_t nmatch,
178
       regmatch_t pmatch[],
179
       int flags)
180
15.6k
{
181
15.6k
  struct vars var;
182
15.6k
  register struct vars *v = &var;
183
15.6k
  int     st;
184
15.6k
  size_t    n;
185
15.6k
  size_t    i;
186
15.6k
  int     backref;
187
188
15.6k
#define  LOCALMAT  
200
189
15.6k
  regmatch_t  mat[LOCALMAT];
190
191
15.6k
#define  LOCALDFAS   40
192
15.6k
  struct dfa *subdfas[LOCALDFAS];
193
194
  /* sanity checks */
195
15.6k
  if (re == NULL || string == NULL || re->re_magic != REMAGIC)
196
0
    return REG_INVARG;
197
15.6k
  if (re->re_csize != sizeof(chr))
198
0
    return REG_MIXED;
199
200
  /* Initialize locale-dependent support */
201
15.6k
  pg_set_regex_collation(re->re_collation);
202
203
  /* setup */
204
15.6k
  v->re = re;
205
15.6k
  v->g = (struct guts *) re->re_guts;
206
15.6k
  if ((v->g->cflags & REG_EXPECT) && 
details == NULL0
)
207
0
    return REG_INVARG;
208
15.6k
  if (v->g->info & REG_UIMPOSSIBLE)
209
0
    return REG_NOMATCH;
210
15.6k
  backref = (v->g->info & REG_UBACKREF) ? 
10
: 0;
211
15.6k
  v->eflags = flags;
212
15.6k
  if (v->g->cflags & REG_NOSUB)
213
31
    nmatch = 0;       /* override client */
214
15.6k
  v->nmatch = nmatch;
215
15.6k
  if (backref)
216
0
  {
217
    /* need work area */
218
0
    if (v->g->nsub + 1 <= LOCALMAT)
219
0
      v->pmatch = mat;
220
0
    else
221
0
      v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
222
0
                        sizeof(regmatch_t));
223
0
    if (v->pmatch == NULL)
224
0
      return REG_ESPACE;
225
0
    v->nmatch = v->g->nsub + 1;
226
0
  }
227
15.6k
  else
228
15.6k
    v->pmatch = pmatch;
229
15.6k
  v->details = details;
230
15.6k
  v->start = (chr *) string;
231
15.6k
  v->search_start = (chr *) string + search_start;
232
15.6k
  v->stop = (chr *) string + len;
233
15.6k
  v->err = 0;
234
15.6k
  v->subdfas = NULL;
235
15.6k
  v->ladfas = NULL;
236
15.6k
  v->lblastcss = NULL;
237
15.6k
  v->lblastcp = NULL;
238
  /* below this point, "goto cleanup" will behave sanely */
239
240
15.6k
  assert(v->g->ntree >= 0);
241
15.6k
  n = (size_t) v->g->ntree;
242
15.6k
  if (n <= LOCALDFAS)
243
15.6k
    v->subdfas = subdfas;
244
0
  else
245
0
  {
246
0
    v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
247
0
    if (v->subdfas == NULL)
248
0
    {
249
0
      st = REG_ESPACE;
250
0
      goto cleanup;
251
0
    }
252
0
  }
253
95.0k
  
for (i = 0; 15.6k
i < n;
i++79.3k
)
254
79.3k
    v->subdfas[i] = NULL;
255
256
15.6k
  assert(v->g->nlacons >= 0);
257
15.6k
  n = (size_t) v->g->nlacons;
258
15.6k
  if (n > 0)
259
1
  {
260
1
    v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
261
1
    if (v->ladfas == NULL)
262
0
    {
263
0
      st = REG_ESPACE;
264
0
      goto cleanup;
265
0
    }
266
3
    
for (i = 0; 1
i < n;
i++2
)
267
2
      v->ladfas[i] = NULL;
268
1
    v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
269
1
    v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
270
1
    if (v->lblastcss == NULL || v->lblastcp == NULL)
271
0
    {
272
0
      st = REG_ESPACE;
273
0
      goto cleanup;
274
0
    }
275
3
    
for (i = 0; 1
i < n;
i++2
)
276
2
    {
277
2
      v->lblastcss[i] = NULL;
278
2
      v->lblastcp[i] = NULL;
279
2
    }
280
1
  }
281
282
  /* do it */
283
15.6k
  assert(v->g->tree != NULL);
284
15.6k
  if (backref)
285
0
    st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
286
15.6k
  else
287
15.6k
    st = find(v, &v->g->tree->cnfa, &v->g->cmap);
288
289
  /* copy (portion of) match vector over if necessary */
290
15.6k
  if (st == REG_OKAY && 
v->pmatch != pmatch1.74k
&&
nmatch > 00
)
291
0
  {
292
0
    zapallsubs(pmatch, nmatch);
293
0
    n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
294
0
    memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
295
0
  }
296
297
  /* clean up */
298
15.6k
cleanup:
299
15.6k
  if (v->pmatch != pmatch && 
v->pmatch != mat0
)
300
0
    FREE(v->pmatch);
301
15.6k
  if (v->subdfas != NULL)
302
15.6k
  {
303
15.6k
    n = (size_t) v->g->ntree;
304
95.0k
    for (i = 0; i < n; 
i++79.3k
)
305
79.3k
    {
306
79.3k
      if (v->subdfas[i] != NULL)
307
156
        freedfa(v->subdfas[i]);
308
79.3k
    }
309
15.6k
    if (v->subdfas != subdfas)
310
0
      FREE(v->subdfas);
311
15.6k
  }
312
15.6k
  if (v->ladfas != NULL)
313
1
  {
314
1
    n = (size_t) v->g->nlacons;
315
3
    for (i = 0; i < n; 
i++2
)
316
2
    {
317
2
      if (v->ladfas[i] != NULL)
318
1
        freedfa(v->ladfas[i]);
319
2
    }
320
1
    FREE(v->ladfas);
321
1
  }
322
15.6k
  if (v->lblastcss != NULL)
323
1
    FREE(v->lblastcss);
324
15.6k
  if (v->lblastcp != NULL)
325
1
    FREE(v->lblastcp);
326
327
15.6k
  return st;
328
15.6k
}
329
330
/*
331
 * getsubdfa - create or re-fetch the DFA for a tree subre node
332
 *
333
 * We only need to create the DFA once per overall regex execution.
334
 * The DFA will be freed by the cleanup step in pg_regexec().
335
 */
336
static struct dfa *
337
getsubdfa(struct vars *v,
338
      struct subre *t)
339
156
{
340
156
  if (v->subdfas[t->id] == NULL)
341
156
  {
342
156
    v->subdfas[t->id] = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
343
156
    if (ISERR())
344
0
      return NULL;
345
156
  }
346
156
  return v->subdfas[t->id];
347
156
}
348
349
/*
350
 * getladfa - create or re-fetch the DFA for a LACON subre node
351
 *
352
 * Same as above, but for LACONs.
353
 */
354
static struct dfa *
355
getladfa(struct vars *v,
356
     int n)
357
1
{
358
1
  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
359
360
1
  if (v->ladfas[n] == NULL)
361
1
  {
362
1
    struct subre *sub = &v->g->lacons[n];
363
364
1
    v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
365
1
    if (ISERR())
366
0
      return NULL;
367
1
  }
368
1
  return v->ladfas[n];
369
1
}
370
371
/*
372
 * find - find a match for the main NFA (no-complications case)
373
 */
374
static int
375
find(struct vars *v,
376
   struct cnfa *cnfa,
377
   struct colormap *cm)
378
15.6k
{
379
15.6k
  struct dfa *s;
380
15.6k
  struct dfa *d;
381
15.6k
  chr      *begin;
382
15.6k
  chr      *end = NULL;
383
15.6k
  chr      *cold;
384
15.6k
  chr      *open;     /* open and close of range of possible starts */
385
15.6k
  chr      *close;
386
15.6k
  int     hitend;
387
15.6k
  int     shorter = (v->g->tree->flags & SHORTER) ? 
10
: 0;
388
389
  /* first, a shot with the search RE */
390
15.6k
  s = newdfa(v, &v->g->search, cm, &v->dfa1);
391
15.6k
  assert(!(ISERR() && s != NULL));
392
15.6k
  NOERR();
393
15.6k
  MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
394
15.6k
  cold = NULL;
395
15.6k
  close = shortest(v, s, v->search_start, v->search_start, v->stop,
396
15.6k
           &cold, (int *) NULL);
397
15.6k
  freedfa(s);
398
15.6k
  NOERR();
399
15.6k
  if (v->g->cflags & REG_EXPECT)
400
0
  {
401
0
    assert(v->details != NULL);
402
0
    if (cold != NULL)
403
0
      v->details->rm_extend.rm_so = OFF(cold);
404
0
    else
405
0
      v->details->rm_extend.rm_so = OFF(v->stop);
406
0
    v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
407
0
  }
408
15.6k
  if (close == NULL)      /* not found */
409
13.9k
    return REG_NOMATCH;
410
1.74k
  if (v->nmatch == 0)      /* found, don't need exact location */
411
1.58k
    return REG_OKAY;
412
413
  /* find starting point and match */
414
161
  assert(cold != NULL);
415
161
  open = cold;
416
161
  cold = NULL;
417
161
  MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
418
161
  d = newdfa(v, cnfa, cm, &v->dfa1);
419
161
  assert(!(ISERR() && d != NULL));
420
161
  NOERR();
421
940
  for (begin = open; begin <= close; 
begin++779
)
422
940
  {
423
940
    MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
424
940
    if (shorter)
425
0
      end = shortest(v, d, begin, begin, v->stop,
426
0
               (chr **) NULL, &hitend);
427
940
    else
428
940
      end = longest(v, d, begin, v->stop, &hitend);
429
940
    if (ISERR())
430
0
    {
431
0
      freedfa(d);
432
0
      return v->err;
433
0
    }
434
940
    if (hitend && 
cold == NULL50
)
435
50
      cold = begin;
436
940
    if (end != NULL)
437
161
      break;       /* NOTE BREAK OUT */
438
940
  }
439
161
  assert(end != NULL);   /* search RE succeeded so loop should */
440
161
  freedfa(d);
441
442
  /* and pin down details */
443
161
  assert(v->nmatch > 0);
444
161
  v->pmatch[0].rm_so = OFF(begin);
445
161
  v->pmatch[0].rm_eo = OFF(end);
446
161
  if (v->g->cflags & REG_EXPECT)
447
0
  {
448
0
    if (cold != NULL)
449
0
      v->details->rm_extend.rm_so = OFF(cold);
450
0
    else
451
0
      v->details->rm_extend.rm_so = OFF(v->stop);
452
0
    v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
453
0
  }
454
161
  if (v->nmatch == 1)      /* no need for submatches */
455
0
    return REG_OKAY;
456
457
  /* find submatches */
458
161
  zapallsubs(v->pmatch, v->nmatch);
459
161
  return cdissect(v, v->g->tree, begin, end);
460
161
}
461
462
/*
463
 * cfind - find a match for the main NFA (with complications)
464
 */
465
static int
466
cfind(struct vars *v,
467
    struct cnfa *cnfa,
468
    struct colormap *cm)
469
0
{
470
0
  struct dfa *s;
471
0
  struct dfa *d;
472
0
  chr      *cold;
473
0
  int     ret;
474
475
0
  s = newdfa(v, &v->g->search, cm, &v->dfa1);
476
0
  NOERR();
477
0
  d = newdfa(v, cnfa, cm, &v->dfa2);
478
0
  if (ISERR())
479
0
  {
480
0
    assert(d == NULL);
481
0
    freedfa(s);
482
0
    return v->err;
483
0
  }
484
485
0
  ret = cfindloop(v, cnfa, cm, d, s, &cold);
486
487
0
  freedfa(d);
488
0
  freedfa(s);
489
0
  NOERR();
490
0
  if (v->g->cflags & REG_EXPECT)
491
0
  {
492
0
    assert(v->details != NULL);
493
0
    if (cold != NULL)
494
0
      v->details->rm_extend.rm_so = OFF(cold);
495
0
    else
496
0
      v->details->rm_extend.rm_so = OFF(v->stop);
497
0
    v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
498
0
  }
499
0
  return ret;
500
0
}
501
502
/*
503
 * cfindloop - the heart of cfind
504
 */
505
static int
506
cfindloop(struct vars *v,
507
      struct cnfa *cnfa,
508
      struct colormap *cm,
509
      struct dfa *d,
510
      struct dfa *s,
511
      chr **coldp)      /* where to put coldstart pointer */
512
0
{
513
0
  chr      *begin;
514
0
  chr      *end;
515
0
  chr      *cold;
516
0
  chr      *open;     /* open and close of range of possible starts */
517
0
  chr      *close;
518
0
  chr      *estart;
519
0
  chr      *estop;
520
0
  int     er;
521
0
  int     shorter = v->g->tree->flags & SHORTER;
522
0
  int     hitend;
523
524
0
  assert(d != NULL && s != NULL);
525
0
  cold = NULL;
526
0
  close = v->search_start;
527
0
  do
528
0
  {
529
    /* Search with the search RE for match range at/beyond "close" */
530
0
    MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
531
0
    close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
532
0
    if (ISERR())
533
0
    {
534
0
      *coldp = cold;
535
0
      return v->err;
536
0
    }
537
0
    if (close == NULL)
538
0
      break;       /* no more possible match anywhere */
539
0
    assert(cold != NULL);
540
0
    open = cold;
541
0
    cold = NULL;
542
    /* Search for matches starting between "open" and "close" inclusive */
543
0
    MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
544
0
    for (begin = open; begin <= close; begin++)
545
0
    {
546
0
      MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
547
0
      estart = begin;
548
0
      estop = v->stop;
549
0
      for (;;)
550
0
      {
551
        /* Here we use the top node's detailed RE */
552
0
        if (shorter)
553
0
          end = shortest(v, d, begin, estart,
554
0
                   estop, (chr **) NULL, &hitend);
555
0
        else
556
0
          end = longest(v, d, begin, estop,
557
0
                  &hitend);
558
0
        if (ISERR())
559
0
        {
560
0
          *coldp = cold;
561
0
          return v->err;
562
0
        }
563
0
        if (hitend && cold == NULL)
564
0
          cold = begin;
565
0
        if (end == NULL)
566
0
          break;   /* no match with this begin point, try next */
567
0
        MDEBUG(("tentative end %ld\n", LOFF(end)));
568
        /* Dissect the potential match to see if it really matches */
569
0
        zapallsubs(v->pmatch, v->nmatch);
570
0
        er = cdissect(v, v->g->tree, begin, end);
571
0
        if (er == REG_OKAY)
572
0
        {
573
0
          if (v->nmatch > 0)
574
0
          {
575
0
            v->pmatch[0].rm_so = OFF(begin);
576
0
            v->pmatch[0].rm_eo = OFF(end);
577
0
          }
578
0
          *coldp = cold;
579
0
          return REG_OKAY;
580
0
        }
581
0
        if (er != REG_NOMATCH)
582
0
        {
583
0
          ERR(er);
584
0
          *coldp = cold;
585
0
          return er;
586
0
        }
587
        /* Try next longer/shorter match with same begin point */
588
0
        if (shorter)
589
0
        {
590
0
          if (end == estop)
591
0
            break; /* no more, so try next begin point */
592
0
          estart = end + 1;
593
0
        }
594
0
        else
595
0
        {
596
0
          if (end == begin)
597
0
            break; /* no more, so try next begin point */
598
0
          estop = end - 1;
599
0
        }
600
0
      }         /* end loop over endpoint positions */
601
0
    }           /* end loop over beginning positions */
602
603
    /*
604
     * If we get here, there is no possible match starting at or before
605
     * "close", so consider matches beyond that.  We'll do a fresh search
606
     * with the search RE to find a new promising match range.
607
     */
608
0
    close++;
609
0
  } while (close < v->stop);
610
611
0
  *coldp = cold;
612
0
  return REG_NOMATCH;
613
0
}
614
615
/*
616
 * zapallsubs - initialize all subexpression matches to "no match"
617
 */
618
static void
619
zapallsubs(regmatch_t *p,
620
       size_t n)
621
161
{
622
161
  size_t    i;
623
624
1.26k
  for (i = n - 1; i > 0; 
i--1.10k
)
625
1.10k
  {
626
1.10k
    p[i].rm_so = -1;
627
1.10k
    p[i].rm_eo = -1;
628
1.10k
  }
629
161
}
630
631
/*
632
 * zaptreesubs - initialize subexpressions within subtree to "no match"
633
 */
634
static void
635
zaptreesubs(struct vars *v,
636
      struct subre *t)
637
342
{
638
342
  if (t->op == '(')
639
114
  {
640
114
    int     n = t->subno;
641
642
114
    assert(n > 0);
643
114
    if ((size_t) n < v->nmatch)
644
114
    {
645
114
      v->pmatch[n].rm_so = -1;
646
114
      v->pmatch[n].rm_eo = -1;
647
114
    }
648
114
  }
649
650
342
  if (t->left != NULL)
651
114
    zaptreesubs(v, t->left);
652
342
  if (t->right != NULL)
653
0
    zaptreesubs(v, t->right);
654
342
}
655
656
/*
657
 * subset - set subexpression match data for a successful subre
658
 */
659
static void
660
subset(struct vars *v,
661
     struct subre *sub,
662
     chr *begin,
663
     chr *end)
664
39
{
665
39
  int     n = sub->subno;
666
667
39
  assert(n > 0);
668
39
  if ((size_t) n >= v->nmatch)
669
0
    return;
670
671
39
  MDEBUG(("setting %d\n", n));
672
39
  v->pmatch[n].rm_so = OFF(begin);
673
39
  v->pmatch[n].rm_eo = OFF(end);
674
39
}
675
676
/*
677
 * cdissect - check backrefs and determine subexpression matches
678
 *
679
 * cdissect recursively processes a subre tree to check matching of backrefs
680
 * and/or identify submatch boundaries for capture nodes.  The proposed match
681
 * runs from "begin" to "end" (not including "end"), and we are basically
682
 * "dissecting" it to see where the submatches are.
683
 *
684
 * Before calling any level of cdissect, the caller must have run the node's
685
 * DFA and found that the proposed substring satisfies the DFA.  (We make
686
 * the caller do that because in concatenation and iteration nodes, it's
687
 * much faster to check all the substrings against the child DFAs before we
688
 * recurse.)  Also, caller must have cleared subexpression match data via
689
 * zaptreesubs (or zapallsubs at the top level).
690
 */
691
static int            /* regexec return code */
692
cdissect(struct vars *v,
693
     struct subre *t,
694
     chr *begin,      /* beginning of relevant substring */
695
     chr *end)        /* end of same */
696
356
{
697
356
  int     er;
698
699
356
  assert(t != NULL);
700
356
  MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
701
702
  /* handy place to check for operation cancel */
703
356
  if (CANCEL_REQUESTED(v->re))
704
0
    return REG_CANCEL;
705
  /* ... and stack overrun */
706
356
  if (STACK_TOO_DEEP(v->re))
707
0
    return REG_ETOOBIG;
708
709
356
  switch (t->op)
710
356
  {
711
239
    case '=':       /* terminal node */
712
239
      assert(t->left == NULL && t->right == NULL);
713
239
      er = REG_OKAY;   /* no action, parent did the work */
714
239
      break;
715
0
    case 'b':       /* back reference */
716
0
      assert(t->left == NULL && t->right == NULL);
717
0
      er = cbrdissect(v, t, begin, end);
718
0
      break;
719
78
    case '.':       /* concatenation */
720
78
      assert(t->left != NULL && t->right != NULL);
721
78
      if (t->left->flags & SHORTER)  /* reverse scan */
722
0
        er = crevcondissect(v, t, begin, end);
723
78
      else
724
78
        er = ccondissect(v, t, begin, end);
725
78
      break;
726
0
    case '|':       /* alternation */
727
0
      assert(t->left != NULL);
728
0
      er = caltdissect(v, t, begin, end);
729
0
      break;
730
0
    case '*':       /* iteration */
731
0
      assert(t->left != NULL);
732
0
      if (t->left->flags & SHORTER) /* reverse scan */
733
0
        er = creviterdissect(v, t, begin, end);
734
0
      else
735
0
        er = citerdissect(v, t, begin, end);
736
0
      break;
737
39
    case '(':       /* capturing */
738
39
      assert(t->left != NULL && t->right == NULL);
739
39
      assert(t->subno > 0);
740
39
      er = cdissect(v, t->left, begin, end);
741
39
      if (er == REG_OKAY)
742
39
        subset(v, t, begin, end);
743
39
      break;
744
0
    default:
745
0
      er = REG_ASSERT;
746
0
      break;
747
356
  }
748
749
  /*
750
   * We should never have a match failure unless backrefs lurk below;
751
   * otherwise, either caller failed to check the DFA, or there's some
752
   * inconsistency between the DFA and the node's innards.
753
   */
754
356
  assert(er != REG_NOMATCH || (t->flags & BACKR));
755
756
356
  return er;
757
356
}
758
759
/*
760
 * ccondissect - dissect match for concatenation node
761
 */
762
static int            /* regexec return code */
763
ccondissect(struct vars *v,
764
      struct subre *t,
765
      chr *begin,     /* beginning of relevant substring */
766
      chr *end)     /* end of same */
767
78
{
768
78
  struct dfa *d;
769
78
  struct dfa *d2;
770
78
  chr      *mid;
771
78
  int     er;
772
773
78
  assert(t->op == '.');
774
78
  assert(t->left != NULL && t->left->cnfa.nstates > 0);
775
78
  assert(t->right != NULL && t->right->cnfa.nstates > 0);
776
78
  assert(!(t->left->flags & SHORTER));
777
778
78
  d = getsubdfa(v, t->left);
779
78
  NOERR();
780
78
  d2 = getsubdfa(v, t->right);
781
78
  NOERR();
782
78
  MDEBUG(("cconcat %d\n", t->id));
783
784
  /* pick a tentative midpoint */
785
78
  mid = longest(v, d, begin, end, (int *) NULL);
786
78
  NOERR();
787
78
  if (mid == NULL)
788
0
    return REG_NOMATCH;
789
78
  MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
790
791
  /* iterate until satisfaction or failure */
792
78
  for (;;)
793
192
  {
794
    /* try this midpoint on for size */
795
192
    if (longest(v, d2, mid, end, (int *) NULL) == end)
796
78
    {
797
78
      er = cdissect(v, t->left, begin, mid);
798
78
      if (er == REG_OKAY)
799
78
      {
800
78
        er = cdissect(v, t->right, mid, end);
801
78
        if (er == REG_OKAY)
802
78
        {
803
          /* satisfaction */
804
78
          MDEBUG(("successful\n"));
805
78
          return REG_OKAY;
806
78
        }
807
78
      }
808
0
      if (er != REG_NOMATCH)
809
0
        return er;
810
0
    }
811
114
    NOERR();
812
813
    /* that midpoint didn't work, find a new one */
814
114
    if (mid == begin)
815
0
    {
816
      /* all possibilities exhausted */
817
0
      MDEBUG(("%d no midpoint\n", t->id));
818
0
      return REG_NOMATCH;
819
0
    }
820
114
    mid = longest(v, d, begin, mid - 1, (int *) NULL);
821
114
    NOERR();
822
114
    if (mid == NULL)
823
0
    {
824
      /* failed to find a new one */
825
0
      MDEBUG(("%d failed midpoint\n", t->id));
826
0
      return REG_NOMATCH;
827
0
    }
828
114
    MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
829
114
    zaptreesubs(v, t->left);
830
114
    zaptreesubs(v, t->right);
831
114
  }
832
833
  /* can't get here */
834
0
  return REG_ASSERT;
835
78
}
836
837
/*
838
 * crevcondissect - dissect match for concatenation node, shortest-first
839
 */
840
static int            /* regexec return code */
841
crevcondissect(struct vars *v,
842
         struct subre *t,
843
         chr *begin,    /* beginning of relevant substring */
844
         chr *end)    /* end of same */
845
0
{
846
0
  struct dfa *d;
847
0
  struct dfa *d2;
848
0
  chr      *mid;
849
0
  int     er;
850
851
0
  assert(t->op == '.');
852
0
  assert(t->left != NULL && t->left->cnfa.nstates > 0);
853
0
  assert(t->right != NULL && t->right->cnfa.nstates > 0);
854
0
  assert(t->left->flags & SHORTER);
855
856
0
  d = getsubdfa(v, t->left);
857
0
  NOERR();
858
0
  d2 = getsubdfa(v, t->right);
859
0
  NOERR();
860
0
  MDEBUG(("crevcon %d\n", t->id));
861
862
  /* pick a tentative midpoint */
863
0
  mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
864
0
  NOERR();
865
0
  if (mid == NULL)
866
0
    return REG_NOMATCH;
867
0
  MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
868
869
  /* iterate until satisfaction or failure */
870
0
  for (;;)
871
0
  {
872
    /* try this midpoint on for size */
873
0
    if (longest(v, d2, mid, end, (int *) NULL) == end)
874
0
    {
875
0
      er = cdissect(v, t->left, begin, mid);
876
0
      if (er == REG_OKAY)
877
0
      {
878
0
        er = cdissect(v, t->right, mid, end);
879
0
        if (er == REG_OKAY)
880
0
        {
881
          /* satisfaction */
882
0
          MDEBUG(("successful\n"));
883
0
          return REG_OKAY;
884
0
        }
885
0
      }
886
0
      if (er != REG_NOMATCH)
887
0
        return er;
888
0
    }
889
0
    NOERR();
890
891
    /* that midpoint didn't work, find a new one */
892
0
    if (mid == end)
893
0
    {
894
      /* all possibilities exhausted */
895
0
      MDEBUG(("%d no midpoint\n", t->id));
896
0
      return REG_NOMATCH;
897
0
    }
898
0
    mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
899
0
    NOERR();
900
0
    if (mid == NULL)
901
0
    {
902
      /* failed to find a new one */
903
0
      MDEBUG(("%d failed midpoint\n", t->id));
904
0
      return REG_NOMATCH;
905
0
    }
906
0
    MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
907
0
    zaptreesubs(v, t->left);
908
0
    zaptreesubs(v, t->right);
909
0
  }
910
911
  /* can't get here */
912
0
  return REG_ASSERT;
913
0
}
914
915
/*
916
 * cbrdissect - dissect match for backref node
917
 */
918
static int            /* regexec return code */
919
cbrdissect(struct vars *v,
920
       struct subre *t,
921
       chr *begin,      /* beginning of relevant substring */
922
       chr *end)      /* end of same */
923
0
{
924
0
  int     n = t->subno;
925
0
  size_t    numreps;
926
0
  size_t    tlen;
927
0
  size_t    brlen;
928
0
  chr      *brstring;
929
0
  chr      *p;
930
0
  int     min = t->min;
931
0
  int     max = t->max;
932
933
0
  assert(t != NULL);
934
0
  assert(t->op == 'b');
935
0
  assert(n >= 0);
936
0
  assert((size_t) n < v->nmatch);
937
938
0
  MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));
939
940
  /* get the backreferenced string */
941
0
  if (v->pmatch[n].rm_so == -1)
942
0
    return REG_NOMATCH;
943
0
  brstring = v->start + v->pmatch[n].rm_so;
944
0
  brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
945
946
  /* special cases for zero-length strings */
947
0
  if (brlen == 0)
948
0
  {
949
    /*
950
     * matches only if target is zero length, but any number of
951
     * repetitions can be considered to be present
952
     */
953
0
    if (begin == end && min <= max)
954
0
    {
955
0
      MDEBUG(("cbackref matched trivially\n"));
956
0
      return REG_OKAY;
957
0
    }
958
0
    return REG_NOMATCH;
959
0
  }
960
0
  if (begin == end)
961
0
  {
962
    /* matches only if zero repetitions are okay */
963
0
    if (min == 0)
964
0
    {
965
0
      MDEBUG(("cbackref matched trivially\n"));
966
0
      return REG_OKAY;
967
0
    }
968
0
    return REG_NOMATCH;
969
0
  }
970
971
  /*
972
   * check target length to see if it could possibly be an allowed number of
973
   * repetitions of brstring
974
   */
975
0
  assert(end > begin);
976
0
  tlen = end - begin;
977
0
  if (tlen % brlen != 0)
978
0
    return REG_NOMATCH;
979
0
  numreps = tlen / brlen;
980
0
  if (numreps < min || (numreps > max && max != DUPINF))
981
0
    return REG_NOMATCH;
982
983
  /* okay, compare the actual string contents */
984
0
  p = begin;
985
0
  while (numreps-- > 0)
986
0
  {
987
0
    if ((*v->g->compare) (brstring, p, brlen) != 0)
988
0
      return REG_NOMATCH;
989
0
    p += brlen;
990
0
  }
991
992
0
  MDEBUG(("cbackref matched\n"));
993
0
  return REG_OKAY;
994
0
}
995
996
/*
997
 * caltdissect - dissect match for alternation node
998
 */
999
static int            /* regexec return code */
1000
caltdissect(struct vars *v,
1001
      struct subre *t,
1002
      chr *begin,     /* beginning of relevant substring */
1003
      chr *end)     /* end of same */
1004
0
{
1005
0
  struct dfa *d;
1006
0
  int     er;
1007
1008
  /* We loop, rather than tail-recurse, to handle a chain of alternatives */
1009
0
  while (t != NULL)
1010
0
  {
1011
0
    assert(t->op == '|');
1012
0
    assert(t->left != NULL && t->left->cnfa.nstates > 0);
1013
1014
0
    MDEBUG(("calt n%d\n", t->id));
1015
1016
0
    d = getsubdfa(v, t->left);
1017
0
    NOERR();
1018
0
    if (longest(v, d, begin, end, (int *) NULL) == end)
1019
0
    {
1020
0
      MDEBUG(("calt matched\n"));
1021
0
      er = cdissect(v, t->left, begin, end);
1022
0
      if (er != REG_NOMATCH)
1023
0
        return er;
1024
0
    }
1025
0
    NOERR();
1026
1027
0
    t = t->right;
1028
0
  }
1029
1030
0
  return REG_NOMATCH;
1031
0
}
1032
1033
/*
1034
 * citerdissect - dissect match for iteration node
1035
 */
1036
static int            /* regexec return code */
1037
citerdissect(struct vars *v,
1038
       struct subre *t,
1039
       chr *begin,    /* beginning of relevant substring */
1040
       chr *end)      /* end of same */
1041
0
{
1042
0
  struct dfa *d;
1043
0
  chr     **endpts;
1044
0
  chr      *limit;
1045
0
  int     min_matches;
1046
0
  size_t    max_matches;
1047
0
  int     nverified;
1048
0
  int     k;
1049
0
  int     i;
1050
0
  int     er;
1051
1052
0
  assert(t->op == '*');
1053
0
  assert(t->left != NULL && t->left->cnfa.nstates > 0);
1054
0
  assert(!(t->left->flags & SHORTER));
1055
0
  assert(begin <= end);
1056
1057
  /*
1058
   * For the moment, assume the minimum number of matches is 1.  If zero
1059
   * matches are allowed, and the target string is empty, we are allowed to
1060
   * match regardless of the contents of the iter node --- but we would
1061
   * prefer to match once, so that capturing parens get set.  (An example of
1062
   * the concern here is a pattern like "()*\1", which historically this
1063
   * code has allowed to succeed.)  Therefore, we deal with the zero-matches
1064
   * case at the bottom, after failing to find any other way to match.
1065
   */
1066
0
  min_matches = t->min;
1067
0
  if (min_matches <= 0)
1068
0
    min_matches = 1;
1069
1070
  /*
1071
   * We need workspace to track the endpoints of each sub-match.  Normally
1072
   * we consider only nonzero-length sub-matches, so there can be at most
1073
   * end-begin of them.  However, if min is larger than that, we will also
1074
   * consider zero-length sub-matches in order to find enough matches.
1075
   *
1076
   * For convenience, endpts[0] contains the "begin" pointer and we store
1077
   * sub-match endpoints in endpts[1..max_matches].
1078
   */
1079
0
  max_matches = end - begin;
1080
0
  if (max_matches > t->max && t->max != DUPINF)
1081
0
    max_matches = t->max;
1082
0
  if (max_matches < min_matches)
1083
0
    max_matches = min_matches;
1084
0
  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1085
0
  if (endpts == NULL)
1086
0
    return REG_ESPACE;
1087
0
  endpts[0] = begin;
1088
1089
0
  d = getsubdfa(v, t->left);
1090
0
  if (ISERR())
1091
0
  {
1092
0
    FREE(endpts);
1093
0
    return v->err;
1094
0
  }
1095
0
  MDEBUG(("citer %d\n", t->id));
1096
1097
  /*
1098
   * Our strategy is to first find a set of sub-match endpoints that are
1099
   * valid according to the child node's DFA, and then recursively dissect
1100
   * each sub-match to confirm validity.  If any validity check fails,
1101
   * backtrack that sub-match and try again.  And, when we next try for a
1102
   * validity check, we need not recheck any successfully verified
1103
   * sub-matches that we didn't move the endpoints of.  nverified remembers
1104
   * how many sub-matches are currently known okay.
1105
   */
1106
1107
  /* initialize to consider first sub-match */
1108
0
  nverified = 0;
1109
0
  k = 1;
1110
0
  limit = end;
1111
1112
  /* iterate until satisfaction or failure */
1113
0
  while (k > 0)
1114
0
  {
1115
    /* try to find an endpoint for the k'th sub-match */
1116
0
    endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1117
0
    if (ISERR())
1118
0
    {
1119
0
      FREE(endpts);
1120
0
      return v->err;
1121
0
    }
1122
0
    if (endpts[k] == NULL)
1123
0
    {
1124
      /* no match possible, so see if we can shorten previous one */
1125
0
      k--;
1126
0
      goto backtrack;
1127
0
    }
1128
0
    MDEBUG(("%d: working endpoint %d: %ld\n",
1129
0
        t->id, k, LOFF(endpts[k])));
1130
1131
    /* k'th sub-match can no longer be considered verified */
1132
0
    if (nverified >= k)
1133
0
      nverified = k - 1;
1134
1135
0
    if (endpts[k] != end)
1136
0
    {
1137
      /* haven't reached end yet, try another iteration if allowed */
1138
0
      if (k >= max_matches)
1139
0
      {
1140
        /* must try to shorten some previous match */
1141
0
        k--;
1142
0
        goto backtrack;
1143
0
      }
1144
1145
      /* reject zero-length match unless necessary to achieve min */
1146
0
      if (endpts[k] == endpts[k - 1] &&
1147
0
        (k >= min_matches || min_matches - k < end - endpts[k]))
1148
0
        goto backtrack;
1149
1150
0
      k++;
1151
0
      limit = end;
1152
0
      continue;
1153
0
    }
1154
1155
    /*
1156
     * We've identified a way to divide the string into k sub-matches that
1157
     * works so far as the child DFA can tell.  If k is an allowed number
1158
     * of matches, start the slow part: recurse to verify each sub-match.
1159
     * We always have k <= max_matches, needn't check that.
1160
     */
1161
0
    if (k < min_matches)
1162
0
      goto backtrack;
1163
1164
0
    MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1165
1166
0
    for (i = nverified + 1; i <= k; i++)
1167
0
    {
1168
0
      zaptreesubs(v, t->left);
1169
0
      er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
1170
0
      if (er == REG_OKAY)
1171
0
      {
1172
0
        nverified = i;
1173
0
        continue;
1174
0
      }
1175
0
      if (er == REG_NOMATCH)
1176
0
        break;
1177
      /* oops, something failed */
1178
0
      FREE(endpts);
1179
0
      return er;
1180
0
    }
1181
1182
0
    if (i > k)
1183
0
    {
1184
      /* satisfaction */
1185
0
      MDEBUG(("%d successful\n", t->id));
1186
0
      FREE(endpts);
1187
0
      return REG_OKAY;
1188
0
    }
1189
1190
    /* i'th match failed to verify, so backtrack it */
1191
0
    k = i;
1192
1193
0
backtrack:
1194
1195
    /*
1196
     * Must consider shorter versions of the k'th sub-match.  However,
1197
     * we'll only ask for a zero-length match if necessary.
1198
     */
1199
0
    while (k > 0)
1200
0
    {
1201
0
      chr      *prev_end = endpts[k - 1];
1202
1203
0
      if (endpts[k] > prev_end)
1204
0
      {
1205
0
        limit = endpts[k] - 1;
1206
0
        if (limit > prev_end ||
1207
0
          (k < min_matches && min_matches - k >= end - prev_end))
1208
0
        {
1209
          /* break out of backtrack loop, continue the outer one */
1210
0
          break;
1211
0
        }
1212
0
      }
1213
      /* can't shorten k'th sub-match any more, consider previous one */
1214
0
      k--;
1215
0
    }
1216
0
  }
1217
1218
  /* all possibilities exhausted */
1219
0
  FREE(endpts);
1220
1221
  /*
1222
   * Now consider the possibility that we can match to a zero-length string
1223
   * by using zero repetitions.
1224
   */
1225
0
  if (t->min == 0 && begin == end)
1226
0
  {
1227
0
    MDEBUG(("%d allowing zero matches\n", t->id));
1228
0
    return REG_OKAY;
1229
0
  }
1230
1231
0
  MDEBUG(("%d failed\n", t->id));
1232
0
  return REG_NOMATCH;
1233
0
}
1234
1235
/*
1236
 * creviterdissect - dissect match for iteration node, shortest-first
1237
 */
1238
static int            /* regexec return code */
1239
creviterdissect(struct vars *v,
1240
        struct subre *t,
1241
        chr *begin,   /* beginning of relevant substring */
1242
        chr *end)   /* end of same */
1243
0
{
1244
0
  struct dfa *d;
1245
0
  chr     **endpts;
1246
0
  chr      *limit;
1247
0
  int     min_matches;
1248
0
  size_t    max_matches;
1249
0
  int     nverified;
1250
0
  int     k;
1251
0
  int     i;
1252
0
  int     er;
1253
1254
0
  assert(t->op == '*');
1255
0
  assert(t->left != NULL && t->left->cnfa.nstates > 0);
1256
0
  assert(t->left->flags & SHORTER);
1257
0
  assert(begin <= end);
1258
1259
  /*
1260
   * If zero matches are allowed, and target string is empty, just declare
1261
   * victory.  OTOH, if target string isn't empty, zero matches can't work
1262
   * so we pretend the min is 1.
1263
   */
1264
0
  min_matches = t->min;
1265
0
  if (min_matches <= 0)
1266
0
  {
1267
0
    if (begin == end)
1268
0
      return REG_OKAY;
1269
0
    min_matches = 1;
1270
0
  }
1271
1272
  /*
1273
   * We need workspace to track the endpoints of each sub-match.  Normally
1274
   * we consider only nonzero-length sub-matches, so there can be at most
1275
   * end-begin of them.  However, if min is larger than that, we will also
1276
   * consider zero-length sub-matches in order to find enough matches.
1277
   *
1278
   * For convenience, endpts[0] contains the "begin" pointer and we store
1279
   * sub-match endpoints in endpts[1..max_matches].
1280
   */
1281
0
  max_matches = end - begin;
1282
0
  if (max_matches > t->max && t->max != DUPINF)
1283
0
    max_matches = t->max;
1284
0
  if (max_matches < min_matches)
1285
0
    max_matches = min_matches;
1286
0
  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1287
0
  if (endpts == NULL)
1288
0
    return REG_ESPACE;
1289
0
  endpts[0] = begin;
1290
1291
0
  d = getsubdfa(v, t->left);
1292
0
  if (ISERR())
1293
0
  {
1294
0
    FREE(endpts);
1295
0
    return v->err;
1296
0
  }
1297
0
  MDEBUG(("creviter %d\n", t->id));
1298
1299
  /*
1300
   * Our strategy is to first find a set of sub-match endpoints that are
1301
   * valid according to the child node's DFA, and then recursively dissect
1302
   * each sub-match to confirm validity.  If any validity check fails,
1303
   * backtrack that sub-match and try again.  And, when we next try for a
1304
   * validity check, we need not recheck any successfully verified
1305
   * sub-matches that we didn't move the endpoints of.  nverified remembers
1306
   * how many sub-matches are currently known okay.
1307
   */
1308
1309
  /* initialize to consider first sub-match */
1310
0
  nverified = 0;
1311
0
  k = 1;
1312
0
  limit = begin;
1313
1314
  /* iterate until satisfaction or failure */
1315
0
  while (k > 0)
1316
0
  {
1317
    /* disallow zero-length match unless necessary to achieve min */
1318
0
    if (limit == endpts[k - 1] &&
1319
0
      limit != end &&
1320
0
      (k >= min_matches || min_matches - k < end - limit))
1321
0
      limit++;
1322
1323
    /* if this is the last allowed sub-match, it must reach to the end */
1324
0
    if (k >= max_matches)
1325
0
      limit = end;
1326
1327
    /* try to find an endpoint for the k'th sub-match */
1328
0
    endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1329
0
               (chr **) NULL, (int *) NULL);
1330
0
    if (ISERR())
1331
0
    {
1332
0
      FREE(endpts);
1333
0
      return v->err;
1334
0
    }
1335
0
    if (endpts[k] == NULL)
1336
0
    {
1337
      /* no match possible, so see if we can lengthen previous one */
1338
0
      k--;
1339
0
      goto backtrack;
1340
0
    }
1341
0
    MDEBUG(("%d: working endpoint %d: %ld\n",
1342
0
        t->id, k, LOFF(endpts[k])));
1343
1344
    /* k'th sub-match can no longer be considered verified */
1345
0
    if (nverified >= k)
1346
0
      nverified = k - 1;
1347
1348
0
    if (endpts[k] != end)
1349
0
    {
1350
      /* haven't reached end yet, try another iteration if allowed */
1351
0
      if (k >= max_matches)
1352
0
      {
1353
        /* must try to lengthen some previous match */
1354
0
        k--;
1355
0
        goto backtrack;
1356
0
      }
1357
1358
0
      k++;
1359
0
      limit = endpts[k - 1];
1360
0
      continue;
1361
0
    }
1362
1363
    /*
1364
     * We've identified a way to divide the string into k sub-matches that
1365
     * works so far as the child DFA can tell.  If k is an allowed number
1366
     * of matches, start the slow part: recurse to verify each sub-match.
1367
     * We always have k <= max_matches, needn't check that.
1368
     */
1369
0
    if (k < min_matches)
1370
0
      goto backtrack;
1371
1372
0
    MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1373
1374
0
    for (i = nverified + 1; i <= k; i++)
1375
0
    {
1376
0
      zaptreesubs(v, t->left);
1377
0
      er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
1378
0
      if (er == REG_OKAY)
1379
0
      {
1380
0
        nverified = i;
1381
0
        continue;
1382
0
      }
1383
0
      if (er == REG_NOMATCH)
1384
0
        break;
1385
      /* oops, something failed */
1386
0
      FREE(endpts);
1387
0
      return er;
1388
0
    }
1389
1390
0
    if (i > k)
1391
0
    {
1392
      /* satisfaction */
1393
0
      MDEBUG(("%d successful\n", t->id));
1394
0
      FREE(endpts);
1395
0
      return REG_OKAY;
1396
0
    }
1397
1398
    /* i'th match failed to verify, so backtrack it */
1399
0
    k = i;
1400
1401
0
backtrack:
1402
1403
    /*
1404
     * Must consider longer versions of the k'th sub-match.
1405
     */
1406
0
    while (k > 0)
1407
0
    {
1408
0
      if (endpts[k] < end)
1409
0
      {
1410
0
        limit = endpts[k] + 1;
1411
        /* break out of backtrack loop, continue the outer one */
1412
0
        break;
1413
0
      }
1414
      /* can't lengthen k'th sub-match any more, consider previous one */
1415
0
      k--;
1416
0
    }
1417
0
  }
1418
1419
  /* all possibilities exhausted */
1420
0
  MDEBUG(("%d failed\n", t->id));
1421
0
  FREE(endpts);
1422
0
  return REG_NOMATCH;
1423
0
}
1424
1425
1426
1427
#include "rege_dfa.c"