YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/postgres/src/bin/psql/stringutils.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * psql - the PostgreSQL interactive terminal
3
 *
4
 * Copyright (c) 2000-2018, PostgreSQL Global Development Group
5
 *
6
 * src/bin/psql/stringutils.c
7
 */
8
#include "postgres_fe.h"
9
10
#include <ctype.h>
11
12
#include "common.h"
13
#include "stringutils.h"
14
15
314
#define PQmblenBounded(s, e)  strnlen(s, PQmblen(s, e))
16
17
18
/*
19
 * Replacement for strtok() (a.k.a. poor man's flex)
20
 *
21
 * Splits a string into tokens, returning one token per call, then NULL
22
 * when no more tokens exist in the given string.
23
 *
24
 * The calling convention is similar to that of strtok, but with more
25
 * frammishes.
26
 *
27
 * s -      string to parse, if NULL continue parsing the last string
28
 * whitespace - set of whitespace characters that separate tokens
29
 * delim -    set of non-whitespace separator characters (or NULL)
30
 * quote -    set of characters that can quote a token (NULL if none)
31
 * escape -   character that can quote quotes (0 if none)
32
 * e_strings -  if true, treat E'...' syntax as a valid token
33
 * del_quotes - if true, strip quotes from the returned token, else return
34
 *        it exactly as found in the string
35
 * encoding - the active character-set encoding
36
 *
37
 * Characters in 'delim', if any, will be returned as single-character
38
 * tokens unless part of a quoted token.
39
 *
40
 * Double occurrences of the quoting character are always taken to represent
41
 * a single quote character in the data.  If escape isn't 0, then escape
42
 * followed by anything (except \0) is a data character too.
43
 *
44
 * The combination of e_strings and del_quotes both true is not currently
45
 * handled.  This could be fixed but it's not needed anywhere at the moment.
46
 *
47
 * Note that the string s is _not_ overwritten in this implementation.
48
 *
49
 * NB: it's okay to vary delim, quote, and escape from one call to the
50
 * next on a single source string, but changing whitespace is a bad idea
51
 * since you might lose data.
52
 */
53
char *
54
strtokx(const char *s,
55
    const char *whitespace,
56
    const char *delim,
57
    const char *quote,
58
    char escape,
59
    bool e_strings,
60
    bool del_quotes,
61
    int encoding)
62
36
{
63
36
  static char *storage = NULL;  /* store the local copy of the users
64
                   * string here */
65
36
  static char *string = NULL; /* pointer into storage where to continue on
66
                 * next call */
67
68
  /* variously abused variables: */
69
36
  unsigned int offset;
70
36
  char     *start;
71
36
  char     *p;
72
73
36
  if (s)
74
9
  {
75
9
    free(storage);
76
77
    /*
78
     * We may need extra space to insert delimiter nulls for adjacent
79
     * tokens.  2X the space is a gross overestimate, but it's unlikely
80
     * that this code will be used on huge strings anyway.
81
     */
82
9
    storage = pg_malloc(2 * strlen(s) + 1);
83
9
    strcpy(storage, s);
84
9
    string = storage;
85
9
  }
86
87
36
  if (!storage)
88
0
    return NULL;
89
90
  /* skip leading whitespace */
91
36
  offset = strspn(string, whitespace);
92
36
  start = &string[offset];
93
94
  /* end of string reached? */
95
36
  if (*start == '\0')
96
9
  {
97
    /* technically we don't need to free here, but we're nice */
98
9
    free(storage);
99
9
    storage = NULL;
100
9
    string = NULL;
101
9
    return NULL;
102
9
  }
103
104
  /* test if delimiter character */
105
27
  if (delim && strchr(delim, *start))
106
0
  {
107
    /*
108
     * If not at end of string, we need to insert a null to terminate the
109
     * returned token.  We can just overwrite the next character if it
110
     * happens to be in the whitespace set ... otherwise move over the
111
     * rest of the string to make room.  (This is why we allocated extra
112
     * space above).
113
     */
114
0
    p = start + 1;
115
0
    if (*p != '\0')
116
0
    {
117
0
      if (!strchr(whitespace, *p))
118
0
        memmove(p + 1, p, strlen(p) + 1);
119
0
      *p = '\0';
120
0
      string = p + 1;
121
0
    }
122
0
    else
123
0
    {
124
      /* at end of string, so no extra work */
125
0
      string = p;
126
0
    }
127
128
0
    return start;
129
0
  }
130
131
  /* check for E string */
132
27
  p = start;
133
27
  if (e_strings &&
134
27
    
(0
*p == 'E'0
||
*p == 'e'0
) &&
135
27
    
p[1] == '\''0
)
136
0
  {
137
0
    quote = "'";
138
0
    escape = '\\';      /* if std strings before, not any more */
139
0
    p++;
140
0
  }
141
142
  /* test if quoting character */
143
27
  if (quote && strchr(quote, *p))
144
9
  {
145
    /* okay, we have a quoted token, now scan for the closer */
146
9
    char    thisquote = *p++;
147
148
166
    for (; *p; 
p += 157
PQmblenBounded157
(p, encoding))
149
166
    {
150
166
      if (*p == escape && 
p[1] != '\0'0
)
151
0
        p++;     /* process escaped anything */
152
166
      else if (*p == thisquote && 
p[1] == thisquote9
)
153
0
        p++;     /* process doubled quote */
154
166
      else if (*p == thisquote)
155
9
      {
156
9
        p++;      /* skip trailing quote */
157
9
        break;
158
9
      }
159
166
    }
160
161
    /*
162
     * If not at end of string, we need to insert a null to terminate the
163
     * returned token.  See notes above.
164
     */
165
9
    if (*p != '\0')
166
0
    {
167
0
      if (!strchr(whitespace, *p))
168
0
        memmove(p + 1, p, strlen(p) + 1);
169
0
      *p = '\0';
170
0
      string = p + 1;
171
0
    }
172
9
    else
173
9
    {
174
      /* at end of string, so no extra work */
175
9
      string = p;
176
9
    }
177
178
    /* Clean up the token if caller wants that */
179
9
    if (del_quotes)
180
0
      strip_quotes(start, thisquote, escape, encoding);
181
182
9
    return start;
183
9
  }
184
185
  /*
186
   * Otherwise no quoting character.  Scan till next whitespace, delimiter
187
   * or quote.  NB: at this point, *start is known not to be '\0',
188
   * whitespace, delim, or quote, so we will consume at least one character.
189
   */
190
18
  offset = strcspn(start, whitespace);
191
192
18
  if (delim)
193
18
  {
194
18
    unsigned int offset2 = strcspn(start, delim);
195
196
18
    if (offset > offset2)
197
0
      offset = offset2;
198
18
  }
199
200
18
  if (quote)
201
18
  {
202
18
    unsigned int offset2 = strcspn(start, quote);
203
204
18
    if (offset > offset2)
205
0
      offset = offset2;
206
18
  }
207
208
18
  p = start + offset;
209
210
  /*
211
   * If not at end of string, we need to insert a null to terminate the
212
   * returned token.  See notes above.
213
   */
214
18
  if (*p != '\0')
215
18
  {
216
18
    if (!strchr(whitespace, *p))
217
0
      memmove(p + 1, p, strlen(p) + 1);
218
18
    *p = '\0';
219
18
    string = p + 1;
220
18
  }
221
0
  else
222
0
  {
223
    /* at end of string, so no extra work */
224
0
    string = p;
225
0
  }
226
227
18
  return start;
228
27
}
229
230
231
/*
232
 * strip_quotes
233
 *
234
 * Remove quotes from the string at *source.  Leading and trailing occurrences
235
 * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
236
 * to single occurrences; if 'escape' is not 0 then 'escape' removes special
237
 * significance of next character.
238
 *
239
 * Note that the source string is overwritten in-place.
240
 */
241
void
242
strip_quotes(char *source, char quote, char escape, int encoding)
243
9
{
244
9
  char     *src;
245
9
  char     *dst;
246
247
9
  Assert(source != NULL);
248
9
  Assert(quote != '\0');
249
250
0
  src = dst = source;
251
252
9
  if (*src && *src == quote)
253
9
    src++;         /* skip leading quote */
254
255
166
  while (*src)
256
166
  {
257
166
    char    c = *src;
258
166
    int     i;
259
260
166
    if (c == quote && 
src[1] == '\0'9
)
261
9
      break;        /* skip trailing quote */
262
157
    else if (c == quote && 
src[1] == quote0
)
263
0
      src++;       /* process doubled quote */
264
157
    else if (c == escape && 
src[1] != '\0'0
)
265
0
      src++;        /* process escaped character */
266
267
157
    i = PQmblenBounded(src, encoding);
268
314
    while (i--)
269
157
      *dst++ = *src++;
270
157
  }
271
272
9
  *dst = '\0';
273
9
}
274
275
276
/*
277
 * quote_if_needed
278
 *
279
 * Opposite of strip_quotes().  If "source" denotes itself literally without
280
 * quoting or escaping, returns NULL.  Otherwise, returns a malloc'd copy with
281
 * quoting and escaping applied:
282
 *
283
 * source -     string to parse
284
 * entails_quote -  any of these present?  need outer quotes
285
 * quote -      doubled within string, affixed to both ends
286
 * escape -     doubled within string
287
 * encoding -   the active character-set encoding
288
 *
289
 * Do not use this as a substitute for PQescapeStringConn().  Use it for
290
 * strings to be parsed by strtokx() or psql_scan_slash_option().
291
 */
292
char *
293
quote_if_needed(const char *source, const char *entails_quote,
294
        char quote, char escape, int encoding)
295
0
{
296
0
  const char *src;
297
0
  char     *ret;
298
0
  char     *dst;
299
0
  bool    need_quotes = false;
300
301
0
  Assert(source != NULL);
302
0
  Assert(quote != '\0');
303
304
0
  src = source;
305
0
  dst = ret = pg_malloc(2 * strlen(src) + 3); /* excess */
306
307
0
  *dst++ = quote;
308
309
0
  while (*src)
310
0
  {
311
0
    char    c = *src;
312
0
    int     i;
313
314
0
    if (c == quote)
315
0
    {
316
0
      need_quotes = true;
317
0
      *dst++ = quote;
318
0
    }
319
0
    else if (c == escape)
320
0
    {
321
0
      need_quotes = true;
322
0
      *dst++ = escape;
323
0
    }
324
0
    else if (strchr(entails_quote, c))
325
0
      need_quotes = true;
326
327
0
    i = PQmblenBounded(src, encoding);
328
0
    while (i--)
329
0
      *dst++ = *src++;
330
0
  }
331
332
0
  *dst++ = quote;
333
0
  *dst = '\0';
334
335
0
  if (!need_quotes)
336
0
  {
337
0
    free(ret);
338
0
    ret = NULL;
339
0
  }
340
341
0
  return ret;
342
0
}