YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/postgres/src/backend/parser/scansup.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * scansup.c
4
 *    support routines for the lex/flex scanner, used by both the normal
5
 * backend as well as the bootstrap backend
6
 *
7
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8
 * Portions Copyright (c) 1994, Regents of the University of California
9
 *
10
 *
11
 * IDENTIFICATION
12
 *    src/backend/parser/scansup.c
13
 *
14
 *-------------------------------------------------------------------------
15
 */
16
#include "postgres.h"
17
18
#include <ctype.h>
19
20
#include "parser/scansup.h"
21
#include "mb/pg_wchar.h"
22
23
24
/* ----------------
25
 *    scanstr
26
 *
27
 * if the string passed in has escaped codes, map the escape codes to actual
28
 * chars
29
 *
30
 * the string returned is palloc'd and should eventually be pfree'd by the
31
 * caller!
32
 * ----------------
33
 */
34
35
char *
36
scanstr(const char *s)
37
0
{
38
0
  char     *newStr;
39
0
  int     len,
40
0
        i,
41
0
        j;
42
43
0
  if (s == NULL || s[0] == '\0')
44
0
    return pstrdup("");
45
46
0
  len = strlen(s);
47
48
0
  newStr = palloc(len + 1); /* string cannot get longer */
49
50
0
  for (i = 0, j = 0; i < len; i++)
51
0
  {
52
0
    if (s[i] == '\'')
53
0
    {
54
      /*
55
       * Note: if scanner is working right, unescaped quotes can only
56
       * appear in pairs, so there should be another character.
57
       */
58
0
      i++;
59
      /* The bootstrap parser is not as smart, so check here. */
60
0
      Assert(s[i] == '\'');
61
0
      newStr[j] = s[i];
62
0
    }
63
0
    else if (s[i] == '\\')
64
0
    {
65
0
      i++;
66
0
      switch (s[i])
67
0
      {
68
0
        case 'b':
69
0
          newStr[j] = '\b';
70
0
          break;
71
0
        case 'f':
72
0
          newStr[j] = '\f';
73
0
          break;
74
0
        case 'n':
75
0
          newStr[j] = '\n';
76
0
          break;
77
0
        case 'r':
78
0
          newStr[j] = '\r';
79
0
          break;
80
0
        case 't':
81
0
          newStr[j] = '\t';
82
0
          break;
83
0
        case '0':
84
0
        case '1':
85
0
        case '2':
86
0
        case '3':
87
0
        case '4':
88
0
        case '5':
89
0
        case '6':
90
0
        case '7':
91
0
          {
92
0
            int     k;
93
0
            long    octVal = 0;
94
95
0
            for (k = 0;
96
0
               s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
97
0
               k++)
98
0
              octVal = (octVal << 3) + (s[i + k] - '0');
99
0
            i += k - 1;
100
0
            newStr[j] = ((char) octVal);
101
0
          }
102
0
          break;
103
0
        default:
104
0
          newStr[j] = s[i];
105
0
          break;
106
0
      }         /* switch */
107
0
    }           /* s[i] == '\\' */
108
0
    else
109
0
      newStr[j] = s[i];
110
0
    j++;
111
0
  }
112
0
  newStr[j] = '\0';
113
0
  return newStr;
114
0
}
115
116
117
/*
118
 * downcase_truncate_identifier() --- do appropriate downcasing and
119
 * truncation of an unquoted identifier.  Optionally warn of truncation.
120
 *
121
 * Returns a palloc'd string containing the adjusted identifier.
122
 *
123
 * Note: in some usages the passed string is not null-terminated.
124
 *
125
 * Note: the API of this function is designed to allow for downcasing
126
 * transformations that increase the string length, but we don't yet
127
 * support that.  If you want to implement it, you'll need to fix
128
 * SplitIdentifierString() in utils/adt/varlena.c.
129
 */
130
char *
131
downcase_truncate_identifier(const char *ident, int len, bool warn)
132
613k
{
133
613k
  return downcase_identifier(ident, len, warn, true);
134
613k
}
135
136
/*
137
 * a workhorse for downcase_truncate_identifier
138
 */
139
char *
140
downcase_identifier(const char *ident, int len, bool warn, bool truncate)
141
613k
{
142
613k
  char     *result;
143
613k
  int     i;
144
613k
  bool    enc_is_single_byte;
145
146
613k
  result = palloc(len + 1);
147
613k
  enc_is_single_byte = pg_database_encoding_max_length() == 1;
148
149
  /*
150
   * SQL99 specifies Unicode-aware case normalization, which we don't yet
151
   * have the infrastructure for.  Instead we use tolower() to provide a
152
   * locale-aware translation.  However, there are some locales where this
153
   * is not right either (eg, Turkish may do strange things with 'i' and
154
   * 'I').  Our current compromise is to use tolower() for characters with
155
   * the high bit set, as long as they aren't part of a multi-byte
156
   * character, and use an ASCII-only downcasing for 7-bit characters.
157
   */
158
5.03M
  for (i = 0; i < len; i++)
159
4.42M
  {
160
4.42M
    unsigned char ch = (unsigned char) ident[i];
161
162
4.42M
    if (ch >= 'A' && ch <= 'Z')
163
171k
      ch += 'a' - 'A';
164
4.25M
    else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
165
0
      ch = tolower(ch);
166
4.42M
    result[i] = (char) ch;
167
4.42M
  }
168
613k
  result[i] = '\0';
169
170
613k
  if (i >= NAMEDATALEN && truncate)
171
0
    truncate_identifier(result, i, warn);
172
173
613k
  return result;
174
613k
}
175
176
177
/*
178
 * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
179
 *
180
 * The given string is modified in-place, if necessary.  A warning is
181
 * issued if requested.
182
 *
183
 * We require the caller to pass in the string length since this saves a
184
 * strlen() call in some common usages.
185
 */
186
void
187
truncate_identifier(char *ident, int len, bool warn)
188
41.3k
{
189
41.3k
  if (len >= NAMEDATALEN)
190
0
  {
191
0
    len = pg_mbcliplen(ident, len, NAMEDATALEN - 1);
192
0
    if (warn)
193
0
    {
194
      /*
195
       * We avoid using %.*s here because it can misbehave if the data
196
       * is not valid in what libc thinks is the prevailing encoding.
197
       */
198
0
      char    buf[NAMEDATALEN];
199
200
0
      memcpy(buf, ident, len);
201
0
      buf[len] = '\0';
202
0
      ereport(NOTICE,
203
0
          (errcode(ERRCODE_NAME_TOO_LONG),
204
0
           errmsg("identifier \"%s\" will be truncated to \"%s\"",
205
0
              ident, buf)));
206
0
    }
207
0
    ident[len] = '\0';
208
0
  }
209
41.3k
}
210
211
/*
212
 * scanner_isspace() --- return true if flex scanner considers char whitespace
213
 *
214
 * This should be used instead of the potentially locale-dependent isspace()
215
 * function when it's important to match the lexer's behavior.
216
 *
217
 * In principle we might need similar functions for isalnum etc, but for the
218
 * moment only isspace seems needed.
219
 */
220
bool
221
scanner_isspace(char ch)
222
952k
{
223
  /* This must match scan.l's list of {space} characters */
224
952k
  if (ch == ' ' ||
225
939k
    ch == '\t' ||
226
929k
    ch == '\n' ||
227
918k
    ch == '\r' ||
228
918k
    ch == '\f')
229
34.3k
    return true;
230
918k
  return false;
231
918k
}