YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/build/debugcov-clang-dynamic-arm64-ninja/postgres_build/src/bin/initdb/encnames.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Encoding names and routines for work with it. All
3
 * in this file is shared between FE and BE.
4
 *
5
 * src/backend/utils/mb/encnames.c
6
 */
7
#ifdef FRONTEND
8
#include "postgres_fe.h"
9
#else
10
#include "postgres.h"
11
#include "utils/builtins.h"
12
#endif
13
14
#include <ctype.h>
15
#include <unistd.h>
16
17
#include "mb/pg_wchar.h"
18
19
20
/* ----------
21
 * All encoding names, sorted:     *** A L P H A B E T I C ***
22
 *
23
 * All names must be without irrelevant chars, search routines use
24
 * isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1
25
 * are always converted to 'iso88591'. All must be lower case.
26
 *
27
 * The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed?
28
 *
29
 * Karel Zak, Aug 2001
30
 * ----------
31
 */
32
typedef struct pg_encname
33
{
34
  const char *name;
35
  pg_enc    encoding;
36
} pg_encname;
37
38
static const pg_encname pg_encname_tbl[] =
39
{
40
  {
41
    "abc", PG_WIN1258
42
  },              /* alias for WIN1258 */
43
  {
44
    "alt", PG_WIN866
45
  },              /* IBM866 */
46
  {
47
    "big5", PG_BIG5
48
  },              /* Big5; Chinese for Taiwan multibyte set */
49
  {
50
    "euccn", PG_EUC_CN
51
  },              /* EUC-CN; Extended Unix Code for simplified
52
                 * Chinese */
53
  {
54
    "eucjis2004", PG_EUC_JIS_2004
55
  },              /* EUC-JIS-2004; Extended UNIX Code fixed
56
                 * Width for Japanese, standard JIS X 0213 */
57
  {
58
    "eucjp", PG_EUC_JP
59
  },              /* EUC-JP; Extended UNIX Code fixed Width for
60
                 * Japanese, standard OSF */
61
  {
62
    "euckr", PG_EUC_KR
63
  },              /* EUC-KR; Extended Unix Code for Korean , KS
64
                 * X 1001 standard */
65
  {
66
    "euctw", PG_EUC_TW
67
  },              /* EUC-TW; Extended Unix Code for
68
                 *
69
                 * traditional Chinese */
70
  {
71
    "gb18030", PG_GB18030
72
  },              /* GB18030;GB18030 */
73
  {
74
    "gbk", PG_GBK
75
  },              /* GBK; Chinese Windows CodePage 936
76
                 * simplified Chinese */
77
  {
78
    "iso88591", PG_LATIN1
79
  },              /* ISO-8859-1; RFC1345,KXS2 */
80
  {
81
    "iso885910", PG_LATIN6
82
  },              /* ISO-8859-10; RFC1345,KXS2 */
83
  {
84
    "iso885913", PG_LATIN7
85
  },              /* ISO-8859-13; RFC1345,KXS2 */
86
  {
87
    "iso885914", PG_LATIN8
88
  },              /* ISO-8859-14; RFC1345,KXS2 */
89
  {
90
    "iso885915", PG_LATIN9
91
  },              /* ISO-8859-15; RFC1345,KXS2 */
92
  {
93
    "iso885916", PG_LATIN10
94
  },              /* ISO-8859-16; RFC1345,KXS2 */
95
  {
96
    "iso88592", PG_LATIN2
97
  },              /* ISO-8859-2; RFC1345,KXS2 */
98
  {
99
    "iso88593", PG_LATIN3
100
  },              /* ISO-8859-3; RFC1345,KXS2 */
101
  {
102
    "iso88594", PG_LATIN4
103
  },              /* ISO-8859-4; RFC1345,KXS2 */
104
  {
105
    "iso88595", PG_ISO_8859_5
106
  },              /* ISO-8859-5; RFC1345,KXS2 */
107
  {
108
    "iso88596", PG_ISO_8859_6
109
  },              /* ISO-8859-6; RFC1345,KXS2 */
110
  {
111
    "iso88597", PG_ISO_8859_7
112
  },              /* ISO-8859-7; RFC1345,KXS2 */
113
  {
114
    "iso88598", PG_ISO_8859_8
115
  },              /* ISO-8859-8; RFC1345,KXS2 */
116
  {
117
    "iso88599", PG_LATIN5
118
  },              /* ISO-8859-9; RFC1345,KXS2 */
119
  {
120
    "johab", PG_JOHAB
121
  },              /* JOHAB; Extended Unix Code for simplified
122
                 * Chinese */
123
  {
124
    "koi8", PG_KOI8R
125
  },              /* _dirty_ alias for KOI8-R (backward
126
                 * compatibility) */
127
  {
128
    "koi8r", PG_KOI8R
129
  },              /* KOI8-R; RFC1489 */
130
  {
131
    "koi8u", PG_KOI8U
132
  },              /* KOI8-U; RFC2319 */
133
  {
134
    "latin1", PG_LATIN1
135
  },              /* alias for ISO-8859-1 */
136
  {
137
    "latin10", PG_LATIN10
138
  },              /* alias for ISO-8859-16 */
139
  {
140
    "latin2", PG_LATIN2
141
  },              /* alias for ISO-8859-2 */
142
  {
143
    "latin3", PG_LATIN3
144
  },              /* alias for ISO-8859-3 */
145
  {
146
    "latin4", PG_LATIN4
147
  },              /* alias for ISO-8859-4 */
148
  {
149
    "latin5", PG_LATIN5
150
  },              /* alias for ISO-8859-9 */
151
  {
152
    "latin6", PG_LATIN6
153
  },              /* alias for ISO-8859-10 */
154
  {
155
    "latin7", PG_LATIN7
156
  },              /* alias for ISO-8859-13 */
157
  {
158
    "latin8", PG_LATIN8
159
  },              /* alias for ISO-8859-14 */
160
  {
161
    "latin9", PG_LATIN9
162
  },              /* alias for ISO-8859-15 */
163
  {
164
    "mskanji", PG_SJIS
165
  },              /* alias for Shift_JIS */
166
  {
167
    "muleinternal", PG_MULE_INTERNAL
168
  },
169
  {
170
    "shiftjis", PG_SJIS
171
  },              /* Shift_JIS; JIS X 0202-1991 */
172
173
  {
174
    "shiftjis2004", PG_SHIFT_JIS_2004
175
  },              /* SHIFT-JIS-2004; Shift JIS for Japanese,
176
                 * standard JIS X 0213 */
177
  {
178
    "sjis", PG_SJIS
179
  },              /* alias for Shift_JIS */
180
  {
181
    "sqlascii", PG_SQL_ASCII
182
  },
183
  {
184
    "tcvn", PG_WIN1258
185
  },              /* alias for WIN1258 */
186
  {
187
    "tcvn5712", PG_WIN1258
188
  },              /* alias for WIN1258 */
189
  {
190
    "uhc", PG_UHC
191
  },              /* UHC; Korean Windows CodePage 949 */
192
  {
193
    "unicode", PG_UTF8
194
  },              /* alias for UTF8 */
195
  {
196
    "utf8", PG_UTF8
197
  },              /* alias for UTF8 */
198
  {
199
    "vscii", PG_WIN1258
200
  },              /* alias for WIN1258 */
201
  {
202
    "win", PG_WIN1251
203
  },              /* _dirty_ alias for windows-1251 (backward
204
                 * compatibility) */
205
  {
206
    "win1250", PG_WIN1250
207
  },              /* alias for Windows-1250 */
208
  {
209
    "win1251", PG_WIN1251
210
  },              /* alias for Windows-1251 */
211
  {
212
    "win1252", PG_WIN1252
213
  },              /* alias for Windows-1252 */
214
  {
215
    "win1253", PG_WIN1253
216
  },              /* alias for Windows-1253 */
217
  {
218
    "win1254", PG_WIN1254
219
  },              /* alias for Windows-1254 */
220
  {
221
    "win1255", PG_WIN1255
222
  },              /* alias for Windows-1255 */
223
  {
224
    "win1256", PG_WIN1256
225
  },              /* alias for Windows-1256 */
226
  {
227
    "win1257", PG_WIN1257
228
  },              /* alias for Windows-1257 */
229
  {
230
    "win1258", PG_WIN1258
231
  },              /* alias for Windows-1258 */
232
  {
233
    "win866", PG_WIN866
234
  },              /* IBM866 */
235
  {
236
    "win874", PG_WIN874
237
  },              /* alias for Windows-874 */
238
  {
239
    "win932", PG_SJIS
240
  },              /* alias for Shift_JIS */
241
  {
242
    "win936", PG_GBK
243
  },              /* alias for GBK */
244
  {
245
    "win949", PG_UHC
246
  },              /* alias for UHC */
247
  {
248
    "win950", PG_BIG5
249
  },              /* alias for BIG5 */
250
  {
251
    "windows1250", PG_WIN1250
252
  },              /* Windows-1251; Microsoft */
253
  {
254
    "windows1251", PG_WIN1251
255
  },              /* Windows-1251; Microsoft */
256
  {
257
    "windows1252", PG_WIN1252
258
  },              /* Windows-1252; Microsoft */
259
  {
260
    "windows1253", PG_WIN1253
261
  },              /* Windows-1253; Microsoft */
262
  {
263
    "windows1254", PG_WIN1254
264
  },              /* Windows-1254; Microsoft */
265
  {
266
    "windows1255", PG_WIN1255
267
  },              /* Windows-1255; Microsoft */
268
  {
269
    "windows1256", PG_WIN1256
270
  },              /* Windows-1256; Microsoft */
271
  {
272
    "windows1257", PG_WIN1257
273
  },              /* Windows-1257; Microsoft */
274
  {
275
    "windows1258", PG_WIN1258
276
  },              /* Windows-1258; Microsoft */
277
  {
278
    "windows866", PG_WIN866
279
  },              /* IBM866 */
280
  {
281
    "windows874", PG_WIN874
282
  },              /* Windows-874; Microsoft */
283
  {
284
    "windows932", PG_SJIS
285
  },              /* alias for Shift_JIS */
286
  {
287
    "windows936", PG_GBK
288
  },              /* alias for GBK */
289
  {
290
    "windows949", PG_UHC
291
  },              /* alias for UHC */
292
  {
293
    "windows950", PG_BIG5
294
  }             /* alias for BIG5 */
295
};
296
297
/* ----------
298
 * These are "official" encoding names.
299
 * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
300
 * ----------
301
 */
302
#ifndef WIN32
303
#define DEF_ENC2NAME(name, codepage) { #name, PG_##name }
304
#else
305
#define DEF_ENC2NAME(name, codepage) { #name, PG_##name, codepage }
306
#endif
307
const pg_enc2name pg_enc2name_tbl[] =
308
{
309
  DEF_ENC2NAME(SQL_ASCII, 0),
310
  DEF_ENC2NAME(EUC_JP, 20932),
311
  DEF_ENC2NAME(EUC_CN, 20936),
312
  DEF_ENC2NAME(EUC_KR, 51949),
313
  DEF_ENC2NAME(EUC_TW, 0),
314
  DEF_ENC2NAME(EUC_JIS_2004, 20932),
315
  DEF_ENC2NAME(UTF8, 65001),
316
  DEF_ENC2NAME(MULE_INTERNAL, 0),
317
  DEF_ENC2NAME(LATIN1, 28591),
318
  DEF_ENC2NAME(LATIN2, 28592),
319
  DEF_ENC2NAME(LATIN3, 28593),
320
  DEF_ENC2NAME(LATIN4, 28594),
321
  DEF_ENC2NAME(LATIN5, 28599),
322
  DEF_ENC2NAME(LATIN6, 0),
323
  DEF_ENC2NAME(LATIN7, 0),
324
  DEF_ENC2NAME(LATIN8, 0),
325
  DEF_ENC2NAME(LATIN9, 28605),
326
  DEF_ENC2NAME(LATIN10, 0),
327
  DEF_ENC2NAME(WIN1256, 1256),
328
  DEF_ENC2NAME(WIN1258, 1258),
329
  DEF_ENC2NAME(WIN866, 866),
330
  DEF_ENC2NAME(WIN874, 874),
331
  DEF_ENC2NAME(KOI8R, 20866),
332
  DEF_ENC2NAME(WIN1251, 1251),
333
  DEF_ENC2NAME(WIN1252, 1252),
334
  DEF_ENC2NAME(ISO_8859_5, 28595),
335
  DEF_ENC2NAME(ISO_8859_6, 28596),
336
  DEF_ENC2NAME(ISO_8859_7, 28597),
337
  DEF_ENC2NAME(ISO_8859_8, 28598),
338
  DEF_ENC2NAME(WIN1250, 1250),
339
  DEF_ENC2NAME(WIN1253, 1253),
340
  DEF_ENC2NAME(WIN1254, 1254),
341
  DEF_ENC2NAME(WIN1255, 1255),
342
  DEF_ENC2NAME(WIN1257, 1257),
343
  DEF_ENC2NAME(KOI8U, 21866),
344
  DEF_ENC2NAME(SJIS, 932),
345
  DEF_ENC2NAME(BIG5, 950),
346
  DEF_ENC2NAME(GBK, 936),
347
  DEF_ENC2NAME(UHC, 949),
348
  DEF_ENC2NAME(GB18030, 54936),
349
  DEF_ENC2NAME(JOHAB, 0),
350
  DEF_ENC2NAME(SHIFT_JIS_2004, 932)
351
};
352
353
/* ----------
354
 * These are encoding names for gettext.
355
 *
356
 * This covers all encodings except MULE_INTERNAL, which is alien to gettext.
357
 * ----------
358
 */
359
const pg_enc2gettext pg_enc2gettext_tbl[] =
360
{
361
  {PG_SQL_ASCII, "US-ASCII"},
362
  {PG_UTF8, "UTF-8"},
363
  {PG_LATIN1, "LATIN1"},
364
  {PG_LATIN2, "LATIN2"},
365
  {PG_LATIN3, "LATIN3"},
366
  {PG_LATIN4, "LATIN4"},
367
  {PG_ISO_8859_5, "ISO-8859-5"},
368
  {PG_ISO_8859_6, "ISO_8859-6"},
369
  {PG_ISO_8859_7, "ISO-8859-7"},
370
  {PG_ISO_8859_8, "ISO-8859-8"},
371
  {PG_LATIN5, "LATIN5"},
372
  {PG_LATIN6, "LATIN6"},
373
  {PG_LATIN7, "LATIN7"},
374
  {PG_LATIN8, "LATIN8"},
375
  {PG_LATIN9, "LATIN-9"},
376
  {PG_LATIN10, "LATIN10"},
377
  {PG_KOI8R, "KOI8-R"},
378
  {PG_KOI8U, "KOI8-U"},
379
  {PG_WIN1250, "CP1250"},
380
  {PG_WIN1251, "CP1251"},
381
  {PG_WIN1252, "CP1252"},
382
  {PG_WIN1253, "CP1253"},
383
  {PG_WIN1254, "CP1254"},
384
  {PG_WIN1255, "CP1255"},
385
  {PG_WIN1256, "CP1256"},
386
  {PG_WIN1257, "CP1257"},
387
  {PG_WIN1258, "CP1258"},
388
  {PG_WIN866, "CP866"},
389
  {PG_WIN874, "CP874"},
390
  {PG_EUC_CN, "EUC-CN"},
391
  {PG_EUC_JP, "EUC-JP"},
392
  {PG_EUC_KR, "EUC-KR"},
393
  {PG_EUC_TW, "EUC-TW"},
394
  {PG_EUC_JIS_2004, "EUC-JP"},
395
  {PG_SJIS, "SHIFT-JIS"},
396
  {PG_BIG5, "BIG5"},
397
  {PG_GBK, "GBK"},
398
  {PG_UHC, "UHC"},
399
  {PG_GB18030, "GB18030"},
400
  {PG_JOHAB, "JOHAB"},
401
  {PG_SHIFT_JIS_2004, "SHIFT_JISX0213"},
402
  {0, NULL}
403
};
404
405
406
#ifndef FRONTEND
407
408
/*
409
 * Table of encoding names for ICU
410
 *
411
 * Reference: <https://ssl.icu-project.org/icu-bin/convexp>
412
 *
413
 * NULL entries are not supported by ICU, or their mapping is unclear.
414
 */
415
static const char *const pg_enc2icu_tbl[] =
416
{
417
  NULL,           /* PG_SQL_ASCII */
418
  "EUC-JP",         /* PG_EUC_JP */
419
  "EUC-CN",         /* PG_EUC_CN */
420
  "EUC-KR",         /* PG_EUC_KR */
421
  "EUC-TW",         /* PG_EUC_TW */
422
  NULL,           /* PG_EUC_JIS_2004 */
423
  "UTF-8",          /* PG_UTF8 */
424
  NULL,           /* PG_MULE_INTERNAL */
425
  "ISO-8859-1",       /* PG_LATIN1 */
426
  "ISO-8859-2",       /* PG_LATIN2 */
427
  "ISO-8859-3",       /* PG_LATIN3 */
428
  "ISO-8859-4",       /* PG_LATIN4 */
429
  "ISO-8859-9",       /* PG_LATIN5 */
430
  "ISO-8859-10",        /* PG_LATIN6 */
431
  "ISO-8859-13",        /* PG_LATIN7 */
432
  "ISO-8859-14",        /* PG_LATIN8 */
433
  "ISO-8859-15",        /* PG_LATIN9 */
434
  NULL,           /* PG_LATIN10 */
435
  "CP1256",         /* PG_WIN1256 */
436
  "CP1258",         /* PG_WIN1258 */
437
  "CP866",          /* PG_WIN866 */
438
  NULL,           /* PG_WIN874 */
439
  "KOI8-R",         /* PG_KOI8R */
440
  "CP1251",         /* PG_WIN1251 */
441
  "CP1252",         /* PG_WIN1252 */
442
  "ISO-8859-5",       /* PG_ISO_8859_5 */
443
  "ISO-8859-6",       /* PG_ISO_8859_6 */
444
  "ISO-8859-7",       /* PG_ISO_8859_7 */
445
  "ISO-8859-8",       /* PG_ISO_8859_8 */
446
  "CP1250",         /* PG_WIN1250 */
447
  "CP1253",         /* PG_WIN1253 */
448
  "CP1254",         /* PG_WIN1254 */
449
  "CP1255",         /* PG_WIN1255 */
450
  "CP1257",         /* PG_WIN1257 */
451
  "KOI8-U",         /* PG_KOI8U */
452
};
453
454
bool
455
is_encoding_supported_by_icu(int encoding)
456
{
457
  return (pg_enc2icu_tbl[encoding] != NULL);
458
}
459
460
const char *
461
get_encoding_name_for_icu(int encoding)
462
{
463
  const char *icu_encoding_name;
464
465
  StaticAssertStmt(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1,
466
           "pg_enc2icu_tbl incomplete");
467
468
  icu_encoding_name = pg_enc2icu_tbl[encoding];
469
470
  if (!icu_encoding_name)
471
    ereport(ERROR,
472
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
473
         errmsg("encoding \"%s\" not supported by ICU",
474
            pg_encoding_to_char(encoding))));
475
476
  return icu_encoding_name;
477
}
478
479
#endif              /* not FRONTEND */
480
481
482
/* ----------
483
 * Encoding checks, for error returns -1 else encoding id
484
 * ----------
485
 */
486
int
487
pg_valid_client_encoding(const char *name)
488
6.46k
{
489
6.46k
  int     enc;
490
491
6.46k
  if ((enc = pg_char_to_encoding(name)) < 0)
492
0
    return -1;
493
494
6.46k
  if (!PG_VALID_FE_ENCODING(enc))
495
0
    return -1;
496
497
6.46k
  return enc;
498
6.46k
}
499
500
int
501
pg_valid_server_encoding(const char *name)
502
0
{
503
0
  int     enc;
504
505
0
  if ((enc = pg_char_to_encoding(name)) < 0)
506
0
    return -1;
507
508
0
  if (!PG_VALID_BE_ENCODING(enc))
509
0
    return -1;
510
511
0
  return enc;
512
0
}
513
514
int
515
pg_valid_server_encoding_id(int encoding)
516
8.02k
{
517
8.02k
  return PG_VALID_BE_ENCODING(encoding);
518
8.02k
}
519
520
/* ----------
521
 * Remove irrelevant chars from encoding name
522
 * ----------
523
 */
524
static char *
525
clean_encoding_name(const char *key, char *newkey)
526
6.81k
{
527
6.81k
  const char *p;
528
6.81k
  char     *np;
529
530
52.1k
  for (p = key, np = newkey; *p != '\0'; p++)
531
45.3k
  {
532
45.3k
    if (isalnum((unsigned char) *p))
533
41.7k
    {
534
41.7k
      if (*p >= 'A' && *p <= 'Z')
535
38.4k
        *np++ = *p + 'a' - 'A';
536
3.31k
      else
537
3.31k
        *np++ = *p;
538
41.7k
    }
539
45.3k
  }
540
6.81k
  *np = '\0';
541
6.81k
  return newkey;
542
6.81k
}
543
544
/* ----------
545
 * Search encoding by encoding name
546
 *
547
 * Returns encoding ID, or -1 for error
548
 * ----------
549
 */
550
int
551
pg_char_to_encoding(const char *name)
552
352
{
553
352
  unsigned int nel = lengthof(pg_encname_tbl);
554
352
  const pg_encname *base = pg_encname_tbl,
555
352
         *last = base + nel - 1,
556
352
         *position;
557
352
  int     result;
558
352
  char    buff[NAMEDATALEN],
559
352
         *key;
560
561
352
  if (name == NULL || *name == '\0')
562
0
    return -1;
563
564
352
  if (strlen(name) >= NAMEDATALEN)
565
0
  {
566
0
#ifdef FRONTEND
567
0
    fprintf(stderr, "encoding name too long\n");
568
0
    return -1;
569
#else
570
    ereport(ERROR,
571
        (errcode(ERRCODE_NAME_TOO_LONG),
572
         errmsg("encoding name too long")));
573
#endif
574
0
  }
575
352
  key = clean_encoding_name(name, buff);
576
577
2.11k
  while (last >= base)
578
2.11k
  {
579
2.11k
    position = base + ((last - base) >> 1);
580
2.11k
    result = key[0] - position->name[0];
581
582
2.11k
    if (result == 0)
583
704
    {
584
704
      result = strcmp(key, position->name);
585
704
      if (result == 0)
586
352
        return position->encoding;
587
1.76k
    }
588
1.76k
    if (result < 0)
589
704
      last = position - 1;
590
1.05k
    else
591
1.05k
      base = position + 1;
592
1.76k
  }
593
0
  return -1;
594
352
}
595
596
#ifndef FRONTEND
597
Datum
598
PG_char_to_encoding(PG_FUNCTION_ARGS)
599
{
600
  Name    s = PG_GETARG_NAME(0);
601
602
  PG_RETURN_INT32(pg_char_to_encoding(NameStr(*s)));
603
}
604
#endif
605
606
const char *
607
pg_encoding_to_char(int encoding)
608
947
{
609
947
  if (PG_VALID_ENCODING(encoding))
610
947
  {
611
947
    const pg_enc2name *p = &pg_enc2name_tbl[encoding];
612
613
947
    Assert(encoding == p->encoding);
614
947
    return p->name;
615
947
  }
616
0
  return "";
617
0
}
618
619
#ifndef FRONTEND
620
Datum
621
PG_encoding_to_char(PG_FUNCTION_ARGS)
622
{
623
  int32   encoding = PG_GETARG_INT32(0);
624
  const char *encoding_name = pg_encoding_to_char(encoding);
625
626
  return DirectFunctionCall1(namein, CStringGetDatum(encoding_name));
627
}
628
629
#endif