YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/postgres/src/backend/utils/mb/conv.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 *    Utility functions for conversion procs.
4
 *
5
 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
6
 * Portions Copyright (c) 1994, Regents of the University of California
7
 *
8
 * IDENTIFICATION
9
 *    src/backend/utils/mb/conv.c
10
 *
11
 *-------------------------------------------------------------------------
12
 */
13
#include "postgres.h"
14
#include "mb/pg_wchar.h"
15
16
17
/*
18
 * local2local: a generic single byte charset encoding
19
 * conversion between two ASCII-superset encodings.
20
 *
21
 * l points to the source string of length len
22
 * p is the output area (must be large enough!)
23
 * src_encoding is the PG identifier for the source encoding
24
 * dest_encoding is the PG identifier for the target encoding
25
 * tab holds conversion entries for the source charset
26
 * starting from 128 (0x80). each entry in the table holds the corresponding
27
 * code point for the target charset, or 0 if there is no equivalent code.
28
 */
29
void
30
local2local(const unsigned char *l,
31
      unsigned char *p,
32
      int len,
33
      int src_encoding,
34
      int dest_encoding,
35
      const unsigned char *tab)
36
0
{
37
0
  unsigned char c1,
38
0
        c2;
39
40
0
  while (len > 0)
41
0
  {
42
0
    c1 = *l;
43
0
    if (c1 == 0)
44
0
      report_invalid_encoding(src_encoding, (const char *) l, len);
45
0
    if (!IS_HIGHBIT_SET(c1))
46
0
      *p++ = c1;
47
0
    else
48
0
    {
49
0
      c2 = tab[c1 - HIGHBIT];
50
0
      if (c2)
51
0
        *p++ = c2;
52
0
      else
53
0
        report_untranslatable_char(src_encoding, dest_encoding,
54
0
                       (const char *) l, len);
55
0
    }
56
0
    l++;
57
0
    len--;
58
0
  }
59
0
  *p = '\0';
60
0
}
61
62
/*
63
 * LATINn ---> MIC when the charset's local codes map directly to MIC
64
 *
65
 * l points to the source string of length len
66
 * p is the output area (must be large enough!)
67
 * lc is the mule character set id for the local encoding
68
 * encoding is the PG identifier for the local encoding
69
 */
70
void
71
latin2mic(const unsigned char *l, unsigned char *p, int len,
72
      int lc, int encoding)
73
0
{
74
0
  int     c1;
75
76
0
  while (len > 0)
77
0
  {
78
0
    c1 = *l;
79
0
    if (c1 == 0)
80
0
      report_invalid_encoding(encoding, (const char *) l, len);
81
0
    if (IS_HIGHBIT_SET(c1))
82
0
      *p++ = lc;
83
0
    *p++ = c1;
84
0
    l++;
85
0
    len--;
86
0
  }
87
0
  *p = '\0';
88
0
}
89
90
/*
91
 * MIC ---> LATINn when the charset's local codes map directly to MIC
92
 *
93
 * mic points to the source string of length len
94
 * p is the output area (must be large enough!)
95
 * lc is the mule character set id for the local encoding
96
 * encoding is the PG identifier for the local encoding
97
 */
98
void
99
mic2latin(const unsigned char *mic, unsigned char *p, int len,
100
      int lc, int encoding)
101
0
{
102
0
  int     c1;
103
104
0
  while (len > 0)
105
0
  {
106
0
    c1 = *mic;
107
0
    if (c1 == 0)
108
0
      report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
109
0
    if (!IS_HIGHBIT_SET(c1))
110
0
    {
111
      /* easy for ASCII */
112
0
      *p++ = c1;
113
0
      mic++;
114
0
      len--;
115
0
    }
116
0
    else
117
0
    {
118
0
      int     l = pg_mic_mblen(mic);
119
120
0
      if (len < l)
121
0
        report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
122
0
                    len);
123
0
      if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
124
0
        report_untranslatable_char(PG_MULE_INTERNAL, encoding,
125
0
                       (const char *) mic, len);
126
0
      *p++ = mic[1];
127
0
      mic += 2;
128
0
      len -= 2;
129
0
    }
130
0
  }
131
0
  *p = '\0';
132
0
}
133
134
135
/*
136
 * ASCII ---> MIC
137
 *
138
 * While ordinarily SQL_ASCII encoding is forgiving of high-bit-set
139
 * characters, here we must take a hard line because we don't know
140
 * the appropriate MIC equivalent.
141
 */
142
void
143
pg_ascii2mic(const unsigned char *l, unsigned char *p, int len)
144
0
{
145
0
  int     c1;
146
147
0
  while (len > 0)
148
0
  {
149
0
    c1 = *l;
150
0
    if (c1 == 0 || IS_HIGHBIT_SET(c1))
151
0
      report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
152
0
    *p++ = c1;
153
0
    l++;
154
0
    len--;
155
0
  }
156
0
  *p = '\0';
157
0
}
158
159
/*
160
 * MIC ---> ASCII
161
 */
162
void
163
pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len)
164
0
{
165
0
  int     c1;
166
167
0
  while (len > 0)
168
0
  {
169
0
    c1 = *mic;
170
0
    if (c1 == 0 || IS_HIGHBIT_SET(c1))
171
0
      report_untranslatable_char(PG_MULE_INTERNAL, PG_SQL_ASCII,
172
0
                     (const char *) mic, len);
173
0
    *p++ = c1;
174
0
    mic++;
175
0
    len--;
176
0
  }
177
0
  *p = '\0';
178
0
}
179
180
/*
181
 * latin2mic_with_table: a generic single byte charset encoding
182
 * conversion from a local charset to the mule internal code.
183
 *
184
 * l points to the source string of length len
185
 * p is the output area (must be large enough!)
186
 * lc is the mule character set id for the local encoding
187
 * encoding is the PG identifier for the local encoding
188
 * tab holds conversion entries for the local charset
189
 * starting from 128 (0x80). each entry in the table holds the corresponding
190
 * code point for the mule encoding, or 0 if there is no equivalent code.
191
 */
192
void
193
latin2mic_with_table(const unsigned char *l,
194
           unsigned char *p,
195
           int len,
196
           int lc,
197
           int encoding,
198
           const unsigned char *tab)
199
0
{
200
0
  unsigned char c1,
201
0
        c2;
202
203
0
  while (len > 0)
204
0
  {
205
0
    c1 = *l;
206
0
    if (c1 == 0)
207
0
      report_invalid_encoding(encoding, (const char *) l, len);
208
0
    if (!IS_HIGHBIT_SET(c1))
209
0
      *p++ = c1;
210
0
    else
211
0
    {
212
0
      c2 = tab[c1 - HIGHBIT];
213
0
      if (c2)
214
0
      {
215
0
        *p++ = lc;
216
0
        *p++ = c2;
217
0
      }
218
0
      else
219
0
        report_untranslatable_char(encoding, PG_MULE_INTERNAL,
220
0
                       (const char *) l, len);
221
0
    }
222
0
    l++;
223
0
    len--;
224
0
  }
225
0
  *p = '\0';
226
0
}
227
228
/*
229
 * mic2latin_with_table: a generic single byte charset encoding
230
 * conversion from the mule internal code to a local charset.
231
 *
232
 * mic points to the source string of length len
233
 * p is the output area (must be large enough!)
234
 * lc is the mule character set id for the local encoding
235
 * encoding is the PG identifier for the local encoding
236
 * tab holds conversion entries for the mule internal code's second byte,
237
 * starting from 128 (0x80). each entry in the table holds the corresponding
238
 * code point for the local charset, or 0 if there is no equivalent code.
239
 */
240
void
241
mic2latin_with_table(const unsigned char *mic,
242
           unsigned char *p,
243
           int len,
244
           int lc,
245
           int encoding,
246
           const unsigned char *tab)
247
0
{
248
0
  unsigned char c1,
249
0
        c2;
250
251
0
  while (len > 0)
252
0
  {
253
0
    c1 = *mic;
254
0
    if (c1 == 0)
255
0
      report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
256
0
    if (!IS_HIGHBIT_SET(c1))
257
0
    {
258
      /* easy for ASCII */
259
0
      *p++ = c1;
260
0
      mic++;
261
0
      len--;
262
0
    }
263
0
    else
264
0
    {
265
0
      int     l = pg_mic_mblen(mic);
266
267
0
      if (len < l)
268
0
        report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
269
0
                    len);
270
0
      if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
271
0
        (c2 = tab[mic[1] - HIGHBIT]) == 0)
272
0
      {
273
0
        report_untranslatable_char(PG_MULE_INTERNAL, encoding,
274
0
                       (const char *) mic, len);
275
0
        break;     /* keep compiler quiet */
276
0
      }
277
0
      *p++ = c2;
278
0
      mic += 2;
279
0
      len -= 2;
280
0
    }
281
0
  }
282
0
  *p = '\0';
283
0
}
284
285
/*
286
 * comparison routine for bsearch()
287
 * this routine is intended for combined UTF8 -> local code
288
 */
289
static int
290
compare3(const void *p1, const void *p2)
291
0
{
292
0
  uint32    s1,
293
0
        s2,
294
0
        d1,
295
0
        d2;
296
297
0
  s1 = *(const uint32 *) p1;
298
0
  s2 = *((const uint32 *) p1 + 1);
299
0
  d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
300
0
  d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
301
0
  return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
302
0
}
303
304
/*
305
 * comparison routine for bsearch()
306
 * this routine is intended for local code -> combined UTF8
307
 */
308
static int
309
compare4(const void *p1, const void *p2)
310
0
{
311
0
  uint32    v1,
312
0
        v2;
313
314
0
  v1 = *(const uint32 *) p1;
315
0
  v2 = ((const pg_local_to_utf_combined *) p2)->code;
316
0
  return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
317
0
}
318
319
/*
320
 * store 32bit character representation into multibyte stream
321
 */
322
static inline unsigned char *
323
store_coded_char(unsigned char *dest, uint32 code)
324
0
{
325
0
  if (code & 0xff000000)
326
0
    *dest++ = code >> 24;
327
0
  if (code & 0x00ff0000)
328
0
    *dest++ = code >> 16;
329
0
  if (code & 0x0000ff00)
330
0
    *dest++ = code >> 8;
331
0
  if (code & 0x000000ff)
332
0
    *dest++ = code;
333
0
  return dest;
334
0
}
335
336
/*
337
 * Convert a character using a conversion radix tree.
338
 *
339
 * 'l' is the length of the input character in bytes, and b1-b4 are
340
 * the input character's bytes.
341
 */
342
static inline uint32
343
pg_mb_radix_conv(const pg_mb_radix_tree *rt,
344
         int l,
345
         unsigned char b1,
346
         unsigned char b2,
347
         unsigned char b3,
348
         unsigned char b4)
349
0
{
350
0
  if (l == 4)
351
0
  {
352
    /* 4-byte code */
353
354
    /* check code validity */
355
0
    if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
356
0
      b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
357
0
      b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
358
0
      b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
359
0
      return 0;
360
361
    /* perform lookup */
362
0
    if (rt->chars32)
363
0
    {
364
0
      uint32    idx = rt->b4root;
365
366
0
      idx = rt->chars32[b1 + idx - rt->b4_1_lower];
367
0
      idx = rt->chars32[b2 + idx - rt->b4_2_lower];
368
0
      idx = rt->chars32[b3 + idx - rt->b4_3_lower];
369
0
      return rt->chars32[b4 + idx - rt->b4_4_lower];
370
0
    }
371
0
    else
372
0
    {
373
0
      uint16    idx = rt->b4root;
374
375
0
      idx = rt->chars16[b1 + idx - rt->b4_1_lower];
376
0
      idx = rt->chars16[b2 + idx - rt->b4_2_lower];
377
0
      idx = rt->chars16[b3 + idx - rt->b4_3_lower];
378
0
      return rt->chars16[b4 + idx - rt->b4_4_lower];
379
0
    }
380
0
  }
381
0
  else if (l == 3)
382
0
  {
383
    /* 3-byte code */
384
385
    /* check code validity */
386
0
    if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
387
0
      b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
388
0
      b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
389
0
      return 0;
390
391
    /* perform lookup */
392
0
    if (rt->chars32)
393
0
    {
394
0
      uint32    idx = rt->b3root;
395
396
0
      idx = rt->chars32[b2 + idx - rt->b3_1_lower];
397
0
      idx = rt->chars32[b3 + idx - rt->b3_2_lower];
398
0
      return rt->chars32[b4 + idx - rt->b3_3_lower];
399
0
    }
400
0
    else
401
0
    {
402
0
      uint16    idx = rt->b3root;
403
404
0
      idx = rt->chars16[b2 + idx - rt->b3_1_lower];
405
0
      idx = rt->chars16[b3 + idx - rt->b3_2_lower];
406
0
      return rt->chars16[b4 + idx - rt->b3_3_lower];
407
0
    }
408
0
  }
409
0
  else if (l == 2)
410
0
  {
411
    /* 2-byte code */
412
413
    /* check code validity - first byte */
414
0
    if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
415
0
      b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
416
0
      return 0;
417
418
    /* perform lookup */
419
0
    if (rt->chars32)
420
0
    {
421
0
      uint32    idx = rt->b2root;
422
423
0
      idx = rt->chars32[b3 + idx - rt->b2_1_lower];
424
0
      return rt->chars32[b4 + idx - rt->b2_2_lower];
425
0
    }
426
0
    else
427
0
    {
428
0
      uint16    idx = rt->b2root;
429
430
0
      idx = rt->chars16[b3 + idx - rt->b2_1_lower];
431
0
      return rt->chars16[b4 + idx - rt->b2_2_lower];
432
0
    }
433
0
  }
434
0
  else if (l == 1)
435
0
  {
436
    /* 1-byte code */
437
438
    /* check code validity - first byte */
439
0
    if (b4 < rt->b1_lower || b4 > rt->b1_upper)
440
0
      return 0;
441
442
    /* perform lookup */
443
0
    if (rt->chars32)
444
0
      return rt->chars32[b4 + rt->b1root - rt->b1_lower];
445
0
    else
446
0
      return rt->chars16[b4 + rt->b1root - rt->b1_lower];
447
0
  }
448
0
  return 0;         /* shouldn't happen */
449
0
}
450
451
/*
452
 * UTF8 ---> local code
453
 *
454
 * utf: input string in UTF8 encoding (need not be null-terminated)
455
 * len: length of input string (in bytes)
456
 * iso: pointer to the output area (must be large enough!)
457
      (output string will be null-terminated)
458
 * map: conversion map for single characters
459
 * cmap: conversion map for combined characters
460
 *      (optional, pass NULL if none)
461
 * cmapsize: number of entries in the conversion map for combined characters
462
 *      (optional, pass 0 if none)
463
 * conv_func: algorithmic encoding conversion function
464
 *      (optional, pass NULL if none)
465
 * encoding: PG identifier for the local encoding
466
 *
467
 * For each character, the cmap (if provided) is consulted first; if no match,
468
 * the map is consulted next; if still no match, the conv_func (if provided)
469
 * is applied.  An error is raised if no match is found.
470
 *
471
 * See pg_wchar.h for more details about the data structures used here.
472
 */
473
void
474
UtfToLocal(const unsigned char *utf, int len,
475
       unsigned char *iso,
476
       const pg_mb_radix_tree *map,
477
       const pg_utf_to_local_combined *cmap, int cmapsize,
478
       utf_local_conversion_func conv_func,
479
       int encoding)
480
0
{
481
0
  uint32    iutf;
482
0
  int     l;
483
0
  const pg_utf_to_local_combined *cp;
484
485
0
  if (!PG_VALID_ENCODING(encoding))
486
0
    ereport(ERROR,
487
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
488
0
         errmsg("invalid encoding number: %d", encoding)));
489
490
0
  for (; len > 0; len -= l)
491
0
  {
492
0
    unsigned char b1 = 0;
493
0
    unsigned char b2 = 0;
494
0
    unsigned char b3 = 0;
495
0
    unsigned char b4 = 0;
496
497
    /* "break" cases all represent errors */
498
0
    if (*utf == '\0')
499
0
      break;
500
501
0
    l = pg_utf_mblen(utf);
502
0
    if (len < l)
503
0
      break;
504
505
0
    if (!pg_utf8_islegal(utf, l))
506
0
      break;
507
508
0
    if (l == 1)
509
0
    {
510
      /* ASCII case is easy, assume it's one-to-one conversion */
511
0
      *iso++ = *utf++;
512
0
      continue;
513
0
    }
514
515
    /* collect coded char of length l */
516
0
    if (l == 2)
517
0
    {
518
0
      b3 = *utf++;
519
0
      b4 = *utf++;
520
0
    }
521
0
    else if (l == 3)
522
0
    {
523
0
      b2 = *utf++;
524
0
      b3 = *utf++;
525
0
      b4 = *utf++;
526
0
    }
527
0
    else if (l == 4)
528
0
    {
529
0
      b1 = *utf++;
530
0
      b2 = *utf++;
531
0
      b3 = *utf++;
532
0
      b4 = *utf++;
533
0
    }
534
0
    else
535
0
    {
536
0
      elog(ERROR, "unsupported character length %d", l);
537
0
      iutf = 0;     /* keep compiler quiet */
538
0
    }
539
0
    iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
540
541
    /* First, try with combined map if possible */
542
0
    if (cmap && len > l)
543
0
    {
544
0
      const unsigned char *utf_save = utf;
545
0
      int     len_save = len;
546
0
      int     l_save = l;
547
548
      /* collect next character, same as above */
549
0
      len -= l;
550
551
0
      l = pg_utf_mblen(utf);
552
0
      if (len < l)
553
0
        break;
554
555
0
      if (!pg_utf8_islegal(utf, l))
556
0
        break;
557
558
      /* We assume ASCII character cannot be in combined map */
559
0
      if (l > 1)
560
0
      {
561
0
        uint32    iutf2;
562
0
        uint32    cutf[2];
563
564
0
        if (l == 2)
565
0
        {
566
0
          iutf2 = *utf++ << 8;
567
0
          iutf2 |= *utf++;
568
0
        }
569
0
        else if (l == 3)
570
0
        {
571
0
          iutf2 = *utf++ << 16;
572
0
          iutf2 |= *utf++ << 8;
573
0
          iutf2 |= *utf++;
574
0
        }
575
0
        else if (l == 4)
576
0
        {
577
0
          iutf2 = *utf++ << 24;
578
0
          iutf2 |= *utf++ << 16;
579
0
          iutf2 |= *utf++ << 8;
580
0
          iutf2 |= *utf++;
581
0
        }
582
0
        else
583
0
        {
584
0
          elog(ERROR, "unsupported character length %d", l);
585
0
          iutf2 = 0;  /* keep compiler quiet */
586
0
        }
587
588
0
        cutf[0] = iutf;
589
0
        cutf[1] = iutf2;
590
591
0
        cp = bsearch(cutf, cmap, cmapsize,
592
0
               sizeof(pg_utf_to_local_combined), compare3);
593
594
0
        if (cp)
595
0
        {
596
0
          iso = store_coded_char(iso, cp->code);
597
0
          continue;
598
0
        }
599
0
      }
600
601
      /* fail, so back up to reprocess second character next time */
602
0
      utf = utf_save;
603
0
      len = len_save;
604
0
      l = l_save;
605
0
    }
606
607
    /* Now check ordinary map */
608
0
    if (map)
609
0
    {
610
0
      uint32    converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
611
612
0
      if (converted)
613
0
      {
614
0
        iso = store_coded_char(iso, converted);
615
0
        continue;
616
0
      }
617
0
    }
618
619
    /* if there's a conversion function, try that */
620
0
    if (conv_func)
621
0
    {
622
0
      uint32    converted = (*conv_func) (iutf);
623
624
0
      if (converted)
625
0
      {
626
0
        iso = store_coded_char(iso, converted);
627
0
        continue;
628
0
      }
629
0
    }
630
631
    /* failed to translate this character */
632
0
    report_untranslatable_char(PG_UTF8, encoding,
633
0
                   (const char *) (utf - l), len);
634
0
  }
635
636
  /* if we broke out of loop early, must be invalid input */
637
0
  if (len > 0)
638
0
    report_invalid_encoding(PG_UTF8, (const char *) utf, len);
639
640
0
  *iso = '\0';
641
0
}
642
643
/*
644
 * local code ---> UTF8
645
 *
646
 * iso: input string in local encoding (need not be null-terminated)
647
 * len: length of input string (in bytes)
648
 * utf: pointer to the output area (must be large enough!)
649
      (output string will be null-terminated)
650
 * map: conversion map for single characters
651
 * cmap: conversion map for combined characters
652
 *      (optional, pass NULL if none)
653
 * cmapsize: number of entries in the conversion map for combined characters
654
 *      (optional, pass 0 if none)
655
 * conv_func: algorithmic encoding conversion function
656
 *      (optional, pass NULL if none)
657
 * encoding: PG identifier for the local encoding
658
 *
659
 * For each character, the map is consulted first; if no match, the cmap
660
 * (if provided) is consulted next; if still no match, the conv_func
661
 * (if provided) is applied.  An error is raised if no match is found.
662
 *
663
 * See pg_wchar.h for more details about the data structures used here.
664
 */
665
void
666
LocalToUtf(const unsigned char *iso, int len,
667
       unsigned char *utf,
668
       const pg_mb_radix_tree *map,
669
       const pg_local_to_utf_combined *cmap, int cmapsize,
670
       utf_local_conversion_func conv_func,
671
       int encoding)
672
0
{
673
0
  uint32    iiso;
674
0
  int     l;
675
0
  const pg_local_to_utf_combined *cp;
676
677
0
  if (!PG_VALID_ENCODING(encoding))
678
0
    ereport(ERROR,
679
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
680
0
         errmsg("invalid encoding number: %d", encoding)));
681
682
0
  for (; len > 0; len -= l)
683
0
  {
684
0
    unsigned char b1 = 0;
685
0
    unsigned char b2 = 0;
686
0
    unsigned char b3 = 0;
687
0
    unsigned char b4 = 0;
688
689
    /* "break" cases all represent errors */
690
0
    if (*iso == '\0')
691
0
      break;
692
693
0
    if (!IS_HIGHBIT_SET(*iso))
694
0
    {
695
      /* ASCII case is easy, assume it's one-to-one conversion */
696
0
      *utf++ = *iso++;
697
0
      l = 1;
698
0
      continue;
699
0
    }
700
701
0
    l = pg_encoding_verifymb(encoding, (const char *) iso, len);
702
0
    if (l < 0)
703
0
      break;
704
705
    /* collect coded char of length l */
706
0
    if (l == 1)
707
0
      b4 = *iso++;
708
0
    else if (l == 2)
709
0
    {
710
0
      b3 = *iso++;
711
0
      b4 = *iso++;
712
0
    }
713
0
    else if (l == 3)
714
0
    {
715
0
      b2 = *iso++;
716
0
      b3 = *iso++;
717
0
      b4 = *iso++;
718
0
    }
719
0
    else if (l == 4)
720
0
    {
721
0
      b1 = *iso++;
722
0
      b2 = *iso++;
723
0
      b3 = *iso++;
724
0
      b4 = *iso++;
725
0
    }
726
0
    else
727
0
    {
728
0
      elog(ERROR, "unsupported character length %d", l);
729
0
      iiso = 0;     /* keep compiler quiet */
730
0
    }
731
0
    iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
732
733
0
    if (map)
734
0
    {
735
0
      uint32    converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
736
737
0
      if (converted)
738
0
      {
739
0
        utf = store_coded_char(utf, converted);
740
0
        continue;
741
0
      }
742
743
      /* If there's a combined character map, try that */
744
0
      if (cmap)
745
0
      {
746
0
        cp = bsearch(&iiso, cmap, cmapsize,
747
0
               sizeof(pg_local_to_utf_combined), compare4);
748
749
0
        if (cp)
750
0
        {
751
0
          utf = store_coded_char(utf, cp->utf1);
752
0
          utf = store_coded_char(utf, cp->utf2);
753
0
          continue;
754
0
        }
755
0
      }
756
0
    }
757
758
    /* if there's a conversion function, try that */
759
0
    if (conv_func)
760
0
    {
761
0
      uint32    converted = (*conv_func) (iiso);
762
763
0
      if (converted)
764
0
      {
765
0
        utf = store_coded_char(utf, converted);
766
0
        continue;
767
0
      }
768
0
    }
769
770
    /* failed to translate this character */
771
0
    report_untranslatable_char(encoding, PG_UTF8,
772
0
                   (const char *) (iso - l), len);
773
0
  }
774
775
  /* if we broke out of loop early, must be invalid input */
776
0
  if (len > 0)
777
0
    report_invalid_encoding(encoding, (const char *) iso, len);
778
779
0
  *utf = '\0';
780
0
}