/Users/deen/code/yugabyte-db/build/debugcov-clang-dynamic-arm64-ninja/postgres_build/src/interfaces/libpq/unicode_norm.c

Source (jump to first uncovered line)
/*-------------------------------------------------------------------------
 * unicode_norm.c
 *    Normalize a Unicode string to NFKC form
 *
 * This implements Unicode normalization, per the documentation at
 * http://www.unicode.org/reports/tr15/.
 *
 * Portions Copyright (c) 2017-2018, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *    src/common/unicode_norm.c
 *
 *-------------------------------------------------------------------------
 */
#ifndef FRONTEND
#include "postgres.h"
#else
#include "postgres_fe.h"
#endif

#include "common/unicode_norm.h"
#include "common/unicode_norm_table.h"

#ifndef FRONTEND
#define ALLOC(size) palloc(size)
#define FREE(size) pfree(size)
#else
#define ALLOC(size) malloc(size)
#define FREE(size) free(size)
#endif

/* Constants for calculations with Hangul characters */
#define SBASE   0xAC00    /* U+AC00 */
#define LBASE   0x1100    /* U+1100 */
#define VBASE   0x1161    /* U+1161 */
#define TBASE   0x11A7    /* U+11A7 */
#define LCOUNT    19
#define VCOUNT    21
#define TCOUNT    28
#define NCOUNT    VCOUNT * TCOUNT
#define SCOUNT    LCOUNT * NCOUNT

/* comparison routine for bsearch() of decomposition lookup table. */
static int
conv_compare(const void *p1, const void *p2)
{
  uint32    v1,
        v2;

  v1 = *(const uint32 *) p1;
  v2 = ((const pg_unicode_decomposition *) p2)->codepoint;
  return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}

/*
 * Get the entry corresponding to code in the decomposition lookup table.
 */
static pg_unicode_decomposition *
get_code_entry(pg_wchar code)
{
  return bsearch(&(code),
           (void *) UnicodeDecompMain,
           lengthof(UnicodeDecompMain),
           sizeof(pg_unicode_decomposition),
           conv_compare);
}

/*
 * Given a decomposition entry looked up earlier, get the decomposed
 * characters.
 *
 * Note: the returned pointer can point to statically allocated buffer, and
 * is only valid until next call to this function!
 */
static const pg_wchar *
get_code_decomposition(pg_unicode_decomposition *entry, int *dec_size)
{
  static pg_wchar x;

  if (DECOMPOSITION_IS_INLINE(entry))
  {
    Assert(DECOMPOSITION_SIZE(entry) == 1);
    x = (pg_wchar) entry->dec_index;
    *dec_size = 1;
    return &x;
  }
  else
  {
    *dec_size = DECOMPOSITION_SIZE(entry);
    return &UnicodeDecomp_codepoints[entry->dec_index];
  }
}

/*
 * Calculate how many characters a given character will decompose to.
 *
 * This needs to recurse, if the character decomposes into characters that
 * are, in turn, decomposable.
 */
static int
get_decomposed_size(pg_wchar code)
{
  pg_unicode_decomposition *entry;
  int     size = 0;
  int     i;
  const uint32 *decomp;
  int     dec_size;

  /*
   * Fast path for Hangul characters not stored in tables to save memory as
   * decomposition is algorithmic. See
   * http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
   * the matter.
   */
  if (code >= SBASE && code < SBASE + SCOUNT)
  {
    uint32    tindex,
          sindex;

    sindex = code - SBASE;
    tindex = sindex % TCOUNT;

    if (tindex != 0)
      return 3;
    return 2;
  }

  entry = get_code_entry(code);

  /*
   * Just count current code if no other decompositions.  A NULL entry is
   * equivalent to a character with class 0 and no decompositions.
   */
  if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0)
    return 1;

  /*
   * If this entry has other decomposition codes look at them as well. First
   * get its decomposition in the list of tables available.
   */
  decomp = get_code_decomposition(entry, &dec_size);
  for (i = 0; i < dec_size; i++)
  {
    uint32    lcode = decomp[i];

    size += get_decomposed_size(lcode);
  }

  return size;
}

/*
 * Recompose a set of characters. For hangul characters, the calculation
 * is algorithmic. For others, an inverse lookup at the decomposition
 * table is necessary. Returns true if a recomposition can be done, and
 * false otherwise.
 */
static bool
recompose_code(uint32 start, uint32 code, uint32 *result)
{
  /*
   * Handle Hangul characters algorithmically, per the Unicode spec.
   *
   * Check if two current characters are L and V.
   */
  if (start >= LBASE && start < LBASE + LCOUNT &&
    code >= VBASE && code < VBASE + VCOUNT)
  {
    /* make syllable of form LV */
    uint32    lindex = start - LBASE;
    uint32    vindex = code - VBASE;

    *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
    return true;
  }
  /* Check if two current characters are LV and T */
  else if (start >= SBASE && start < (SBASE + SCOUNT) &&
       ((start - SBASE) % TCOUNT) == 0 &&
       code >= TBASE && code < (TBASE + TCOUNT))
  {
    /* make syllable of from LVT */
    uint32    tindex = code - TBASE;

    *result = start + tindex;
    return true;
  }
  else
  {
    int     i;

    /*
     * Do an inverse lookup of the decomposition tables to see if anything
     * matches. The comparison just needs to be a perfect match on the
     * sub-table of size two, because the start character has already been
     * recomposed partially.
     */
    for (i = 0; i < lengthof(UnicodeDecompMain); i++)
    {
      const pg_unicode_decomposition *entry = &UnicodeDecompMain[i];

      if (DECOMPOSITION_SIZE(entry) != 2)
        continue;

      if (DECOMPOSITION_NO_COMPOSE(entry))
        continue;

      if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
        code == UnicodeDecomp_codepoints[entry->dec_index + 1])
      {
        *result = entry->codepoint;
        return true;
      }
    }
  }

  return false;
}

/*
 * Decompose the given code into the array given by caller. The
 * decomposition begins at the position given by caller, saving one
 * lookup on the decomposition table. The current position needs to be
 * updated here to let the caller know from where to continue filling
 * in the array result.
 */
static void
decompose_code(pg_wchar code, pg_wchar **result, int *current)
{
  pg_unicode_decomposition *entry;
  int     i;
  const uint32 *decomp;
  int     dec_size;

  /*
   * Fast path for Hangul characters not stored in tables to save memory as
   * decomposition is algorithmic. See
   * http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
   * the matter.
   */
  if (code >= SBASE && code < SBASE + SCOUNT)
  {
    uint32    l,
          v,
          tindex,
          sindex;
    pg_wchar   *res = *result;

    sindex = code - SBASE;
    l = LBASE + sindex / (VCOUNT * TCOUNT);
    v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT;
    tindex = sindex % TCOUNT;

    res[*current] = l;
    (*current)++;
    res[*current] = v;
    (*current)++;

    if (tindex != 0)
    {
      res[*current] = TBASE + tindex;
      (*current)++;
    }

    return;
  }

  entry = get_code_entry(code);

  /*
   * Just fill in with the current decomposition if there are no
   * decomposition codes to recurse to.  A NULL entry is equivalent to a
   * character with class 0 and no decompositions, so just leave also in
   * this case.
   */
  if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0)
  {
    pg_wchar   *res = *result;

    res[*current] = code;
    (*current)++;
    return;
  }

  /*
   * If this entry has other decomposition codes look at them as well.
   */
  decomp = get_code_decomposition(entry, &dec_size);
  for (i = 0; i < dec_size; i++)
  {
    pg_wchar  lcode = (pg_wchar) decomp[i];

    /* Leave if no more decompositions */
    decompose_code(lcode, result, current);
  }
}

/*
 * unicode_normalize_kc - Normalize a Unicode string to NFKC form.
 *
 * The input is a 0-terminated array of codepoints.
 *
 * In frontend, returns a 0-terminated array of codepoints, allocated with
 * malloc. Or NULL if we run out of memory. In frontend, the returned
 * string is palloc'd instead, and OOM is reported with ereport().
 */
pg_wchar *
unicode_normalize_kc(const pg_wchar *input)
{
  pg_wchar   *decomp_chars;
  pg_wchar   *recomp_chars;
  int     decomp_size,
        current_size;
  int     count;
  const pg_wchar *p;

  /* variables for recomposition */
  int     last_class;
  int     starter_pos;
  int     target_pos;
  uint32    starter_ch;

  /* First, do character decomposition */

  /*
   * Calculate how many characters long the decomposed version will be.
   */
  decomp_size = 0;
  for (p = input; *p; p++)
    decomp_size += get_decomposed_size(*p);

  decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
  if (decomp_chars == NULL)
    return NULL;

  /*
   * Now fill in each entry recursively. This needs a second pass on the
   * decomposition table.
   */
  current_size = 0;
  for (p = input; *p; p++)
    decompose_code(*p, &decomp_chars, &current_size);
  decomp_chars[decomp_size] = '\0';
  Assert(decomp_size == current_size);

  /*
   * Now apply canonical ordering.
   */
  for (count = 1; count < decomp_size; count++)
  {
    pg_wchar  prev = decomp_chars[count - 1];
    pg_wchar  next = decomp_chars[count];
    pg_wchar  tmp;
    pg_unicode_decomposition *prevEntry = get_code_entry(prev);
    pg_unicode_decomposition *nextEntry = get_code_entry(next);

    /*
     * If no entries are found, the character used is either an Hangul
     * character or a character with a class of 0 and no decompositions,
     * so move to next result.
     */
    if (prevEntry == NULL || nextEntry == NULL)
      continue;

    /*
     * Per Unicode (http://unicode.org/reports/tr15/tr15-18.html) annex 4,
     * a sequence of two adjacent characters in a string is an
     * exchangeable pair if the combining class (from the Unicode
     * Character Database) for the first character is greater than the
     * combining class for the second, and the second is not a starter.  A
     * character is a starter if its combining class is 0.
     */
    if (nextEntry->comb_class == 0x0 || prevEntry->comb_class == 0x0)
      continue;

    if (prevEntry->comb_class <= nextEntry->comb_class)
      continue;

    /* exchange can happen */
    tmp = decomp_chars[count - 1];
    decomp_chars[count - 1] = decomp_chars[count];
    decomp_chars[count] = tmp;

    /* backtrack to check again */
    if (count > 1)
      count -= 2;
  }

  /*
   * The last phase of NFKC is the recomposition of the reordered Unicode
   * string using combining classes. The recomposed string cannot be longer
   * than the decomposed one, so make the allocation of the output string
   * based on that assumption.
   */
  recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
  if (!recomp_chars)
  {
    FREE(decomp_chars);
    return NULL;
  }

  last_class = -1;      /* this eliminates a special check */
  starter_pos = 0;
  target_pos = 1;
  starter_ch = recomp_chars[0] = decomp_chars[0];

  for (count = 1; count < decomp_size; count++)
  {
    pg_wchar  ch = decomp_chars[count];
    pg_unicode_decomposition *ch_entry = get_code_entry(ch);
    int     ch_class = (ch_entry == NULL) ? 0 : ch_entry->comb_class;
    pg_wchar  composite;

    if (last_class < ch_class &&
      recompose_code(starter_ch, ch, &composite))
    {
      recomp_chars[starter_pos] = composite;
      starter_ch = composite;
    }
    else if (ch_class == 0)
    {
      starter_pos = target_pos;
      starter_ch = ch;
      last_class = -1;
      recomp_chars[target_pos++] = ch;
    }
    else
    {
      last_class = ch_class;
      recomp_chars[target_pos++] = ch;
    }
  }
  recomp_chars[target_pos] = (pg_wchar) '\0';

  FREE(decomp_chars);

  return recomp_chars;
}

YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

Line	Count	Source (jump to first uncovered line)
1		/*-------------------------------------------------------------------------
2		* unicode_norm.c
3		* Normalize a Unicode string to NFKC form
4		*
5		* This implements Unicode normalization, per the documentation at
6		* http://www.unicode.org/reports/tr15/.
7		*
8		* Portions Copyright (c) 2017-2018, PostgreSQL Global Development Group
9		*
10		* IDENTIFICATION
11		* src/common/unicode_norm.c
12		*
13		*-------------------------------------------------------------------------
14		*/
15		#ifndef FRONTEND
16		#include "postgres.h"
17		#else
18		#include "postgres_fe.h"
19		#endif
20
21		#include "common/unicode_norm.h"
22		#include "common/unicode_norm_table.h"
23
24		#ifndef FRONTEND
25		#define ALLOC(size) palloc(size)
26		#define FREE(size) pfree(size)
27		#else
28	0	#define ALLOC(size) malloc(size)
29	0	#define FREE(size) free(size)
30		#endif
31
32		/* Constants for calculations with Hangul characters */
33	0	#define SBASE 0xAC00 /* U+AC00 */
34	0	#define LBASE 0x1100 /* U+1100 */
35	0	#define VBASE 0x1161 /* U+1161 */
36	0	#define TBASE 0x11A7 /* U+11A7 */
37	0	#define LCOUNT 19
38	0	#define VCOUNT 21
39	0	#define TCOUNT 28
40	0	#define NCOUNT VCOUNT * TCOUNT
41	0	#define SCOUNT LCOUNT * NCOUNT
42
43		/* comparison routine for bsearch() of decomposition lookup table. */
44		static int
45		conv_compare(const void p1, const void p2)
46	0	{
47	0	uint32 v1,
48	0	v2;
49
50	0	v1 = (const uint32 ) p1;
51	0	v2 = ((const pg_unicode_decomposition *) p2)->codepoint;
52	0	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
53	0	}
54
55		/*
56		* Get the entry corresponding to code in the decomposition lookup table.
57		*/
58		static pg_unicode_decomposition *
59		get_code_entry(pg_wchar code)
60	0	{
61	0	return bsearch(&(code),
62	0	(void *) UnicodeDecompMain,
63	0	lengthof(UnicodeDecompMain),
64	0	sizeof(pg_unicode_decomposition),
65	0	conv_compare);
66	0	}
67
68		/*
69		* Given a decomposition entry looked up earlier, get the decomposed
70		* characters.
71		*
72		* Note: the returned pointer can point to statically allocated buffer, and
73		* is only valid until next call to this function!
74		*/
75		static const pg_wchar *
76		get_code_decomposition(pg_unicode_decomposition entry, int dec_size)
77	0	{
78	0	static pg_wchar x;
79
80	0	if (DECOMPOSITION_IS_INLINE(entry))
81	0	{
82	0	Assert(DECOMPOSITION_SIZE(entry) == 1);
83	0	x = (pg_wchar) entry->dec_index;
84	0	*dec_size = 1;
85	0	return &x;
86	0	}
87	0	else
88	0	{
89	0	*dec_size = DECOMPOSITION_SIZE(entry);
90	0	return &UnicodeDecomp_codepoints[entry->dec_index];
91	0	}
92	0	}
93
94		/*
95		* Calculate how many characters a given character will decompose to.
96		*
97		* This needs to recurse, if the character decomposes into characters that
98		* are, in turn, decomposable.
99		*/
100		static int
101		get_decomposed_size(pg_wchar code)
102	0	{
103	0	pg_unicode_decomposition *entry;
104	0	int size = 0;
105	0	int i;
106	0	const uint32 *decomp;
107	0	int dec_size;
108
109		/*
110		* Fast path for Hangul characters not stored in tables to save memory as
111		* decomposition is algorithmic. See
112		* http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
113		* the matter.
114		*/
115	0	if (code >= SBASE && code < SBASE + SCOUNT)
116	0	{
117	0	uint32 tindex,
118	0	sindex;
119
120	0	sindex = code - SBASE;
121	0	tindex = sindex % TCOUNT;
122
123	0	if (tindex != 0)
124	0	return 3;
125	0	return 2;
126	0	}
127
128	0	entry = get_code_entry(code);
129
130		/*
131		* Just count current code if no other decompositions. A NULL entry is
132		* equivalent to a character with class 0 and no decompositions.
133		*/
134	0	if (entry == NULL \|\| DECOMPOSITION_SIZE(entry) == 0)
135	0	return 1;
136
137		/*
138		* If this entry has other decomposition codes look at them as well. First
139		* get its decomposition in the list of tables available.
140		*/
141	0	decomp = get_code_decomposition(entry, &dec_size);
142	0	for (i = 0; i < dec_size; i++)
143	0	{
144	0	uint32 lcode = decomp[i];
145
146	0	size += get_decomposed_size(lcode);
147	0	}
148
149	0	return size;
150	0	}
151
152		/*
153		* Recompose a set of characters. For hangul characters, the calculation
154		* is algorithmic. For others, an inverse lookup at the decomposition
155		* table is necessary. Returns true if a recomposition can be done, and
156		* false otherwise.
157		*/
158		static bool
159		recompose_code(uint32 start, uint32 code, uint32 *result)
160	0	{
161		/*
162		* Handle Hangul characters algorithmically, per the Unicode spec.
163		*
164		* Check if two current characters are L and V.
165		*/
166	0	if (start >= LBASE && start < LBASE + LCOUNT &&
167	0	code >= VBASE && code < VBASE + VCOUNT)
168	0	{
169		/* make syllable of form LV */
170	0	uint32 lindex = start - LBASE;
171	0	uint32 vindex = code - VBASE;
172
173	0	result = SBASE + (lindex VCOUNT + vindex) * TCOUNT;
174	0	return true;
175	0	}
176		/* Check if two current characters are LV and T */
177	0	else if (start >= SBASE && start < (SBASE + SCOUNT) &&
178	0	((start - SBASE) % TCOUNT) == 0 &&
179	0	code >= TBASE && code < (TBASE + TCOUNT))
180	0	{
181		/* make syllable of from LVT */
182	0	uint32 tindex = code - TBASE;
183
184	0	*result = start + tindex;
185	0	return true;
186	0	}
187	0	else
188	0	{
189	0	int i;
190
191		/*
192		* Do an inverse lookup of the decomposition tables to see if anything
193		* matches. The comparison just needs to be a perfect match on the
194		* sub-table of size two, because the start character has already been
195		* recomposed partially.
196		*/
197	0	for (i = 0; i < lengthof(UnicodeDecompMain); i++)
198	0	{
199	0	const pg_unicode_decomposition *entry = &UnicodeDecompMain[i];
200
201	0	if (DECOMPOSITION_SIZE(entry) != 2)
202	0	continue;
203
204	0	if (DECOMPOSITION_NO_COMPOSE(entry))
205	0	continue;
206
207	0	if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
208	0	code == UnicodeDecomp_codepoints[entry->dec_index + 1])
209	0	{
210	0	*result = entry->codepoint;
211	0	return true;
212	0	}
213	0	}
214	0	}
215
216	0	return false;
217	0	}
218
219		/*
220		* Decompose the given code into the array given by caller. The
221		* decomposition begins at the position given by caller, saving one
222		* lookup on the decomposition table. The current position needs to be
223		* updated here to let the caller know from where to continue filling
224		* in the array result.
225		*/
226		static void
227		decompose_code(pg_wchar code, pg_wchar *result, int current)
228	0	{
229	0	pg_unicode_decomposition *entry;
230	0	int i;
231	0	const uint32 *decomp;
232	0	int dec_size;
233
234		/*
235		* Fast path for Hangul characters not stored in tables to save memory as
236		* decomposition is algorithmic. See
237		* http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
238		* the matter.
239		*/
240	0	if (code >= SBASE && code < SBASE + SCOUNT)
241	0	{
242	0	uint32 l,
243	0	v,
244	0	tindex,
245	0	sindex;
246	0	pg_wchar res = result;
247
248	0	sindex = code - SBASE;
249	0	l = LBASE + sindex / (VCOUNT * TCOUNT);
250	0	v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT;
251	0	tindex = sindex % TCOUNT;
252
253	0	res[*current] = l;
254	0	(*current)++;
255	0	res[*current] = v;
256	0	(*current)++;
257
258	0	if (tindex != 0)
259	0	{
260	0	res[*current] = TBASE + tindex;
261	0	(*current)++;
262	0	}
263
264	0	return;
265	0	}
266
267	0	entry = get_code_entry(code);
268
269		/*
270		* Just fill in with the current decomposition if there are no
271		* decomposition codes to recurse to. A NULL entry is equivalent to a
272		* character with class 0 and no decompositions, so just leave also in
273		* this case.
274		*/
275	0	if (entry == NULL \|\| DECOMPOSITION_SIZE(entry) == 0)
276	0	{
277	0	pg_wchar res = result;
278
279	0	res[*current] = code;
280	0	(*current)++;
281	0	return;
282	0	}
283
284		/*
285		* If this entry has other decomposition codes look at them as well.
286		*/
287	0	decomp = get_code_decomposition(entry, &dec_size);
288	0	for (i = 0; i < dec_size; i++)
289	0	{
290	0	pg_wchar lcode = (pg_wchar) decomp[i];
291
292		/* Leave if no more decompositions */
293	0	decompose_code(lcode, result, current);
294	0	}
295	0	}
296
297		/*
298		* unicode_normalize_kc - Normalize a Unicode string to NFKC form.
299		*
300		* The input is a 0-terminated array of codepoints.
301		*
302		* In frontend, returns a 0-terminated array of codepoints, allocated with
303		* malloc. Or NULL if we run out of memory. In frontend, the returned
304		* string is palloc'd instead, and OOM is reported with ereport().
305		*/
306		pg_wchar *
307		unicode_normalize_kc(const pg_wchar *input)
308	0	{
309	0	pg_wchar *decomp_chars;
310	0	pg_wchar *recomp_chars;
311	0	int decomp_size,
312	0	current_size;
313	0	int count;
314	0	const pg_wchar *p;
315
316		/* variables for recomposition */
317	0	int last_class;
318	0	int starter_pos;
319	0	int target_pos;
320	0	uint32 starter_ch;
321
322		/* First, do character decomposition */
323
324		/*
325		* Calculate how many characters long the decomposed version will be.
326		*/
327	0	decomp_size = 0;
328	0	for (p = input; *p; p++)
329	0	decomp_size += get_decomposed_size(*p);
330
331	0	decomp_chars = (pg_wchar ) ALLOC((decomp_size + 1) sizeof(pg_wchar));
332	0	if (decomp_chars == NULL)
333	0	return NULL;
334
335		/*
336		* Now fill in each entry recursively. This needs a second pass on the
337		* decomposition table.
338		*/
339	0	current_size = 0;
340	0	for (p = input; *p; p++)
341	0	decompose_code(*p, &decomp_chars, &current_size);
342	0	decomp_chars[decomp_size] = '\0';
343	0	Assert(decomp_size == current_size);
344
345		/*
346		* Now apply canonical ordering.
347		*/
348	0	for (count = 1; count < decomp_size; count++)
349	0	{
350	0	pg_wchar prev = decomp_chars[count - 1];
351	0	pg_wchar next = decomp_chars[count];
352	0	pg_wchar tmp;
353	0	pg_unicode_decomposition *prevEntry = get_code_entry(prev);
354	0	pg_unicode_decomposition *nextEntry = get_code_entry(next);
355
356		/*
357		* If no entries are found, the character used is either an Hangul
358		* character or a character with a class of 0 and no decompositions,
359		* so move to next result.
360		*/
361	0	if (prevEntry == NULL \|\| nextEntry == NULL)
362	0	continue;
363
364		/*
365		* Per Unicode (http://unicode.org/reports/tr15/tr15-18.html) annex 4,
366		* a sequence of two adjacent characters in a string is an
367		* exchangeable pair if the combining class (from the Unicode
368		* Character Database) for the first character is greater than the
369		* combining class for the second, and the second is not a starter. A
370		* character is a starter if its combining class is 0.
371		*/
372	0	if (nextEntry->comb_class == 0x0 \|\| prevEntry->comb_class == 0x0)
373	0	continue;
374
375	0	if (prevEntry->comb_class <= nextEntry->comb_class)
376	0	continue;
377
378		/* exchange can happen */
379	0	tmp = decomp_chars[count - 1];
380	0	decomp_chars[count - 1] = decomp_chars[count];
381	0	decomp_chars[count] = tmp;
382
383		/* backtrack to check again */
384	0	if (count > 1)
385	0	count -= 2;
386	0	}
387
388		/*
389		* The last phase of NFKC is the recomposition of the reordered Unicode
390		* string using combining classes. The recomposed string cannot be longer
391		* than the decomposed one, so make the allocation of the output string
392		* based on that assumption.
393		*/
394	0	recomp_chars = (pg_wchar ) ALLOC((decomp_size + 1) sizeof(pg_wchar));
395	0	if (!recomp_chars)
396	0	{
397	0	FREE(decomp_chars);
398	0	return NULL;
399	0	}
400
401	0	last_class = -1; /* this eliminates a special check */
402	0	starter_pos = 0;
403	0	target_pos = 1;
404	0	starter_ch = recomp_chars[0] = decomp_chars[0];
405
406	0	for (count = 1; count < decomp_size; count++)
407	0	{
408	0	pg_wchar ch = decomp_chars[count];
409	0	pg_unicode_decomposition *ch_entry = get_code_entry(ch);
410	0	int ch_class = (ch_entry == NULL) ? 0 : ch_entry->comb_class;
411	0	pg_wchar composite;
412
413	0	if (last_class < ch_class &&
414	0	recompose_code(starter_ch, ch, &composite))
415	0	{
416	0	recomp_chars[starter_pos] = composite;
417	0	starter_ch = composite;
418	0	}
419	0	else if (ch_class == 0)
420	0	{
421	0	starter_pos = target_pos;
422	0	starter_ch = ch;
423	0	last_class = -1;
424	0	recomp_chars[target_pos++] = ch;
425	0	}
426	0	else
427	0	{
428	0	last_class = ch_class;
429	0	recomp_chars[target_pos++] = ch;
430	0	}
431	0	}
432	0	recomp_chars[target_pos] = (pg_wchar) '\0';
433
434	0	FREE(decomp_chars);
435
436	0	return recomp_chars;
437	0	}