1111 * Portions Copyright (c) 1994, Regents of the University of California
1212 *
1313 * IDENTIFICATION
14- *$PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.64 2006/03/05 15:58:42 momjian Exp $
14+ *$PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.65 2006/09/04 18:32:55 tgl Exp $
1515 *
1616 *-------------------------------------------------------------------------
1717 */
@@ -64,50 +64,23 @@ wchareq(char *p1, char *p2)
6464return 1 ;
6565}
6666
67- /*--------------------
68- * Support routine for MatchTextIC. Compares given multibyte streams
69- * as wide characters ignoring case.
70- * If they match, returns 1 otherwise returns 0.
71- *--------------------
67+ /*
68+ * Formerly we had a routine iwchareq() here that tried to do case-insensitive
69+ * comparison of multibyte characters. It did not work at all, however,
70+ * because it relied on tolower() which has a single-byte API ... and
71+ * towlower() wouldn't be much better since we have no suitably cheap way
72+ * of getting a single character transformed to the system's wchar_t format.
73+ * So now, we just downcase the strings using lower() and apply regular LIKE
74+ * comparison. This should be revisited when we install better locale support.
75+ *
76+ * Note that MBMatchText and MBMatchTextIC do exactly the same thing now.
77+ * Is it worth refactoring to avoid duplicated code? They might become
78+ * different again in the future.
7279 */
73- #define CHARMAX 0x80
74-
75- static int
76- iwchareq (char * p1 ,char * p2 )
77- {
78- pg_wchar c1 [2 ],
79- c2 [2 ];
80- int l ;
81-
82- /*
83- * short cut. if *p1 and *p2 is lower than CHARMAX, then we could assume
84- * they are ASCII
85- */
86- if ((unsignedchar )* p1 < CHARMAX && (unsignedchar )* p2 < CHARMAX )
87- return (tolower ((unsignedchar )* p1 )== tolower ((unsignedchar )* p2 ));
88-
89- /*
90- * if one of them is an ASCII while the other is not, then they must be
91- * different characters
92- */
93- else if ((unsignedchar )* p1 < CHARMAX || (unsignedchar )* p2 < CHARMAX )
94- return 0 ;
95-
96- /*
97- * ok, p1 and p2 are both > CHARMAX, then they must be multibyte
98- * characters
99- */
100- l = pg_mblen (p1 );
101- (void )pg_mb2wchar_with_len (p1 ,c1 ,l );
102- c1 [0 ]= tolower (c1 [0 ]);
103- l = pg_mblen (p2 );
104- (void )pg_mb2wchar_with_len (p2 ,c2 ,l );
105- c2 [0 ]= tolower (c2 [0 ]);
106- return (c1 [0 ]== c2 [0 ]);
107- }
10880
81+ /* Set up to compile like_match.c for multibyte characters */
10982#define CHAREQ (p1 ,p2 ) wchareq(p1, p2)
110- #define ICHAREQ (p1 ,p2 )iwchareq (p1, p2)
83+ #define ICHAREQ (p1 ,p2 )wchareq (p1, p2)
11184#define NextChar (p ,plen ) \
11285do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
11386#define CopyAdvChar (dst ,src ,srclen ) \
@@ -120,7 +93,9 @@ iwchareq(char *p1, char *p2)
12093#define MatchText MBMatchText
12194#define MatchTextIC MBMatchTextIC
12295#define do_like_escape MB_do_like_escape
96+
12397#include "like_match.c"
98+
12499#undef CHAREQ
125100#undef ICHAREQ
126101#undef NextChar
@@ -129,15 +104,19 @@ iwchareq(char *p1, char *p2)
129104#undef MatchTextIC
130105#undef do_like_escape
131106
107+ /* Set up to compile like_match.c for single-byte characters */
132108#define CHAREQ (p1 ,p2 ) (*(p1) == *(p2))
133109#define ICHAREQ (p1 ,p2 ) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2)))
134110#define NextChar (p ,plen ) ((p)++, (plen)--)
135111#define CopyAdvChar (dst ,src ,srclen ) (*(dst)++ = *(src)++, (srclen)--)
136112
113+ #include "like_match.c"
114+
115+ /* And some support for BYTEA */
137116#define BYTEA_CHAREQ (p1 ,p2 ) (*(p1) == *(p2))
138117#define BYTEA_NextChar (p ,plen ) ((p)++, (plen)--)
139118#define BYTEA_CopyAdvChar (dst ,src ,srclen ) (*(dst)++ = *(src)++, (srclen)--)
140- #include "like_match.c"
119+
141120
142121/*
143122 *interface routines called by the function manager
@@ -296,15 +275,32 @@ nameiclike(PG_FUNCTION_ARGS)
296275int slen ,
297276plen ;
298277
299- s = NameStr (* str );
300- slen = strlen (s );
301- p = VARDATA (pat );
302- plen = (VARSIZE (pat )- VARHDRSZ );
303-
304278if (pg_database_encoding_max_length ()== 1 )
279+ {
280+ s = NameStr (* str );
281+ slen = strlen (s );
282+ p = VARDATA (pat );
283+ plen = (VARSIZE (pat )- VARHDRSZ );
305284result = (MatchTextIC (s ,slen ,p ,plen )== LIKE_TRUE );
285+ }
306286else
287+ {
288+ /* Force inputs to lower case to achieve case insensitivity */
289+ text * strtext ;
290+
291+ strtext = DatumGetTextP (DirectFunctionCall1 (name_text ,
292+ NameGetDatum (str )));
293+ strtext = DatumGetTextP (DirectFunctionCall1 (lower ,
294+ PointerGetDatum (strtext )));
295+ pat = DatumGetTextP (DirectFunctionCall1 (lower ,
296+ PointerGetDatum (pat )));
297+
298+ s = VARDATA (strtext );
299+ slen = (VARSIZE (strtext )- VARHDRSZ );
300+ p = VARDATA (pat );
301+ plen = (VARSIZE (pat )- VARHDRSZ );
307302result = (MBMatchTextIC (s ,slen ,p ,plen )== LIKE_TRUE );
303+ }
308304
309305PG_RETURN_BOOL (result );
310306}
@@ -320,15 +316,32 @@ nameicnlike(PG_FUNCTION_ARGS)
320316int slen ,
321317plen ;
322318
323- s = NameStr (* str );
324- slen = strlen (s );
325- p = VARDATA (pat );
326- plen = (VARSIZE (pat )- VARHDRSZ );
327-
328319if (pg_database_encoding_max_length ()== 1 )
320+ {
321+ s = NameStr (* str );
322+ slen = strlen (s );
323+ p = VARDATA (pat );
324+ plen = (VARSIZE (pat )- VARHDRSZ );
329325result = (MatchTextIC (s ,slen ,p ,plen )!= LIKE_TRUE );
326+ }
330327else
328+ {
329+ /* Force inputs to lower case to achieve case insensitivity */
330+ text * strtext ;
331+
332+ strtext = DatumGetTextP (DirectFunctionCall1 (name_text ,
333+ NameGetDatum (str )));
334+ strtext = DatumGetTextP (DirectFunctionCall1 (lower ,
335+ PointerGetDatum (strtext )));
336+ pat = DatumGetTextP (DirectFunctionCall1 (lower ,
337+ PointerGetDatum (pat )));
338+
339+ s = VARDATA (strtext );
340+ slen = (VARSIZE (strtext )- VARHDRSZ );
341+ p = VARDATA (pat );
342+ plen = (VARSIZE (pat )- VARHDRSZ );
331343result = (MBMatchTextIC (s ,slen ,p ,plen )!= LIKE_TRUE );
344+ }
332345
333346PG_RETURN_BOOL (result );
334347}
@@ -344,15 +357,27 @@ texticlike(PG_FUNCTION_ARGS)
344357int slen ,
345358plen ;
346359
347- s = VARDATA (str );
348- slen = (VARSIZE (str )- VARHDRSZ );
349- p = VARDATA (pat );
350- plen = (VARSIZE (pat )- VARHDRSZ );
351-
352360if (pg_database_encoding_max_length ()== 1 )
361+ {
362+ s = VARDATA (str );
363+ slen = (VARSIZE (str )- VARHDRSZ );
364+ p = VARDATA (pat );
365+ plen = (VARSIZE (pat )- VARHDRSZ );
353366result = (MatchTextIC (s ,slen ,p ,plen )== LIKE_TRUE );
367+ }
354368else
369+ {
370+ /* Force inputs to lower case to achieve case insensitivity */
371+ str = DatumGetTextP (DirectFunctionCall1 (lower ,
372+ PointerGetDatum (str )));
373+ pat = DatumGetTextP (DirectFunctionCall1 (lower ,
374+ PointerGetDatum (pat )));
375+ s = VARDATA (str );
376+ slen = (VARSIZE (str )- VARHDRSZ );
377+ p = VARDATA (pat );
378+ plen = (VARSIZE (pat )- VARHDRSZ );
355379result = (MBMatchTextIC (s ,slen ,p ,plen )== LIKE_TRUE );
380+ }
356381
357382PG_RETURN_BOOL (result );
358383}
@@ -368,15 +393,27 @@ texticnlike(PG_FUNCTION_ARGS)
368393int slen ,
369394plen ;
370395
371- s = VARDATA (str );
372- slen = (VARSIZE (str )- VARHDRSZ );
373- p = VARDATA (pat );
374- plen = (VARSIZE (pat )- VARHDRSZ );
375-
376396if (pg_database_encoding_max_length ()== 1 )
397+ {
398+ s = VARDATA (str );
399+ slen = (VARSIZE (str )- VARHDRSZ );
400+ p = VARDATA (pat );
401+ plen = (VARSIZE (pat )- VARHDRSZ );
377402result = (MatchTextIC (s ,slen ,p ,plen )!= LIKE_TRUE );
403+ }
378404else
405+ {
406+ /* Force inputs to lower case to achieve case insensitivity */
407+ str = DatumGetTextP (DirectFunctionCall1 (lower ,
408+ PointerGetDatum (str )));
409+ pat = DatumGetTextP (DirectFunctionCall1 (lower ,
410+ PointerGetDatum (pat )));
411+ s = VARDATA (str );
412+ slen = (VARSIZE (str )- VARHDRSZ );
413+ p = VARDATA (pat );
414+ plen = (VARSIZE (pat )- VARHDRSZ );
379415result = (MBMatchTextIC (s ,slen ,p ,plen )!= LIKE_TRUE );
416+ }
380417
381418PG_RETURN_BOOL (result );
382419}