11/* -----------------------------------------------------------------------
22 * formatting.c
33 *
4- * $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.142 2008/06/17 16:09:06 momjian Exp $
4+ * $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.143 2008/06/23 19:27:19 momjian Exp $
55 *
66 *
77 * Portions Copyright (c) 1999-2008, PostgreSQL Global Development Group
@@ -925,9 +925,6 @@ static char *get_th(char *num, int type);
925925static char * str_numth (char * dest ,char * num ,int type );
926926static int strspace_len (char * str );
927927static int strdigits_len (char * str );
928- static char * str_toupper (char * buff );
929- static char * str_tolower (char * buff );
930- static char * str_initcap (char * buff );
931928
932929static int seq_search (char * name ,char * * array ,int type ,int max ,int * len );
933930static void do_to_timestamp (text * date_txt ,text * fmt ,
@@ -1424,12 +1421,24 @@ str_numth(char *dest, char *num, int type)
14241421return dest ;
14251422}
14261423
1424+ /*
1425+ * If the system provides the needed functions for wide-character manipulation
1426+ * (which are all standardized by C99), then we implement upper/lower/initcap
1427+ * using wide-character functions, if necessary. Otherwise we use the
1428+ * traditional <ctype.h> functions, which of course will not work as desired
1429+ * in multibyte character sets. Note that in either case we are effectively
1430+ * assuming that the database character encoding matches the encoding implied
1431+ * by LC_CTYPE.
1432+ */
1433+
14271434/* ----------
1428- * Convert string to upper case. It is designed to be multibyte-aware.
1435+ * wide-character-aware lower function
1436+ * We pass the number of bytes so we can pass varlena and char*
1437+ * to this function.
14291438 * ----------
14301439 */
1431- static char *
1432- str_toupper (char * buff )
1440+ char *
1441+ str_tolower (char * buff , size_t nbytes )
14331442{
14341443char * result ;
14351444
@@ -1438,27 +1447,46 @@ str_toupper(char *buff)
14381447
14391448#ifdef USE_WIDE_UPPER_LOWER
14401449if (pg_database_encoding_max_length ()> 1 && !lc_ctype_is_c ())
1441- result = wstring_upper (buff );
1450+ {
1451+ wchar_t * workspace ;
1452+ int curr_char = 0 ;
1453+
1454+ /* Output workspace cannot have more codes than input bytes */
1455+ workspace = (wchar_t * )palloc ((nbytes + 1 )* sizeof (wchar_t ));
1456+
1457+ char2wchar (workspace ,nbytes + 1 ,buff ,nbytes + 1 );
1458+
1459+ for (curr_char = 0 ;workspace [curr_char ]!= 0 ;curr_char ++ )
1460+ workspace [curr_char ]= towlower (workspace [curr_char ]);
1461+
1462+ /* Make result large enough; case change might change number of bytes */
1463+ result = palloc (curr_char * MB_CUR_MAX + 1 );
1464+
1465+ wchar2char (result ,workspace ,curr_char * MB_CUR_MAX + 1 );
1466+ pfree (workspace );
1467+ }
14421468else
14431469#endif /* USE_WIDE_UPPER_LOWER */
14441470{
14451471char * p ;
14461472
1447- result = pstrdup (buff );
1473+ result = pnstrdup (buff , nbytes );
14481474
14491475for (p = result ;* p ;p ++ )
1450- * p = pg_toupper ((unsignedchar )* p );
1476+ * p = pg_tolower ((unsignedchar )* p );
14511477}
14521478
14531479return result ;
14541480}
14551481
14561482/* ----------
1457- * Convert string to lower case. It is designed to be multibyte-aware.
1483+ * wide-character-aware upper function
1484+ * We pass the number of bytes so we can pass varlena and char*
1485+ * to this function.
14581486 * ----------
14591487 */
1460- static char *
1461- str_tolower (char * buff )
1488+ char *
1489+ str_toupper (char * buff , size_t nbytes )
14621490{
14631491char * result ;
14641492
@@ -1467,27 +1495,46 @@ str_tolower(char *buff)
14671495
14681496#ifdef USE_WIDE_UPPER_LOWER
14691497if (pg_database_encoding_max_length ()> 1 && !lc_ctype_is_c ())
1470- result = wstring_lower (buff );
1498+ {
1499+ wchar_t * workspace ;
1500+ int curr_char = 0 ;
1501+
1502+ /* Output workspace cannot have more codes than input bytes */
1503+ workspace = (wchar_t * )palloc ((nbytes + 1 )* sizeof (wchar_t ));
1504+
1505+ char2wchar (workspace ,nbytes + 1 ,buff ,nbytes + 1 );
1506+
1507+ for (curr_char = 0 ;workspace [curr_char ]!= 0 ;curr_char ++ )
1508+ workspace [curr_char ]= towupper (workspace [curr_char ]);
1509+
1510+ /* Make result large enough; case change might change number of bytes */
1511+ result = palloc (curr_char * MB_CUR_MAX + 1 );
1512+
1513+ wchar2char (result ,workspace ,curr_char * MB_CUR_MAX + 1 );
1514+ pfree (workspace );
1515+ }
14711516else
14721517#endif /* USE_WIDE_UPPER_LOWER */
14731518{
14741519char * p ;
14751520
1476- result = pstrdup (buff );
1521+ result = pnstrdup (buff , nbytes );
14771522
14781523for (p = result ;* p ;p ++ )
1479- * p = pg_tolower ((unsignedchar )* p );
1524+ * p = pg_toupper ((unsignedchar )* p );
14801525}
14811526
14821527return result ;
14831528}
1484-
1529+
14851530/* ----------
14861531 * wide-character-aware initcap function
1532+ * We pass the number of bytes so we can pass varlena and char*
1533+ * to this function.
14871534 * ----------
14881535 */
1489- static char *
1490- str_initcap (char * buff )
1536+ char *
1537+ str_initcap (char * buff , size_t nbytes )
14911538{
14921539char * result ;
14931540bool wasalnum = false;
@@ -1499,35 +1546,34 @@ str_initcap(char *buff)
14991546if (pg_database_encoding_max_length ()> 1 && !lc_ctype_is_c ())
15001547{
15011548wchar_t * workspace ;
1502- text * in_text ;
1503- text * out_text ;
1504- int i ;
1549+ int curr_char = 0 ;
1550+
1551+ /* Output workspace cannot have more codes than input bytes */
1552+ workspace = (wchar_t * )palloc ((nbytes + 1 )* sizeof (wchar_t ));
15051553
1506- in_text = cstring_to_text (buff );
1507- workspace = texttowcs (in_text );
1554+ char2wchar (workspace ,nbytes + 1 ,buff ,nbytes + 1 );
15081555
1509- for (i = 0 ;workspace [i ]!= 0 ;i ++ )
1556+ for (curr_char = 0 ;workspace [curr_char ]!= 0 ;curr_char ++ )
15101557{
15111558if (wasalnum )
1512- workspace [i ]= towlower (workspace [i ]);
1559+ workspace [curr_char ]= towlower (workspace [curr_char ]);
15131560else
1514- workspace [i ]= towupper (workspace [i ]);
1515- wasalnum = iswalnum (workspace [i ]);
1561+ workspace [curr_char ]= towupper (workspace [curr_char ]);
1562+ wasalnum = iswalnum (workspace [curr_char ]);
15161563}
15171564
1518- out_text = wcstotext ( workspace , i );
1519- result = text_to_cstring ( out_text );
1565+ /* Make result large enough; case change might change number of bytes */
1566+ result = palloc ( curr_char * MB_CUR_MAX + 1 );
15201567
1568+ wchar2char (result ,workspace ,curr_char * MB_CUR_MAX + 1 );
15211569pfree (workspace );
1522- pfree (in_text );
1523- pfree (out_text );
15241570}
15251571else
15261572#endif /* USE_WIDE_UPPER_LOWER */
15271573{
15281574char * p ;
15291575
1530- result = pstrdup (buff );
1576+ result = pnstrdup (buff , nbytes );
15311577
15321578for (p = result ;* p ;p ++ )
15331579{
@@ -1851,7 +1897,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
18511897{
18521898char * p = pstrdup (tmtcTzn (in ));
18531899
1854- strcpy (s ,str_tolower (p ));
1900+ strcpy (s ,str_tolower (p , strlen ( p ) ));
18551901pfree (p );
18561902s += strlen (s );
18571903}
@@ -1893,11 +1939,13 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
18931939if (!tm -> tm_mon )
18941940break ;
18951941if (S_TM (n -> suffix ))
1896- strcpy (s ,str_toupper (localized_full_months [tm -> tm_mon - 1 ]));
1942+ strcpy (s ,str_toupper (localized_full_months [tm -> tm_mon - 1 ],
1943+ strlen (localized_full_months [tm -> tm_mon - 1 ])));
18971944else
18981945{
18991946strcpy (workbuff ,months_full [tm -> tm_mon - 1 ]);
1900- sprintf (s ,"%*s" ,S_FM (n -> suffix ) ?0 :-9 ,str_toupper (workbuff ));
1947+ sprintf (s ,"%*s" ,S_FM (n -> suffix ) ?0 :-9 ,
1948+ str_toupper (workbuff ,strlen (workbuff )));
19011949}
19021950s += strlen (s );
19031951break ;
@@ -1906,7 +1954,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
19061954if (!tm -> tm_mon )
19071955break ;
19081956if (S_TM (n -> suffix ))
1909- strcpy (s ,str_initcap (localized_full_months [tm -> tm_mon - 1 ]));
1957+ strcpy (s ,str_initcap (localized_full_months [tm -> tm_mon - 1 ],
1958+ strlen (localized_full_months [tm -> tm_mon - 1 ])));
19101959else
19111960sprintf (s ,"%*s" ,S_FM (n -> suffix ) ?0 :-9 ,months_full [tm -> tm_mon - 1 ]);
19121961s += strlen (s );
@@ -1916,7 +1965,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
19161965if (!tm -> tm_mon )
19171966break ;
19181967if (S_TM (n -> suffix ))
1919- strcpy (s ,str_tolower (localized_full_months [tm -> tm_mon - 1 ]));
1968+ strcpy (s ,str_tolower (localized_full_months [tm -> tm_mon - 1 ],
1969+ strlen (localized_full_months [tm -> tm_mon - 1 ])));
19201970else
19211971{
19221972sprintf (s ,"%*s" ,S_FM (n -> suffix ) ?0 :-9 ,months_full [tm -> tm_mon - 1 ]);
@@ -1929,17 +1979,20 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
19291979if (!tm -> tm_mon )
19301980break ;
19311981if (S_TM (n -> suffix ))
1932- strcpy (s ,str_toupper (localized_abbrev_months [tm -> tm_mon - 1 ]));
1982+ strcpy (s ,str_toupper (localized_abbrev_months [tm -> tm_mon - 1 ],
1983+ strlen (localized_abbrev_months [tm -> tm_mon - 1 ])));
19331984else
1934- strcpy (s ,str_toupper (months [tm -> tm_mon - 1 ]));
1985+ strcpy (s ,str_toupper (months [tm -> tm_mon - 1 ],
1986+ strlen (months [tm -> tm_mon - 1 ])));
19351987s += strlen (s );
19361988break ;
19371989case DCH_Mon :
19381990INVALID_FOR_INTERVAL ;
19391991if (!tm -> tm_mon )
19401992break ;
19411993if (S_TM (n -> suffix ))
1942- strcpy (s ,str_initcap (localized_abbrev_months [tm -> tm_mon - 1 ]));
1994+ strcpy (s ,str_initcap (localized_abbrev_months [tm -> tm_mon - 1 ],
1995+ strlen (localized_abbrev_months [tm -> tm_mon - 1 ])));
19431996else
19441997strcpy (s ,months [tm -> tm_mon - 1 ]);
19451998s += strlen (s );
@@ -1949,7 +2002,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
19492002if (!tm -> tm_mon )
19502003break ;
19512004if (S_TM (n -> suffix ))
1952- strcpy (s ,str_tolower (localized_abbrev_months [tm -> tm_mon - 1 ]));
2005+ strcpy (s ,str_tolower (localized_abbrev_months [tm -> tm_mon - 1 ],
2006+ strlen (localized_abbrev_months [tm -> tm_mon - 1 ])));
19532007else
19542008{
19552009strcpy (s ,months [tm -> tm_mon - 1 ]);
@@ -1966,26 +2020,30 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
19662020case DCH_DAY :
19672021INVALID_FOR_INTERVAL ;
19682022if (S_TM (n -> suffix ))
1969- strcpy (s ,str_toupper (localized_full_days [tm -> tm_wday ]));
2023+ strcpy (s ,str_toupper (localized_full_days [tm -> tm_wday ],
2024+ strlen (localized_full_days [tm -> tm_wday ])));
19702025else
19712026{
19722027strcpy (workbuff ,days [tm -> tm_wday ]);
1973- sprintf (s ,"%*s" ,S_FM (n -> suffix ) ?0 :-9 ,str_toupper (workbuff ));
2028+ sprintf (s ,"%*s" ,S_FM (n -> suffix ) ?0 :-9 ,
2029+ str_toupper (workbuff ,strlen (workbuff )));
19742030}
19752031s += strlen (s );
19762032break ;
19772033case DCH_Day :
19782034INVALID_FOR_INTERVAL ;
19792035if (S_TM (n -> suffix ))
1980- strcpy (s ,str_initcap (localized_full_days [tm -> tm_wday ]));
2036+ strcpy (s ,str_initcap (localized_full_days [tm -> tm_wday ],
2037+ strlen (localized_full_days [tm -> tm_wday ])));
19812038else
19822039sprintf (s ,"%*s" ,S_FM (n -> suffix ) ?0 :-9 ,days [tm -> tm_wday ]);
19832040s += strlen (s );
19842041break ;
19852042case DCH_day :
19862043INVALID_FOR_INTERVAL ;
19872044if (S_TM (n -> suffix ))
1988- strcpy (s ,str_tolower (localized_full_days [tm -> tm_wday ]));
2045+ strcpy (s ,str_tolower (localized_full_days [tm -> tm_wday ],
2046+ strlen (localized_full_days [tm -> tm_wday ])));
19892047else
19902048{
19912049sprintf (s ,"%*s" ,S_FM (n -> suffix ) ?0 :-9 ,days [tm -> tm_wday ]);
@@ -1996,23 +2054,27 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
19962054case DCH_DY :
19972055INVALID_FOR_INTERVAL ;
19982056if (S_TM (n -> suffix ))
1999- strcpy (s ,str_toupper (localized_abbrev_days [tm -> tm_wday ]));
2057+ strcpy (s ,str_toupper (localized_abbrev_days [tm -> tm_wday ],
2058+ strlen (localized_abbrev_days [tm -> tm_wday ])));
20002059else
2001- strcpy (s ,str_toupper (days_short [tm -> tm_wday ]));
2060+ strcpy (s ,str_toupper (days_short [tm -> tm_wday ],
2061+ strlen (days_short [tm -> tm_wday ])));
20022062s += strlen (s );
20032063break ;
20042064case DCH_Dy :
20052065INVALID_FOR_INTERVAL ;
20062066if (S_TM (n -> suffix ))
2007- strcpy (s ,str_initcap (localized_abbrev_days [tm -> tm_wday ]));
2067+ strcpy (s ,str_initcap (localized_abbrev_days [tm -> tm_wday ],
2068+ strlen (localized_abbrev_days [tm -> tm_wday ])));
20082069else
20092070strcpy (s ,days_short [tm -> tm_wday ]);
20102071s += strlen (s );
20112072break ;
20122073case DCH_dy :
20132074INVALID_FOR_INTERVAL ;
20142075if (S_TM (n -> suffix ))
2015- strcpy (s ,str_tolower (localized_abbrev_days [tm -> tm_wday ]));
2076+ strcpy (s ,str_tolower (localized_abbrev_days [tm -> tm_wday ],
2077+ strlen (localized_abbrev_days [tm -> tm_wday ])));
20162078else
20172079{
20182080strcpy (s ,days_short [tm -> tm_wday ]);
@@ -4277,12 +4339,14 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
42774339case NUM_rn :
42784340if (IS_FILLMODE (Np -> Num ))
42794341{
4280- strcpy (Np -> inout_p ,str_tolower (Np -> number_p ));
4342+ strcpy (Np -> inout_p ,str_tolower (Np -> number_p ,
4343+ strlen (Np -> number_p )));
42814344Np -> inout_p += strlen (Np -> inout_p )- 1 ;
42824345}
42834346else
42844347{
4285- sprintf (Np -> inout_p ,"%15s" ,str_tolower (Np -> number_p ));
4348+ sprintf (Np -> inout_p ,"%15s" ,str_tolower (Np -> number_p ,
4349+ strlen (Np -> number_p )));
42864350Np -> inout_p += strlen (Np -> inout_p )- 1 ;
42874351}
42884352break ;