33 * client encoding and server internal encoding.
44 * (currently mule internal code (mic) is used)
55 * Tatsuo Ishii
6- * $Id: mbutils.c,v 1.18 2001/07 /1511 :07:36 ishii Exp $
6+ * $Id: mbutils.c,v 1.19 2001/08 /1507 :07:40 ishii Exp $
77 */
88#include "postgres.h"
99
@@ -34,67 +34,84 @@ pg_get_enc_ent(int encoding)
3434}
3535
3636/*
37- * set the client encoding. if encoding conversion between
38- * client/server encoding is not supported, returns -1
37+ * Find appropriate encoding conversion functions. If no such
38+ * functions found, returns -1.
39+ *
40+ * Arguments:
41+ *
42+ * src, dest (in): source and destination encoding ids
43+ *
44+ * src_to_mic (out): pointer to a function which converts src to
45+ * mic/unicode according to dest. if src == mic/unicode or no
46+ * appropriate function found, set to 0.
47+ *
48+ * dest_from_mic (out): pointer to a function which converts
49+ * mic/unicode to dest according to src. if dest == mic/unicode or no
50+ * appropriate function found, set to 0.
3951 */
4052int
41- pg_set_client_encoding (int encoding )
53+ pg_find_encoding_converters (int src , int dest , void ( * * src_to_mic )(), void ( * * dest_from_mic )() )
4254{
43- int current_server_encoding = GetDatabaseEncoding ();
44-
45- client_encoding = encoding ;
46-
47- if (client_encoding == current_server_encoding )
48- {/* server == client? */
49- client_to_mic = client_from_mic = 0 ;
50- server_to_mic = server_from_mic = 0 ;
55+ if (src == dest )
56+ {/* src == dest? */
57+ * src_to_mic = * dest_from_mic = 0 ;
5158}
52- else if (current_server_encoding == MULE_INTERNAL )
53- {/* server == MULE_INETRNAL? */
54- client_to_mic = pg_get_enc_ent (encoding )-> to_mic ;
55- client_from_mic = pg_get_enc_ent (encoding )-> from_mic ;
56- server_to_mic = server_from_mic = 0 ;
57- if (client_to_mic == 0 || client_from_mic == 0 )
59+ else if (src == MULE_INTERNAL )
60+ {/* src == MULE_INETRNAL? */
61+ * dest_from_mic = pg_get_enc_ent (dest )-> from_mic ;
62+ if (* dest_from_mic == 0 )
5863return (-1 );
64+ * src_to_mic = 0 ;
5965}
60- else if (encoding == MULE_INTERNAL )
61- {/* client == MULE_INETRNAL? */
62- client_to_mic = client_from_mic = 0 ;
63- server_to_mic = pg_get_enc_ent (current_server_encoding )-> to_mic ;
64- server_from_mic = pg_get_enc_ent (current_server_encoding )-> from_mic ;
65- if (server_to_mic == 0 || server_from_mic == 0 )
66+ else if (dest == MULE_INTERNAL )
67+ {/* dest == MULE_INETRNAL? */
68+ * src_to_mic = pg_get_enc_ent (src )-> to_mic ;
69+ if (* src_to_mic == 0 )
6670return (-1 );
71+ * dest_from_mic = 0 ;
6772}
68- else if (current_server_encoding == UNICODE )
69- {/* server == UNICODE? */
70- client_to_mic = pg_get_enc_ent (encoding )-> to_unicode ;
71- client_from_mic = pg_get_enc_ent (encoding )-> from_unicode ;
72- server_to_mic = server_from_mic = 0 ;
73- if (client_to_mic == 0 || client_from_mic == 0 )
73+ else if (src == UNICODE )
74+ {/* src == UNICODE? */
75+ * dest_from_mic = pg_get_enc_ent (dest )-> from_unicode ;
76+ if (* dest_from_mic == 0 )
7477return (-1 );
78+ * src_to_mic = 0 ;
7579}
76- else if (encoding == UNICODE )
77- {/* client == UNICODE? */
78- client_to_mic = client_from_mic = 0 ;
79- server_to_mic = pg_get_enc_ent (current_server_encoding )-> to_unicode ;
80- server_from_mic = pg_get_enc_ent (current_server_encoding )-> from_unicode ;
81- if (server_to_mic == 0 || server_from_mic == 0 )
80+ else if (dest == UNICODE )
81+ {/* dest == UNICODE? */
82+ * src_to_mic = pg_get_enc_ent (src )-> to_unicode ;
83+ if (* src_to_mic == 0 )
8284return (-1 );
85+ * dest_from_mic = 0 ;
8386}
8487else
8588{
86- client_to_mic = pg_get_enc_ent (encoding )-> to_mic ;
87- client_from_mic = pg_get_enc_ent (encoding )-> from_mic ;
88- server_to_mic = pg_get_enc_ent (current_server_encoding )-> to_mic ;
89- server_from_mic = pg_get_enc_ent (current_server_encoding )-> from_mic ;
90- if (client_to_mic == 0 || client_from_mic == 0 )
91- return (-1 );
92- if (server_to_mic == 0 || server_from_mic == 0 )
89+ * src_to_mic = pg_get_enc_ent (src )-> to_mic ;
90+ * dest_from_mic = pg_get_enc_ent (dest )-> from_mic ;
91+ if (* src_to_mic == 0 || * dest_from_mic == 0 )
9392return (-1 );
9493}
9594return (0 );
9695}
9796
97+ /*
98+ * set the client encoding. if encoding conversion between
99+ * client/server encoding is not supported, returns -1
100+ */
101+ int
102+ pg_set_client_encoding (int encoding )
103+ {
104+ int current_server_encoding = GetDatabaseEncoding ();
105+
106+ if (pg_find_encoding_converters (encoding ,current_server_encoding ,& client_to_mic ,& server_from_mic )< 0 )
107+ return (-1 );
108+ client_encoding = encoding ;
109+
110+ if (pg_find_encoding_converters (current_server_encoding ,encoding ,& server_to_mic ,& client_from_mic )< 0 )
111+ return (-1 );
112+ return 0 ;
113+ }
114+
98115/*
99116 * returns the current client encoding
100117 */
@@ -110,7 +127,21 @@ pg_get_client_encoding()
110127}
111128
112129/*
113- * convert client encoding to server encoding.
130+ * Convert src encoding and returns it. Actual conversion is done by
131+ * src_to_mic and dest_from_mic, which can be obtained by
132+ * pg_find_encoding_converters(). The reason we require two conversion
133+ * functions is that we have an intermediate encoding: MULE_INTERNAL
134+ * Using intermediate encodings will reduce the number of functions
135+ * doing encoding conversions. Special case is either src or dest is
136+ * the intermediate encoding itself. In this case, you don't need src
137+ * or dest (setting 0 will indicate there's no conversion
138+ * function). Another case is you have direct-conversion function from
139+ * src to dest. In this case either src_to_mic or dest_from_mic could
140+ * be set to 0 also.
141+ *
142+ * Note that If src or dest is UNICODE, we have to do
143+ * direct-conversion, since we don't support conversion bwteen UNICODE
144+ * and MULE_INTERNAL, we cannot go through MULE_INTERNAL.
114145 *
115146 * CASE 1: if no conversion is required, then the given pointer s is returned.
116147 *
@@ -120,34 +151,138 @@ pg_get_client_encoding()
120151 * to determine whether to pfree the result or not!
121152 *
122153 * Note: we assume that conversion cannot cause more than a 4-to-1 growth
123- * in the length of the string --- is this enough?
124- */
154+ * in the length of the string --- is this enough? */
155+
125156unsignedchar *
126- pg_client_to_server (unsignedchar * s ,int len )
157+ pg_do_encoding_conversion (unsignedchar * src ,int len , void ( * src_to_mic )(), void ( * dest_from_mic )() )
127158{
128- unsignedchar * result = s ;
159+ unsignedchar * result = src ;
129160unsignedchar * buf ;
130161
131- if (client_encoding == GetDatabaseEncoding ())
132- return result ;
133- if (client_to_mic )
162+ if (src_to_mic )
134163{
135164buf = (unsignedchar * )palloc (len * 4 + 1 );
136- (* client_to_mic ) (result ,buf ,len );
165+ (* src_to_mic ) (result ,buf ,len );
137166result = buf ;
138167len = strlen (result );
139168}
140- if (server_from_mic )
169+ if (dest_from_mic )
141170{
142171buf = (unsignedchar * )palloc (len * 4 + 1 );
143- (* server_from_mic ) (result ,buf ,len );
144- if (result != s )
172+ (* dest_from_mic ) (result ,buf ,len );
173+ if (result != src )
145174pfree (result );/* release first buffer */
146175result = buf ;
147176}
148177return result ;
149178}
150179
180+ /*
181+ * Convert string using encoding_nanme. We assume that string's
182+ * encoding is same as DB encoding.
183+ *
184+ * TEXT convert(TEXT string, NAME encoding_name)
185+ */
186+ Datum
187+ pg_convert (PG_FUNCTION_ARGS )
188+ {
189+ text * string = PG_GETARG_TEXT_P (0 );
190+ Name s = PG_GETARG_NAME (1 );
191+ int encoding = pg_char_to_encoding (NameStr (* s ));
192+ int db_encoding = GetDatabaseEncoding ();
193+ void (* src )(), (* dest )();
194+ unsignedchar * result ;
195+ text * retval ;
196+
197+ if (encoding < 0 )
198+ elog (ERROR ,"Invalid encoding name %s" ,NameStr (* s ));
199+
200+ if (pg_find_encoding_converters (db_encoding ,encoding ,& src ,& dest )< 0 )
201+ {
202+ char * encoding_name = (char * )pg_encoding_to_char (db_encoding );
203+ elog (ERROR ,"Conversion from %s to %s is not possible" ,NameStr (* s ),encoding_name );
204+ }
205+
206+ result = pg_do_encoding_conversion (VARDATA (string ),VARSIZE (string )- VARHDRSZ ,
207+ src ,dest );
208+ if (result == NULL )
209+ elog (ERROR ,"Encoding conversion failed" );
210+
211+ retval = DatumGetTextP (DirectFunctionCall1 (textin ,CStringGetDatum (result )));
212+ if (result != (unsignedchar * )VARDATA (string ))
213+ pfree (result );
214+
215+ /* free memory if allocated by the toaster */
216+ PG_FREE_IF_COPY (string ,0 );
217+
218+ PG_RETURN_TEXT_P (retval );
219+ }
220+
221+ /*
222+ * Convert string using encoding_nanme.
223+ *
224+ * TEXT convert(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
225+ */
226+ Datum
227+ pg_convert2 (PG_FUNCTION_ARGS )
228+ {
229+ text * string = PG_GETARG_TEXT_P (0 );
230+ char * src_encoding_name = NameStr (* PG_GETARG_NAME (1 ));
231+ int src_encoding = pg_char_to_encoding (src_encoding_name );
232+ char * dest_encoding_name = NameStr (* PG_GETARG_NAME (2 ));
233+ int dest_encoding = pg_char_to_encoding (dest_encoding_name );
234+ void (* src )(), (* dest )();
235+ unsignedchar * result ;
236+ text * retval ;
237+
238+ if (src_encoding < 0 )
239+ elog (ERROR ,"Invalid source encoding name %s" ,src_encoding_name );
240+ if (dest_encoding < 0 )
241+ elog (ERROR ,"Invalid destination encoding name %s" ,dest_encoding_name );
242+
243+ if (pg_find_encoding_converters (src_encoding ,dest_encoding ,& src ,& dest )< 0 )
244+ {
245+ elog (ERROR ,"Conversion from %s to %s is not possible" ,
246+ src_encoding_name ,dest_encoding_name );
247+ }
248+
249+ result = pg_do_encoding_conversion (VARDATA (string ),VARSIZE (string )- VARHDRSZ ,
250+ src ,dest );
251+ if (result == NULL )
252+ elog (ERROR ,"Encoding conversion failed" );
253+
254+ retval = DatumGetTextP (DirectFunctionCall1 (textin ,CStringGetDatum (result )));
255+ if (result != (unsignedchar * )VARDATA (string ))
256+ pfree (result );
257+
258+ /* free memory if allocated by the toaster */
259+ PG_FREE_IF_COPY (string ,0 );
260+
261+ PG_RETURN_TEXT_P (retval );
262+ }
263+
264+ /*
265+ * convert client encoding to server encoding.
266+ *
267+ * CASE 1: if no conversion is required, then the given pointer s is returned.
268+ *
269+ * CASE 2: if conversion is required, a palloc'd string is returned.
270+ *
271+ * Callers must check whether return value differs from passed value
272+ * to determine whether to pfree the result or not!
273+ *
274+ * Note: we assume that conversion cannot cause more than a 4-to-1 growth
275+ * in the length of the string --- is this enough?
276+ */
277+ unsignedchar *
278+ pg_client_to_server (unsignedchar * s ,int len )
279+ {
280+ if (client_encoding == GetDatabaseEncoding ())
281+ return s ;
282+
283+ return pg_do_encoding_conversion (s ,len ,client_to_mic ,server_from_mic );
284+ }
285+
151286/*
152287 * convert server encoding to client encoding.
153288 *
@@ -164,27 +299,10 @@ pg_client_to_server(unsigned char *s, int len)
164299unsignedchar *
165300pg_server_to_client (unsignedchar * s ,int len )
166301{
167- unsignedchar * result = s ;
168- unsignedchar * buf ;
169-
170302if (client_encoding == GetDatabaseEncoding ())
171- return result ;
172- if (server_to_mic )
173- {
174- buf = (unsignedchar * )palloc (len * 4 + 1 );
175- (* server_to_mic ) (result ,buf ,len );
176- result = buf ;
177- len = strlen (result );
178- }
179- if (client_from_mic )
180- {
181- buf = (unsignedchar * )palloc (len * 4 + 1 );
182- (* client_from_mic ) (result ,buf ,len );
183- if (result != s )
184- pfree (result );/* release first buffer */
185- result = buf ;
186- }
187- return result ;
303+ return s ;
304+
305+ return pg_do_encoding_conversion (s ,len ,server_to_mic ,client_from_mic );
188306}
189307
190308/* convert a multi-byte string to a wchar */