Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6cddecd

Browse files
committed
Avoid breaking SJIS encoding while de-backslashing Windows paths.
When running on Windows, canonicalize_path() converts '\' to '/'to prevent confusing the Windows command processor. It wasdoing that in a non-encoding-aware fashion; but in SJIS thereare valid two-byte characters whose second byte matches '\'.So encoding corruption ensues if such a character is used inthe path.We can fairly easily fix this if we know which encoding isin use, but a lot of our utilities don't have much of a clueabout that. After some discussion we decided we'd settle forfixing this only in psql, and assuming that its value ofclient_encoding matches what the user is typing.It seems hopeless to get the server to deal with the problematiccharacters in database path names, so we'll just declare thatcase to be unsupported. That means nothing need be done inthe server, nor in utility programs whose only contact withfile path names is for database paths. But psql frequentlydeals with client-side file paths, so it'd be good if itdidn't mess those up.Bug: #18735Reported-by: Koichi Suzuki <koichi.suzuki@enterprisedb.com>Author: Tom Lane <tgl@sss.pgh.pa.us>Reviewed-by: Koichi Suzuki <koichi.suzuki@enterprisedb.com>Discussion:https://postgr.es/m/18735-4acdb3998bb9f2b1@postgresql.orgBackpatch-through: 13
1 parentf6ff75f commit6cddecd

File tree

4 files changed

+97
-19
lines changed

4 files changed

+97
-19
lines changed

‎src/bin/psql/command.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,7 +1224,7 @@ exec_command_edit(PsqlScanState scan_state, bool active_branch,
12241224
expand_tilde(&fname);
12251225
if (fname)
12261226
{
1227-
canonicalize_path(fname);
1227+
canonicalize_path_enc(fname,pset.encoding);
12281228
/* Always clear buffer if the file isn't modified */
12291229
discard_on_quit= true;
12301230
}
@@ -2864,7 +2864,7 @@ exec_command_write(PsqlScanState scan_state, bool active_branch,
28642864
}
28652865
else
28662866
{
2867-
canonicalize_path(fname);
2867+
canonicalize_path_enc(fname,pset.encoding);
28682868
fd=fopen(fname,"w");
28692869
}
28702870
if (!fd)
@@ -4479,7 +4479,7 @@ process_file(char *filename, bool use_relative_path)
44794479
}
44804480
elseif (strcmp(filename,"-")!=0)
44814481
{
4482-
canonicalize_path(filename);
4482+
canonicalize_path_enc(filename,pset.encoding);
44834483

44844484
/*
44854485
* If we were asked to resolve the pathname relative to the location
@@ -4493,7 +4493,7 @@ process_file(char *filename, bool use_relative_path)
44934493
strlcpy(relpath,pset.inputfile,sizeof(relpath));
44944494
get_parent_directory(relpath);
44954495
join_path_components(relpath,relpath,filename);
4496-
canonicalize_path(relpath);
4496+
canonicalize_path_enc(relpath,pset.encoding);
44974497

44984498
filename=relpath;
44994499
}

‎src/bin/psql/copy.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ do_copy(const char *args)
280280

281281
/* prepare to read or write the target file */
282282
if (options->file&& !options->program)
283-
canonicalize_path(options->file);
283+
canonicalize_path_enc(options->file,pset.encoding);
284284

285285
if (options->from)
286286
{

‎src/include/port.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ extern char *first_path_var_separator(const char *pathlist);
5353
externvoidjoin_path_components(char*ret_path,
5454
constchar*head,constchar*tail);
5555
externvoidcanonicalize_path(char*path);
56+
externvoidcanonicalize_path_enc(char*path,intencoding);
5657
externvoidmake_native_path(char*filename);
5758
externvoidcleanup_path(char*path);
5859
externboolpath_contains_parent_reference(constchar*path);

‎src/port/path.c

Lines changed: 91 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include<unistd.h>
3737
#endif
3838

39+
#include"mb/pg_wchar.h"
3940
#include"pg_config_paths.h"
4041

4142

@@ -45,6 +46,10 @@
4546
#defineIS_PATH_VAR_SEP(ch) ((ch) == ';')
4647
#endif
4748

49+
#ifdefWIN32
50+
staticvoiddebackslash_path(char*path,intencoding);
51+
staticintpg_sjis_mblen(constunsignedchar*s);
52+
#endif
4853
staticvoidmake_relative_path(char*ret_path,constchar*target_path,
4954
constchar*bin_path,constchar*my_exec_path);
5055
staticchar*trim_directory(char*path);
@@ -149,10 +154,73 @@ last_dir_separator(const char *filename)
149154
}
150155

151156

157+
#ifdefWIN32
158+
159+
/*
160+
* Convert '\' to '/' within the given path, assuming the path
161+
* is in the specified encoding.
162+
*/
163+
staticvoid
164+
debackslash_path(char*path,intencoding)
165+
{
166+
char*p;
167+
168+
/*
169+
* Of the supported encodings, only Shift-JIS has multibyte characters
170+
* that can include a byte equal to '\' (0x5C). So rather than implement
171+
* a fully encoding-aware conversion, we special-case SJIS. (Invoking the
172+
* general encoding-aware logic in wchar.c is impractical here for
173+
* assorted reasons.)
174+
*/
175+
if (encoding==PG_SJIS)
176+
{
177+
for (p=path;*p;p+=pg_sjis_mblen((constunsignedchar*)p))
178+
{
179+
if (*p=='\\')
180+
*p='/';
181+
}
182+
}
183+
else
184+
{
185+
for (p=path;*p;p++)
186+
{
187+
if (*p=='\\')
188+
*p='/';
189+
}
190+
}
191+
}
192+
152193
/*
153-
*make_native_path - on WIN32, change / to \ in the path
194+
* SJIS character length
154195
*
155-
*This effectively undoes canonicalize_path.
196+
* This must match the behavior of
197+
*pg_encoding_mblen_bounded(PG_SJIS, s)
198+
* In particular, unlike the version of pg_sjis_mblen in src/common/wchar.c,
199+
* do not allow caller to accidentally step past end-of-string.
200+
*/
201+
staticint
202+
pg_sjis_mblen(constunsignedchar*s)
203+
{
204+
intlen;
205+
206+
if (*s >=0xa1&&*s <=0xdf)
207+
len=1;/* 1 byte kana? */
208+
elseif (IS_HIGHBIT_SET(*s)&&s[1]!='\0')
209+
len=2;/* kanji? */
210+
else
211+
len=1;/* should be ASCII */
212+
returnlen;
213+
}
214+
215+
#endif/* WIN32 */
216+
217+
218+
/*
219+
*make_native_path - on WIN32, change '/' to '\' in the path
220+
*
221+
*This reverses the '\'-to-'/' transformation of debackslash_path.
222+
*We need not worry about encodings here, since '/' does not appear
223+
*as a byte of a multibyte character in any supported encoding.
156224
*
157225
*This is required because WIN32 COPY is an internal CMD.EXE
158226
*command and doesn't process forward slashes in the same way
@@ -182,13 +250,14 @@ make_native_path(char *filename)
182250
* on Windows. We need them to use filenames without spaces, for which a
183251
* short filename is the safest equivalent, eg:
184252
*C:/Progra~1/
253+
*
254+
* Presently, this is only used on paths that we can assume are in a
255+
* server-safe encoding, so there's no need for an encoding-aware variant.
185256
*/
186257
void
187258
cleanup_path(char*path)
188259
{
189260
#ifdefWIN32
190-
char*ptr;
191-
192261
/*
193262
* GetShortPathName() will fail if the path does not exist, or short names
194263
* are disabled on this file system. In both cases, we just return the
@@ -198,11 +267,8 @@ cleanup_path(char *path)
198267
GetShortPathName(path,path,MAXPGPATH-1);
199268

200269
/* Replace '\' with '/' */
201-
for (ptr=path;*ptr;ptr++)
202-
{
203-
if (*ptr=='\\')
204-
*ptr='/';
205-
}
270+
/* All server-safe encodings are alike here, so just use PG_SQL_ASCII */
271+
debackslash_path(path,PG_SQL_ASCII);
206272
#endif
207273
}
208274

@@ -253,16 +319,29 @@ typedef enum
253319
}canonicalize_state;
254320

255321
/*
322+
* canonicalize_path()
323+
*
256324
*Clean up path by:
257325
*o make Win32 path use Unix slashes
258326
*o remove trailing quote on Win32
259327
*o remove trailing slash
260328
*o remove duplicate (adjacent) separators
261329
*o remove '.' (unless path reduces to only '.')
262330
*o process '..' ourselves, removing it if possible
331+
*Modifies path in-place.
332+
*
333+
* This comes in two variants: encoding-aware and not. The non-aware version
334+
* is only safe to use on strings that are in a server-safe encoding.
263335
*/
264336
void
265337
canonicalize_path(char*path)
338+
{
339+
/* All server-safe encodings are alike here, so just use PG_SQL_ASCII */
340+
canonicalize_path_enc(path,PG_SQL_ASCII);
341+
}
342+
343+
void
344+
canonicalize_path_enc(char*path,intencoding)
266345
{
267346
char*p,
268347
*to_p;
@@ -278,17 +357,15 @@ canonicalize_path(char *path)
278357
/*
279358
* The Windows command processor will accept suitably quoted paths with
280359
* forward slashes, but barfs badly with mixed forward and back slashes.
360+
* Hence, start by converting all back slashes to forward slashes.
281361
*/
282-
for (p=path;*p;p++)
283-
{
284-
if (*p=='\\')
285-
*p='/';
286-
}
362+
debackslash_path(path,encoding);
287363

288364
/*
289365
* In Win32, if you do: prog.exe "a b" "\c\d\" the system will pass \c\d"
290366
* as argv[2], so trim off trailing quote.
291367
*/
368+
p=path+strlen(path);
292369
if (p>path&&*(p-1)=='"')
293370
*(p-1)='/';
294371
#endif

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp