|
9 | 9 |
|
10 | 10 | #include"postgres_fe.h"
|
11 | 11 |
|
| 12 | +#include"mb/pg_wchar.h" |
12 | 13 | #include"pg_upgrade.h"
|
13 | 14 |
|
14 | 15 |
|
15 | 16 | staticvoidset_locale_and_encoding(ClusterInfo*cluster);
|
16 | 17 | staticvoidcheck_new_cluster_is_empty(void);
|
17 | 18 | staticvoidcheck_locale_and_encoding(ControlData*oldctrl,
|
18 | 19 | ControlData*newctrl);
|
| 20 | +staticboolequivalent_locale(constchar*loca,constchar*locb); |
| 21 | +staticboolequivalent_encoding(constchar*chara,constchar*charb); |
19 | 22 | staticvoidcheck_is_super_user(ClusterInfo*cluster);
|
20 | 23 | staticvoidcheck_for_prepared_transactions(ClusterInfo*cluster);
|
21 | 24 | staticvoidcheck_for_isn_and_int8_passing_mismatch(ClusterInfo*cluster);
|
@@ -397,27 +400,80 @@ set_locale_and_encoding(ClusterInfo *cluster)
|
397 | 400 | /*
|
398 | 401 | * check_locale_and_encoding()
|
399 | 402 | *
|
400 |
| - *locale is not in pg_controldata in 8.4 and later so |
401 |
| - *we probably had to get via a database query. |
| 403 | + * Check that old and new locale and encoding match. Even though the backend |
| 404 | + * tries to canonicalize stored locale names, the platform often doesn't |
| 405 | + * cooperate, so it's entirely possible that one DB thinks its locale is |
| 406 | + * "en_US.UTF-8" while the other says "en_US.utf8". Try to be forgiving. |
402 | 407 | */
|
403 | 408 | staticvoid
|
404 | 409 | check_locale_and_encoding(ControlData*oldctrl,
|
405 | 410 | ControlData*newctrl)
|
406 | 411 | {
|
407 |
| -/* |
408 |
| - * These are often defined with inconsistent case, so use pg_strcasecmp(). |
409 |
| - * They also often use inconsistent hyphenation, which we cannot fix, e.g. |
410 |
| - * UTF-8 vs. UTF8, so at least we display the mismatching values. |
411 |
| - */ |
412 |
| -if (pg_strcasecmp(oldctrl->lc_collate,newctrl->lc_collate)!=0) |
| 412 | +if (!equivalent_locale(oldctrl->lc_collate,newctrl->lc_collate)) |
413 | 413 | pg_fatal("lc_collate cluster values do not match: old \"%s\", new \"%s\"\n",
|
414 |
| -oldctrl->lc_collate,newctrl->lc_collate); |
415 |
| -if (pg_strcasecmp(oldctrl->lc_ctype,newctrl->lc_ctype)!=0) |
| 414 | +oldctrl->lc_collate,newctrl->lc_collate); |
| 415 | +if (!equivalent_locale(oldctrl->lc_ctype,newctrl->lc_ctype)) |
416 | 416 | pg_fatal("lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n",
|
417 |
| -oldctrl->lc_ctype,newctrl->lc_ctype); |
418 |
| -if (pg_strcasecmp(oldctrl->encoding,newctrl->encoding)!=0) |
| 417 | +oldctrl->lc_ctype,newctrl->lc_ctype); |
| 418 | +if (!equivalent_encoding(oldctrl->encoding,newctrl->encoding)) |
419 | 419 | pg_fatal("encoding cluster values do not match: old \"%s\", new \"%s\"\n",
|
420 |
| -oldctrl->encoding,newctrl->encoding); |
| 420 | +oldctrl->encoding,newctrl->encoding); |
| 421 | +} |
| 422 | + |
| 423 | +/* |
| 424 | + * equivalent_locale() |
| 425 | + * |
| 426 | + * Best effort locale-name comparison. Return false if we are not 100% sure |
| 427 | + * the locales are equivalent. |
| 428 | + */ |
| 429 | +staticbool |
| 430 | +equivalent_locale(constchar*loca,constchar*locb) |
| 431 | +{ |
| 432 | +constchar*chara=strrchr(loca,'.'); |
| 433 | +constchar*charb=strrchr(locb,'.'); |
| 434 | +intlencmp; |
| 435 | + |
| 436 | +/* If they don't both contain an encoding part, just do strcasecmp(). */ |
| 437 | +if (!chara|| !charb) |
| 438 | +return (pg_strcasecmp(loca,locb)==0); |
| 439 | + |
| 440 | +/* Compare the encoding parts. */ |
| 441 | +if (!equivalent_encoding(chara+1,charb+1)) |
| 442 | +return false; |
| 443 | + |
| 444 | +/* |
| 445 | + * OK, compare the locale identifiers (e.g. en_US part of en_US.utf8). |
| 446 | + * |
| 447 | + * It's tempting to ignore non-alphanumeric chars here, but for now it's |
| 448 | + * not clear that that's necessary; just do case-insensitive comparison. |
| 449 | + */ |
| 450 | +lencmp=chara-loca; |
| 451 | +if (lencmp!=charb-locb) |
| 452 | +return false; |
| 453 | + |
| 454 | +return (pg_strncasecmp(loca,locb,lencmp)==0); |
| 455 | +} |
| 456 | + |
| 457 | +/* |
| 458 | + * equivalent_encoding() |
| 459 | + * |
| 460 | + * Best effort encoding-name comparison. Return true only if the encodings |
| 461 | + * are valid server-side encodings and known equivalent. |
| 462 | + * |
| 463 | + * Because the lookup in pg_valid_server_encoding() does case folding and |
| 464 | + * ignores non-alphanumeric characters, this will recognize many popular |
| 465 | + * variant spellings as equivalent, eg "utf8" and "UTF-8" will match. |
| 466 | + */ |
| 467 | +staticbool |
| 468 | +equivalent_encoding(constchar*chara,constchar*charb) |
| 469 | +{ |
| 470 | +intenca=pg_valid_server_encoding(chara); |
| 471 | +intencb=pg_valid_server_encoding(charb); |
| 472 | + |
| 473 | +if (enca<0||encb<0) |
| 474 | +return false; |
| 475 | + |
| 476 | +return (enca==encb); |
421 | 477 | }
|
422 | 478 |
|
423 | 479 |
|
|