@@ -16,6 +16,8 @@ static void set_locale_and_encoding(ClusterInfo *cluster);
16
16
static void check_new_cluster_is_empty (void );
17
17
static void check_locale_and_encoding (ControlData * oldctrl ,
18
18
ControlData * newctrl );
19
+ static bool equivalent_locale (int category ,const char * loca ,const char * locb );
20
+ static bool equivalent_encoding (const char * chara ,const char * charb );
19
21
static void check_is_super_user (ClusterInfo * cluster );
20
22
static void check_for_prepared_transactions (ClusterInfo * cluster );
21
23
static void check_for_isn_and_int8_passing_mismatch (ClusterInfo * cluster );
@@ -361,23 +363,8 @@ set_locale_and_encoding(ClusterInfo *cluster)
361
363
i_datcollate = PQfnumber (res ,"datcollate" );
362
364
i_datctype = PQfnumber (res ,"datctype" );
363
365
364
- if (GET_MAJOR_VERSION (cluster -> major_version )< 902 )
365
- {
366
- /*
367
- * Pre-9.2 did not canonicalize the supplied locale names to match
368
- * what the system returns, while 9.2+ does, so convert pre-9.2 to
369
- * match.
370
- */
371
- ctrl -> lc_collate = get_canonical_locale_name (LC_COLLATE ,
372
- pg_strdup (PQgetvalue (res ,0 ,i_datcollate )));
373
- ctrl -> lc_ctype = get_canonical_locale_name (LC_CTYPE ,
374
- pg_strdup (PQgetvalue (res ,0 ,i_datctype )));
375
- }
376
- else
377
- {
378
- ctrl -> lc_collate = pg_strdup (PQgetvalue (res ,0 ,i_datcollate ));
379
- ctrl -> lc_ctype = pg_strdup (PQgetvalue (res ,0 ,i_datctype ));
380
- }
366
+ ctrl -> lc_collate = pg_strdup (PQgetvalue (res ,0 ,i_datcollate ));
367
+ ctrl -> lc_ctype = pg_strdup (PQgetvalue (res ,0 ,i_datctype ));
381
368
382
369
PQclear (res );
383
370
}
@@ -407,23 +394,84 @@ static void
407
394
check_locale_and_encoding (ControlData * oldctrl ,
408
395
ControlData * newctrl )
409
396
{
397
+ if (!equivalent_locale (LC_COLLATE ,oldctrl -> lc_collate ,newctrl -> lc_collate ))
398
+ pg_fatal ("lc_collate cluster values do not match: old \"%s\", new \"%s\"\n" ,
399
+ oldctrl -> lc_collate ,newctrl -> lc_collate );
400
+ if (!equivalent_locale (LC_CTYPE ,oldctrl -> lc_ctype ,newctrl -> lc_ctype ))
401
+ pg_fatal ("lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n" ,
402
+ oldctrl -> lc_ctype ,newctrl -> lc_ctype );
403
+ if (!equivalent_encoding (oldctrl -> encoding ,newctrl -> encoding ))
404
+ pg_fatal ("encoding cluster values do not match: old \"%s\", new \"%s\"\n" ,
405
+ oldctrl -> encoding ,newctrl -> encoding );
406
+ }
407
+
408
+ /*
409
+ * equivalent_locale()
410
+ *
411
+ * Best effort locale-name comparison. Return false if we are not 100% sure
412
+ * the locales are equivalent.
413
+ *
414
+ * Note: The encoding parts of the names are ignored. This function is
415
+ * currently used to compare locale names stored in pg_database, and
416
+ * pg_database contains a separate encoding field. That's compared directly
417
+ * in check_locale_and_encoding().
418
+ */
419
+ static bool
420
+ equivalent_locale (int category ,const char * loca ,const char * locb )
421
+ {
422
+ const char * chara ;
423
+ const char * charb ;
424
+ char * canona ;
425
+ char * canonb ;
426
+ int lena ;
427
+ int lenb ;
428
+
410
429
/*
411
- *These are often defined with inconsistent case, so use pg_strcasecmp().
412
- *They also often use inconsistent hyphenation, which we cannot fix, e.g.
413
- *UTF-8 vs. UTF8, so at least we display the mismatching values .
430
+ *If the names are equal, the locales are equivalent. Checking this
431
+ *first avoids calling setlocale() in the common case that the names
432
+ *are equal. That's a good thing, if setlocale() is buggy, for example .
414
433
*/
415
- if (pg_strcasecmp (oldctrl -> lc_collate ,newctrl -> lc_collate )!= 0 )
416
- pg_log (PG_FATAL ,
417
- "lc_collate cluster values do not match: old \"%s\", new \"%s\"\n" ,
418
- oldctrl -> lc_collate ,newctrl -> lc_collate );
419
- if (pg_strcasecmp (oldctrl -> lc_ctype ,newctrl -> lc_ctype )!= 0 )
420
- pg_log (PG_FATAL ,
421
- "lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n" ,
422
- oldctrl -> lc_ctype ,newctrl -> lc_ctype );
423
- if (pg_strcasecmp (oldctrl -> encoding ,newctrl -> encoding )!= 0 )
424
- pg_log (PG_FATAL ,
425
- "encoding cluster values do not match: old \"%s\", new \"%s\"\n" ,
426
- oldctrl -> encoding ,newctrl -> encoding );
434
+ if (pg_strcasecmp (loca ,locb )== 0 )
435
+ return true;
436
+
437
+ /*
438
+ * Not identical. Canonicalize both names, remove the encoding parts,
439
+ * and try again.
440
+ */
441
+ canona = get_canonical_locale_name (category ,loca );
442
+ chara = strrchr (canona ,'.' );
443
+ lena = chara ? (chara - canona ) :strlen (canona );
444
+
445
+ canonb = get_canonical_locale_name (category ,locb );
446
+ charb = strrchr (canonb ,'.' );
447
+ lenb = charb ? (charb - canonb ) :strlen (canonb );
448
+
449
+ if (lena == lenb && pg_strncasecmp (canona ,canonb ,lena )== 0 )
450
+ return true;
451
+
452
+ return false;
453
+ }
454
+
455
+ /*
456
+ * equivalent_encoding()
457
+ *
458
+ * Best effort encoding-name comparison. Return true only if the encodings
459
+ * are valid server-side encodings and known equivalent.
460
+ *
461
+ * Because the lookup in pg_valid_server_encoding() does case folding and
462
+ * ignores non-alphanumeric characters, this will recognize many popular
463
+ * variant spellings as equivalent, eg "utf8" and "UTF-8" will match.
464
+ */
465
+ static bool
466
+ equivalent_encoding (const char * chara ,const char * charb )
467
+ {
468
+ int enca = pg_valid_server_encoding (chara );
469
+ int encb = pg_valid_server_encoding (charb );
470
+
471
+ if (enca < 0 || encb < 0 )
472
+ return false;
473
+
474
+ return (enca == encb );
427
475
}
428
476
429
477