Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit4ab5dae

Browse files
committed
Use TRUNCATE to preserve relfilenode for pg_largeobject + index.
Commit9a974cb arranged to preservethe relfilenode of user tables across pg_upgrade, but failed to noticethat pg_upgrade treats pg_largeobject as a user table and thus it needsthe same treatment. Otherwise, large objects will appear to vanishafter a pg_upgrade.Commitd498e05 fixed this problemby teaching pg_dump to UPDATE pg_class.relfilenode for pg_largeobjectand its index. However, because an UPDATE on the catalog rows doesn'tchange anything on disk, this can leave stray files behind in the newcluster. They will normally be empty, but it's a little bit untidy.Hence, this commit arranges to do the same thing using DDL. Specifically,it makes TRUNCATE work for the pg_largeobject catalog when inbinary-upgrade mode, and it then uses that command in binary-upgradedumps as a way of setting pg_class.relfilenode for pg_largeobject andits index. That way, the old files are removed from the new cluster.Discussion:http://postgr.es/m/CA+TgmoYYMXGUJO5GZk1-MByJGu_bB8CbOL6GJQC8=Bzt6x6vDg@mail.gmail.com
1 parent02e5c27 commit4ab5dae

File tree

4 files changed

+120
-15
lines changed

4 files changed

+120
-15
lines changed

‎src/backend/commands/tablecmds.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "catalog/pg_depend.h"
4141
#include "catalog/pg_foreign_table.h"
4242
#include "catalog/pg_inherits.h"
43+
#include "catalog/pg_largeobject.h"
4344
#include "catalog/pg_namespace.h"
4445
#include "catalog/pg_opclass.h"
4546
#include "catalog/pg_statistic_ext.h"
@@ -2181,7 +2182,15 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
21812182
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
21822183
errmsg("\"%s\" is not a table", relname)));
21832184

2184-
if (!allowSystemTableMods && IsSystemClass(relid, reltuple))
2185+
/*
2186+
* Most system catalogs can't be truncated at all, or at least not unless
2187+
* allow_system_table_mods=on. As an exception, however, we allow
2188+
* pg_largeobject to be truncated as part of pg_upgrade, because we need
2189+
* to change its relfilenode to match the old cluster, and allowing a
2190+
* TRUNCATE command to be executed is the easiest way of doing that.
2191+
*/
2192+
if (!allowSystemTableMods && IsSystemClass(relid, reltuple)
2193+
&& (!IsBinaryUpgrade || relid != LargeObjectRelationId))
21852194
ereport(ERROR,
21862195
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
21872196
errmsg("permission denied: \"%s\" is a system catalog",

‎src/backend/storage/smgr/md.c

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
319319
{
320320
char*path;
321321
intret;
322+
BlockNumbersegno=0;
322323

323324
path=relpath(rnode,forkNum);
324325

@@ -353,8 +354,22 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
353354
/* Prevent other backends' fds from holding on to the disk space */
354355
ret=do_truncate(path);
355356

356-
/* Register request to unlink first segment later */
357-
register_unlink_segment(rnode,forkNum,0/* first seg */ );
357+
/*
358+
* Except during a binary upgrade, register request to unlink first
359+
* segment later, rather than now.
360+
*
361+
* If we're performing a binary upgrade, the dangers described in the
362+
* header comments for mdunlink() do not exist, since after a crash
363+
* or even a simple ERROR, the upgrade fails and the whole new cluster
364+
* must be recreated from scratch. And, on the other hand, it is
365+
* important to remove the files from disk immediately, because we
366+
* may be about to reuse the same relfilenode.
367+
*/
368+
if (!IsBinaryUpgrade)
369+
{
370+
register_unlink_segment(rnode,forkNum,0/* first seg */ );
371+
++segno;
372+
}
358373
}
359374

360375
/*
@@ -363,15 +378,17 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
363378
if (ret >=0)
364379
{
365380
char*segpath= (char*)palloc(strlen(path)+12);
366-
BlockNumbersegno;
367381

368382
/*
369383
* Note that because we loop until getting ENOENT, we will correctly
370384
* remove all inactive segments as well as active ones.
371385
*/
372-
for (segno=1;;segno++)
386+
for (;;segno++)
373387
{
374-
sprintf(segpath,"%s.%u",path,segno);
388+
if (segno==0)
389+
strcpy(segpath,path);
390+
else
391+
sprintf(segpath,"%s.%u",path,segno);
375392

376393
if (!RelFileNodeBackendIsTemp(rnode))
377394
{

‎src/backend/utils/cache/relcache.c

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include"access/tupdesc_details.h"
4242
#include"access/xact.h"
4343
#include"access/xlog.h"
44+
#include"catalog/binary_upgrade.h"
4445
#include"catalog/catalog.h"
4546
#include"catalog/indexing.h"
4647
#include"catalog/namespace.h"
@@ -3707,9 +3708,36 @@ RelationSetNewRelfilenode(Relation relation, char persistence)
37073708
TransactionIdfreezeXid=InvalidTransactionId;
37083709
RelFileNodenewrnode;
37093710

3710-
/* Allocate a new relfilenode */
3711-
newrelfilenode=GetNewRelFileNode(relation->rd_rel->reltablespace,NULL,
3712-
persistence);
3711+
if (!IsBinaryUpgrade)
3712+
{
3713+
/* Allocate a new relfilenode */
3714+
newrelfilenode=GetNewRelFileNode(relation->rd_rel->reltablespace,
3715+
NULL,persistence);
3716+
}
3717+
elseif (relation->rd_rel->relkind==RELKIND_INDEX)
3718+
{
3719+
if (!OidIsValid(binary_upgrade_next_index_pg_class_relfilenode))
3720+
ereport(ERROR,
3721+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3722+
errmsg("index relfilenode value not set when in binary upgrade mode")));
3723+
3724+
newrelfilenode=binary_upgrade_next_index_pg_class_relfilenode;
3725+
binary_upgrade_next_index_pg_class_relfilenode=InvalidOid;
3726+
}
3727+
elseif (relation->rd_rel->relkind==RELKIND_RELATION)
3728+
{
3729+
if (!OidIsValid(binary_upgrade_next_heap_pg_class_relfilenode))
3730+
ereport(ERROR,
3731+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3732+
errmsg("heap relfilenode value not set when in binary upgrade mode")));
3733+
3734+
newrelfilenode=binary_upgrade_next_heap_pg_class_relfilenode;
3735+
binary_upgrade_next_heap_pg_class_relfilenode=InvalidOid;
3736+
}
3737+
else
3738+
ereport(ERROR,
3739+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3740+
errmsg("unexpected request for new relfilenode in binary upgrade mode")));
37133741

37143742
/*
37153743
* Get a writable copy of the pg_class tuple for the given relation.
@@ -3724,9 +3752,37 @@ RelationSetNewRelfilenode(Relation relation, char persistence)
37243752
classform= (Form_pg_class)GETSTRUCT(tuple);
37253753

37263754
/*
3727-
* Schedule unlinking of the old storage at transaction commit.
3755+
* Schedule unlinking of the old storage at transaction commit, except
3756+
* when performing a binary upgrade, when we must do it immediately.
37283757
*/
3729-
RelationDropStorage(relation);
3758+
if (IsBinaryUpgrade)
3759+
{
3760+
SMgrRelationsrel;
3761+
3762+
/*
3763+
* During a binary upgrade, we use this code path to ensure that
3764+
* pg_largeobject and its index have the same relfilenode values as in
3765+
* the old cluster. This is necessary because pg_upgrade treats
3766+
* pg_largeobject like a user table, not a system table. It is however
3767+
* possible that a table or index may need to end up with the same
3768+
* relfilenode in the new cluster as what it had in the old cluster.
3769+
* Hence, we can't wait until commit time to remove the old storage.
3770+
*
3771+
* In general, this function needs to have transactional semantics,
3772+
* and removing the old storage before commit time surely isn't.
3773+
* However, it doesn't really matter, because if a binary upgrade
3774+
* fails at this stage, the new cluster will need to be recreated
3775+
* anyway.
3776+
*/
3777+
srel=smgropen(relation->rd_node,relation->rd_backend);
3778+
smgrdounlinkall(&srel,1, false);
3779+
smgrclose(srel);
3780+
}
3781+
else
3782+
{
3783+
/* Not a binary upgrade, so just schedule it to happen later. */
3784+
RelationDropStorage(relation);
3785+
}
37303786

37313787
/*
37323788
* Create storage for the main fork of the new relfilenode. If it's a

‎src/bin/pg_dump/pg_dump.c

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3141,6 +3141,7 @@ dumpDatabase(Archive *fout)
31413141
PGresult *lo_res;
31423142
PQExpBuffer loFrozenQry = createPQExpBuffer();
31433143
PQExpBuffer loOutQry = createPQExpBuffer();
3144+
PQExpBuffer loVacQry = createPQExpBuffer();
31443145
inti_relfrozenxid,
31453146
i_relfilenode,
31463147
i_oid,
@@ -3167,15 +3168,36 @@ dumpDatabase(Archive *fout)
31673168
i_relfilenode = PQfnumber(lo_res, "relfilenode");
31683169
i_oid = PQfnumber(lo_res, "oid");
31693170

3170-
appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, preserve values for pg_largeobject and its index\n");
3171+
appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, set pg_largeobject relfrozenxid and relminmxid\n");
3172+
appendPQExpBufferStr(loVacQry, "\n-- For binary upgrade, preserve pg_largeobject and index relfilenodes\n");
31713173
for (int i = 0; i < PQntuples(lo_res); ++i)
3174+
{
3175+
Oidoid;
3176+
Oidrelfilenode;
3177+
31723178
appendPQExpBuffer(loOutQry, "UPDATE pg_catalog.pg_class\n"
3173-
"SET relfrozenxid = '%u', relminmxid = '%u', relfilenode = '%u'\n"
3179+
"SET relfrozenxid = '%u', relminmxid = '%u'\n"
31743180
"WHERE oid = %u;\n",
31753181
atooid(PQgetvalue(lo_res, i, i_relfrozenxid)),
31763182
atooid(PQgetvalue(lo_res, i, i_relminmxid)),
3177-
atooid(PQgetvalue(lo_res, i, i_relfilenode)),
3178-
atooid(PQgetvalue(lo_res, i, i_oid)));
3183+
atooid(PQgetvalue(lo_res, i, i_relfilenode)));
3184+
3185+
oid = atooid(PQgetvalue(lo_res, i, i_oid));
3186+
relfilenode = atooid(PQgetvalue(lo_res, i, i_relfilenode));
3187+
3188+
if (oid == LargeObjectRelationId)
3189+
appendPQExpBuffer(loVacQry,
3190+
"SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n",
3191+
relfilenode);
3192+
else if (oid == LargeObjectLOidPNIndexId)
3193+
appendPQExpBuffer(loVacQry,
3194+
"SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
3195+
relfilenode);
3196+
}
3197+
3198+
appendPQExpBufferStr(loVacQry,
3199+
"TRUNCATE pg_catalog.pg_largeobject;\n");
3200+
appendPQExpBufferStr(loOutQry, loVacQry->data);
31793201

31803202
ArchiveEntry(fout, nilCatalogId, createDumpId(),
31813203
ARCHIVE_OPTS(.tag = "pg_largeobject",
@@ -3187,6 +3209,7 @@ dumpDatabase(Archive *fout)
31873209

31883210
destroyPQExpBuffer(loFrozenQry);
31893211
destroyPQExpBuffer(loOutQry);
3212+
destroyPQExpBuffer(loVacQry);
31903213
}
31913214

31923215
PQclear(res);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp