Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2ce64ca

Browse files
committed
Fix bugs in vacuum of shared rels, by keeping their relcache entries current.
When vacuum processes a relation it uses the corresponding relcacheentry's relfrozenxid / relminmxid as a cutoff for when to removetuples etc. Unfortunately for nailed relations (i.e. critical systemcatalogs) bugs could frequently lead to the corresponding relcacheentry being stale.This set of bugs could cause actual data corruption as vacuum wouldpotentially not remove the correct row versions, potentially revivingthem at a later point. After699bf7d some corruptions in this veinwere prevented, but the additional error checks could also triggerspuriously. Examples of such errors are: ERROR: found xmin ... from before relfrozenxid ...and ERROR: found multixact ... from before relminmxid ...To be caused by this bug the errors have to occur on system catalogtables.The two bugs are:1) Invalidations for nailed relations were ignored, based on the theory that the relcache entry for such tables doesn't change. Which is largely true, except for fields like relfrozenxid etc. This means that changes to relations vacuumed in other sessions weren't picked up by already existing sessions. Luckily autovacuum doesn't have particularly longrunning sessions.2) For shared *and* nailed relations, the shared relcache init file was never invalidated while running. That means that for such tables (e.g. pg_authid, pg_database) it's not just already existing sessions that are affected, but even new connections are as well. That explains why the reports usually were about pg_authid et. al.To fix 1), revalidate the rd_rel portion of a relcache entry wheninvalid. This implies a bit of extra complexity to deal withbootstrapping, but it's not too bad. The fix for 2) is simpler,simply always remove both the shared and local init files.Author: Andres FreundReviewed-By: Alvaro HerreraDiscussion:https://postgr.es/m/20180525203736.crkbg36muzxrjj5e@alap3.anarazel.dehttps://postgr.es/m/CAMa1XUhKSJd98JW4o9StWPrfS=11bPgG+_GDMxe25TvUY4Sugg@mail.gmail.comhttps://postgr.es/m/CAKMFJucqbuoDRfxPDX39WhA3vJyxweRg_zDVXzncr6+5wOguWA@mail.gmail.comhttps://postgr.es/m/CAGewt-ujGpMLQ09gXcUFMZaZsGJC98VXHEFbF-tpPB0fB13K+A@mail.gmail.comBackpatch: 9.3-
1 parentb10edaf commit2ce64ca

File tree

3 files changed

+146
-74
lines changed

3 files changed

+146
-74
lines changed

‎src/backend/utils/cache/inval.c

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -521,12 +521,12 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
521521
(void)GetCurrentCommandId(true);
522522

523523
/*
524-
* If the relation being invalidated is one of those cached in the local
525-
* relcache init file, mark that we need to zap that file at commit. Same
526-
* is true when we are invalidating whole relcache.
524+
* If the relation being invalidated is one of those cached in a relcache
525+
* init file, mark that we need to zap that file at commit. For simplicity
526+
* invalidations for a specific database always invalidate the shared file
527+
* as well. Also zap when we are invalidating whole relcache.
527528
*/
528-
if (OidIsValid(dbId)&&
529-
(RelationIdIsInInitFile(relId)||relId==InvalidOid))
529+
if (relId==InvalidOid||RelationIdIsInInitFile(relId))
530530
transInvalInfo->RelcacheInitFileInval= true;
531531
}
532532

@@ -881,18 +881,26 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
881881

882882
if (RelcacheInitFileInval)
883883
{
884+
elog(trace_recovery(DEBUG4),"removing relcache init files for database %u",
885+
dbid);
886+
884887
/*
885-
* RelationCacheInitFilePreInvalidate requires DatabasePath to be set,
886-
* but we should not use SetDatabasePath during recovery, since it is
888+
* RelationCacheInitFilePreInvalidate, when the invalidation message
889+
* is for a specific database, requires DatabasePath to be set, but we
890+
* should not use SetDatabasePath during recovery, since it is
887891
* intended to be used only once by normal backends. Hence, a quick
888892
* hack: set DatabasePath directly then unset after use.
889893
*/
890-
DatabasePath=GetDatabasePath(dbid,tsid);
891-
elog(trace_recovery(DEBUG4),"removing relcache init file in \"%s\"",
892-
DatabasePath);
894+
if (OidIsValid(dbid))
895+
DatabasePath=GetDatabasePath(dbid,tsid);
896+
893897
RelationCacheInitFilePreInvalidate();
894-
pfree(DatabasePath);
895-
DatabasePath=NULL;
898+
899+
if (OidIsValid(dbid))
900+
{
901+
pfree(DatabasePath);
902+
DatabasePath=NULL;
903+
}
896904
}
897905

898906
SendSharedInvalidMessages(msgs,nmsgs);

‎src/backend/utils/cache/relcache.c

Lines changed: 125 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
248248
staticvoidRelationClearRelation(Relationrelation,boolrebuild);
249249

250250
staticvoidRelationReloadIndexInfo(Relationrelation);
251+
staticvoidRelationReloadNailed(Relationrelation);
251252
staticvoidRelationFlushRelation(Relationrelation);
252253
staticvoidRememberToFreeTupleDescAtEOX(TupleDesctd);
253254
staticvoidAtEOXact_cleanup(Relationrelation,boolisCommit);
@@ -285,7 +286,7 @@ static void IndexSupportInitialize(oidvector *indclass,
285286
staticOpClassCacheEnt*LookupOpclassInfo(OidoperatorClassOid,
286287
StrategyNumbernumSupport);
287288
staticvoidRelationCacheInitFileRemoveInDir(constchar*tblspcpath);
288-
staticvoidunlink_initfile(constchar*initfilename);
289+
staticvoidunlink_initfile(constchar*initfilename,intelevel);
289290
staticboolequalPartitionDescs(PartitionKeykey,PartitionDescpartdesc1,
290291
PartitionDescpartdesc2);
291292

@@ -2052,7 +2053,16 @@ RelationIdGetRelation(Oid relationId)
20522053
RelationReloadIndexInfo(rd);
20532054
else
20542055
RelationClearRelation(rd, true);
2055-
Assert(rd->rd_isvalid);
2056+
2057+
/*
2058+
* Normally entries need to be valid here, but before the relcache
2059+
* has been initialized, not enough infrastructure exists to
2060+
* perform pg_class lookups. The structure of such entries doesn't
2061+
* change, but we still want to update the rd_rel entry. So
2062+
* rd_isvalid = false is left in place for a later lookup.
2063+
*/
2064+
Assert(rd->rd_isvalid||
2065+
(rd->rd_isnailed&& !criticalRelcachesBuilt));
20562066
}
20572067
returnrd;
20582068
}
@@ -2255,6 +2265,81 @@ RelationReloadIndexInfo(Relation relation)
22552265
relation->rd_isvalid= true;
22562266
}
22572267

2268+
/*
2269+
* RelationReloadNailed - reload minimal information for nailed relations.
2270+
*
2271+
* The structure of a nailed relation can never change (which is good, because
2272+
* we rely on knowing their structure to be able to read catalog content). But
2273+
* some parts, e.g. pg_class.relfrozenxid, are still important to have
2274+
* accurate content for. Therefore those need to be reloaded after the arrival
2275+
* of invalidations.
2276+
*/
2277+
staticvoid
2278+
RelationReloadNailed(Relationrelation)
2279+
{
2280+
Assert(relation->rd_isnailed);
2281+
2282+
/*
2283+
* Redo RelationInitPhysicalAddr in case it is a mapped relation whose
2284+
* mapping changed.
2285+
*/
2286+
RelationInitPhysicalAddr(relation);
2287+
2288+
/* flag as needing to be revalidated */
2289+
relation->rd_isvalid= false;
2290+
2291+
/*
2292+
* Can only reread catalog contents if in a transaction. If the relation
2293+
* is currently open (not counting the nailed refcount), do so
2294+
* immediately. Otherwise we've already marked the entry as possibly
2295+
* invalid, and it'll be fixed when next opened.
2296+
*/
2297+
if (!IsTransactionState()||relation->rd_refcnt <=1)
2298+
return;
2299+
2300+
if (relation->rd_rel->relkind==RELKIND_INDEX)
2301+
{
2302+
/*
2303+
* If it's a nailed-but-not-mapped index, then we need to re-read the
2304+
* pg_class row to see if its relfilenode changed.
2305+
*/
2306+
RelationReloadIndexInfo(relation);
2307+
}
2308+
else
2309+
{
2310+
/*
2311+
* Reload a non-index entry. We can't easily do so if relcaches
2312+
* aren't yet built, but that's fine because at that stage the
2313+
* attributes that need to be current (like relfrozenxid) aren't yet
2314+
* accessed. To ensure the entry will later be revalidated, we leave
2315+
* it in invalid state, but allow use (cf. RelationIdGetRelation()).
2316+
*/
2317+
if (criticalRelcachesBuilt)
2318+
{
2319+
HeapTuplepg_class_tuple;
2320+
Form_pg_classrelp;
2321+
2322+
/*
2323+
* NB: Mark the entry as valid before starting to scan, to avoid
2324+
* self-recursion when re-building pg_class.
2325+
*/
2326+
relation->rd_isvalid= true;
2327+
2328+
pg_class_tuple=ScanPgRelation(RelationGetRelid(relation),
2329+
true, false);
2330+
relp= (Form_pg_class)GETSTRUCT(pg_class_tuple);
2331+
memcpy(relation->rd_rel,relp,CLASS_TUPLE_SIZE);
2332+
heap_freetuple(pg_class_tuple);
2333+
2334+
/*
2335+
* Again mark as valid, to protect against concurrently arriving
2336+
* invalidations.
2337+
*/
2338+
relation->rd_isvalid= true;
2339+
}
2340+
}
2341+
}
2342+
22582343
/*
22592344
* RelationDestroyRelation
22602345
*
@@ -2368,26 +2453,12 @@ RelationClearRelation(Relation relation, bool rebuild)
23682453
RelationCloseSmgr(relation);
23692454

23702455
/*
2371-
* Never, never ever blow away a nailed-in system relation, because we'd
2372-
* be unable to recover. However, we must redo RelationInitPhysicalAddr
2373-
* in case it is a mapped relation whose mapping changed.
2374-
*
2375-
* If it's a nailed-but-not-mapped index, then we need to re-read the
2376-
* pg_class row to see if its relfilenode changed. We do that immediately
2377-
* if we're inside a valid transaction and the relation is open (not
2378-
* counting the nailed refcount). Otherwise just mark the entry as
2379-
* possibly invalid, and it'll be fixed when next opened.
2456+
* Treat nailed-in system relations separately, they always need to be
2457+
* accessible, so we can't blow them away.
23802458
*/
23812459
if (relation->rd_isnailed)
23822460
{
2383-
RelationInitPhysicalAddr(relation);
2384-
2385-
if (relation->rd_rel->relkind==RELKIND_INDEX)
2386-
{
2387-
relation->rd_isvalid= false;/* needs to be revalidated */
2388-
if (relation->rd_refcnt>1&&IsTransactionState())
2389-
RelationReloadIndexInfo(relation);
2390-
}
2461+
RelationReloadNailed(relation);
23912462
return;
23922463
}
23932464

@@ -5904,24 +5975,26 @@ write_item(const void *data, Size len, FILE *fp)
59045975

59055976
/*
59065977
* Determine whether a given relation (identified by OID) is one of the ones
5907-
* we should store inthe local relcache init file.
5978+
* we should store ina relcache init file.
59085979
*
59095980
* We must cache all nailed rels, and for efficiency we should cache every rel
59105981
* that supports a syscache. The former set is almost but not quite a subset
5911-
* of the latter. Currently, we must special-case TriggerRelidNameIndexId,
5912-
* which RelationCacheInitializePhase3 chooses to nail for efficiency reasons,
5913-
* but which does not support any syscache.
5914-
*
5915-
* Note: this function is currently never called for shared rels. If it were,
5916-
* we'd probably also need a special case for DatabaseNameIndexId, which is
5917-
* critical but does not support a syscache.
5982+
* of the latter. The special cases are relations where
5983+
* RelationCacheInitializePhase2/3 chooses to nail for efficiency reasons, but
5984+
* which do not support any syscache.
59185985
*/
59195986
bool
59205987
RelationIdIsInInitFile(OidrelationId)
59215988
{
5922-
if (relationId==TriggerRelidNameIndexId)
5989+
if (relationId==SharedSecLabelRelationId||
5990+
relationId==TriggerRelidNameIndexId||
5991+
relationId==DatabaseNameIndexId||
5992+
relationId==SharedSecLabelObjectIndexId)
59235993
{
5924-
/* If this Assert fails, we don't need this special case anymore. */
5994+
/*
5995+
* If this Assert fails, we don't need the applicable special case
5996+
* anymore.
5997+
*/
59255998
Assert(!RelationSupportsSysCache(relationId));
59265999
return true;
59276000
}
@@ -5991,38 +6064,30 @@ RelationHasUnloggedIndex(Relation rel)
59916064
* We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
59926065
* then release the lock in RelationCacheInitFilePostInvalidate. Caller must
59936066
* send any pending SI messages between those calls.
5994-
*
5995-
* Notice this deals only with the local init file, not the shared init file.
5996-
* The reason is that there can never be a "significant" change to the
5997-
* relcache entry of a shared relation; the most that could happen is
5998-
* updates of noncritical fields such as relpages/reltuples. So, while
5999-
* it's worth updating the shared init file from time to time, it can never
6000-
* be invalid enough to make it necessary to remove it.
60016067
*/
60026068
void
60036069
RelationCacheInitFilePreInvalidate(void)
60046070
{
6005-
charinitfilename[MAXPGPATH];
6071+
charlocalinitfname[MAXPGPATH];
6072+
charsharedinitfname[MAXPGPATH];
60066073

6007-
snprintf(initfilename,sizeof(initfilename),"%s/%s",
6008-
DatabasePath,RELCACHE_INIT_FILENAME);
6074+
if (DatabasePath)
6075+
snprintf(localinitfname,sizeof(localinitfname),"%s/%s",
6076+
DatabasePath,RELCACHE_INIT_FILENAME);
6077+
snprintf(sharedinitfname,sizeof(sharedinitfname),"global/%s",
6078+
RELCACHE_INIT_FILENAME);
60096079

60106080
LWLockAcquire(RelCacheInitLock,LW_EXCLUSIVE);
60116081

6012-
if (unlink(initfilename)<0)
6013-
{
6014-
/*
6015-
* The file might not be there if no backend has been started since
6016-
* the last removal. But complain about failures other than ENOENT.
6017-
* Fortunately, it's not too late to abort the transaction if we can't
6018-
* get rid of the would-be-obsolete init file.
6019-
*/
6020-
if (errno!=ENOENT)
6021-
ereport(ERROR,
6022-
(errcode_for_file_access(),
6023-
errmsg("could not remove cache file \"%s\": %m",
6024-
initfilename)));
6025-
}
6082+
/*
6083+
* The files might not be there if no backend has been started since the
6084+
* last removal. But complain about failures other than ENOENT with
6085+
* ERROR. Fortunately, it's not too late to abort the transaction if we
6086+
* can't get rid of the would-be-obsolete init file.
6087+
*/
6088+
if (DatabasePath)
6089+
unlink_initfile(localinitfname,ERROR);
6090+
unlink_initfile(sharedinitfname,ERROR);
60266091
}
60276092

60286093
void
@@ -6048,13 +6113,9 @@ RelationCacheInitFileRemove(void)
60486113
structdirent*de;
60496114
charpath[MAXPGPATH+10+sizeof(TABLESPACE_VERSION_DIRECTORY)];
60506115

6051-
/*
6052-
* We zap the shared cache file too. In theory it can't get out of sync
6053-
* enough to be a problem, but in data-corruption cases, who knows ...
6054-
*/
60556116
snprintf(path,sizeof(path),"global/%s",
60566117
RELCACHE_INIT_FILENAME);
6057-
unlink_initfile(path);
6118+
unlink_initfile(path,LOG);
60586119

60596120
/* Scan everything in the default tablespace */
60606121
RelationCacheInitFileRemoveInDir("base");
@@ -6106,20 +6167,23 @@ RelationCacheInitFileRemoveInDir(const char *tblspcpath)
61066167
/* Try to remove the init file in each database */
61076168
snprintf(initfilename,sizeof(initfilename),"%s/%s/%s",
61086169
tblspcpath,de->d_name,RELCACHE_INIT_FILENAME);
6109-
unlink_initfile(initfilename);
6170+
unlink_initfile(initfilename,LOG);
61106171
}
61116172
}
61126173

61136174
FreeDir(dir);
61146175
}
61156176

61166177
staticvoid
6117-
unlink_initfile(constchar*initfilename)
6178+
unlink_initfile(constchar*initfilename,intelevel)
61186179
{
61196180
if (unlink(initfilename)<0)
61206181
{
61216182
/* It might not be there, but log any error other than ENOENT */
61226183
if (errno!=ENOENT)
6123-
elog(LOG,"could not remove cache file \"%s\": %m",initfilename);
6184+
ereport(elevel,
6185+
(errcode_for_file_access(),
6186+
errmsg("could not remove cache file \"%s\": %m",
6187+
initfilename)));
61246188
}
61256189
}

‎src/include/storage/standbydefs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ typedef struct xl_invalidations
6464
{
6565
OiddbId;/* MyDatabaseId */
6666
OidtsId;/* MyDatabaseTableSpace */
67-
boolrelcacheInitFileInval;/* invalidate relcache initfile */
67+
boolrelcacheInitFileInval;/* invalidate relcache initfiles */
6868
intnmsgs;/* number of shared inval msgs */
6969
SharedInvalidationMessagemsgs[FLEXIBLE_ARRAY_MEMBER];
7070
}xl_invalidations;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp