Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc424c75

Browse files
committed
Prevent excess SimpleLruTruncate() deletion.
Every core SLRU wraps around. With the exception of pg_notify, the wrappoint can fall in the middle of a page. Account for this in thePagePrecedes callback specification and in SimpleLruTruncate()'s use ofsaid callback. Update each callback implementation to fit the newspecification. This changes SerialPagePrecedesLogically() from thestyle of asyncQueuePagePrecedes() to the style of CLOGPagePrecedes().(Whereas pg_clog and pg_serial share a key space, pg_serial is nothinglike pg_notify.) The bug fixed here has the same symptoms and userfollowup steps as592a589. Back-patchto 9.5 (all supported versions).Reviewed by Andrey Borodin and (in earlier versions) by Tom Lane.Discussion:https://postgr.es/m/20190202083822.GC32531@gust.leadboat.com
1 parent3934543 commitc424c75

File tree

8 files changed

+312
-79
lines changed

8 files changed

+312
-79
lines changed

‎src/backend/access/transam/clog.c

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,7 @@ CLOGShmemInit(void)
464464
ClogCtl->PagePrecedes=CLOGPagePrecedes;
465465
SimpleLruInit(ClogCtl,"CLOG Ctl",CLOGShmemBuffers(),CLOG_LSNS_PER_PAGE,
466466
CLogControlLock,"pg_clog");
467+
SlruPagePrecedesUnitTests(ClogCtl,CLOG_XACTS_PER_PAGE);
467468
}
468469

469470
/*
@@ -676,13 +677,22 @@ TruncateCLOG(TransactionId oldestXact)
676677

677678

678679
/*
679-
* Decidewhich of twoCLOG pagenumbers is "older" for truncation purposes.
680+
* Decidewhether aCLOG pagenumber is "older" for truncation purposes.
680681
*
681682
* We need to use comparison of TransactionIds here in order to do the right
682-
* thing with wraparound XID arithmetic. However, if we are asked about
683-
* page number zero, we don't want to hand InvalidTransactionId to
684-
* TransactionIdPrecedes: it'll get weird about permanent xact IDs. So,
685-
* offset both xids by FirstNormalTransactionId to avoid that.
683+
* thing with wraparound XID arithmetic. However, TransactionIdPrecedes()
684+
* would get weird about permanent xact IDs. So, offset both such that xid1,
685+
* xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset
686+
* is relevant to page 0 and to the page preceding page 0.
687+
*
688+
* The page containing oldestXact-2^31 is the important edge case. The
689+
* portion of that page equaling or following oldestXact-2^31 is expendable,
690+
* but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is
691+
* the first XID of a page and segment, the entire page and segment is
692+
* expendable, and we could truncate the segment. Recognizing that case would
693+
* require making oldestXact, not just the page containing oldestXact,
694+
* available to this callback. The benefit would be rare and small, so we
695+
* don't optimize that edge case.
686696
*/
687697
staticbool
688698
CLOGPagePrecedes(intpage1,intpage2)
@@ -691,11 +701,12 @@ CLOGPagePrecedes(int page1, int page2)
691701
TransactionIdxid2;
692702

693703
xid1= ((TransactionId)page1)*CLOG_XACTS_PER_PAGE;
694-
xid1+=FirstNormalTransactionId;
704+
xid1+=FirstNormalTransactionId+1;
695705
xid2= ((TransactionId)page2)*CLOG_XACTS_PER_PAGE;
696-
xid2+=FirstNormalTransactionId;
706+
xid2+=FirstNormalTransactionId+1;
697707

698-
returnTransactionIdPrecedes(xid1,xid2);
708+
return (TransactionIdPrecedes(xid1,xid2)&&
709+
TransactionIdPrecedes(xid1,xid2+CLOG_XACTS_PER_PAGE-1));
699710
}
700711

701712

‎src/backend/access/transam/commit_ts.c

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,7 @@ CommitTsShmemInit(void)
493493
CommitTsCtl->PagePrecedes=CommitTsPagePrecedes;
494494
SimpleLruInit(CommitTsCtl,"CommitTs Ctl",CommitTsShmemBuffers(),0,
495495
CommitTsControlLock,"pg_commit_ts");
496+
SlruPagePrecedesUnitTests(CommitTsCtl,COMMIT_TS_XACTS_PER_PAGE);
496497

497498
commitTsShared=ShmemInitStruct("CommitTs shared",
498499
sizeof(CommitTimestampShared),
@@ -869,13 +870,27 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact)
869870

870871

871872
/*
872-
* Decide which of two CLOG page numbers is "older" for truncation purposes.
873+
* Decide whether a commitTS page number is "older" for truncation purposes.
874+
* Analogous to CLOGPagePrecedes().
873875
*
874-
* We need to use comparison of TransactionIds here in order to do the right
875-
* thing with wraparound XID arithmetic. However, if we are asked about
876-
* page number zero, we don't want to hand InvalidTransactionId to
877-
* TransactionIdPrecedes: it'll get weird about permanent xact IDs. So,
878-
* offset both xids by FirstNormalTransactionId to avoid that.
876+
* At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128. This
877+
* introduces differences compared to CLOG and the other SLRUs having (1 <<
878+
* 31) % per_page == 0. This function never tests exactly
879+
* TransactionIdPrecedes(x-2^31, x). When the system reaches xidStopLimit,
880+
* there are two possible counts of page boundaries between oldestXact and the
881+
* latest XID assigned, depending on whether oldestXact is within the first
882+
* 128 entries of its page. Since this function doesn't know the location of
883+
* oldestXact within page2, it returns false for one page that actually is
884+
* expendable. This is a wider (yet still negligible) version of the
885+
* truncation opportunity that CLOGPagePrecedes() cannot recognize.
886+
*
887+
* For the sake of a worked example, number entries with decimal values such
888+
* that page1==1 entries range from 1.0 to 1.999. Let N+0.15 be the number of
889+
* pages that 2^31 entries will span (N is an integer). If oldestXact=N+2.1,
890+
* then the final safe XID assignment leaves newestXact=1.95. We keep page 2,
891+
* because entry=2.85 is the border that toggles whether entries precede the
892+
* last entry of the oldestXact page. While page 2 is expendable at
893+
* oldestXact=N+2.1, it would be precious at oldestXact=N+2.9.
879894
*/
880895
staticbool
881896
CommitTsPagePrecedes(intpage1,intpage2)
@@ -884,11 +899,12 @@ CommitTsPagePrecedes(int page1, int page2)
884899
TransactionIdxid2;
885900

886901
xid1= ((TransactionId)page1)*COMMIT_TS_XACTS_PER_PAGE;
887-
xid1+=FirstNormalTransactionId;
902+
xid1+=FirstNormalTransactionId+1;
888903
xid2= ((TransactionId)page2)*COMMIT_TS_XACTS_PER_PAGE;
889-
xid2+=FirstNormalTransactionId;
904+
xid2+=FirstNormalTransactionId+1;
890905

891-
returnTransactionIdPrecedes(xid1,xid2);
906+
return (TransactionIdPrecedes(xid1,xid2)&&
907+
TransactionIdPrecedes(xid1,xid2+COMMIT_TS_XACTS_PER_PAGE-1));
892908
}
893909

894910

‎src/backend/access/transam/multixact.c

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1839,9 +1839,11 @@ MultiXactShmemInit(void)
18391839
SimpleLruInit(MultiXactOffsetCtl,
18401840
"MultiXactOffset Ctl",NUM_MXACTOFFSET_BUFFERS,0,
18411841
MultiXactOffsetControlLock,"pg_multixact/offsets");
1842+
SlruPagePrecedesUnitTests(MultiXactOffsetCtl,MULTIXACT_OFFSETS_PER_PAGE);
18421843
SimpleLruInit(MultiXactMemberCtl,
18431844
"MultiXactMember Ctl",NUM_MXACTMEMBER_BUFFERS,0,
18441845
MultiXactMemberControlLock,"pg_multixact/members");
1846+
/* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
18451847

18461848
/* Initialize our shared state struct */
18471849
MultiXactState=ShmemInitStruct("Shared MultiXact State",
@@ -2975,6 +2977,14 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
29752977
* truncate the members SLRU. So we first scan the directory to determine
29762978
* the earliest offsets page number that we can read without error.
29772979
*
2980+
* When nextMXact is less than one segment away from multiWrapLimit,
2981+
* SlruScanDirCbFindEarliest can find some early segment other than the
2982+
* actual earliest. (MultiXactOffsetPagePrecedes(EARLIEST, LATEST)
2983+
* returns false, because not all pairs of entries have the same answer.)
2984+
* That can also arise when an earlier truncation attempt failed unlink()
2985+
* or returned early from this function. The only consequence is
2986+
* returning early, which wastes space that we could have liberated.
2987+
*
29782988
* NB: It's also possible that the page that oldestMulti is on has already
29792989
* been truncated away, and we crashed before updating oldestMulti.
29802990
*/
@@ -3089,15 +3099,11 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
30893099
}
30903100

30913101
/*
3092-
* Decide which of two MultiXactOffset page numbers is "older" for truncation
3093-
* purposes.
3094-
*
3095-
* We need to use comparison of MultiXactId here in order to do the right
3096-
* thing with wraparound. However, if we are asked about page number zero, we
3097-
* don't want to hand InvalidMultiXactId to MultiXactIdPrecedes: it'll get
3098-
* weird. So, offset both multis by FirstMultiXactId to avoid that.
3099-
* (Actually, the current implementation doesn't do anything weird with
3100-
* InvalidMultiXactId, but there's no harm in leaving this code like this.)
3102+
* Decide whether a MultiXactOffset page number is "older" for truncation
3103+
* purposes. Analogous to CLOGPagePrecedes().
3104+
*
3105+
* Offsetting the values is optional, because MultiXactIdPrecedes() has
3106+
* translational symmetry.
31013107
*/
31023108
staticbool
31033109
MultiXactOffsetPagePrecedes(intpage1,intpage2)
@@ -3106,15 +3112,17 @@ MultiXactOffsetPagePrecedes(int page1, int page2)
31063112
MultiXactIdmulti2;
31073113

31083114
multi1= ((MultiXactId)page1)*MULTIXACT_OFFSETS_PER_PAGE;
3109-
multi1+=FirstMultiXactId;
3115+
multi1+=FirstMultiXactId+1;
31103116
multi2= ((MultiXactId)page2)*MULTIXACT_OFFSETS_PER_PAGE;
3111-
multi2+=FirstMultiXactId;
3117+
multi2+=FirstMultiXactId+1;
31123118

3113-
returnMultiXactIdPrecedes(multi1,multi2);
3119+
return (MultiXactIdPrecedes(multi1,multi2)&&
3120+
MultiXactIdPrecedes(multi1,
3121+
multi2+MULTIXACT_OFFSETS_PER_PAGE-1));
31143122
}
31153123

31163124
/*
3117-
* Decidewhich of twoMultiXactMember pagenumbers is "older" for truncation
3125+
* Decidewhether aMultiXactMember pagenumber is "older" for truncation
31183126
* purposes. There is no "invalid offset number" so use the numbers verbatim.
31193127
*/
31203128
staticbool
@@ -3126,7 +3134,9 @@ MultiXactMemberPagePrecedes(int page1, int page2)
31263134
offset1= ((MultiXactOffset)page1)*MULTIXACT_MEMBERS_PER_PAGE;
31273135
offset2= ((MultiXactOffset)page2)*MULTIXACT_MEMBERS_PER_PAGE;
31283136

3129-
returnMultiXactOffsetPrecedes(offset1,offset2);
3137+
return (MultiXactOffsetPrecedes(offset1,offset2)&&
3138+
MultiXactOffsetPrecedes(offset1,
3139+
offset2+MULTIXACT_MEMBERS_PER_PAGE-1));
31303140
}
31313141

31323142
/*

‎src/backend/access/transam/slru.c

Lines changed: 127 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,11 +1156,6 @@ SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
11561156
SlruSharedshared=ctl->shared;
11571157
intslotno;
11581158

1159-
/*
1160-
* The cutoff point is the start of the segment containing cutoffPage.
1161-
*/
1162-
cutoffPage-=cutoffPage %SLRU_PAGES_PER_SEGMENT;
1163-
11641159
/*
11651160
* Scan shared memory and remove any pages preceding the cutoff page, to
11661161
* ensure we won't rewrite them later. (Since this is normally called in
@@ -1173,9 +1168,7 @@ restart:;
11731168

11741169
/*
11751170
* While we are holding the lock, make an important safety check: the
1176-
* planned cutoff point must be <= the current endpoint page. Otherwise we
1177-
* have already wrapped around, and proceeding with the truncation would
1178-
* risk removing the current segment.
1171+
* current endpoint page must not be eligible for removal.
11791172
*/
11801173
if (ctl->PagePrecedes(shared->latest_page_number,cutoffPage))
11811174
{
@@ -1207,8 +1200,11 @@ restart:;
12071200
* Hmm, we have (or may have) I/O operations acting on the page, so
12081201
* we've got to wait for them to finish and then start again. This is
12091202
* the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1210-
* wouldn't it be OK to just discard it without writing it? For now,
1211-
* keep the logic the same as it was.)
1203+
* wouldn't it be OK to just discard it without writing it?
1204+
* SlruMayDeleteSegment() uses a stricter qualification, so we might
1205+
* not delete this page in the end; even if we don't delete it, we
1206+
* won't have cause to read its data again. For now, keep the logic
1207+
* the same as it was.)
12121208
*/
12131209
if (shared->page_status[slotno]==SLRU_PAGE_VALID)
12141210
SlruInternalWritePage(ctl,slotno,NULL);
@@ -1298,19 +1294,134 @@ SlruDeleteSegment(SlruCtl ctl, int segno)
12981294
LWLockRelease(shared->ControlLock);
12991295
}
13001296

1297+
/*
1298+
* Determine whether a segment is okay to delete.
1299+
*
1300+
* segpage is the first page of the segment, and cutoffPage is the oldest (in
1301+
* PagePrecedes order) page in the SLRU containing still-useful data. Since
1302+
* every core PagePrecedes callback implements "wrap around", check the
1303+
* segment's first and last pages:
1304+
*
1305+
* first<cutoff && last<cutoff: yes
1306+
* first<cutoff && last>=cutoff: no; cutoff falls inside this segment
1307+
* first>=cutoff && last<cutoff: no; wrap point falls inside this segment
1308+
* first>=cutoff && last>=cutoff: no; every page of this segment is too young
1309+
*/
1310+
staticbool
1311+
SlruMayDeleteSegment(SlruCtlctl,intsegpage,intcutoffPage)
1312+
{
1313+
intseg_last_page=segpage+SLRU_PAGES_PER_SEGMENT-1;
1314+
1315+
Assert(segpage %SLRU_PAGES_PER_SEGMENT==0);
1316+
1317+
return (ctl->PagePrecedes(segpage,cutoffPage)&&
1318+
ctl->PagePrecedes(seg_last_page,cutoffPage));
1319+
}
1320+
1321+
#ifdefUSE_ASSERT_CHECKING
1322+
staticvoid
1323+
SlruPagePrecedesTestOffset(SlruCtlctl,intper_page,uint32offset)
1324+
{
1325+
TransactionIdlhs,
1326+
rhs;
1327+
intnewestPage,
1328+
oldestPage;
1329+
TransactionIdnewestXact,
1330+
oldestXact;
1331+
1332+
/*
1333+
* Compare an XID pair having undefined order (see RFC 1982), a pair at
1334+
* "opposite ends" of the XID space. TransactionIdPrecedes() treats each
1335+
* as preceding the other. If RHS is oldestXact, LHS is the first XID we
1336+
* must not assign.
1337+
*/
1338+
lhs=per_page+offset;/* skip first page to avoid non-normal XIDs */
1339+
rhs=lhs+ (1U <<31);
1340+
Assert(TransactionIdPrecedes(lhs,rhs));
1341+
Assert(TransactionIdPrecedes(rhs,lhs));
1342+
Assert(!TransactionIdPrecedes(lhs-1,rhs));
1343+
Assert(TransactionIdPrecedes(rhs,lhs-1));
1344+
Assert(TransactionIdPrecedes(lhs+1,rhs));
1345+
Assert(!TransactionIdPrecedes(rhs,lhs+1));
1346+
Assert(!TransactionIdFollowsOrEquals(lhs,rhs));
1347+
Assert(!TransactionIdFollowsOrEquals(rhs,lhs));
1348+
Assert(!ctl->PagePrecedes(lhs /per_page,lhs /per_page));
1349+
Assert(!ctl->PagePrecedes(lhs /per_page,rhs /per_page));
1350+
Assert(!ctl->PagePrecedes(rhs /per_page,lhs /per_page));
1351+
Assert(!ctl->PagePrecedes((lhs-per_page) /per_page,rhs /per_page));
1352+
Assert(ctl->PagePrecedes(rhs /per_page, (lhs-3*per_page) /per_page));
1353+
Assert(ctl->PagePrecedes(rhs /per_page, (lhs-2*per_page) /per_page));
1354+
Assert(ctl->PagePrecedes(rhs /per_page, (lhs-1*per_page) /per_page)
1355+
|| (1U <<31) %per_page!=0);/* See CommitTsPagePrecedes() */
1356+
Assert(ctl->PagePrecedes((lhs+1*per_page) /per_page,rhs /per_page)
1357+
|| (1U <<31) %per_page!=0);
1358+
Assert(ctl->PagePrecedes((lhs+2*per_page) /per_page,rhs /per_page));
1359+
Assert(ctl->PagePrecedes((lhs+3*per_page) /per_page,rhs /per_page));
1360+
Assert(!ctl->PagePrecedes(rhs /per_page, (lhs+per_page) /per_page));
1361+
1362+
/*
1363+
* GetNewTransactionId() has assigned the last XID it can safely use, and
1364+
* that XID is in the *LAST* page of the second segment. We must not
1365+
* delete that segment.
1366+
*/
1367+
newestPage=2*SLRU_PAGES_PER_SEGMENT-1;
1368+
newestXact=newestPage*per_page+offset;
1369+
Assert(newestXact /per_page==newestPage);
1370+
oldestXact=newestXact+1;
1371+
oldestXact-=1U <<31;
1372+
oldestPage=oldestXact /per_page;
1373+
Assert(!SlruMayDeleteSegment(ctl,
1374+
(newestPage-
1375+
newestPage %SLRU_PAGES_PER_SEGMENT),
1376+
oldestPage));
1377+
1378+
/*
1379+
* GetNewTransactionId() has assigned the last XID it can safely use, and
1380+
* that XID is in the *FIRST* page of the second segment. We must not
1381+
* delete that segment.
1382+
*/
1383+
newestPage=SLRU_PAGES_PER_SEGMENT;
1384+
newestXact=newestPage*per_page+offset;
1385+
Assert(newestXact /per_page==newestPage);
1386+
oldestXact=newestXact+1;
1387+
oldestXact-=1U <<31;
1388+
oldestPage=oldestXact /per_page;
1389+
Assert(!SlruMayDeleteSegment(ctl,
1390+
(newestPage-
1391+
newestPage %SLRU_PAGES_PER_SEGMENT),
1392+
oldestPage));
1393+
}
1394+
1395+
/*
1396+
* Unit-test a PagePrecedes function.
1397+
*
1398+
* This assumes every uint32 >= FirstNormalTransactionId is a valid key. It
1399+
* assumes each value occupies a contiguous, fixed-size region of SLRU bytes.
1400+
* (MultiXactMemberCtl separates flags from XIDs. AsyncCtl has
1401+
* variable-length entries, no keys, and no random access. These unit tests
1402+
* do not apply to them.)
1403+
*/
1404+
void
1405+
SlruPagePrecedesUnitTests(SlruCtlctl,intper_page)
1406+
{
1407+
/* Test first, middle and last entries of a page. */
1408+
SlruPagePrecedesTestOffset(ctl,per_page,0);
1409+
SlruPagePrecedesTestOffset(ctl,per_page,per_page /2);
1410+
SlruPagePrecedesTestOffset(ctl,per_page,per_page-1);
1411+
}
1412+
#endif
1413+
13011414
/*
13021415
* SlruScanDirectory callback
1303-
*This callback reports true if there's any segment prior to the one
1304-
*containing the page passed as "data".
1416+
*This callback reports true if there's any segmentwhollyprior to the
1417+
*onecontaining the page passed as "data".
13051418
*/
13061419
bool
13071420
SlruScanDirCbReportPresence(SlruCtlctl,char*filename,intsegpage,void*data)
13081421
{
13091422
intcutoffPage=*(int*)data;
13101423

1311-
cutoffPage-=cutoffPage %SLRU_PAGES_PER_SEGMENT;
1312-
1313-
if (ctl->PagePrecedes(segpage,cutoffPage))
1424+
if (SlruMayDeleteSegment(ctl,segpage,cutoffPage))
13141425
return true;/* found one; don't iterate any more */
13151426

13161427
return false;/* keep going */
@@ -1325,7 +1436,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
13251436
{
13261437
intcutoffPage=*(int*)data;
13271438

1328-
if (ctl->PagePrecedes(segpage,cutoffPage))
1439+
if (SlruMayDeleteSegment(ctl,segpage,cutoffPage))
13291440
SlruInternalDeleteSegment(ctl,filename);
13301441

13311442
return false;/* keep going */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp