1111 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
1212 * Portions Copyright (c) 1994, Regents of the University of California
1313 *
14- * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.43 2006/03/31 23:32:06 tgl Exp $
14+ * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.44 2006/04/14 20:27:24 tgl Exp $
1515 *
1616 *-------------------------------------------------------------------------
1717 */
2424#include "utils/hsearch.h"
2525
2626
27+ /*
28+ * During XLOG replay, we may see XLOG records for incremental updates of
29+ * pages that no longer exist, because their relation was later dropped or
30+ * truncated. (Note: this is only possible when full_page_writes = OFF,
31+ * since when it's ON, the first reference we see to a page should always
32+ * be a full-page rewrite not an incremental update.) Rather than simply
33+ * ignoring such records, we make a note of the referenced page, and then
34+ * complain if we don't actually see a drop or truncate covering the page
35+ * later in replay.
36+ */
37+ typedef struct xl_invalid_page_key
38+ {
39+ RelFileNode node ;/* the relation */
40+ BlockNumber blkno ;/* the page */
41+ }xl_invalid_page_key ;
42+
43+ typedef struct xl_invalid_page
44+ {
45+ xl_invalid_page_key key ;/* hash key ... must be first */
46+ bool present ;/* page existed but contained zeroes */
47+ }xl_invalid_page ;
48+
49+ static HTAB * invalid_page_tab = NULL ;
50+
51+
52+ /* Log a reference to an invalid page */
53+ static void
54+ log_invalid_page (RelFileNode node ,BlockNumber blkno ,bool present )
55+ {
56+ xl_invalid_page_key key ;
57+ xl_invalid_page * hentry ;
58+ bool found ;
59+
60+ /*
61+ * Log references to invalid pages at DEBUG1 level. This allows some
62+ * tracing of the cause (note the elog context mechanism will tell us
63+ * something about the XLOG record that generated the reference).
64+ */
65+ if (present )
66+ elog (DEBUG1 ,"page %u of relation %u/%u/%u is uninitialized" ,
67+ blkno ,node .spcNode ,node .dbNode ,node .relNode );
68+ else
69+ elog (DEBUG1 ,"page %u of relation %u/%u/%u does not exist" ,
70+ blkno ,node .spcNode ,node .dbNode ,node .relNode );
71+
72+ if (invalid_page_tab == NULL )
73+ {
74+ /* create hash table when first needed */
75+ HASHCTL ctl ;
76+
77+ memset (& ctl ,0 ,sizeof (ctl ));
78+ ctl .keysize = sizeof (xl_invalid_page_key );
79+ ctl .entrysize = sizeof (xl_invalid_page );
80+ ctl .hash = tag_hash ;
81+
82+ invalid_page_tab = hash_create ("XLOG invalid-page table" ,
83+ 100 ,
84+ & ctl ,
85+ HASH_ELEM |HASH_FUNCTION );
86+ }
87+
88+ /* we currently assume xl_invalid_page_key contains no padding */
89+ key .node = node ;
90+ key .blkno = blkno ;
91+ hentry = (xl_invalid_page * )
92+ hash_search (invalid_page_tab , (void * )& key ,HASH_ENTER ,& found );
93+
94+ if (!found )
95+ {
96+ /* hash_search already filled in the key */
97+ hentry -> present = present ;
98+ }
99+ else
100+ {
101+ /* repeat reference ... leave "present" as it was */
102+ }
103+ }
104+
105+ /* Forget any invalid pages >= minblkno, because they've been dropped */
106+ static void
107+ forget_invalid_pages (RelFileNode node ,BlockNumber minblkno )
108+ {
109+ HASH_SEQ_STATUS status ;
110+ xl_invalid_page * hentry ;
111+
112+ if (invalid_page_tab == NULL )
113+ return ;/* nothing to do */
114+
115+ hash_seq_init (& status ,invalid_page_tab );
116+
117+ while ((hentry = (xl_invalid_page * )hash_seq_search (& status ))!= NULL )
118+ {
119+ if (RelFileNodeEquals (hentry -> key .node ,node )&&
120+ hentry -> key .blkno >=minblkno )
121+ {
122+ elog (DEBUG2 ,"page %u of relation %u/%u/%u has been dropped" ,
123+ hentry -> key .blkno ,hentry -> key .node .spcNode ,
124+ hentry -> key .node .dbNode ,hentry -> key .node .relNode );
125+
126+ if (hash_search (invalid_page_tab ,
127+ (void * )& hentry -> key ,
128+ HASH_REMOVE ,NULL )== NULL )
129+ elog (ERROR ,"hash table corrupted" );
130+ }
131+ }
132+ }
133+
134+ /* Forget any invalid pages in a whole database */
135+ static void
136+ forget_invalid_pages_db (Oid dbid )
137+ {
138+ HASH_SEQ_STATUS status ;
139+ xl_invalid_page * hentry ;
140+
141+ if (invalid_page_tab == NULL )
142+ return ;/* nothing to do */
143+
144+ hash_seq_init (& status ,invalid_page_tab );
145+
146+ while ((hentry = (xl_invalid_page * )hash_seq_search (& status ))!= NULL )
147+ {
148+ if (hentry -> key .node .dbNode == dbid )
149+ {
150+ elog (DEBUG2 ,"page %u of relation %u/%u/%u has been dropped" ,
151+ hentry -> key .blkno ,hentry -> key .node .spcNode ,
152+ hentry -> key .node .dbNode ,hentry -> key .node .relNode );
153+
154+ if (hash_search (invalid_page_tab ,
155+ (void * )& hentry -> key ,
156+ HASH_REMOVE ,NULL )== NULL )
157+ elog (ERROR ,"hash table corrupted" );
158+ }
159+ }
160+ }
161+
162+ /* Complain about any remaining invalid-page entries */
163+ void
164+ XLogCheckInvalidPages (void )
165+ {
166+ HASH_SEQ_STATUS status ;
167+ xl_invalid_page * hentry ;
168+ bool foundone = false;
169+
170+ if (invalid_page_tab == NULL )
171+ return ;/* nothing to do */
172+
173+ hash_seq_init (& status ,invalid_page_tab );
174+
175+ /*
176+ * Our strategy is to emit WARNING messages for all remaining entries
177+ * and only PANIC after we've dumped all the available info.
178+ */
179+ while ((hentry = (xl_invalid_page * )hash_seq_search (& status ))!= NULL )
180+ {
181+ if (hentry -> present )
182+ elog (WARNING ,"page %u of relation %u/%u/%u was uninitialized" ,
183+ hentry -> key .blkno ,hentry -> key .node .spcNode ,
184+ hentry -> key .node .dbNode ,hentry -> key .node .relNode );
185+ else
186+ elog (WARNING ,"page %u of relation %u/%u/%u did not exist" ,
187+ hentry -> key .blkno ,hentry -> key .node .spcNode ,
188+ hentry -> key .node .dbNode ,hentry -> key .node .relNode );
189+ foundone = true;
190+ }
191+
192+ if (foundone )
193+ elog (PANIC ,"WAL contains references to invalid pages" );
194+ }
195+
196+
27197/*
28198 * XLogReadBuffer
29199 *Read a page during XLOG replay
40210 * the page being "new" (all zeroes).
41211 *
42212 * If "init" is false then the caller needs the page to be valid already.
43- * If the page doesn't exist or contains zeroes, we report failure.
44- *
45- * If the return value is InvalidBuffer (only possible when init = false),
46- * the caller should silently skip the update on this page. This currently
47- * never happens, but we retain it as part of the API spec for possible future
48- * use.
213+ * If the page doesn't exist or contains zeroes, we return InvalidBuffer.
214+ * In this case the caller should silently skip the update on this page.
215+ * (In this situation, we expect that the page was later dropped or truncated.
216+ * If we don't see evidence of that later in the WAL sequence, we'll complain
217+ * at the end of WAL replay.)
49218 */
50219Buffer
51220XLogReadBuffer (Relation reln ,BlockNumber blkno ,bool init )
@@ -64,9 +233,10 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
64233{
65234/* hm, page doesn't exist in file */
66235if (!init )
67- elog (PANIC ,"block %u of relation %u/%u/%u does not exist" ,
68- blkno ,reln -> rd_node .spcNode ,
69- reln -> rd_node .dbNode ,reln -> rd_node .relNode );
236+ {
237+ log_invalid_page (reln -> rd_node ,blkno , false);
238+ return InvalidBuffer ;
239+ }
70240/* OK to extend the file */
71241/* we do this in recovery only - no rel-extension lock needed */
72242Assert (InRecovery );
@@ -89,9 +259,11 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
89259Page page = (Page )BufferGetPage (buffer );
90260
91261if (PageIsNew ((PageHeader )page ))
92- elog (PANIC ,"block %u of relation %u/%u/%u is uninitialized" ,
93- blkno ,reln -> rd_node .spcNode ,
94- reln -> rd_node .dbNode ,reln -> rd_node .relNode );
262+ {
263+ UnlockReleaseBuffer (buffer );
264+ log_invalid_page (reln -> rd_node ,blkno , true);
265+ return InvalidBuffer ;
266+ }
95267}
96268
97269return buffer ;
195367XLogInitRelationCache (void )
196368{
197369_xl_init_rel_cache ();
370+ invalid_page_tab = NULL ;
198371}
199372
200373void
@@ -300,22 +473,26 @@ XLogOpenRelation(RelFileNode rnode)
300473 *
301474 * Currently, we don't bother to physically remove the relation from the
302475 * cache, we just let it age out normally.
476+ *
477+ * This also takes care of removing any open "invalid-page" records for
478+ * the relation.
303479 */
304480void
305481XLogDropRelation (RelFileNode rnode )
306482{
307- XLogRelDesc * rdesc ;
308483XLogRelCacheEntry * hentry ;
309484
310485hentry = (XLogRelCacheEntry * )
311486hash_search (_xlrelcache , (void * )& rnode ,HASH_FIND ,NULL );
312487
313- if (!hentry )
314- return ;/* not in cache so no work */
488+ if (hentry )
489+ {
490+ XLogRelDesc * rdesc = hentry -> rdesc ;
315491
316- rdesc = hentry -> rdesc ;
492+ RelationCloseSmgr (& (rdesc -> reldata ));
493+ }
317494
318- RelationCloseSmgr ( & ( rdesc -> reldata ) );
495+ forget_invalid_pages ( rnode , 0 );
319496}
320497
321498/*
@@ -338,4 +515,18 @@ XLogDropDatabase(Oid dbid)
338515if (hentry -> rnode .dbNode == dbid )
339516RelationCloseSmgr (& (rdesc -> reldata ));
340517}
518+
519+ forget_invalid_pages_db (dbid );
520+ }
521+
522+ /*
523+ * Truncate a relation during XLOG replay
524+ *
525+ * We don't need to do anything to the fake relcache, but we do need to
526+ * clean up any open "invalid-page" records for the dropped pages.
527+ */
528+ void
529+ XLogTruncateRelation (RelFileNode rnode ,BlockNumber nblocks )
530+ {
531+ forget_invalid_pages (rnode ,nblocks );
341532}