Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7087166

Browse files
committed
pg_upgrade: Convert old visibility map format to new format.
Commita892234 added a second bit perpage to the visibility map, but pg_upgrade has been unaware of it upuntil now. Therefore, a pg_upgrade from an earlier major release ofPostgreSQL to any commit preceding this one and following the onementioned above would result in invalid visibility map contents on thenew cluster, very possibly leading to data corruption. This plugsthat hole.Masahiko Sawada, reviewed by Jeff Janes, Bruce Momjian, Simon Riggs,Michael Paquier, Andres Freund, me, and others.
1 parent9118d03 commit7087166

File tree

3 files changed

+197
-11
lines changed

3 files changed

+197
-11
lines changed

‎src/bin/pg_upgrade/file.c

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,16 @@
99

1010
#include"postgres_fe.h"
1111

12+
#include"access/visibilitymap.h"
1213
#include"pg_upgrade.h"
14+
#include"storage/bufpage.h"
15+
#include"storage/checksum.h"
16+
#include"storage/checksum_impl.h"
1317

18+
#include<sys/stat.h>
1419
#include<fcntl.h>
1520

21+
#defineBITS_PER_HEAPBLOCK_OLD 1
1622

1723

1824
#ifndefWIN32
@@ -138,6 +144,154 @@ copy_file(const char *srcfile, const char *dstfile, bool force)
138144
#endif
139145

140146

147+
/*
148+
* rewriteVisibilityMap()
149+
*
150+
* In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
151+
* visibility map included one bit per heap page; it now includes two.
152+
* When upgrading a cluster from before that time to a current PostgreSQL
153+
* version, we could refuse to copy visibility maps from the old cluster
154+
* to the new cluster; the next VACUUM would recreate them, but at the
155+
* price of scanning the entire table. So, instead, we rewrite the old
156+
* visibility maps in the new format. That way, the all-visible bit
157+
* remains set for the pages for which it was set previously. The
158+
* all-frozen bit is never set by this conversion; we leave that to
159+
* VACUUM.
160+
*/
161+
constchar*
162+
rewriteVisibilityMap(constchar*fromfile,constchar*tofile,boolforce)
163+
{
164+
intsrc_fd=0;
165+
intdst_fd=0;
166+
charbuffer[BLCKSZ];
167+
ssize_tbytesRead;
168+
ssize_tsrc_filesize;
169+
intrewriteVmBytesPerPage;
170+
BlockNumbernew_blkno=0;
171+
structstatstatbuf;
172+
173+
/* Compute we need how many old page bytes to rewrite a new page */
174+
rewriteVmBytesPerPage= (BLCKSZ-SizeOfPageHeaderData) /2;
175+
176+
if ((fromfile==NULL)|| (tofile==NULL))
177+
return"Invalid old file or new file";
178+
179+
if ((src_fd=open(fromfile,O_RDONLY,0))<0)
180+
returngetErrorText();
181+
182+
if (fstat(src_fd,&statbuf)!=0)
183+
{
184+
close(src_fd);
185+
returngetErrorText();
186+
}
187+
188+
if ((dst_fd=open(tofile,O_RDWR |O_CREAT | (force ?0 :O_EXCL),S_IRUSR |S_IWUSR))<0)
189+
{
190+
close(src_fd);
191+
returngetErrorText();
192+
}
193+
194+
/* Save old file size */
195+
src_filesize=statbuf.st_size;
196+
197+
/*
198+
* Turn each visibility map page into 2 pages one by one. Each new page
199+
* has the same page header as the old one. If the last section of last
200+
* page is empty, we skip it, mostly to avoid turning one-page visibility
201+
* maps for small relations into two pages needlessly.
202+
*/
203+
while ((bytesRead=read(src_fd,buffer,BLCKSZ))==BLCKSZ)
204+
{
205+
char*old_cur;
206+
char*old_break;
207+
char*old_blkend;
208+
PageHeaderDatapageheader;
209+
boolold_lastblk= ((BLCKSZ* (new_blkno+1))==src_filesize);
210+
211+
/* Save the page header data */
212+
memcpy(&pageheader,buffer,SizeOfPageHeaderData);
213+
214+
/*
215+
* These old_* variables point to old visibility map page. old_cur
216+
* points to current position on old page. old_blkend points to end of
217+
* old block. old_break points to old page break position for
218+
* rewriting a new page. After wrote a new page, old_break proceeds
219+
* rewriteVmBytesPerPage bytes.
220+
*/
221+
old_cur=buffer+SizeOfPageHeaderData;
222+
old_blkend=buffer+bytesRead;
223+
old_break=old_cur+rewriteVmBytesPerPage;
224+
225+
while (old_blkend >=old_break)
226+
{
227+
charnew_vmbuf[BLCKSZ];
228+
char*new_cur=new_vmbuf;
229+
boolempty= true;
230+
boolold_lastpart;
231+
232+
/* Copy page header in advance */
233+
memcpy(new_vmbuf,&pageheader,SizeOfPageHeaderData);
234+
235+
/* Rewrite the last part of the old page? */
236+
old_lastpart=old_lastblk&& (old_blkend==old_break);
237+
238+
new_cur+=SizeOfPageHeaderData;
239+
240+
/* Process old page bytes one by one, and turn it into new page. */
241+
while (old_break>old_cur)
242+
{
243+
uint16new_vmbits=0;
244+
inti;
245+
246+
/* Generate new format bits while keeping old information */
247+
for (i=0;i<BITS_PER_BYTE;i++)
248+
{
249+
uint8byte=*(uint8*)old_cur;
250+
251+
if (byte& (1 << (BITS_PER_HEAPBLOCK_OLD*i)))
252+
{
253+
empty= false;
254+
new_vmbits |=1 << (BITS_PER_HEAPBLOCK*i);
255+
}
256+
}
257+
258+
/* Copy new visibility map bit to new format page */
259+
memcpy(new_cur,&new_vmbits,BITS_PER_HEAPBLOCK);
260+
261+
old_cur+=BITS_PER_HEAPBLOCK_OLD;
262+
new_cur+=BITS_PER_HEAPBLOCK;
263+
}
264+
265+
/* If the last part of the old page is empty, skip to write it */
266+
if (old_lastpart&&empty)
267+
break;
268+
269+
/* Set new checksum for a visibility map page (if enabled) */
270+
if (old_cluster.controldata.data_checksum_version!=0&&
271+
new_cluster.controldata.data_checksum_version!=0)
272+
((PageHeader)new_vmbuf)->pd_checksum=
273+
pg_checksum_page(new_vmbuf,new_blkno);
274+
275+
if (write(dst_fd,new_vmbuf,BLCKSZ)!=BLCKSZ)
276+
{
277+
close(dst_fd);
278+
close(src_fd);
279+
returngetErrorText();
280+
}
281+
282+
old_break+=rewriteVmBytesPerPage;
283+
new_blkno++;
284+
}
285+
}
286+
287+
/* Close files */
288+
close(dst_fd);
289+
close(src_fd);
290+
291+
returnNULL;
292+
293+
}
294+
141295
void
142296
check_hard_link(void)
143297
{

‎src/bin/pg_upgrade/pg_upgrade.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ extern char *output_files[];
109109
*/
110110
#defineVISIBILITY_MAP_CRASHSAFE_CAT_VER 201107031
111111

112+
/*
113+
* The format of visibility map is changed with this 9.6 commit,
114+
*/
115+
#defineVISIBILITY_MAP_FROZEN_BIT_CAT_VER 201603011
112116
/*
113117
* pg_multixact format changed in 9.3 commit 0ac5ad5134f2769ccbaefec73844f85,
114118
* ("Improve concurrency of foreign key locking") which also updated catalog
@@ -365,6 +369,8 @@ boolpid_lock_file_exists(const char *datadir);
365369

366370
constchar*copyFile(constchar*src,constchar*dst,boolforce);
367371
constchar*linkFile(constchar*src,constchar*dst);
372+
constchar*rewriteVisibilityMap(constchar*fromfile,constchar*tofile,
373+
boolforce);
368374

369375
voidcheck_hard_link(void);
370376
FILE*fopen_priv(constchar*path,constchar*mode);

‎src/bin/pg_upgrade/relfilenode.c

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@
1111

1212
#include"pg_upgrade.h"
1313

14+
#include<sys/stat.h>
1415
#include"catalog/pg_class.h"
1516
#include"access/transam.h"
1617

1718

1819
staticvoidtransfer_single_new_db(FileNameMap*maps,intsize,char*old_tablespace);
19-
staticvoidtransfer_relfile(FileNameMap*map,constchar*suffix);
20+
staticvoidtransfer_relfile(FileNameMap*map,constchar*suffix,boolvm_must_add_frozenbit);
2021

2122

2223
/*
@@ -132,6 +133,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
132133
{
133134
intmapnum;
134135
boolvm_crashsafe_match= true;
136+
boolvm_must_add_frozenbit= false;
135137

136138
/*
137139
* Do the old and new cluster disagree on the crash-safetiness of the vm
@@ -141,23 +143,30 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
141143
new_cluster.controldata.cat_ver >=VISIBILITY_MAP_CRASHSAFE_CAT_VER)
142144
vm_crashsafe_match= false;
143145

146+
/*
147+
* Do we need to rewrite visibilitymap?
148+
*/
149+
if (old_cluster.controldata.cat_ver<VISIBILITY_MAP_FROZEN_BIT_CAT_VER&&
150+
new_cluster.controldata.cat_ver >=VISIBILITY_MAP_FROZEN_BIT_CAT_VER)
151+
vm_must_add_frozenbit= true;
152+
144153
for (mapnum=0;mapnum<size;mapnum++)
145154
{
146155
if (old_tablespace==NULL||
147156
strcmp(maps[mapnum].old_tablespace,old_tablespace)==0)
148157
{
149158
/* transfer primary file */
150-
transfer_relfile(&maps[mapnum],"");
159+
transfer_relfile(&maps[mapnum],"",vm_must_add_frozenbit);
151160

152161
/* fsm/vm files added in PG 8.4 */
153162
if (GET_MAJOR_VERSION(old_cluster.major_version) >=804)
154163
{
155164
/*
156165
* Copy/link any fsm and vm files, if they exist
157166
*/
158-
transfer_relfile(&maps[mapnum],"_fsm");
167+
transfer_relfile(&maps[mapnum],"_fsm",vm_must_add_frozenbit);
159168
if (vm_crashsafe_match)
160-
transfer_relfile(&maps[mapnum],"_vm");
169+
transfer_relfile(&maps[mapnum],"_vm",vm_must_add_frozenbit);
161170
}
162171
}
163172
}
@@ -167,17 +176,19 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
167176
/*
168177
* transfer_relfile()
169178
*
170-
* Copy or link file from old cluster to new one.
179+
* Copy or link file from old cluster to new one. If vm_must_add_frozenbit
180+
* is true, visibility map forks are converted and rewritten, even in link
181+
* mode.
171182
*/
172183
staticvoid
173-
transfer_relfile(FileNameMap*map,constchar*type_suffix)
184+
transfer_relfile(FileNameMap*map,constchar*type_suffix,boolvm_must_add_frozenbit)
174185
{
175186
constchar*msg;
176187
charold_file[MAXPGPATH];
177188
charnew_file[MAXPGPATH];
178-
intfd;
179189
intsegno;
180190
charextent_suffix[65];
191+
structstatstatbuf;
181192

182193
/*
183194
* Now copy/link any related segments as well. Remember, PG breaks large
@@ -210,7 +221,7 @@ transfer_relfile(FileNameMap *map, const char *type_suffix)
210221
if (type_suffix[0]!='\0'||segno!=0)
211222
{
212223
/* Did file open fail? */
213-
if ((fd=open(old_file,O_RDONLY,0))==-1)
224+
if (stat(old_file,&statbuf)!=0)
214225
{
215226
/* File does not exist? That's OK, just return */
216227
if (errno==ENOENT)
@@ -220,7 +231,10 @@ transfer_relfile(FileNameMap *map, const char *type_suffix)
220231
map->nspname,map->relname,old_file,new_file,
221232
getErrorText());
222233
}
223-
close(fd);
234+
235+
/* If file is empty, just return */
236+
if (statbuf.st_size==0)
237+
return;
224238
}
225239

226240
unlink(new_file);
@@ -232,15 +246,27 @@ transfer_relfile(FileNameMap *map, const char *type_suffix)
232246
{
233247
pg_log(PG_VERBOSE,"copying \"%s\" to \"%s\"\n",old_file,new_file);
234248

235-
if ((msg=copyFile(old_file,new_file, true))!=NULL)
249+
/* Rewrite visibility map if needed */
250+
if (vm_must_add_frozenbit&& (strcmp(type_suffix,"_vm")==0))
251+
msg=rewriteVisibilityMap(old_file,new_file, true);
252+
else
253+
msg=copyFile(old_file,new_file, true);
254+
255+
if (msg)
236256
pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
237257
map->nspname,map->relname,old_file,new_file,msg);
238258
}
239259
else
240260
{
241261
pg_log(PG_VERBOSE,"linking \"%s\" to \"%s\"\n",old_file,new_file);
242262

243-
if ((msg=linkFile(old_file,new_file))!=NULL)
263+
/* Rewrite visibility map if needed */
264+
if (vm_must_add_frozenbit&& (strcmp(type_suffix,"_vm")==0))
265+
msg=rewriteVisibilityMap(old_file,new_file, true);
266+
else
267+
msg=linkFile(old_file,new_file);
268+
269+
if (msg)
244270
pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
245271
map->nspname,map->relname,old_file,new_file,msg);
246272
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp