Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd8cd0c6

Browse files
committed
Remove the restriction that the relmap must be 512 bytes.
Instead of relying on the ability to atomically overwrite theentire relmap file in one shot, write a new one and durablyrename it into place. Removing the struct padding and thecalculation showing why the map is exactly 512 bytes, and changethe maximum number of entries to a nearby round number.Patch by me, reviewed by Andres Freund and Dilip Kumar.Discussion:http://postgr.es/m/CA+TgmoZq5%3DLWDK7kHaUbmWXxcaTuw_QwafgG9dr-BaPym_U8WQ%40mail.gmail.comDiscussion:http://postgr.es/m/CAFiTN-ttOXLX75k_WzRo9ar=VvxFhrHi+rJxns997F+yvkm==A@mail.gmail.com
1 parente530be2 commitd8cd0c6

File tree

4 files changed

+58
-46
lines changed

4 files changed

+58
-46
lines changed

‎doc/src/sgml/monitoring.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,8 +1409,8 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
14091409
<entry>Waiting for a read of the relation map file.</entry>
14101410
</row>
14111411
<row>
1412-
<entry><literal>RelationMapSync</literal></entry>
1413-
<entry>Waiting forthe relation map file to reach durable storage.</entry>
1412+
<entry><literal>RelationMapReplace</literal></entry>
1413+
<entry>Waiting fordurable replacement of a relation map file.</entry>
14141414
</row>
14151415
<row>
14161416
<entry><literal>RelationMapWrite</literal></entry>

‎src/backend/utils/activity/wait_event.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -633,8 +633,8 @@ pgstat_get_wait_io(WaitEventIO w)
633633
caseWAIT_EVENT_RELATION_MAP_READ:
634634
event_name="RelationMapRead";
635635
break;
636-
caseWAIT_EVENT_RELATION_MAP_SYNC:
637-
event_name="RelationMapSync";
636+
caseWAIT_EVENT_RELATION_MAP_REPLACE:
637+
event_name="RelationMapReplace";
638638
break;
639639
caseWAIT_EVENT_RELATION_MAP_WRITE:
640640
event_name="RelationMapWrite";

‎src/backend/utils/cache/relmapper.c

Lines changed: 53 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -60,21 +60,26 @@
6060
/*
6161
* The map file is critical data: we have no automatic method for recovering
6262
* from loss or corruption of it. We use a CRC so that we can detect
63-
* corruption. To minimize the risk of failed updates, the map file should
64-
* be kept to no more than one standard-size disk sector (ie 512 bytes),
65-
* and we use overwrite-in-place rather than playing renaming games.
66-
* The struct layout below is designed to occupy exactly 512 bytes, which
67-
* might make filesystem updates a bit more efficient.
63+
* corruption. Since the file might be more than one standard-size disk
64+
* sector in size, we cannot rely on overwrite-in-place. Instead, we generate
65+
* a new file and rename it into place, atomically replacing the original file.
6866
*
6967
* Entries in the mappings[] array are in no particular order. We could
7068
* speed searching by insisting on OID order, but it really shouldn't be
7169
* worth the trouble given the intended size of the mapping sets.
7270
*/
7371
#defineRELMAPPER_FILENAME"pg_filenode.map"
72+
#defineRELMAPPER_TEMP_FILENAME"pg_filenode.map.tmp"
7473

7574
#defineRELMAPPER_FILEMAGIC0x592717/* version ID value */
7675

77-
#defineMAX_MAPPINGS62/* 62 * 8 + 16 = 512 */
76+
/*
77+
* There's no need for this constant to have any particular value, and we
78+
* can raise it as necessary if we end up with more mapped relations. For
79+
* now, we just pick a round number that is modestly larger than the expected
80+
* number of mappings.
81+
*/
82+
#defineMAX_MAPPINGS64
7883

7984
typedefstructRelMapping
8085
{
@@ -88,7 +93,6 @@ typedef struct RelMapFile
8893
int32num_mappings;/* number of valid RelMapping entries */
8994
RelMappingmappings[MAX_MAPPINGS];
9095
pg_crc32ccrc;/* CRC of all above */
91-
int32pad;/* to make the struct size be 512 exactly */
9296
}RelMapFile;
9397

9498
/*
@@ -877,6 +881,7 @@ write_relmap_file(RelMapFile *newmap, bool write_wal, bool send_sinval,
877881
{
878882
intfd;
879883
charmapfilename[MAXPGPATH];
884+
charmaptempfilename[MAXPGPATH];
880885

881886
/*
882887
* Fill in the overhead fields and update CRC.
@@ -890,17 +895,47 @@ write_relmap_file(RelMapFile *newmap, bool write_wal, bool send_sinval,
890895
FIN_CRC32C(newmap->crc);
891896

892897
/*
893-
*Open the targetfile. We prefer todo this before entering the
894-
*critical section, so that an open() failure need not force PANIC.
898+
*Construct filenames -- a temporaryfile that we'll create towrite the
899+
*data initially, and then the permanent name to which we will rename it.
895900
*/
896901
snprintf(mapfilename,sizeof(mapfilename),"%s/%s",
897902
dbpath,RELMAPPER_FILENAME);
898-
fd=OpenTransientFile(mapfilename,O_WRONLY |O_CREAT |PG_BINARY);
903+
snprintf(maptempfilename,sizeof(maptempfilename),"%s/%s",
904+
dbpath,RELMAPPER_TEMP_FILENAME);
905+
906+
/*
907+
* Open a temporary file. If a file already exists with this name, it must
908+
* be left over from a previous crash, so we can overwrite it. Concurrent
909+
* calls to this function are not allowed.
910+
*/
911+
fd=OpenTransientFile(maptempfilename,
912+
O_WRONLY |O_CREAT |O_TRUNC |PG_BINARY);
899913
if (fd<0)
900914
ereport(ERROR,
901915
(errcode_for_file_access(),
902916
errmsg("could not open file \"%s\": %m",
903-
mapfilename)));
917+
maptempfilename)));
918+
919+
/* Write new data to the file. */
920+
pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_WRITE);
921+
if (write(fd,newmap,sizeof(RelMapFile))!=sizeof(RelMapFile))
922+
{
923+
/* if write didn't set errno, assume problem is no disk space */
924+
if (errno==0)
925+
errno=ENOSPC;
926+
ereport(ERROR,
927+
(errcode_for_file_access(),
928+
errmsg("could not write file \"%s\": %m",
929+
maptempfilename)));
930+
}
931+
pgstat_report_wait_end();
932+
933+
/* And close the file. */
934+
if (CloseTransientFile(fd)!=0)
935+
ereport(ERROR,
936+
(errcode_for_file_access(),
937+
errmsg("could not close file \"%s\": %m",
938+
maptempfilename)));
904939

905940
if (write_wal)
906941
{
@@ -924,40 +959,17 @@ write_relmap_file(RelMapFile *newmap, bool write_wal, bool send_sinval,
924959
XLogFlush(lsn);
925960
}
926961

927-
errno=0;
928-
pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_WRITE);
929-
if (write(fd,newmap,sizeof(RelMapFile))!=sizeof(RelMapFile))
930-
{
931-
/* if write didn't set errno, assume problem is no disk space */
932-
if (errno==0)
933-
errno=ENOSPC;
934-
ereport(ERROR,
935-
(errcode_for_file_access(),
936-
errmsg("could not write file \"%s\": %m",
937-
mapfilename)));
938-
}
939-
pgstat_report_wait_end();
940-
941962
/*
942-
* We choose to fsync the data to disk before considering the task done.
943-
* It would be possible to relax this if it turns out to be a performance
944-
* issue, but it would complicate checkpointing --- see notes for
945-
* CheckPointRelationMap.
963+
* durable_rename() does all the hard work of making sure that we rename
964+
* the temporary file into place in a crash-safe manner.
965+
*
966+
* NB: Although we instruct durable_rename() to use ERROR, we will often
967+
* be in a critical section at this point; if so, ERROR will become PANIC.
946968
*/
947-
pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_SYNC);
948-
if (pg_fsync(fd)!=0)
949-
ereport(data_sync_elevel(ERROR),
950-
(errcode_for_file_access(),
951-
errmsg("could not fsync file \"%s\": %m",
952-
mapfilename)));
969+
pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_REPLACE);
970+
durable_rename(maptempfilename,mapfilename,ERROR);
953971
pgstat_report_wait_end();
954972

955-
if (CloseTransientFile(fd)!=0)
956-
ereport(ERROR,
957-
(errcode_for_file_access(),
958-
errmsg("could not close file \"%s\": %m",
959-
mapfilename)));
960-
961973
/*
962974
* Now that the file is safely on disk, send sinval message to let other
963975
* backends know to re-read it. We must do this inside the critical

‎src/include/utils/wait_event.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ typedef enum
194194
WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE,
195195
WAIT_EVENT_LOGICAL_REWRITE_WRITE,
196196
WAIT_EVENT_RELATION_MAP_READ,
197-
WAIT_EVENT_RELATION_MAP_SYNC,
197+
WAIT_EVENT_RELATION_MAP_REPLACE,
198198
WAIT_EVENT_RELATION_MAP_WRITE,
199199
WAIT_EVENT_REORDER_BUFFER_READ,
200200
WAIT_EVENT_REORDER_BUFFER_WRITE,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp