Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita25ca5c

Browse files
author
Sokolov Yura
committed
cfs: if many pages were modified between gc passes, do whole gc under lock
1 parent90757f9 commita25ca5c

File tree

2 files changed

+122
-91
lines changed

2 files changed

+122
-91
lines changed

‎src/backend/storage/file/cfs.c

Lines changed: 121 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,65 @@ typedef enum {
770770
CFS_IMPLICIT
771771
}GC_CALL_KIND;
772772

773+
staticboolcfs_copy_inodes(inode_t**inodes,intn_nodes,intfd,intfd2,uint32*writeback,uint32*offset,constchar*file_path,constchar*file_bck_path)
774+
{
775+
charblock[BLCKSZ];
776+
uint32size,offs;
777+
inti;
778+
off_tsoff=-1;
779+
780+
/* sort inodes by offset to improve read locality */
781+
qsort(inodes,n_nodes,sizeof(inode_t*),cfs_cmp_page_offs);
782+
for (i=0;i<n_nodes;i++)
783+
{
784+
size=CFS_INODE_SIZE(*inodes[i]);
785+
if (size!=0)
786+
{
787+
offs=CFS_INODE_OFFS(*inodes[i]);
788+
Assert(size <=BLCKSZ);
789+
if (soff!= (off_t)offs)
790+
{
791+
soff=lseek(fd,offs,SEEK_SET);
792+
Assert(soff==offs);
793+
}
794+
795+
if (!cfs_read_file(fd,block,size))
796+
{
797+
elog(WARNING,"CFS GC failed to read block %u of file %s at position %u size %u: %m",
798+
i,file_path,offs,size);
799+
return false;
800+
}
801+
soff+=size;
802+
803+
if (!cfs_write_file(fd2,block,size))
804+
{
805+
elog(WARNING,"CFS failed to write file %s: %m",file_bck_path);
806+
return false;
807+
}
808+
cfs_state->gc_stat.processedBytes+=size;
809+
cfs_state->gc_stat.processedPages+=1;
810+
811+
offs=*offset;
812+
*offset+=size;
813+
*inodes[i]=CFS_INODE(size,offs);
814+
815+
/* xfs doesn't like if writeback performed closer than 128k to
816+
* file end */
817+
if (*writeback+16*1024*1024<*offset)
818+
{
819+
uint32newwb= (*offset-128*1024)& ~(128*1024-1);
820+
pg_flush_data(fd2,*writeback,newwb-*writeback);
821+
*writeback=newwb;
822+
}
823+
}
824+
else
825+
{
826+
*inodes[i]=CFS_INODE(0,0);
827+
}
828+
}
829+
return true;
830+
}
831+
773832
/*
774833
* Perform garbage collection (if required) on the file
775834
* @param map_path - path to the map file (*.cfm).
@@ -868,20 +927,20 @@ static bool cfs_gc_file(char* map_path, GC_CALL_KIND background)
868927
/* do we need to perform defragmentation? */
869928
if (physSize>CFS_IMPLICIT_GC_THRESHOLD|| (uint64)(physSize-usedSize)*100> (uint64)physSize*cfs_gc_threshold)
870929
{
871-
charblock[BLCKSZ];
872930
FileMap*newMap= (FileMap*)palloc0(sizeof(FileMap));
873931
uint32newSize=0;
874932
uint32writeback=0;
875933
uint32newUsed=0;
876934
uint32second_pass=0;
935+
uint32second_pass_bytes=0;
877936
inode_t**inodes= (inode_t**)palloc(RELSEG_SIZE*sizeof(inode_t*));
878937
boolremove_backups= true;
879-
intn_pages;
938+
boolsecond_pass_whole= false;
939+
intn_pages,n_pages1;
880940
TimestampTzstartTime,secondTime,endTime;
881941
longsecs,secs2;
882942
intusecs,usecs2;
883943
inti,size;
884-
uint32offs;
885944
pg_atomic_uint32*lock;
886945
off_trcPG_USED_FOR_ASSERTS_ONLY;
887946

@@ -918,6 +977,13 @@ static bool cfs_gc_file(char* map_path, GC_CALL_KIND background)
918977
gotoCleanup;
919978
}
920979

980+
fd=open(file_path,O_RDONLY|PG_BINARY,0);
981+
if (fd<0)
982+
{
983+
elog(WARNING,"CFS failed to open file %s: %m",map_bck_path);
984+
gotoCleanup;
985+
}
986+
921987
/* temporary lock file for fetching map snapshot */
922988
cfs_gc_lock(lock);
923989

@@ -934,62 +1000,12 @@ static bool cfs_gc_file(char* map_path, GC_CALL_KIND background)
9341000
/* may unlock until second phase */
9351001
cfs_gc_unlock(lock);
9361002

937-
/* sort inodes by offset to improve read locality */
938-
qsort(inodes,n_pages,sizeof(inode_t*),cfs_cmp_page_offs);
939-
940-
fd=open(file_path,O_RDONLY|PG_BINARY,0);
941-
if (fd<0)
942-
{
943-
elog(WARNING,"CFS failed to open file %s: %m",map_bck_path);
944-
gotoCleanup;
945-
}
946-
9471003
cfs_state->gc_stat.processedFiles+=1;
9481004
cfs_gc_processed_segments+=1;
9491005

950-
for (i=0;i<n_pages;i++)
951-
{
952-
size=CFS_INODE_SIZE(*inodes[i]);
953-
if (size!=0)
954-
{
955-
offs=CFS_INODE_OFFS(*inodes[i]);
956-
Assert(size <=BLCKSZ);
957-
rc=lseek(fd,offs,SEEK_SET);
958-
Assert(rc==offs);
959-
960-
if (!cfs_read_file(fd,block,size))
961-
{
962-
elog(WARNING,"CFS GC failed to read block %u of file %s at position %u size %u: %m",
963-
i,file_path,offs,size);
964-
gotoCleanup;
965-
}
966-
967-
if (!cfs_write_file(fd2,block,size))
968-
{
969-
elog(WARNING,"CFS failed to write file %s: %m",file_bck_path);
970-
gotoCleanup;
971-
}
972-
cfs_state->gc_stat.processedBytes+=size;
973-
cfs_state->gc_stat.processedPages+=1;
974-
975-
offs=newSize;
976-
newSize+=size;
977-
*inodes[i]=CFS_INODE(size,offs);
978-
979-
/* xfs doesn't like if writeback performed closer than 128k to
980-
* file end */
981-
if (writeback+16*1024*1024<newSize)
982-
{
983-
uint32newwb= (newSize-128*1024)& ~(128*1024-1);
984-
pg_flush_data(fd2,writeback,newwb-writeback);
985-
writeback=newwb;
986-
}
987-
}
988-
else
989-
{
990-
*inodes[i]=CFS_INODE(0,0);
991-
}
992-
}
1006+
if (!cfs_copy_inodes(inodes,n_pages,fd,fd2,&writeback,&newSize,
1007+
file_path,file_bck_path))
1008+
gotoCleanup;
9931009
newUsed=newSize;
9941010

9951011
/* Persist bigger part of copy to not do it under lock */
@@ -1009,6 +1025,7 @@ static bool cfs_gc_file(char* map_path, GC_CALL_KIND background)
10091025
cfs_gc_lock(lock);
10101026

10111027
/* Reread variables after locking file */
1028+
n_pages1=n_pages;
10121029
virtSize=pg_atomic_read_u32(&map->hdr.virtSize);
10131030
n_pages=virtSize /BLCKSZ;
10141031

@@ -1025,46 +1042,60 @@ static bool cfs_gc_file(char* map_path, GC_CALL_KIND background)
10251042
}
10261043
newUsed-=CFS_INODE_SIZE(nnode);
10271044
newUsed+=size;
1028-
if (size!=0)
1045+
newMap->inodes[i]=onode;
1046+
inodes[second_pass]=&newMap->inodes[i];
1047+
second_pass_bytes+=size;
1048+
second_pass++;
1049+
}
1050+
1051+
if (n_pages1>n_pages)
1052+
{
1053+
/* if file were truncated (vacuum???), clean a bit */
1054+
for (i=n_pages;i<n_pages1;i++)
10291055
{
1030-
second_pass++;
1031-
offs=CFS_INODE_OFFS(onode);
1056+
inode_tnnode=newMap->inodes[i];
1057+
if (CFS_INODE_SIZE(nnode)!=0) {
1058+
newUsed-=CFS_INODE_SIZE(nnode);
1059+
newMap->inodes[i]=CFS_INODE(0,0);
1060+
}
1061+
}
1062+
}
10321063

1033-
rc=lseek(fd,offs,SEEK_SET);
1034-
Assert(rc== (off_t)offs);
1064+
if ((uint64)(newSize+second_pass_bytes-newUsed)*100>
1065+
(uint64)(newSize+second_pass_bytes)*cfs_gc_threshold)
1066+
{
1067+
/* there were too many modified pages between passes, so it is
1068+
* better to do whole copy again */
1069+
newUsed=0;
1070+
newSize=0;
1071+
writeback=0;
1072+
second_pass_whole= true;
1073+
memset(newMap->inodes,0,sizeof(newMap->inodes));
1074+
for (i=0;i<n_pages;i++)
1075+
{
1076+
newMap->inodes[i]=map->inodes[i];
1077+
newUsed+=CFS_INODE_SIZE(map->inodes[i]);
1078+
inodes[i]=&newMap->inodes[i];
1079+
}
1080+
second_pass=n_pages;
1081+
second_pass_bytes=newUsed;
1082+
}
10351083

1036-
if (!cfs_read_file(fd,block,size))
1037-
{
1038-
elog(WARNING,"CFS GC failed to read block %u of file %s at position %u size %u: %m",
1039-
i,file_path,offs,size);
1040-
gotoCleanup;
1041-
}
1084+
if (!cfs_copy_inodes(inodes,second_pass,fd,fd2,&writeback,&newSize,
1085+
file_path,file_bck_path))
1086+
gotoCleanup;
10421087

1043-
/* copy it without sorting */
1044-
offs=newSize;
1045-
newSize+=size;
1046-
if (!cfs_write_file(fd2,block,size))
1047-
{
1048-
elog(WARNING,"CFS failed to write file %s: %m",file_bck_path);
1049-
gotoCleanup;
1050-
}
1051-
newMap->inodes[i]=CFS_INODE(size,offs);
1088+
pg_flush_data(fd2,writeback,newSize);
10521089

1053-
if (writeback+16*1024*1024<newSize)
1054-
{
1055-
uint32newwb= (newSize-128*1024)& ~(128*1024-1);
1056-
pg_flush_data(fd2,writeback,newwb-writeback);
1057-
writeback=newwb;
1058-
}
1059-
}
1060-
else
1090+
if (second_pass_whole)
1091+
{
1092+
/* truncate file to copied size */
1093+
if (ftruncate(fd2,newSize))
10611094
{
1062-
newMap->inodes[i]=CFS_INODE(0,0);
1095+
elog(WARNING,"CFS failed to truncate file %s: %m",file_bck_path);
1096+
gotoCleanup;
10631097
}
1064-
cfs_state->gc_stat.processedBytes+=size;
1065-
cfs_state->gc_stat.processedPages+=1;
10661098
}
1067-
pg_flush_data(fd2,writeback,newSize);
10681099

10691100
if (close(fd)<0)
10701101
{
@@ -1235,10 +1266,10 @@ static bool cfs_gc_file(char* map_path, GC_CALL_KIND background)
12351266

12361267
if (succeed)
12371268
{
1238-
elog(LOG,"CFS GC worker %d: defragment file %s: old size %u, new size %u, logical size %u, used %u, compression ratio %f, time %ld usec; second pass: pages %u, time %ld"
1269+
elog(LOG,"CFS GC worker %d: defragment file %s: old size %u, new size %u, logical size %u, used %u, compression ratio %f, time %ld usec; second pass: pages %u,bytes %u,time %ld"
12391270
,
12401271
MyProcPid,file_path,physSize,newSize,virtSize,usedSize, (double)virtSize/newSize,
1241-
secs*USECS_PER_SEC+usecs,second_pass,
1272+
secs*USECS_PER_SEC+usecs,second_pass,second_pass_bytes,
12421273
secs2*USECS_PER_SEC+usecs2);
12431274
}
12441275

‎src/backend/storage/file/fd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2325,7 +2325,7 @@ FileTruncate(File file, off_t offset)
23252325
for (i=offset /BLCKSZ;i<RELSEG_SIZE;i++)
23262326
{
23272327
released+=CFS_INODE_SIZE(map->inodes[i]);
2328-
map->inodes[i]=0;
2328+
map->inodes[i]=CFS_INODE(0,0);
23292329
}
23302330

23312331
pg_atomic_write_u32(&map->hdr.virtSize,offset);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp