Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita72d35d

Browse files
committed
Reimplement CFS GC lock
1 parentb88747c commita72d35d

File tree

2 files changed

+102
-112
lines changed

2 files changed

+102
-112
lines changed

‎src/backend/storage/file/cfs.c

Lines changed: 101 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -540,86 +540,72 @@ static bool cfs_write_file(int fd, void const* data, uint32 size)
540540
voidcfs_lock_file(FileMap*map,charconst*file_path)
541541
{
542542
longdelay=CFS_LOCK_MIN_TIMEOUT;
543-
intn_attempts=0;
544543

545544
while (true)
546545
{
547-
uint64count=pg_atomic_fetch_add_u32(&map->lock,1);
548-
boolrevokeLock= false;
546+
uint32count=pg_atomic_fetch_add_u32(&map->lock,1);
549547

550548
if (count<CFS_GC_LOCK)
551-
break;
552-
553-
if (InRecovery)
554-
{
555-
revokeLock= true;
556-
}
557-
else
558549
{
559-
if (pg_atomic_unlocked_test_flag(&cfs_state->gc_started))
560-
{
561-
if (++n_attempts>MAX_LOCK_ATTEMPTS)
562-
{
563-
/* So there is GC lock, but no active GC process during MAX_LOCK_ATTEMPTS.
564-
* Most likely it means that GC is crashed (may be together with other postgres processes or even OS)
565-
* without releasing lock. And for some reasons recovery was not performed and this page left locked.
566-
* We should revoke the the lock to allow access to this segment.
567-
*/
568-
revokeLock= true;
569-
elog(WARNING,"CFS revokes lock on file %s\n",file_path);
570-
}
571-
}
572-
else
573-
{
574-
n_attempts=0;/* Reset counter of attempts because GC is in progress */
575-
}
550+
/* No GC is active for this segment */
551+
break;
576552
}
577-
if (revokeLock
578-
/* use gc_started flag to prevent race condition with other backends and GC */
579-
&&pg_atomic_test_set_flag(&cfs_state->gc_started))
580-
{
581-
/* Ugggh... looks like last GC was interrupted.
582-
* Try to recover the file.
583-
*/
584-
char*map_bck_path=psprintf("%s.cfm.bck",file_path);
585-
char*file_bck_path=psprintf("%s.bck",file_path);
586553

587-
elog(WARNING,"CFS indicates that GC of %s was interrupted: try to perform recovery",file_path);
554+
if (pg_atomic_read_u32(&cfs_state->n_active_gc)==0)
555+
{
556+
/* There is no active GC, so lock is set by crashed GC */
588557

589-
if (access(file_bck_path,R_OK)!=0)
590-
{
591-
/* There is no backup file: new map should be constructed */
592-
intmd2=open(map_bck_path,O_RDWR|PG_BINARY,0);
593-
if (md2 >=0)
594-
{
595-
/* Recover map. */
596-
if (!cfs_read_file(md2,map,sizeof(FileMap)))
597-
elog(WARNING,"CFS failed to read file %s: %m",map_bck_path);
558+
LWLockAcquire(CfsGcLock,LW_EXCLUSIVE);/* Prevent race condition with GC */
598559

599-
close(md2);
600-
}
601-
}
602-
else
560+
/* Recheck under CfsGcLock that map->lock was not released */
561+
if (pg_atomic_read_u32(&map->lock) >=CFS_GC_LOCK)
603562
{
604-
/* Presence of backup file means that we still have
605-
* unchanged data and map files. Just remove backup files and
606-
* revoke GC lock.
563+
/* Uhhh... looks like last GC was interrupted.
564+
* Try to recover the file.
607565
*/
608-
unlink(file_bck_path);
609-
unlink(map_bck_path);
566+
char*map_bck_path=psprintf("%s.cfm.bck",file_path);
567+
char*file_bck_path=psprintf("%s.bck",file_path);
568+
569+
elog(WARNING,"CFS indicates that GC of %s was interrupted: try to perform recovery",file_path);
570+
571+
if (access(file_bck_path,R_OK)!=0)
572+
{
573+
/* There is no backup file: new map should be constructed */
574+
intmd2=open(map_bck_path,O_RDWR|PG_BINARY,0);
575+
if (md2 >=0)
576+
{
577+
/* Recover map. */
578+
if (!cfs_read_file(md2,map,sizeof(FileMap)))
579+
elog(WARNING,"CFS failed to read file %s: %m",map_bck_path);
580+
581+
close(md2);
582+
}
583+
}
584+
else
585+
{
586+
/* Presence of backup file means that we still have
587+
* unchanged data and map files. Just remove backup files and
588+
* revoke GC lock.
589+
*/
590+
unlink(file_bck_path);
591+
unlink(map_bck_path);
592+
}
593+
594+
count=pg_atomic_fetch_sub_u32(&map->lock,CFS_GC_LOCK);/* revoke GC lock */
595+
Assert((int)count>0);
596+
pfree(file_bck_path);
597+
pfree(map_bck_path);
610598
}
611-
612-
pg_atomic_clear_flag(&cfs_state->gc_started);
613-
count=pg_atomic_fetch_sub_u32(&map->lock,CFS_GC_LOCK);/* revoke GC lock */
614-
Assert((int)count>0);
615-
pfree(file_bck_path);
616-
pfree(map_bck_path);
599+
LWLockRelease(CfsGcLock);
617600
break;
618-
}
601+
}
602+
/* Wait until GC of segment is completed */
619603
pg_atomic_fetch_sub_u32(&map->lock,1);
620604
pg_usleep(delay);
621605
if (delay<CFS_LOCK_MAX_TIMEOUT)
606+
{
622607
delay *=2;
608+
}
623609
}
624610

625611
if (IsUnderPostmaster&&cfs_gc_workers!=0
@@ -649,11 +635,11 @@ static int cfs_cmp_page_offs(void const* p1, void const* p2)
649635
/*
650636
* Perform garbage collection (if required) on the file
651637
* @param map_path - path to the map file (*.cfm).
652-
* @paramnoerror -surpress error message (when this function is calledbycfs_gc_relation until there are available segments)
638+
* @parambacground -GC is performed in backgroundbyBGW: surpress error message and set CfsGcLock
653639
*/
654-
staticboolcfs_gc_file(char*map_path,boolnoerror)
640+
staticboolcfs_gc_file(char*map_path,boolbackground)
655641
{
656-
intmd=open(map_path,O_RDWR|PG_BINARY,0);
642+
intmd;
657643
FileMap*map;
658644
uint32physSize;
659645
uint32usedSize;
@@ -663,29 +649,33 @@ static bool cfs_gc_file(char* map_path, bool noerror)
663649
intfd2=-1;
664650
intmd2=-1;
665651
boolsucceed= false;
652+
intrc;
653+
666654

667655
pg_atomic_fetch_add_u32(&cfs_state->n_active_gc,1);
668656

669-
while (!cfs_state->gc_enabled)
657+
if (background)
670658
{
671-
intrc;
672-
673-
pg_atomic_fetch_sub_u32(&cfs_state->n_active_gc,1);
674-
675-
rc=WaitLatch(MyLatch,
676-
WL_TIMEOUT |WL_POSTMASTER_DEATH,
677-
CFS_DISABLE_TIMEOUT/* ms */);
678-
if (cfs_gc_stop|| (rc&WL_POSTMASTER_DEATH))
679-
exit(1);
659+
while (!cfs_state->gc_enabled)
660+
{
661+
pg_atomic_fetch_sub_u32(&cfs_state->n_active_gc,1);
662+
663+
rc=WaitLatch(MyLatch,
664+
WL_TIMEOUT |WL_POSTMASTER_DEATH,
665+
CFS_DISABLE_TIMEOUT/* ms */);
666+
if (cfs_gc_stop|| (rc&WL_POSTMASTER_DEATH))
667+
exit(1);
668+
669+
pg_atomic_fetch_add_u32(&cfs_state->n_active_gc,1);
670+
}
680671

681-
pg_atomic_fetch_add_u32(&cfs_state->n_active_gc,1);
672+
LWLockAcquire(CfsGcLock,LW_SHARED);/* avoid race condition with cfs_file_lock */
682673
}
683674

675+
md=open(map_path,O_RDWR|PG_BINARY,0);
684676
if (md<0)
685677
{
686-
if (!noerror) {
687-
elog(WARNING,"CFS failed to open map file %s: %m",map_path);
688-
}
678+
elog(DEBUG1,"CFS failed to open map file %s: %m",map_path);
689679
gotoFinishGC;
690680
}
691681

@@ -1032,7 +1022,12 @@ static bool cfs_gc_file(char* map_path, bool noerror)
10321022
}
10331023

10341024
FinishGC:
1025+
if (background)
1026+
{
1027+
LWLockRelease(CfsGcLock);
1028+
}
10351029
pg_atomic_fetch_sub_u32(&cfs_state->n_active_gc,1);
1030+
10361031
returnsucceed;
10371032
}
10381033

@@ -1066,7 +1061,7 @@ static bool cfs_gc_directory(int worker_id, char const* path)
10661061
strcmp(file_path+len-4,".cfm")==0)
10671062
{
10681063
if (entry->d_ino %cfs_state->n_workers==worker_id
1069-
&& !cfs_gc_file(file_path,false))
1064+
&& !cfs_gc_file(file_path,true))
10701065
{
10711066
success= false;
10721067
break;
@@ -1395,31 +1390,35 @@ Datum cfs_fragmentation(PG_FUNCTION_ARGS)
13951390

13961391
Datumcfs_gc_relation(PG_FUNCTION_ARGS)
13971392
{
1398-
cfs_gc_processed_segments=0;
1399-
1400-
if (cfs_gc_workers==0&&pg_atomic_test_set_flag(&cfs_state->gc_started))
1393+
Oidoid=PG_GETARG_OID(0);
1394+
Relationrel=try_relation_open(oid,AccessShareLock);
1395+
intprocessed_segments=0;
1396+
1397+
if (rel!=NULL)
14011398
{
1402-
Oidoid=PG_GETARG_OID(0);
1403-
Relationrel=try_relation_open(oid,AccessShareLock);
1404-
1405-
if (rel!=NULL)
1406-
{
1407-
char*path=relpathbackend(rel->rd_node,rel->rd_backend,MAIN_FORKNUM);
1408-
char*map_path= (char*)palloc(strlen(path)+16);
1409-
inti=0;
1410-
sprintf(map_path,"%s.cfm",path);
1399+
char*path;
1400+
char*map_path;
1401+
inti=0;
1402+
1403+
LWLockAcquire(CfsGcLock,LW_EXCLUSIVE);/* Prevent interaction with background GC */
1404+
1405+
processed_segments=cfs_gc_processed_segments;
1406+
1407+
path=relpathbackend(rel->rd_node,rel->rd_backend,MAIN_FORKNUM);
1408+
map_path= (char*)palloc(strlen(path)+16);
1409+
sprintf(map_path,"%s.cfm",path);
14111410

1412-
while (true)
1413-
{
1414-
if (!cfs_gc_file(map_path, true))
1415-
break;
1416-
sprintf(map_path,"%s.%u.cfm",path,++i);
1417-
}
1418-
pfree(path);
1419-
pfree(map_path);
1420-
relation_close(rel,AccessShareLock);
1411+
while (cfs_gc_file(map_path, false))
1412+
{
1413+
sprintf(map_path,"%s.%u.cfm",path,++i);
14211414
}
1422-
pg_atomic_clear_flag(&cfs_state->gc_started);
1415+
pfree(path);
1416+
pfree(map_path);
1417+
relation_close(rel,AccessShareLock);
1418+
1419+
processed_segments-=cfs_gc_processed_segments;
1420+
1421+
LWLockRelease(CfsGcLock);
14231422
}
14241423
PG_RETURN_INT32(cfs_gc_processed_segments);
14251424
}

‎src/include/storage/cfs.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#include"port/atomics.h"
77
#include"storage/rijndael.h"
88

9-
#defineCFS_VERSION "0.24"
9+
#defineCFS_VERSION "0.25"
1010

1111
#defineCFS_GC_LOCK 0x10000000
1212

@@ -16,15 +16,6 @@
1616
#defineCFS_RETRY_TIMEOUT 10000/* milliseconds: delay between GC iterations in case of error */
1717
#defineCFS_ESTIMATE_PROBES 10
1818

19-
/*
20-
* Maximal number of attempts backend should perform to lock file segment.
21-
* If during all this attempts gc_started flag is not set (it means that no GC is active at this moment),
22-
* then it means that for some reasons GC process was crashed (may be together with other postgres processes or even OS)
23-
* without releasing lock. And for some reasons recovery was not performed and this page left locked. Цe should revoke this lock
24-
* to allow access to this database segment.
25-
*/
26-
#defineMAX_LOCK_ATTEMPTS 100
27-
2819
/* Maximal size of buffer for compressing (size) bytes where (size) is equal to PostgreSQL page size.
2920
* Some compression algorithms requires to provide buffer large enough for worst case and immediately returns error is buffer is not enough.
3021
* Accurate calculation of required buffer size is not needed here and doubling buffer size works for all used compression algorithms. */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp