Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8a6deb1

Browse files
committed
chore: purge file cache entries on error
1 parentf44969b commit8a6deb1

File tree

1 file changed

+130
-96
lines changed

1 file changed

+130
-96
lines changed

‎coderd/files/cache.go

Lines changed: 130 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -25,60 +25,61 @@ type FileAcquirer interface {
2525

2626
// New returns a file cache that will fetch files from a database
2727
funcNew(registerer prometheus.Registerer,authz rbac.Authorizer)*Cache {
28-
return (&Cache{
29-
lock: sync.Mutex{},
30-
data:make(map[uuid.UUID]*cacheEntry),
31-
authz:authz,
32-
}).registerMetrics(registerer)
28+
return&Cache{
29+
lock: sync.Mutex{},
30+
data:make(map[uuid.UUID]*cacheEntry),
31+
authz:authz,
32+
cacheMetrics:newCacheMetrics(registerer),
33+
}
3334
}
3435

35-
func(c*Cache)registerMetrics(registerer prometheus.Registerer)*Cache {
36+
funcnewCacheMetrics(registerer prometheus.Registerer)cacheMetrics {
3637
subsystem:="file_cache"
3738
f:=promauto.With(registerer)
3839

39-
c.currentCacheSize=f.NewGauge(prometheus.GaugeOpts{
40-
Namespace:"coderd",
41-
Subsystem:subsystem,
42-
Name:"open_files_size_bytes_current",
43-
Help:"The current amount of memory of all files currently open in the file cache.",
44-
})
45-
46-
c.totalCacheSize=f.NewCounter(prometheus.CounterOpts{
47-
Namespace:"coderd",
48-
Subsystem:subsystem,
49-
Name:"open_files_size_bytes_total",
50-
Help:"The total amount of memory ever opened in the file cache. This number never decrements.",
51-
})
52-
53-
c.currentOpenFiles=f.NewGauge(prometheus.GaugeOpts{
54-
Namespace:"coderd",
55-
Subsystem:subsystem,
56-
Name:"open_files_current",
57-
Help:"The count of unique files currently open in the file cache.",
58-
})
59-
60-
c.totalOpenedFiles=f.NewCounter(prometheus.CounterOpts{
61-
Namespace:"coderd",
62-
Subsystem:subsystem,
63-
Name:"open_files_total",
64-
Help:"The total count of unique files ever opened in the file cache.",
65-
})
66-
67-
c.currentOpenFileReferences=f.NewGauge(prometheus.GaugeOpts{
68-
Namespace:"coderd",
69-
Subsystem:subsystem,
70-
Name:"open_file_refs_current",
71-
Help:"The count of file references currently open in the file cache. Multiple references can be held for the same file.",
72-
})
73-
74-
c.totalOpenFileReferences=f.NewCounterVec(prometheus.CounterOpts{
75-
Namespace:"coderd",
76-
Subsystem:subsystem,
77-
Name:"open_file_refs_total",
78-
Help:"The total number of file references ever opened in the file cache. The 'hit' label indicates if the file was loaded from the cache.",
79-
}, []string{"hit"})
80-
81-
returnc
40+
returncacheMetrics{
41+
currentCacheSize:f.NewGauge(prometheus.GaugeOpts{
42+
Namespace:"coderd",
43+
Subsystem:subsystem,
44+
Name:"open_files_size_bytes_current",
45+
Help:"The current amount of memory of all files currently open in the file cache.",
46+
}),
47+
48+
totalCacheSize:f.NewCounter(prometheus.CounterOpts{
49+
Namespace:"coderd",
50+
Subsystem:subsystem,
51+
Name:"open_files_size_bytes_total",
52+
Help:"The total amount of memory ever opened in the file cache. This number never decrements.",
53+
}),
54+
55+
currentOpenFiles:f.NewGauge(prometheus.GaugeOpts{
56+
Namespace:"coderd",
57+
Subsystem:subsystem,
58+
Name:"open_files_current",
59+
Help:"The count of unique files currently open in the file cache.",
60+
}),
61+
62+
totalOpenedFiles:f.NewCounter(prometheus.CounterOpts{
63+
Namespace:"coderd",
64+
Subsystem:subsystem,
65+
Name:"open_files_total",
66+
Help:"The total count of unique files ever opened in the file cache.",
67+
}),
68+
69+
currentOpenFileReferences:f.NewGauge(prometheus.GaugeOpts{
70+
Namespace:"coderd",
71+
Subsystem:subsystem,
72+
Name:"open_file_refs_current",
73+
Help:"The count of file references currently open in the file cache. Multiple references can be held for the same file.",
74+
}),
75+
76+
totalOpenFileReferences:f.NewCounterVec(prometheus.CounterOpts{
77+
Namespace:"coderd",
78+
Subsystem:subsystem,
79+
Name:"open_file_refs_total",
80+
Help:"The total number of file references ever opened in the file cache. The 'hit' label indicates if the file was loaded from the cache.",
81+
}, []string{"hit"}),
82+
}
8283
}
8384

8485
// Cache persists the files for template versions, and is used by dynamic
@@ -106,18 +107,21 @@ type cacheMetrics struct {
106107
totalCacheSize prometheus.Counter
107108
}
108109

110+
typecacheEntrystruct {
111+
// refCount must only be accessed while the cacheEntry lock is held.
112+
lock sync.Mutex
113+
refCountint
114+
value*lazy.ValueWithError[CacheEntryValue]
115+
116+
closefunc()
117+
}
118+
109119
typeCacheEntryValuestruct {
110120
fs.FS
111121
Object rbac.Object
112122
Sizeint64
113123
}
114124

115-
typecacheEntrystruct {
116-
// refCount must only be accessed while the Cache lock is held.
117-
refCountint
118-
value*lazy.ValueWithError[CacheEntryValue]
119-
}
120-
121125
var_ fs.FS= (*CloseFS)(nil)
122126

123127
// CloseFS is a wrapper around fs.FS that implements io.Closer. The Close()
@@ -142,93 +146,116 @@ func (c *Cache) Acquire(ctx context.Context, db database.Store, fileID uuid.UUID
142146
// mutex has been released, or we would continue to hold the lock until the
143147
// entire file has been fetched, which may be slow, and would prevent other
144148
// files from being fetched in parallel.
145-
it,err:=c.prepare(ctx,db,fileID).Load()
149+
e:=c.prepare(ctx,db,fileID)
150+
ev,err:=e.value.Load()
146151
iferr!=nil {
147-
c.release(fileID)
152+
c.purge(fileID)
153+
returnnil,err
154+
}
155+
156+
// We always run the fetch under a system context and actor, so we need to check the caller's
157+
// context manually before returning.
158+
159+
// Check if the caller's context was canceled
160+
iferr:=ctx.Err();err!=nil {
148161
returnnil,err
149162
}
150163

164+
// Check that the caller is authorized to access the file
151165
subject,ok:=dbauthz.ActorFromContext(ctx)
152166
if!ok {
153167
returnnil,dbauthz.ErrNoActor
154168
}
155-
// Always check the caller can actually read the file.
156-
iferr:=c.authz.Authorize(ctx,subject,policy.ActionRead,it.Object);err!=nil {
157-
c.release(fileID)
169+
iferr:=c.authz.Authorize(ctx,subject,policy.ActionRead,ev.Object);err!=nil {
170+
e.close()
158171
returnnil,err
159172
}
160173

161-
varonce sync.Once
174+
varcloseOnce sync.Once
162175
return&CloseFS{
163-
FS:it.FS,
176+
FS:ev.FS,
164177
close:func() {
165178
// sync.Once makes the Close() idempotent, so we can call it
166179
// multiple times without worrying about double-releasing.
167-
once.Do(func() {c.release(fileID) })
180+
closeOnce.Do(func() {
181+
e.close()
182+
})
168183
},
169184
},nil
170185
}
171186

172-
func (c*Cache)prepare(ctx context.Context,db database.Store,fileID uuid.UUID)*lazy.ValueWithError[CacheEntryValue] {
187+
func (c*Cache)prepare(ctx context.Context,db database.Store,fileID uuid.UUID)*cacheEntry {
173188
c.lock.Lock()
174189
deferc.lock.Unlock()
175190

176191
hitLabel:="true"
177192
entry,ok:=c.data[fileID]
178193
if!ok {
179-
value:=lazy.NewWithError(func() (CacheEntryValue,error) {
180-
val,err:=fetch(ctx,db,fileID)
194+
hitLabel="false"
181195

182-
// Always add to the cache size the bytes of the file loaded.
183-
iferr==nil {
196+
varreleaseOnce sync.Once
197+
entry=&cacheEntry{
198+
refCount:0,
199+
value:lazy.NewWithError(func() (CacheEntryValue,error) {
200+
val,err:=fetch(db,fileID)
201+
iferr!=nil {
202+
// Force future calls to Acquire to trigger a new fetch as soon as
203+
// a fetch has failed, even if references are still held.
204+
delete(c.data,fileID)
205+
returnval,err
206+
}
207+
208+
// Add the size of the file to the cache size metrics.
184209
c.currentCacheSize.Add(float64(val.Size))
185210
c.totalCacheSize.Add(float64(val.Size))
186-
}
187211

188-
returnval,err
189-
})
212+
returnval,err
213+
}),
190214

191-
entry=&cacheEntry{
192-
value:value,
193-
refCount:0,
215+
close:func() {
216+
entry.lock.Lock()
217+
deferentry.lock.Unlock()
218+
219+
entry.refCount--
220+
c.currentOpenFileReferences.Dec()
221+
222+
ifentry.refCount==0 {
223+
releaseOnce.Do(func() {
224+
c.purge(fileID)
225+
})
226+
}
227+
},
194228
}
195229
c.data[fileID]=entry
230+
196231
c.currentOpenFiles.Inc()
197232
c.totalOpenedFiles.Inc()
198-
hitLabel="false"
199233
}
200234

235+
entry.lock.Lock()
236+
deferentry.lock.Unlock()
201237
c.currentOpenFileReferences.Inc()
202238
c.totalOpenFileReferences.WithLabelValues(hitLabel).Inc()
203239
entry.refCount++
204-
returnentry.value
240+
returnentry
205241
}
206242

207-
// release decrements the reference count for the given fileID, and frees the
208-
// backing data if there are no further references being held.
209-
//
210-
// release should only be called after a successful call to Acquire using the Release()
211-
// method on the returned *CloseFS.
212-
func (c*Cache)release(fileID uuid.UUID) {
243+
// purge immediately removes an entry from the cache. It should be called
244+
func (c*Cache)purge(fileID uuid.UUID) {
213245
c.lock.Lock()
214246
deferc.lock.Unlock()
215247

216248
entry,ok:=c.data[fileID]
217249
if!ok {
218-
// If we land here, it's almost certainly because a bug already happened,
219-
// and we're freeing something that's already been freed, or we're calling
220-
// this function with an incorrect ID. Should this function return an error?
221-
return
222-
}
223-
224-
c.currentOpenFileReferences.Dec()
225-
entry.refCount--
226-
ifentry.refCount>0 {
250+
// If we land here, it's probably because of a fetch attempt that
251+
// resulted in an error, and got purged already. It may also be an
252+
// erroneous extra close, but we can't really distinguish between those
253+
// two cases currently.
227254
return
228255
}
229256

257+
// Purge the file from the cache.
230258
c.currentOpenFiles.Dec()
231-
232259
ev,err:=entry.value.Load()
233260
iferr==nil {
234261
c.currentCacheSize.Add(-1*float64(ev.Size))
@@ -246,11 +273,18 @@ func (c *Cache) Count() int {
246273
returnlen(c.data)
247274
}
248275

249-
funcfetch(ctx context.Context,store database.Store,fileID uuid.UUID) (CacheEntryValue,error) {
250-
// Make sure the read does not fail due to authorization issues.
251-
// Authz is checked on the Acquire call, so this is safe.
276+
funcfetch(store database.Store,fileID uuid.UUID) (CacheEntryValue,error) {
277+
// Because many callers can be waiting on the same file fetch concurrently, we
278+
// want to prevent any failures that would cause them all to receive errors
279+
// because the caller who initiated the fetch would fail.
280+
// - We always run the fetch with an uncancelable context, and then check
281+
// context cancellation for each acquirer afterwards.
282+
// - We always run the fetch as a system user, and then check authorization
283+
// for each acquirer afterwards.
284+
// This prevents a canceled context or an unauthorized user from "holding up
285+
// the queue".
252286
//nolint:gocritic
253-
file,err:=store.GetFileByID(dbauthz.AsFileReader(ctx),fileID)
287+
file,err:=store.GetFileByID(dbauthz.AsFileReader(context.Background()),fileID)
254288
iferr!=nil {
255289
returnCacheEntryValue{},xerrors.Errorf("failed to read file from database: %w",err)
256290
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp