Skip to content

Commit

Permalink
Do not return ENOMEM to readers by default, allow them to exceed the …
Browse files Browse the repository at this point in the history
…memory limit. May be turned back with --use-enomem
  • Loading branch information
vitalif committed Mar 7, 2025
1 parent cfb618d commit 4190aa0
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
jobs:

build:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2

Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,12 @@ GeeseFS uses RAM for two purposes:

However, that means that more than 10 processes trying to read large files
at the same time may exceed the memory limit by requesting more than 1000 MB
of buffers and in that case GeeseFS will return ENOMEM errors to some of them.
of buffers. Starting with v0.43.0, GeeseFS allows it, but you may turn this
off by using mount option `--use-enomem`, which will make GeeseFS prevent
exceeding memory limit and return ENOMEM errors to some of the processes instead.

You can overcome this problem by either raising `--memory-limit` (for example
to 4 GB) or lowering `--read-ahead-large` (for example to 20 MB).
The other way to overcome this problem is to either raise `--memory-limit` (for
example to 4 GB) or reduce `--read-ahead-large` (for example to 20 MB).

## Maximizing Throughput

Expand Down
1 change: 1 addition & 0 deletions core/cfg/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type FlagStorage struct {

// Tuning
MemoryLimit uint64
UseEnomem bool
EntryLimit int
GCInterval uint64
Cheap bool
Expand Down
6 changes: 6 additions & 0 deletions core/cfg/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,11 @@ MISC OPTIONS:
Value: 1000,
},

cli.BoolFlag{
Name: "use-enomem",
Usage: "Return ENOMEM errors to applications when trying to read too many large files in parallel",
},

cli.IntFlag{
Name: "entry-limit",
Usage: "Maximum metadata entries to cache in memory (1 entry uses ~1 KB of memory)",
Expand Down Expand Up @@ -824,6 +829,7 @@ func PopulateFlags(c *cli.Context) (ret *FlagStorage) {

// Tuning,
MemoryLimit: uint64(1024 * 1024 * c.Int("memory-limit")),
UseEnomem: c.Bool("use-enomem"),
EntryLimit: c.Int("entry-limit"),
GCInterval: uint64(1024 * 1024 * c.Int("gc-interval")),
Cheap: c.Bool("cheap"),
Expand Down
42 changes: 26 additions & 16 deletions core/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,16 +146,20 @@ func (fh *FileHandle) WriteFile(offset int64, data []byte, copyData bool) (err e
}

// Try to reserve space without the inode lock
err = fh.inode.fs.bufferPool.Use(int64(len(data)), false)
if err != nil {
return err
if fh.inode.fs.flags.UseEnomem {
err = fh.inode.fs.bufferPool.Use(int64(len(data)), false)
if err != nil {
return err
}
}

fh.inode.mu.Lock()

if fh.inode.CacheState == ST_DELETED || fh.inode.CacheState == ST_DEAD {
// Oops, it's a deleted file. We don't support changing invisible files
fh.inode.fs.bufferPool.Use(-int64(len(data)), false)
if fh.inode.fs.flags.UseEnomem {
fh.inode.fs.bufferPool.Use(-int64(len(data)), false)
}
fh.inode.mu.Unlock()
return syscall.ENOENT
}
Expand Down Expand Up @@ -187,7 +191,9 @@ func (fh *FileHandle) WriteFile(offset int64, data []byte, copyData bool) (err e
fh.inode.mu.Unlock()

// Correct memory usage
if allocated != int64(len(data)) {
if !fh.inode.fs.flags.UseEnomem {
fh.inode.fs.bufferPool.Use(allocated, true)
} else if allocated != int64(len(data)) {
err = fh.inode.fs.bufferPool.Use(allocated-int64(len(data)), true)
}

Expand Down Expand Up @@ -330,15 +336,17 @@ func (inode *Inode) LoadRange(offset, size uint64, readAheadSize uint64, ignoreM

func (inode *Inode) retryRead(cloud StorageBackend, key string, offset, size uint64, ignoreMemoryLimit bool) {
// Maybe free some buffers first
err := inode.fs.bufferPool.Use(int64(size), ignoreMemoryLimit)
if err != nil {
log.Errorf("Error reading %v +%v of %v: %v", offset, size, key, err)
inode.mu.Lock()
inode.readError = err
inode.buffers.RemoveLoading(offset, size)
inode.mu.Unlock()
inode.readCond.Broadcast()
return
if inode.fs.flags.UseEnomem {
err := inode.fs.bufferPool.Use(int64(size), ignoreMemoryLimit)
if err != nil {
log.Errorf("Error reading %v +%v of %v: %v", offset, size, key, err)
inode.mu.Lock()
inode.readError = err
inode.buffers.RemoveLoading(offset, size)
inode.mu.Unlock()
inode.readCond.Broadcast()
return
}
}
inode.mu.Lock()
inode.LockRange(offset, size, false)
Expand All @@ -348,7 +356,7 @@ func (inode *Inode) retryRead(cloud StorageBackend, key string, offset, size uin
// is temporarily unavailable (err would be io.EOF in that case)
allocated := int64(0)
curOffset, curSize := offset, size
err = ReadBackoff(inode.fs.flags, func(attempt int) error {
err := ReadBackoff(inode.fs.flags, func(attempt int) error {
alloc, done, err := inode.sendRead(cloud, key, curOffset, curSize)
if err != nil && shouldRetry(err) {
s3Log.Warnf("Error reading %v +%v of %v (attempt %v): %v", curOffset, curSize, key, attempt, err)
Expand All @@ -358,7 +366,9 @@ func (inode *Inode) retryRead(cloud StorageBackend, key string, offset, size uin
allocated += alloc
return err
})
if allocated != int64(size) {
if !inode.fs.flags.UseEnomem {
inode.fs.bufferPool.Use(int64(allocated), true)
} else if allocated != int64(size) {
inode.fs.bufferPool.Use(int64(allocated)-int64(size), true)
}
inode.mu.Lock()
Expand Down

0 comments on commit 4190aa0

Please sign in to comment.