diff --git a/packages/orchestrator/internal/sandbox/uffd/eagain.go b/packages/orchestrator/internal/sandbox/uffd/eagain.go new file mode 100644 index 0000000000..f9575f21ea --- /dev/null +++ b/packages/orchestrator/internal/sandbox/uffd/eagain.go @@ -0,0 +1,57 @@ +package uffd + +import ( + "time" + + "go.uber.org/zap" +) + +type eagainCounter struct { + count uint64 + startTime time.Time + endTime time.Time + logger *zap.Logger + msg string +} + +func newEagainCounter(logger *zap.Logger, msg string) *eagainCounter { + return &eagainCounter{ + count: 0, + startTime: time.Time{}, + endTime: time.Time{}, + logger: logger, + msg: msg, + } +} + +func (c *eagainCounter) Increase() { + if c.count == 0 { + c.startTime = time.Now() + } + + c.count++ + + c.endTime = time.Now() +} + +func (c *eagainCounter) log(closing bool) { + if c.count > 0 { + c.logger.Debug( + c.msg, + zap.Uint64("count", c.count), + zap.Time("start", c.startTime), + zap.Time("end", c.endTime), + zap.Bool("closing", closing), + ) + + c.count = 0 + } +} + +func (c *eagainCounter) Close() { + c.log(true) +} + +func (c *eagainCounter) Log() { + c.log(false) +} diff --git a/packages/orchestrator/internal/sandbox/uffd/serve.go b/packages/orchestrator/internal/sandbox/uffd/serve.go index c5866b52dd..f493087220 100644 --- a/packages/orchestrator/internal/sandbox/uffd/serve.go +++ b/packages/orchestrator/internal/sandbox/uffd/serve.go @@ -43,6 +43,9 @@ func Serve( missingPagesBeingHandled := map[int64]struct{}{} + eagainCounter := newEagainCounter(logger, "uffd: eagain during fd read (accumulated)") + defer eagainCounter.Close() + outerLoop: for { if _, err := unix.Poll( @@ -56,7 +59,7 @@ outerLoop: } if err == unix.EAGAIN { - logger.Debug("uffd: eagain during polling, going back to polling") + logger.Debug("uffd: eagain during fd polling, going back to polling") continue } @@ -97,7 +100,7 @@ outerLoop: buf := make([]byte, unsafe.Sizeof(userfaultfd.UffdMsg{})) for { - n, err := syscall.Read(uffd, buf) + _, err := syscall.Read(uffd, buf) if err == syscall.EINTR { logger.Debug("uffd: interrupted read, reading again") @@ -110,7 +113,7 @@ outerLoop: } if err == syscall.EAGAIN { - logger.Debug("uffd: eagain error, going back to polling", zap.Error(err), zap.Int("read_bytes", n)) + eagainCounter.Increase() // Continue polling the fd. continue outerLoop @@ -121,6 +124,8 @@ outerLoop: return fmt.Errorf("failed to read: %w", err) } + eagainCounter.Log() + msg := *(*userfaultfd.UffdMsg)(unsafe.Pointer(&buf[0])) if userfaultfd.GetMsgEvent(&msg) != userfaultfd.UFFD_EVENT_PAGEFAULT { logger.Error("UFFD serve unexpected event type", zap.Any("event_type", userfaultfd.GetMsgEvent(&msg)))