Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Backoff and Retry Mechanism for Image Pull #1409

Merged
merged 7 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions __tests__/cleanup-integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ integration('cleanupOldImageVersions', () => {
}, 10000)

beforeEach(async () => {
await ImageService.fetchImage(currentImage)
await ImageService.fetchImage(oldImage)
await ImageService.fetchImageWithRetry(currentImage)
await ImageService.fetchImageWithRetry(oldImage)
}, 20000)

afterEach(async () => {
Expand Down
2 changes: 1 addition & 1 deletion __tests__/container-service.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ describe('ContainerService', () => {

beforeAll(async () => {
/* We use alpine as a small, easy-to-script-for test stand-in for the updater */
await ImageService.fetchImage('alpine')
await ImageService.fetchImageWithRetry('alpine')
})

describe('when a container runs successfully', () => {
Expand Down
108 changes: 108 additions & 0 deletions __tests__/image-service.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import * as core from '@actions/core'
import Docker from 'dockerode'
import {Readable} from 'stream'
import {ImageService} from '../src/image-service'

jest.mock('@actions/core')

describe('ImageService', () => {
describe('when asked to fetch non-GitHub hosted images', () => {
test('it raises an error', async () => {
Expand All @@ -11,3 +16,106 @@ describe('ImageService', () => {
})
})
})

describe('ImageService.fetchImageWithRetry', () => {
let pullMock: jest.Mock
let docker: Docker
let modemMock: any
let getImageMock: jest.Mock

const MAX_RETRIES = 5

beforeEach(() => {
pullMock = jest.fn().mockResolvedValue(
new Readable({
read() {} // Empty read function to avoid stream errors
})
)

getImageMock = jest.fn().mockImplementation(() => ({
inspect: jest.fn().mockResolvedValue({
RepoDigests: ['ghcr.io/dependabot/dependabot-updater-npm:latest']
}),
remove: jest.fn(),
history: jest.fn(),
get: jest.fn(),
tag: jest.fn()
}))

modemMock = {
followProgress: jest.fn((stream, onFinished) => onFinished(null))
}

docker = new Docker()
jest.spyOn(docker, 'pull').mockImplementation(pullMock)
jest.spyOn(docker, 'getImage').mockImplementation(getImageMock)

// Mock modem property to avoid `followProgress` errors
Object.defineProperty(docker, 'modem', {value: modemMock})

jest.spyOn(global, 'setTimeout').mockImplementation(fn => {
fn()
return {} as NodeJS.Timeout
})

jest.spyOn(core, 'info').mockImplementation(jest.fn())
jest.spyOn(core, 'warning').mockImplementation(jest.fn())
jest.spyOn(core, 'error').mockImplementation(jest.fn())
})

afterEach(() => {
jest.clearAllMocks()
})

test('it retries with jitter on 429 Too Many Requests', async () => {
pullMock
.mockRejectedValueOnce(new Error('429 Too Many Requests'))
.mockRejectedValueOnce(new Error('429 Too Many Requests'))
.mockResolvedValueOnce(
new Readable({
read() {}
})
) // Succeeds on the third attempt

await expect(
ImageService.fetchImageWithRetry(
'ghcr.io/dependabot/dependabot-updater-npm',
{},
docker
)
).resolves.not.toThrow()

expect(pullMock).toHaveBeenCalledTimes(3)
expect(core.warning).toHaveBeenCalledWith(
expect.stringContaining('Retrying in')
)
})

test('it fails after MAX_RETRIES on persistent 429 errors', async () => {
pullMock.mockRejectedValue(new Error('429 Too Many Requests')) // Always fails

await expect(
ImageService.fetchImageWithRetry(
'ghcr.io/dependabot/dependabot-updater-npm',
{},
docker
)
).rejects.toThrow('429 Too Many Requests')

expect(pullMock).toHaveBeenCalledTimes(MAX_RETRIES)
})

test('it does not retry on fatal errors', async () => {
pullMock.mockRejectedValue(new Error('500 Internal Server Error')) // Fatal error

await expect(
ImageService.fetchImageWithRetry(
'ghcr.io/dependabot/dependabot-updater-npm',
{},
docker
)
).rejects.toThrow('500 Internal Server Error')

expect(pullMock).toHaveBeenCalledTimes(1) // No retries should occur
})
})
47 changes: 40 additions & 7 deletions dist/main/index.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion dist/main/index.js.map

Large diffs are not rendered by default.

57 changes: 50 additions & 7 deletions src/image-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ import * as core from '@actions/core'
import Docker from 'dockerode'
import {Readable} from 'stream'

const MAX_RETRIES = 5 // Maximum number of retries
const INITIAL_DELAY_MS = 2000 // Initial delay in milliseconds for backoff
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@abdulapopoola I have added the delay of 2 seconds


const sleep = async (ms: number): Promise<void> =>
new Promise(resolve => setTimeout(resolve, ms))

const endOfStream = async (docker: Docker, stream: Readable): Promise<void> => {
return new Promise((resolve, reject) => {
docker.modem.followProgress(stream, (err: Error | null) =>
Expand Down Expand Up @@ -43,18 +49,55 @@ export const ImageService = {
}

const auth = {} // Images are public so not authentication info is required
await this.fetchImage(imageName, auth, docker)
await this.fetchImageWithRetry(imageName, auth, docker)
},

/* Retrieve the imageName using the auth details provided, if any */
async fetchImage(
/* Retrieve the image using the auth details provided, if any with retry and backoff */
async fetchImageWithRetry(
imageName: string,
auth = {},
docker = new Docker()
): Promise<void> {
core.info(`Pulling image ${imageName}...`)
const stream = await docker.pull(imageName, {authconfig: auth})
await endOfStream(docker, new Readable().wrap(stream))
core.info(`Pulled image ${imageName}`)
let attempt = 0

while (attempt < MAX_RETRIES) {
try {
core.info(`Pulling image ${imageName} (attempt ${attempt + 1})...`)
const stream = await docker.pull(imageName, {authconfig: auth})
await endOfStream(docker, new Readable().wrap(stream))
core.info(`Pulled image ${imageName}`)
return // Exit on success
} catch (error) {
if (!(error instanceof Error)) throw error // Ensure error is an instance of Error

// Handle 429 Too Many Requests separately
if (
error.message.includes('429 Too Many Requests') ||
error.message.toLowerCase().includes('too many requests')
) {
attempt++ // Only increment attempt on 429
if (attempt >= MAX_RETRIES) {
core.error(
`Failed to pull image ${imageName} after ${MAX_RETRIES} attempts.`
)
throw error
}

// Add jitter to avoid synchronization issues
const baseDelay = INITIAL_DELAY_MS * Math.pow(2, attempt)
const jitter = Math.random() * baseDelay
const delay = baseDelay / 2 + jitter

core.warning(
`Received Too Many Requests error. Retrying in ${(delay / 1000).toFixed(2)} seconds...`
)
await sleep(delay)
} else {
// Non-429 errors should NOT be retried
core.error(`Fatal error pulling image ${imageName}: ${error.message}`)
throw error // Exit immediately
}
}
}
}
}
Loading