Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option for concurrent cache downloads with timeout #1484

Merged
merged 3 commits into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/cache/RELEASES.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,7 @@
### 3.2.1

- Updated @azure/storage-blob to `v12.13.0`

### 3.2.2

- Add new default cache download method to improve performance and reduce hangs [#1484](https://github.com/actions/toolkit/pull/1484)
24 changes: 18 additions & 6 deletions packages/cache/__tests__/cacheHttpClient.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,24 @@
'downloadCacheStorageSDK'
)

const downloadCacheHttpClientConcurrentMock = jest.spyOn(
downloadUtils,
'downloadCacheHttpClientConcurrent'
)

const archiveLocation = 'http://foo.blob.core.windows.net/bar/baz'
const archivePath = '/foo/bar'

await downloadCache(archiveLocation, archivePath)

expect(downloadCacheStorageSDKMock).toHaveBeenCalledTimes(1)
expect(downloadCacheStorageSDKMock).toHaveBeenCalledWith(
expect(downloadCacheHttpClientConcurrentMock).toHaveBeenCalledTimes(1)
expect(downloadCacheHttpClientConcurrentMock).toHaveBeenCalledWith(
archiveLocation,
archivePath,
getDownloadOptions()
)

expect(downloadCacheStorageSDKMock).toHaveBeenCalledTimes(0)
expect(downloadCacheHttpClientMock).toHaveBeenCalledTimes(0)
})

Expand All @@ -109,20 +115,26 @@
'downloadCacheStorageSDK'
)

const downloadCacheHttpClientConcurrentMock = jest.spyOn(
downloadUtils,
'downloadCacheHttpClientConcurrent'
)

const archiveLocation = 'http://foo.blob.core.windows.net/bar/baz'
const archivePath = '/foo/bar'
const options: DownloadOptions = {downloadConcurrency: 4}

await downloadCache(archiveLocation, archivePath, options)

expect(downloadCacheStorageSDKMock).toHaveBeenCalledTimes(1)
expect(downloadCacheStorageSDKMock).toHaveBeenCalled()
expect(downloadCacheStorageSDKMock).toHaveBeenCalledWith(
expect(downloadCacheHttpClientConcurrentMock).toHaveBeenCalledTimes(1)
expect(downloadCacheHttpClientConcurrentMock).toHaveBeenCalled()
expect(downloadCacheHttpClientConcurrentMock).toHaveBeenCalledWith(
archiveLocation,
archivePath,
getDownloadOptions(options)
)

expect(downloadCacheStorageSDKMock).toHaveBeenCalledTimes(0)
expect(downloadCacheHttpClientMock).toHaveBeenCalledTimes(0)
})

Expand All @@ -138,7 +150,7 @@

const archiveLocation = 'http://foo.blob.core.windows.net/bar/baz'
const archivePath = '/foo/bar'
const options: DownloadOptions = {useAzureSdk: false}
const options: DownloadOptions = {useAzureSdk: false, concurrentBlobDownloads: false}

Check failure on line 153 in packages/cache/__tests__/cacheHttpClient.test.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Replace `useAzureSdk:·false,·concurrentBlobDownloads:·false` with `⏎····useAzureSdk:·false,⏎····concurrentBlobDownloads:·false⏎··`

Check failure on line 153 in packages/cache/__tests__/cacheHttpClient.test.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Replace `useAzureSdk:·false,·concurrentBlobDownloads:·false` with `⏎····useAzureSdk:·false,⏎····concurrentBlobDownloads:·false⏎··`

await downloadCache(archiveLocation, archivePath, options)

Expand Down
7 changes: 5 additions & 2 deletions packages/cache/__tests__/options.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import {
getUploadOptions
} from '../src/options'

const useAzureSdk = true
const useAzureSdk = false
const concurrentBlobDownloads = true
const downloadConcurrency = 8
const timeoutInMs = 30000
const segmentTimeoutInMs = 600000
Expand All @@ -18,6 +19,7 @@ test('getDownloadOptions sets defaults', async () => {

expect(actualOptions).toEqual({
useAzureSdk,
concurrentBlobDownloads,
downloadConcurrency,
timeoutInMs,
segmentTimeoutInMs,
Expand All @@ -27,7 +29,8 @@ test('getDownloadOptions sets defaults', async () => {

test('getDownloadOptions overrides all settings', async () => {
const expectedOptions: DownloadOptions = {
useAzureSdk: false,
useAzureSdk: true,
concurrentBlobDownloads: false,
downloadConcurrency: 14,
timeoutInMs: 20000,
segmentTimeoutInMs: 3600000,
Expand Down
18 changes: 9 additions & 9 deletions packages/cache/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions packages/cache/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@actions/cache",
"version": "3.2.1",
"version": "3.2.2",
"preview": true,
"description": "Actions cache lib",
"keywords": [
Expand Down Expand Up @@ -40,7 +40,7 @@
"@actions/core": "^1.10.0",
"@actions/exec": "^1.0.1",
"@actions/glob": "^0.1.0",
"@actions/http-client": "^2.0.1",
"@actions/http-client": "^2.1.1",
"@actions/io": "^1.0.1",
"@azure/abort-controller": "^1.1.0",
"@azure/ms-rest-js": "^2.6.0",
Expand Down
22 changes: 13 additions & 9 deletions packages/cache/src/internal/cacheHttpClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
ITypedResponseWithError,
ArtifactCacheList
} from './contracts'
import {downloadCacheHttpClient, downloadCacheStorageSDK} from './downloadUtils'
import {downloadCacheHttpClient, downloadCacheHttpClientConcurrent, downloadCacheStorageSDK} from './downloadUtils'

Check failure on line 23 in packages/cache/src/internal/cacheHttpClient.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Replace `downloadCacheHttpClient,·downloadCacheHttpClientConcurrent,·downloadCacheStorageSDK` with `⏎··downloadCacheHttpClient,⏎··downloadCacheHttpClientConcurrent,⏎··downloadCacheStorageSDK⏎`

Check failure on line 23 in packages/cache/src/internal/cacheHttpClient.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Replace `downloadCacheHttpClient,·downloadCacheHttpClientConcurrent,·downloadCacheStorageSDK` with `⏎··downloadCacheHttpClient,⏎··downloadCacheHttpClientConcurrent,⏎··downloadCacheStorageSDK⏎`
import {
DownloadOptions,
UploadOptions,
Expand Down Expand Up @@ -171,15 +171,19 @@
const archiveUrl = new URL(archiveLocation)
const downloadOptions = getDownloadOptions(options)

if (
downloadOptions.useAzureSdk &&
archiveUrl.hostname.endsWith('.blob.core.windows.net')
) {
// Use Azure storage SDK to download caches hosted on Azure to improve speed and reliability.
await downloadCacheStorageSDK(archiveLocation, archivePath, downloadOptions)
if (archiveUrl.hostname.endsWith('.blob.core.windows.net')) {
if (downloadOptions.useAzureSdk) {
// Use Azure storage SDK to download caches hosted on Azure to improve speed and reliability.
await downloadCacheStorageSDK(archiveLocation, archivePath, downloadOptions)

Check failure on line 177 in packages/cache/src/internal/cacheHttpClient.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Replace `archiveLocation,·archivePath,·downloadOptions` with `⏎········archiveLocation,⏎········archivePath,⏎········downloadOptions⏎······`

Check failure on line 177 in packages/cache/src/internal/cacheHttpClient.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Replace `archiveLocation,·archivePath,·downloadOptions` with `⏎········archiveLocation,⏎········archivePath,⏎········downloadOptions⏎······`
} else if (downloadOptions.concurrentBlobDownloads) {
// Use concurrent implementation with HttpClient to work around blob SDK issue
await downloadCacheHttpClientConcurrent(archiveLocation, archivePath, downloadOptions)

Check failure on line 180 in packages/cache/src/internal/cacheHttpClient.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Replace `archiveLocation,·archivePath,·downloadOptions` with `⏎········archiveLocation,⏎········archivePath,⏎········downloadOptions⏎······`

Check failure on line 180 in packages/cache/src/internal/cacheHttpClient.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Replace `archiveLocation,·archivePath,·downloadOptions` with `⏎········archiveLocation,⏎········archivePath,⏎········downloadOptions⏎······`
} else {
// Otherwise, download using the Actions http-client.
await downloadCacheHttpClient(archiveLocation, archivePath)
}
} else {
// Otherwise, download using the Actions http-client.
await downloadCacheHttpClient(archiveLocation, archivePath)
await downloadCacheHttpClient(archiveLocation, archivePath)

Check failure on line 186 in packages/cache/src/internal/cacheHttpClient.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Delete `··`

Check failure on line 186 in packages/cache/src/internal/cacheHttpClient.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Delete `··`
}
}

Expand Down
155 changes: 151 additions & 4 deletions packages/cache/src/internal/downloadUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,153 @@
}
}

/**
* Download the cache using the Actions toolkit http-client concurrently
*
* @param archiveLocation the URL for the cache
* @param archivePath the local path where the cache is saved
*/
export async function downloadCacheHttpClientConcurrent(
archiveLocation: string,
archivePath: fs.PathLike,
options: DownloadOptions
): Promise<void> {
const archiveDescriptor = await fs.promises.open(archivePath, 'w')
const httpClient = new HttpClient('actions/cache', undefined, {
socketTimeout: options.timeoutInMs,
keepAlive: true
})
try {
const res = await retryHttpClientResponse(
'downloadCacheMetadata',
async () => await httpClient.request('HEAD', archiveLocation, null, {})
)

const lengthHeader = res.message.headers['content-length']
if (lengthHeader === undefined || lengthHeader === null) {
throw new Error('Content-Length not found on blob response')
}

const length = parseInt(lengthHeader)
if (Number.isNaN(length)) {
throw new Error(`Could not interpret Content-Length: ${length}`)
}

const downloads: {
offset: number
promiseGetter: () => Promise<DownloadSegment>
}[] = []
const blockSize = 4 * 1024 * 1024

for (let offset = 0; offset < length; offset += blockSize) {
const count = Math.min(blockSize, length - offset)
downloads.push({
offset,
promiseGetter: async () => {
return await downloadSegmentRetry(
httpClient,
archiveLocation,
offset,
count
)
}
})
}

// reverse to use .pop instead of .shift
downloads.reverse()
let actives = 0
let bytesDownloaded = 0
const progress = new DownloadProgress(length)
progress.startDisplayTimer()
const progressFn = progress.onProgress()

const activeDownloads: {[offset: number]: Promise<DownloadSegment>} = []
let nextDownload:
| {offset: number; promiseGetter: () => Promise<DownloadSegment>}
| undefined

const waitAndWrite: () => Promise<void> = async () => {
const segment = await Promise.race(Object.values(activeDownloads))
await archiveDescriptor.write(segment.buffer, 0, segment.count, segment.offset);

Check failure on line 274 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Replace `segment.buffer,·0,·segment.count,·segment.offset);` with `⏎········segment.buffer,⏎········0,⏎········segment.count,⏎········segment.offset⏎······)`

Check failure on line 274 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Extra semicolon

Check failure on line 274 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Replace `segment.buffer,·0,·segment.count,·segment.offset);` with `⏎········segment.buffer,⏎········0,⏎········segment.count,⏎········segment.offset⏎······)`

Check failure on line 274 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Extra semicolon
chkimes marked this conversation as resolved.
Show resolved Hide resolved
actives--
delete activeDownloads[segment.offset]
bytesDownloaded += segment.count
progressFn({ loadedBytes: bytesDownloaded })

Check failure on line 278 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Replace `·loadedBytes:·bytesDownloaded·` with `loadedBytes:·bytesDownloaded`

Check failure on line 278 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Replace `·loadedBytes:·bytesDownloaded·` with `loadedBytes:·bytesDownloaded`
}

while ((nextDownload = downloads.pop())) {
activeDownloads[nextDownload.offset] = nextDownload.promiseGetter()
actives++

if (actives >= (options.downloadConcurrency ?? 10)) {
await waitAndWrite()
}
}

while (actives > 0) {
await waitAndWrite()
}
} finally {
httpClient.dispose()
await archiveDescriptor.close()
}
}

async function downloadSegmentRetry(
httpClient: HttpClient,
archiveLocation: string,
offset: number,
count: number
): Promise<DownloadSegment> {
const retries = 5
let failures = 0

while (true) {

Check failure on line 308 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Unexpected constant condition

Check failure on line 308 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Unexpected constant condition
vmjoseph marked this conversation as resolved.
Show resolved Hide resolved
try {
const timeout = 30000
const result = await promiseWithTimeout(timeout, downloadSegment(httpClient, archiveLocation, offset, count))

Check failure on line 311 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Replace `timeout,·downloadSegment(httpClient,·archiveLocation,·offset,·count)` with `⏎········timeout,⏎········downloadSegment(httpClient,·archiveLocation,·offset,·count)⏎······`

Check failure on line 311 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Replace `timeout,·downloadSegment(httpClient,·archiveLocation,·offset,·count)` with `⏎········timeout,⏎········downloadSegment(httpClient,·archiveLocation,·offset,·count)⏎······`
if (typeof result === 'string') {
throw new Error('downloadSegmentRetry failed due to timeout')
}

return result
} catch (err) {
if (failures >= retries) {
throw err
}

failures++
}
}
}

async function downloadSegment(
httpClient: HttpClient,
archiveLocation: string,
offset: number,
count: number
): Promise<DownloadSegment> {
const partRes = await retryHttpClientResponse(
'downloadCachePart',
async () =>
await httpClient.get(archiveLocation, {
Range: `bytes=${offset}-${offset + count - 1}`
})
)
return {
offset,
count,
buffer: await partRes.readBodyBuffer!()

Check warning on line 343 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (ubuntu-latest)

Forbidden non-null assertion

Check warning on line 343 in packages/cache/src/internal/downloadUtils.ts

View workflow job for this annotation

GitHub Actions / Build (macos-latest)

Forbidden non-null assertion
}
}

declare class DownloadSegment {
offset: number
count: number
buffer: Buffer
}

/**
* Download the cache using the Azure Storage SDK. Only call this method if the
* URL points to an Azure Storage endpoint.
Expand Down Expand Up @@ -287,12 +434,12 @@
}
}

const promiseWithTimeout = async (
const promiseWithTimeout = async<T> (
timeoutMs: number,
promise: Promise<Buffer>
): Promise<unknown> => {
promise: Promise<T>
): Promise<T | string> => {
let timeoutHandle: NodeJS.Timeout
const timeoutPromise = new Promise(resolve => {
const timeoutPromise = new Promise<string>(resolve => {
timeoutHandle = setTimeout(() => resolve('timeout'), timeoutMs)
})

Expand Down