From 21229dc09e551e6da18e72e4e454ee145709c713 Mon Sep 17 00:00:00 2001 From: Daniel Kennedy Date: Wed, 11 Mar 2026 09:30:15 -0400 Subject: [PATCH] Artifact: support downloading artifacts with CJK characters in their name (#2341) * Artifact: support downloading artifacts with CJK characters in their name * Fix some linting/PR comments * One more linting fix --- packages/artifact/RELEASES.md | 4 + .../__tests__/download-artifact.test.ts | 120 ++++++++++++++++++ packages/artifact/package.json | 2 +- .../internal/download/download-artifact.ts | 14 +- 4 files changed, 135 insertions(+), 5 deletions(-) diff --git a/packages/artifact/RELEASES.md b/packages/artifact/RELEASES.md index 9b368e35..730413d9 100644 --- a/packages/artifact/RELEASES.md +++ b/packages/artifact/RELEASES.md @@ -1,5 +1,9 @@ # @actions/artifact Releases +## 6.2.1 + +- Support the RFC 5987 `filename*` field in the `content-disposition` header. This allows us to correctly download files and artifacts with Chinese/Japanese/Korean (among other) characters in their name. + ## 6.2.0 - Support uploading single un-archived files (not zipped). Direct uploads are only supported for artifacts version 7+ (based on the major version of `actions/upload-artifact`). Callers must pass the `skipArchive` option to `uploadArtifact`. Only single files can be uploaded at a time right now. Default behavior should remain unchanged if `skipArchive = false`. When `skipArchive = true`, the name of the file is used as the name of the artifact for consistency with the downloads: you upload `artifact.txt`, you download `artifact.txt`. diff --git a/packages/artifact/__tests__/download-artifact.test.ts b/packages/artifact/__tests__/download-artifact.test.ts index e4bb9319..1e4d8d1e 100644 --- a/packages/artifact/__tests__/download-artifact.test.ts +++ b/packages/artifact/__tests__/download-artifact.test.ts @@ -977,5 +977,125 @@ describe('download-artifact', () => { ) expect(fs.existsSync(maliciousPath)).toBe(false) }) + + it('should correctly handle Content-Disposition with filename* parameter (RFC 5987)', async () => { + const rawFileContent = 'content with rfc5987 encoding' + const expectedFileName = '报告-土-x.txt' + const asciiFileName = '__-_-x.txt' + + const mockGetRfc5987File = jest.fn(() => { + const message = new http.IncomingMessage(new net.Socket()) + message.statusCode = 200 + message.headers['content-type'] = 'text/plain' + // Server sends both: filename with _ fallbacks, filename* with UTF-8 encoding + message.headers['content-disposition'] = + `attachment; filename="${asciiFileName}"; filename*=UTF-8''${encodeURIComponent(expectedFileName)}` + message.push(Buffer.from(rawFileContent, 'utf8')) + message.push(null) + return { + message + } + }) + + const mockHttpClient = (HttpClient as jest.Mock).mockImplementation( + () => { + return { + get: mockGetRfc5987File + } + } + ) + + await streamExtractExternal( + fixtures.blobStorageUrl, + fixtures.workspaceDir + ) + + expect(mockHttpClient).toHaveBeenCalledWith(getUserAgentString()) + const savedFilePath = path.join(fixtures.workspaceDir, expectedFileName) + expect(fs.existsSync(savedFilePath)).toBe(true) + expect(fs.readFileSync(savedFilePath, 'utf8')).toBe(rawFileContent) + }) + + it('should handle zip artifacts with Chinese characters in the artifact name', async () => { + // Simulate Azure Blob Storage URL with rscd containing Chinese filename + const chineseArtifactName = 'probe-土-x' + const asciiArtifactName = 'probe-_-x' + const blobUrlWithChineseName = `https://blob-storage.local/artifact.zip?rscd=${encodeURIComponent(`attachment; filename="${asciiArtifactName}.zip"; filename*=UTF-8''${encodeURIComponent(`${chineseArtifactName}.zip`)}`)}&rsct=application%2Fzip&sig=abc123` + + const mockGetZip = jest.fn(() => { + const message = new http.IncomingMessage(new net.Socket()) + message.statusCode = 200 + message.headers['content-type'] = 'application/zip' + message.headers['content-disposition'] = + `attachment; filename="${asciiArtifactName}.zip"; filename*=UTF-8''${encodeURIComponent(`${chineseArtifactName}.zip`)}` + message.push(fs.readFileSync(fixtures.exampleArtifact.path)) + message.push(null) + return { + message + } + }) + + const mockHttpClient = (HttpClient as jest.Mock).mockImplementation( + () => { + return { + get: mockGetZip + } + } + ) + + await streamExtractExternal(blobUrlWithChineseName, fixtures.workspaceDir) + + expect(mockHttpClient).toHaveBeenCalledWith(getUserAgentString()) + // Zip should be extracted normally regardless of Chinese artifact name + await expectExtractedArchive(fixtures.workspaceDir) + }) + + it.each([ + ['土', '_'], // U+571F - known to cause 400 errors + ['日', '_'], // U+65E5 - reported to work fine + ['中文测试', '____'], // multiple Chinese characters + ['文件-2026年', '__-2026_'], // mixed Chinese and numbers + ['データ', '___'], // Japanese katakana + ['테스트', '___'] // Korean characters + ])( + 'should prefer filename* over filename for non-ASCII character %s (%s)', + async (chars, asciiReplacement) => { + const rawFileContent = `content for ${chars}` + const expectedFileName = `artifact-${chars}.txt` + const asciiFileName = `artifact-${asciiReplacement}.txt` + + const mockGetFile = jest.fn(() => { + const message = new http.IncomingMessage(new net.Socket()) + message.statusCode = 200 + message.headers['content-type'] = 'text/plain' + // Server sends filename with _ replacing non-ASCII, filename* with proper encoding + message.headers['content-disposition'] = + `attachment; filename="${asciiFileName}"; filename*=UTF-8''${encodeURIComponent(expectedFileName)}` + message.push(Buffer.from(rawFileContent, 'utf8')) + message.push(null) + return { + message + } + }) + + const mockHttpClient = (HttpClient as jest.Mock).mockImplementation( + () => { + return { + get: mockGetFile + } + } + ) + + await streamExtractExternal( + fixtures.blobStorageUrl, + fixtures.workspaceDir + ) + + expect(mockHttpClient).toHaveBeenCalledWith(getUserAgentString()) + const savedFilePath = path.join(fixtures.workspaceDir, expectedFileName) + expect(fs.existsSync(savedFilePath)).toBe(true) + expect(fs.readFileSync(savedFilePath, 'utf8')).toBe(rawFileContent) + } + ) }) }) diff --git a/packages/artifact/package.json b/packages/artifact/package.json index 9197d1d9..3ee0d8ab 100644 --- a/packages/artifact/package.json +++ b/packages/artifact/package.json @@ -1,6 +1,6 @@ { "name": "@actions/artifact", - "version": "6.2.0", + "version": "6.2.1", "preview": true, "description": "Actions artifact lib", "keywords": [ diff --git a/packages/artifact/src/internal/download/download-artifact.ts b/packages/artifact/src/internal/download/download-artifact.ts index 81c52e9a..9e853470 100644 --- a/packages/artifact/src/internal/download/download-artifact.ts +++ b/packages/artifact/src/internal/download/download-artifact.ts @@ -93,16 +93,22 @@ export async function streamExtractExternal( urlEndsWithZip // Extract filename from Content-Disposition header + // Prefer filename* (RFC 5987) which supports UTF-8 encoded filenames, + // fall back to filename which may contain ASCII-only replacements const contentDisposition = response.message.headers['content-disposition'] || '' let fileName = 'artifact' - const filenameMatch = contentDisposition.match( - /filename\*?=['"]?(?:UTF-\d['"]*)?([^;\r\n"']*)['"]?/i + const filenameStar = contentDisposition.match( + /filename\*\s*=\s*UTF-8''([^;\r\n]*)/i ) - if (filenameMatch && filenameMatch[1]) { + const filenamePlain = contentDisposition.match( + /(?