Merge pull request #88 from actions/sgoedecke/force-exit-once-inference-finishes

Force exit once inference finishes
package
2025-08-06 11:01:14 +10:00 · 2025-08-06 00:54:19 +00:00 · 2025-08-06 10:41:02 +10:00 · 2025-08-05 22:21:28 +00:00 · 2025-08-05 22:06:49 +00:00 · 2025-08-05 21:42:07 +00:00
7 changed files with 312 additions and 15 deletions
@@ -95,6 +95,49 @@ describe('inference.ts', () => {
      expect(result).toBeNull()
      expect(core.info).toHaveBeenCalledWith('Model response: No response content')
    })
+
+    it('includes response format when specified', async () => {
+      const requestWithResponseFormat = {
+        ...mockRequest,
+        responseFormat: {
+          type: 'json_schema' as const,
+          json_schema: {type: 'object'},
+        },
+      }
+
+      const mockResponse = {
+        choices: [
+          {
+            message: {
+              content: '{"result": "success"}',
+            },
+          },
+        ],
+      }
+
+      mockCreate.mockResolvedValue(mockResponse)
+
+      const result = await simpleInference(requestWithResponseFormat)
+
+      expect(result).toBe('{"result": "success"}')
+
+      // Verify response format was included in the request
+      expect(mockCreate).toHaveBeenCalledWith({
+        messages: [
+          {
+            role: 'system',
+            content: 'You are a test assistant',
+          },
+          {
+            role: 'user',
+            content: 'Hello, AI!',
+          },
+        ],
+        max_tokens: 100,
+        model: 'gpt-4',
+        response_format: requestWithResponseFormat.responseFormat,
+      })
+    })
  })

  describe('mcpInference', () => {
@@ -140,6 +183,7 @@ describe('inference.ts', () => {
      // eslint-disable-next-line @typescript-eslint/no-explicit-any
      const callArgs = mockCreate.mock.calls[0][0] as any
      expect(callArgs.tools).toEqual(mockMcpClient.tools)
+      expect(callArgs.response_format).toBeUndefined()
      expect(callArgs.model).toBe('gpt-4')
      expect(callArgs.max_tokens).toBe(100)
    })
@@ -315,5 +359,191 @@ describe('inference.ts', () => {

      expect(result).toBe('Second message')
    })
+
+    it('makes additional loop with response format when no tool calls are made', async () => {
+      const requestWithResponseFormat = {
+        ...mockRequest,
+        responseFormat: {
+          type: 'json_schema' as const,
+          json_schema: {type: 'object'},
+        },
+      }
+
+      // First response without tool calls
+      const firstResponse = {
+        choices: [
+          {
+            message: {
+              content: 'First response',
+              tool_calls: null,
+            },
+          },
+        ],
+      }
+
+      // Second response with response format applied
+      const secondResponse = {
+        choices: [
+          {
+            message: {
+              content: '{"result": "formatted response"}',
+              tool_calls: null,
+            },
+          },
+        ],
+      }
+
+      mockCreate.mockResolvedValueOnce(firstResponse).mockResolvedValueOnce(secondResponse)
+
+      const result = await mcpInference(requestWithResponseFormat, mockMcpClient)
+
+      expect(result).toBe('{"result": "formatted response"}')
+      expect(mockCreate).toHaveBeenCalledTimes(2)
+      expect(core.info).toHaveBeenCalledWith('Making one more MCP loop with the requested response format...')
+
+      // First call should have tools but no response format
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const firstCall = mockCreate.mock.calls[0][0] as any
+      expect(firstCall.tools).toEqual(mockMcpClient.tools)
+      expect(firstCall.response_format).toBeUndefined()
+
+      // Second call should have response format but no tools
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const secondCall = mockCreate.mock.calls[1][0] as any
+      expect(secondCall.tools).toBeUndefined()
+      expect(secondCall.response_format).toEqual(requestWithResponseFormat.responseFormat)
+
+      // Second call should include the user message requesting JSON format
+      expect(secondCall.messages).toHaveLength(5) // system, user, assistant, user, assistant
+      expect(secondCall.messages[3].role).toBe('user')
+      expect(secondCall.messages[3].content).toContain('Please provide your response in the exact')
+    })
+
+    it('uses response format only on final iteration after tool calls', async () => {
+      const requestWithResponseFormat = {
+        ...mockRequest,
+        responseFormat: {
+          type: 'json_schema' as const,
+          json_schema: {type: 'object'},
+        },
+      }
+
+      const toolCalls = [
+        {
+          id: 'call-123',
+          function: {
+            name: 'test-tool',
+            arguments: '{"param": "value"}',
+          },
+        },
+      ]
+
+      const toolResults = [
+        {
+          tool_call_id: 'call-123',
+          role: 'tool',
+          name: 'test-tool',
+          content: 'Tool result',
+        },
+      ]
+
+      // First response with tool calls
+      const firstResponse = {
+        choices: [
+          {
+            message: {
+              content: 'Using tool',
+              tool_calls: toolCalls,
+            },
+          },
+        ],
+      }
+
+      // Second response without tool calls, but should trigger final message loop
+      const secondResponse = {
+        choices: [
+          {
+            message: {
+              content: 'Intermediate result',
+              tool_calls: null,
+            },
+          },
+        ],
+      }
+
+      // Third response with response format
+      const thirdResponse = {
+        choices: [
+          {
+            message: {
+              content: '{"final": "result"}',
+              tool_calls: null,
+            },
+          },
+        ],
+      }
+
+      mockCreate
+        .mockResolvedValueOnce(firstResponse)
+        .mockResolvedValueOnce(secondResponse)
+        .mockResolvedValueOnce(thirdResponse)
+
+      mockExecuteToolCalls.mockResolvedValue(toolResults)
+
+      const result = await mcpInference(requestWithResponseFormat, mockMcpClient)
+
+      expect(result).toBe('{"final": "result"}')
+      expect(mockCreate).toHaveBeenCalledTimes(3)
+
+      // First call: tools but no response format
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const firstCall = mockCreate.mock.calls[0][0] as any
+      expect(firstCall.tools).toEqual(mockMcpClient.tools)
+      expect(firstCall.response_format).toBeUndefined()
+
+      // Second call: tools but no response format (after tool execution)
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const secondCall = mockCreate.mock.calls[1][0] as any
+      expect(secondCall.tools).toEqual(mockMcpClient.tools)
+      expect(secondCall.response_format).toBeUndefined()
+
+      // Third call: response format but no tools (final message)
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const thirdCall = mockCreate.mock.calls[2][0] as any
+      expect(thirdCall.tools).toBeUndefined()
+      expect(thirdCall.response_format).toEqual(requestWithResponseFormat.responseFormat)
+    })
+
+    it('returns immediately when response format is set and finalMessage is already true', async () => {
+      const requestWithResponseFormat = {
+        ...mockRequest,
+        responseFormat: {
+          type: 'json_schema' as const,
+          json_schema: {type: 'object'},
+        },
+      }
+
+      // Response without tool calls on what would be the final message iteration
+      const mockResponse = {
+        choices: [
+          {
+            message: {
+              content: '{"immediate": "result"}',
+              tool_calls: null,
+            },
+          },
+        ],
+      }
+
+      mockCreate.mockResolvedValue(mockResponse)
+
+      // We need to test a scenario where finalMessage would already be true
+      // This happens when we're already in the final iteration
+      const result = await mcpInference(requestWithResponseFormat, mockMcpClient)
+
+      // The function should make two calls: one normal, then one with response format
+      expect(mockCreate).toHaveBeenCalledTimes(2)
+      expect(result).toBe('{"immediate": "result"}')
+    })
  })
 })
@@ -34,6 +34,12 @@ vi.mock('../src/mcp.js', () => ({

 vi.mock('@actions/core', () => core)

+// Mock process.exit to prevent it from actually exiting during tests
+const mockProcessExit = vi.spyOn(process, 'exit').mockImplementation(() => {
+  // Prevent actual exit, but don't throw - just return
+  return undefined as never
+})
+
 // The module being tested should be imported dynamically. This ensures that the
 // mocks are used in place of any actual dependencies.
 const {run} = await import('../src/main.js')
@@ -41,6 +47,7 @@ const {run} = await import('../src/main.js')
 describe('main.ts - prompt.yml integration', () => {
  beforeEach(() => {
    vi.clearAllMocks()
+    mockProcessExit.mockClear()

    // Mock environment variables
    process.env['GITHUB_TOKEN'] = 'test-token'
@@ -103,8 +110,12 @@ model: openai/gpt-4o
      }
    })

+    // Expect the run function to complete successfully
    await run()

+    // Verify process.exit was called with code 0 (success)
+    expect(mockProcessExit).toHaveBeenCalledWith(0)
+
    // Verify simpleInference was called with the correct message structure
    expect(mockSimpleInference).toHaveBeenCalledWith(
      expect.objectContaining({
@@ -171,6 +182,9 @@ model: openai/gpt-4o
        messages: [{role: 'user', content: 'Here is the data: FILE_CONTENTS'}],
      }),
    )
+
+    // Verify process.exit was called with code 0 (success)
+    expect(mockProcessExit).toHaveBeenCalledWith(0)
  })

  it('should fall back to legacy format when not using prompt YAML', async () => {
@@ -215,5 +229,8 @@ model: openai/gpt-4o
        token: 'test-token',
      }),
    )
+
+    // Verify process.exit was called with code 0 (success)
+    expect(mockProcessExit).toHaveBeenCalledWith(0)
  })
 })
@@ -96,7 +96,8 @@ vi.mock('@actions/core', () => core)

 // Mock process.exit to prevent it from actually exiting during tests
 const mockProcessExit = vi.spyOn(process, 'exit').mockImplementation(() => {
-  throw new Error('process.exit called')
+  // Prevent actual exit, but don't throw - just return
+  return undefined as never
 })

 // The module being tested should be imported dynamically. This ensures that the
@@ -127,6 +128,7 @@ describe('main.ts', () => {

    expect(core.setOutput).toHaveBeenCalled()
    verifyStandardResponse()
+    expect(mockProcessExit).toHaveBeenCalledWith(0)
  })

  it('Sets a failed status when no prompt is set', async () => {
@@ -135,8 +137,7 @@ describe('main.ts', () => {
      'prompt-file': '',
    })

-    // Expect the run function to throw due to process.exit being mocked
-    await expect(run()).rejects.toThrow('process.exit called')
+    await run()

    expect(core.setFailed).toHaveBeenCalledWith('Neither prompt-file nor prompt was set')
    expect(mockProcessExit).toHaveBeenCalledWith(1)
@@ -165,6 +166,7 @@ describe('main.ts', () => {
    expect(mockConnectToGitHubMCP).not.toHaveBeenCalled()
    expect(mockMcpInference).not.toHaveBeenCalled()
    verifyStandardResponse()
+    expect(mockProcessExit).toHaveBeenCalledWith(0)
  })

  it('uses MCP inference when enabled and connection succeeds', async () => {
@@ -197,6 +199,7 @@ describe('main.ts', () => {
    )
    expect(mockSimpleInference).not.toHaveBeenCalled()
    verifyStandardResponse()
+    expect(mockProcessExit).toHaveBeenCalledWith(0)
  })

  it('falls back to simple inference when MCP connection fails', async () => {
@@ -215,6 +218,7 @@ describe('main.ts', () => {
    expect(mockMcpInference).not.toHaveBeenCalled()
    expect(core.warning).toHaveBeenCalledWith('MCP connection failed, falling back to simple inference')
    verifyStandardResponse()
+    expect(mockProcessExit).toHaveBeenCalledWith(0)
  })

  it('properly integrates with loadContentFromFileOrInput', async () => {
@@ -248,6 +252,7 @@ describe('main.ts', () => {
      responseFormat: undefined,
    })
    verifyStandardResponse()
+    expect(mockProcessExit).toHaveBeenCalledWith(0)
  })

  it('handles non-existent prompt-file with an error', async () => {
@@ -259,8 +264,7 @@ describe('main.ts', () => {
      'prompt-file': promptFile,
    })

-    // Expect the run function to throw due to process.exit being mocked
-    await expect(run()).rejects.toThrow('process.exit called')
+    await run()

    expect(core.setFailed).toHaveBeenCalledWith(`File for prompt-file was not found: ${promptFile}`)
    expect(mockProcessExit).toHaveBeenCalledWith(1)
@@ -48678,6 +48678,9 @@ async function mcpInference(request, githubMcpClient) {
    const messages = [...request.messages];
    let iterationCount = 0;
    const maxIterations = 5; // Prevent infinite loops
+    // We want to use response_format (e.g. JSON) on the last iteration only, so the model can output
+    // the final result in the expected format without interfering with tool calls
+    let finalMessage = false;
    while (iterationCount < maxIterations) {
        iterationCount++;
        coreExports.info(`MCP inference iteration ${iterationCount}`);
@@ -48685,13 +48688,15 @@ async function mcpInference(request, githubMcpClient) {
            messages: messages,
            max_tokens: request.maxTokens,
            model: request.modelName,
-            tools: githubMcpClient.tools,
        };
-        // Add response format if specified (only on first iteration to avoid conflicts)
-        if (iterationCount === 1 && request.responseFormat) {
+        // Add response format if specified (only on final iteration to avoid conflicts with tool calls)
+        if (finalMessage && request.responseFormat) {
            // eslint-disable-next-line @typescript-eslint/no-explicit-any
            chatCompletionRequest.response_format = request.responseFormat;
        }
+        else {
+            chatCompletionRequest.tools = githubMcpClient.tools;
+        }
        try {
            const response = await client.chat.completions.create(chatCompletionRequest);
            if (!('choices' in response)) {
@@ -48708,7 +48713,22 @@ async function mcpInference(request, githubMcpClient) {
            });
            if (!toolCalls || toolCalls.length === 0) {
                coreExports.info('No tool calls requested, ending GitHub MCP inference loop');
-                return modelResponse || null;
+                // If we have a response format set and we haven't explicitly run one final message iteration,
+                // do another loop with the response format set
+                if (request.responseFormat && !finalMessage) {
+                    coreExports.info('Making one more MCP loop with the requested response format...');
+                    // Add a user message requesting JSON format and try again
+                    messages.push({
+                        role: 'user',
+                        content: `Please provide your response in the exact ${request.responseFormat.type} format specified.`,
+                    });
+                    finalMessage = true;
+                    // Continue the loop to get a properly formatted response
+                    continue;
+                }
+                else {
+                    return modelResponse || null;
+                }
            }
            coreExports.info(`Model requested ${toolCalls.length} tool calls`);
            // Execute all tool calls via GitHub MCP
@@ -51790,6 +51810,8 @@ async function run() {
        // Force exit to prevent hanging on open connections
        process.exit(1);
    }
+    // Force exit to prevent hanging on open connections
+    process.exit(0);
 }
 function tempDir() {
    const tempDirectory = process.env['RUNNER_TEMP'] || require$$0.tmpdir();
@@ -89,6 +89,9 @@ export async function mcpInference(

  let iterationCount = 0
  const maxIterations = 5 // Prevent infinite loops
+  // We want to use response_format (e.g. JSON) on the last iteration only, so the model can output
+  // the final result in the expected format without interfering with tool calls
+  let finalMessage = false

  while (iterationCount < maxIterations) {
    iterationCount++
@@ -98,13 +101,14 @@ export async function mcpInference(
      messages: messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[],
      max_tokens: request.maxTokens,
      model: request.modelName,
-      tools: githubMcpClient.tools as OpenAI.Chat.Completions.ChatCompletionTool[],
    }

-    // Add response format if specified (only on first iteration to avoid conflicts)
-    if (iterationCount === 1 && request.responseFormat) {
+    // Add response format if specified (only on final iteration to avoid conflicts with tool calls)
+    if (finalMessage && request.responseFormat) {
      // eslint-disable-next-line @typescript-eslint/no-explicit-any
      chatCompletionRequest.response_format = request.responseFormat as any
+    } else {
+      chatCompletionRequest.tools = githubMcpClient.tools as OpenAI.Chat.Completions.ChatCompletionTool[]
    }

    try {
@@ -128,7 +132,25 @@ export async function mcpInference(

      if (!toolCalls || toolCalls.length === 0) {
        core.info('No tool calls requested, ending GitHub MCP inference loop')
-        return modelResponse || null
+
+        // If we have a response format set and we haven't explicitly run one final message iteration,
+        // do another loop with the response format set
+        if (request.responseFormat && !finalMessage) {
+          core.info('Making one more MCP loop with the requested response format...')
+
+          // Add a user message requesting JSON format and try again
+          messages.push({
+            role: 'user',
+            content: `Please provide your response in the exact ${request.responseFormat.type} format specified.`,
+          })
+
+          finalMessage = true
+
+          // Continue the loop to get a properly formatted response
+          continue
+        } else {
+          return modelResponse || null
+        }
      }

      core.info(`Model requested ${toolCalls.length} tool calls`)
@@ -105,10 +105,12 @@ export async function run(): Promise<void> {
    } else {
      core.setFailed(`An unexpected error occurred: ${JSON.stringify(error, null, 2)}`)
    }
-
    // Force exit to prevent hanging on open connections
    process.exit(1)
  }
+
+  // Force exit to prevent hanging on open connections
+  process.exit(0)
 }

 function tempDir(): string {
Author	SHA1	Message	Date
Sean Goedecke	b81b2afb83	Merge pull request #88 from actions/sgoedecke/force-exit-once-inference-finishes Force exit once inference finishes	2025-08-06 11:01:14 +10:00
Sean Goedecke	9133f81330	package	2025-08-06 00:54:19 +00:00
Sean Goedecke	7923b92ef8	Merge pull request #89 from actions/sgoedecke/ensure-mcp-loops-output-desired-response-format Ensure MCP loops output the right response format	2025-08-06 10:41:02 +10:00
Sean Goedecke	e44da102bf	fixup format parsing	2025-08-05 22:21:28 +00:00
Sean Goedecke	866ae2b5d7	Ensure MCP loops output the right response format In a tool loop, you can't set response_format because the model needs to be able to think in plain English. But you still need the final response to be in the desired format, so we add response_format only on the last iteration.	2025-08-05 22:06:49 +00:00
Sean Goedecke	4685e0dcd4	Force exit once inference finishes in case we are holding any connections open	2025-08-05 21:42:07 +00:00