This commit is contained in:
Sean Goedecke
2025-08-06 00:54:19 +00:00
4 changed files with 281 additions and 9 deletions
+230
View File
@@ -95,6 +95,49 @@ describe('inference.ts', () => {
expect(result).toBeNull()
expect(core.info).toHaveBeenCalledWith('Model response: No response content')
})
it('includes response format when specified', async () => {
const requestWithResponseFormat = {
...mockRequest,
responseFormat: {
type: 'json_schema' as const,
json_schema: {type: 'object'},
},
}
const mockResponse = {
choices: [
{
message: {
content: '{"result": "success"}',
},
},
],
}
mockCreate.mockResolvedValue(mockResponse)
const result = await simpleInference(requestWithResponseFormat)
expect(result).toBe('{"result": "success"}')
// Verify response format was included in the request
expect(mockCreate).toHaveBeenCalledWith({
messages: [
{
role: 'system',
content: 'You are a test assistant',
},
{
role: 'user',
content: 'Hello, AI!',
},
],
max_tokens: 100,
model: 'gpt-4',
response_format: requestWithResponseFormat.responseFormat,
})
})
})
describe('mcpInference', () => {
@@ -140,6 +183,7 @@ describe('inference.ts', () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const callArgs = mockCreate.mock.calls[0][0] as any
expect(callArgs.tools).toEqual(mockMcpClient.tools)
expect(callArgs.response_format).toBeUndefined()
expect(callArgs.model).toBe('gpt-4')
expect(callArgs.max_tokens).toBe(100)
})
@@ -315,5 +359,191 @@ describe('inference.ts', () => {
expect(result).toBe('Second message')
})
it('makes additional loop with response format when no tool calls are made', async () => {
const requestWithResponseFormat = {
...mockRequest,
responseFormat: {
type: 'json_schema' as const,
json_schema: {type: 'object'},
},
}
// First response without tool calls
const firstResponse = {
choices: [
{
message: {
content: 'First response',
tool_calls: null,
},
},
],
}
// Second response with response format applied
const secondResponse = {
choices: [
{
message: {
content: '{"result": "formatted response"}',
tool_calls: null,
},
},
],
}
mockCreate.mockResolvedValueOnce(firstResponse).mockResolvedValueOnce(secondResponse)
const result = await mcpInference(requestWithResponseFormat, mockMcpClient)
expect(result).toBe('{"result": "formatted response"}')
expect(mockCreate).toHaveBeenCalledTimes(2)
expect(core.info).toHaveBeenCalledWith('Making one more MCP loop with the requested response format...')
// First call should have tools but no response format
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const firstCall = mockCreate.mock.calls[0][0] as any
expect(firstCall.tools).toEqual(mockMcpClient.tools)
expect(firstCall.response_format).toBeUndefined()
// Second call should have response format but no tools
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const secondCall = mockCreate.mock.calls[1][0] as any
expect(secondCall.tools).toBeUndefined()
expect(secondCall.response_format).toEqual(requestWithResponseFormat.responseFormat)
// Second call should include the user message requesting JSON format
expect(secondCall.messages).toHaveLength(5) // system, user, assistant, user, assistant
expect(secondCall.messages[3].role).toBe('user')
expect(secondCall.messages[3].content).toContain('Please provide your response in the exact')
})
it('uses response format only on final iteration after tool calls', async () => {
const requestWithResponseFormat = {
...mockRequest,
responseFormat: {
type: 'json_schema' as const,
json_schema: {type: 'object'},
},
}
const toolCalls = [
{
id: 'call-123',
function: {
name: 'test-tool',
arguments: '{"param": "value"}',
},
},
]
const toolResults = [
{
tool_call_id: 'call-123',
role: 'tool',
name: 'test-tool',
content: 'Tool result',
},
]
// First response with tool calls
const firstResponse = {
choices: [
{
message: {
content: 'Using tool',
tool_calls: toolCalls,
},
},
],
}
// Second response without tool calls, but should trigger final message loop
const secondResponse = {
choices: [
{
message: {
content: 'Intermediate result',
tool_calls: null,
},
},
],
}
// Third response with response format
const thirdResponse = {
choices: [
{
message: {
content: '{"final": "result"}',
tool_calls: null,
},
},
],
}
mockCreate
.mockResolvedValueOnce(firstResponse)
.mockResolvedValueOnce(secondResponse)
.mockResolvedValueOnce(thirdResponse)
mockExecuteToolCalls.mockResolvedValue(toolResults)
const result = await mcpInference(requestWithResponseFormat, mockMcpClient)
expect(result).toBe('{"final": "result"}')
expect(mockCreate).toHaveBeenCalledTimes(3)
// First call: tools but no response format
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const firstCall = mockCreate.mock.calls[0][0] as any
expect(firstCall.tools).toEqual(mockMcpClient.tools)
expect(firstCall.response_format).toBeUndefined()
// Second call: tools but no response format (after tool execution)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const secondCall = mockCreate.mock.calls[1][0] as any
expect(secondCall.tools).toEqual(mockMcpClient.tools)
expect(secondCall.response_format).toBeUndefined()
// Third call: response format but no tools (final message)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const thirdCall = mockCreate.mock.calls[2][0] as any
expect(thirdCall.tools).toBeUndefined()
expect(thirdCall.response_format).toEqual(requestWithResponseFormat.responseFormat)
})
it('returns immediately when response format is set and finalMessage is already true', async () => {
const requestWithResponseFormat = {
...mockRequest,
responseFormat: {
type: 'json_schema' as const,
json_schema: {type: 'object'},
},
}
// Response without tool calls on what would be the final message iteration
const mockResponse = {
choices: [
{
message: {
content: '{"immediate": "result"}',
tool_calls: null,
},
},
],
}
mockCreate.mockResolvedValue(mockResponse)
// We need to test a scenario where finalMessage would already be true
// This happens when we're already in the final iteration
const result = await mcpInference(requestWithResponseFormat, mockMcpClient)
// The function should make two calls: one normal, then one with response format
expect(mockCreate).toHaveBeenCalledTimes(2)
expect(result).toBe('{"immediate": "result"}')
})
})
})
Generated Vendored
+24 -4
View File
@@ -48678,6 +48678,9 @@ async function mcpInference(request, githubMcpClient) {
const messages = [...request.messages];
let iterationCount = 0;
const maxIterations = 5; // Prevent infinite loops
// We want to use response_format (e.g. JSON) on the last iteration only, so the model can output
// the final result in the expected format without interfering with tool calls
let finalMessage = false;
while (iterationCount < maxIterations) {
iterationCount++;
coreExports.info(`MCP inference iteration ${iterationCount}`);
@@ -48685,13 +48688,15 @@ async function mcpInference(request, githubMcpClient) {
messages: messages,
max_tokens: request.maxTokens,
model: request.modelName,
tools: githubMcpClient.tools,
};
// Add response format if specified (only on first iteration to avoid conflicts)
if (iterationCount === 1 && request.responseFormat) {
// Add response format if specified (only on final iteration to avoid conflicts with tool calls)
if (finalMessage && request.responseFormat) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
chatCompletionRequest.response_format = request.responseFormat;
}
else {
chatCompletionRequest.tools = githubMcpClient.tools;
}
try {
const response = await client.chat.completions.create(chatCompletionRequest);
if (!('choices' in response)) {
@@ -48708,7 +48713,22 @@ async function mcpInference(request, githubMcpClient) {
});
if (!toolCalls || toolCalls.length === 0) {
coreExports.info('No tool calls requested, ending GitHub MCP inference loop');
return modelResponse || null;
// If we have a response format set and we haven't explicitly run one final message iteration,
// do another loop with the response format set
if (request.responseFormat && !finalMessage) {
coreExports.info('Making one more MCP loop with the requested response format...');
// Add a user message requesting JSON format and try again
messages.push({
role: 'user',
content: `Please provide your response in the exact ${request.responseFormat.type} format specified.`,
});
finalMessage = true;
// Continue the loop to get a properly formatted response
continue;
}
else {
return modelResponse || null;
}
}
coreExports.info(`Model requested ${toolCalls.length} tool calls`);
// Execute all tool calls via GitHub MCP
Generated Vendored
+1 -1
View File
File diff suppressed because one or more lines are too long
+26 -4
View File
@@ -89,6 +89,9 @@ export async function mcpInference(
let iterationCount = 0
const maxIterations = 5 // Prevent infinite loops
// We want to use response_format (e.g. JSON) on the last iteration only, so the model can output
// the final result in the expected format without interfering with tool calls
let finalMessage = false
while (iterationCount < maxIterations) {
iterationCount++
@@ -98,13 +101,14 @@ export async function mcpInference(
messages: messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[],
max_tokens: request.maxTokens,
model: request.modelName,
tools: githubMcpClient.tools as OpenAI.Chat.Completions.ChatCompletionTool[],
}
// Add response format if specified (only on first iteration to avoid conflicts)
if (iterationCount === 1 && request.responseFormat) {
// Add response format if specified (only on final iteration to avoid conflicts with tool calls)
if (finalMessage && request.responseFormat) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
chatCompletionRequest.response_format = request.responseFormat as any
} else {
chatCompletionRequest.tools = githubMcpClient.tools as OpenAI.Chat.Completions.ChatCompletionTool[]
}
try {
@@ -128,7 +132,25 @@ export async function mcpInference(
if (!toolCalls || toolCalls.length === 0) {
core.info('No tool calls requested, ending GitHub MCP inference loop')
return modelResponse || null
// If we have a response format set and we haven't explicitly run one final message iteration,
// do another loop with the response format set
if (request.responseFormat && !finalMessage) {
core.info('Making one more MCP loop with the requested response format...')
// Add a user message requesting JSON format and try again
messages.push({
role: 'user',
content: `Please provide your response in the exact ${request.responseFormat.type} format specified.`,
})
finalMessage = true
// Continue the loop to get a properly formatted response
continue
} else {
return modelResponse || null
}
}
core.info(`Model requested ${toolCalls.length} tool calls`)