Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- README.md +2 -2
- package.json +0 -2
- pnpm-lock.yaml +0 -26
- src/routes/landingPageHtml.ts +3 -3
- src/routes/responses.ts +69 -67
README.md
CHANGED
|
@@ -112,7 +112,7 @@ Experience the API through our interactive web interface, adapted from the [open
|
|
| 112 |
```bash
|
| 113 |
# Create demo/.env
|
| 114 |
cat > demo/.env << EOF
|
| 115 |
-
MODEL="
|
| 116 |
OPENAI_BASE_URL=http://localhost:3000/v1
|
| 117 |
OPENAI_API_KEY=${HF_TOKEN:-<your-huggingface-token>}
|
| 118 |
EOF
|
|
@@ -181,7 +181,7 @@ responses.js/
|
|
| 181 |
- [x] Multi-turn conversation fixes for text messages + tool calls
|
| 182 |
- [x] Correctly return "usage" field
|
| 183 |
- [x] MCP support (non-streaming)
|
| 184 |
-
- [
|
| 185 |
- [ ] Tools execution (web search, file search, image generation, code interpreter)
|
| 186 |
- [ ] Background mode support
|
| 187 |
- [ ] Additional API routes (GET, DELETE, CANCEL, LIST responses)
|
|
|
|
| 112 |
```bash
|
| 113 |
# Create demo/.env
|
| 114 |
cat > demo/.env << EOF
|
| 115 |
+
MODEL="CohereLabs/c4ai-command-a-03-2025"
|
| 116 |
OPENAI_BASE_URL=http://localhost:3000/v1
|
| 117 |
OPENAI_API_KEY=${HF_TOKEN:-<your-huggingface-token>}
|
| 118 |
EOF
|
|
|
|
| 181 |
- [x] Multi-turn conversation fixes for text messages + tool calls
|
| 182 |
- [x] Correctly return "usage" field
|
| 183 |
- [x] MCP support (non-streaming)
|
| 184 |
+
- [x] MCP support (streaming)
|
| 185 |
- [ ] Tools execution (web search, file search, image generation, code interpreter)
|
| 186 |
- [ ] Background mode support
|
| 187 |
- [ ] Additional API routes (GET, DELETE, CANCEL, LIST responses)
|
package.json
CHANGED
|
@@ -58,8 +58,6 @@
|
|
| 58 |
"author": "Hugging Face",
|
| 59 |
"license": "MIT",
|
| 60 |
"dependencies": {
|
| 61 |
-
"@huggingface/inference": "^4.3.1",
|
| 62 |
-
"@huggingface/tasks": "^0.19.22",
|
| 63 |
"@modelcontextprotocol/sdk": "^1.15.0",
|
| 64 |
"express": "^4.21.2",
|
| 65 |
"openai": "^5.8.2",
|
|
|
|
| 58 |
"author": "Hugging Face",
|
| 59 |
"license": "MIT",
|
| 60 |
"dependencies": {
|
|
|
|
|
|
|
| 61 |
"@modelcontextprotocol/sdk": "^1.15.0",
|
| 62 |
"express": "^4.21.2",
|
| 63 |
"openai": "^5.8.2",
|
pnpm-lock.yaml
CHANGED
|
@@ -8,12 +8,6 @@ importers:
|
|
| 8 |
|
| 9 |
.:
|
| 10 |
dependencies:
|
| 11 |
-
'@huggingface/inference':
|
| 12 |
-
specifier: ^4.3.1
|
| 13 |
-
version: 4.3.1
|
| 14 |
-
'@huggingface/tasks':
|
| 15 |
-
specifier: ^0.19.22
|
| 16 |
-
version: 0.19.22
|
| 17 |
'@modelcontextprotocol/sdk':
|
| 18 |
specifier: ^1.15.0
|
| 19 |
version: 1.15.0
|
|
@@ -258,17 +252,6 @@ packages:
|
|
| 258 |
resolution: {integrity: sha512-1+WqvgNMhmlAambTvT3KPtCl/Ibr68VldY2XY40SL1CE0ZXiakFR/cbTspaF5HsnpDMvcYYoJHfl4980NBjGag==}
|
| 259 |
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
|
| 260 |
|
| 261 |
-
'@huggingface/[email protected]':
|
| 262 |
-
resolution: {integrity: sha512-wn5ErcX+HTeAYfNIkgjl6pkzGvTeskKRoCFodSmEfa+SmZnMo0/YDP46Ivnz2JV6DJwMd3naOtgYH6WZVD3qoQ==}
|
| 263 |
-
engines: {node: '>=18'}
|
| 264 |
-
|
| 265 |
-
'@huggingface/[email protected]':
|
| 266 |
-
resolution: {integrity: sha512-Ptc03/jGRiYRoi0bUYKZ14MkDslsBRT24oxmsvUlfYrvQMldrxCevhPnT+hfX8awKTT8/f/0ZBBWldoeAcMHdQ==}
|
| 267 |
-
engines: {node: '>=18'}
|
| 268 |
-
|
| 269 |
-
'@huggingface/[email protected]':
|
| 270 |
-
resolution: {integrity: sha512-jtRXsJZTES01X4gJ5VOUnEm3ONyyfXUcWKObbWkr/SQmjaH/kxtWqc2zVWKaxL4QLoXqXJ+T+Pi5xupMStSudQ==}
|
| 271 |
-
|
| 272 |
'@humanfs/[email protected]':
|
| 273 |
resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
|
| 274 |
engines: {node: '>=18.18.0'}
|
|
@@ -1833,15 +1816,6 @@ snapshots:
|
|
| 1833 |
'@eslint/core': 0.15.1
|
| 1834 |
levn: 0.4.1
|
| 1835 |
|
| 1836 |
-
'@huggingface/[email protected]':
|
| 1837 |
-
dependencies:
|
| 1838 |
-
'@huggingface/jinja': 0.5.0
|
| 1839 |
-
'@huggingface/tasks': 0.19.22
|
| 1840 |
-
|
| 1841 |
-
'@huggingface/[email protected]': {}
|
| 1842 |
-
|
| 1843 |
-
'@huggingface/[email protected]': {}
|
| 1844 |
-
|
| 1845 |
'@humanfs/[email protected]': {}
|
| 1846 |
|
| 1847 |
'@humanfs/[email protected]':
|
|
|
|
| 8 |
|
| 9 |
.:
|
| 10 |
dependencies:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
'@modelcontextprotocol/sdk':
|
| 12 |
specifier: ^1.15.0
|
| 13 |
version: 1.15.0
|
|
|
|
| 252 |
resolution: {integrity: sha512-1+WqvgNMhmlAambTvT3KPtCl/Ibr68VldY2XY40SL1CE0ZXiakFR/cbTspaF5HsnpDMvcYYoJHfl4980NBjGag==}
|
| 253 |
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
|
| 254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
'@humanfs/[email protected]':
|
| 256 |
resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
|
| 257 |
engines: {node: '>=18.18.0'}
|
|
|
|
| 1816 |
'@eslint/core': 0.15.1
|
| 1817 |
levn: 0.4.1
|
| 1818 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1819 |
'@humanfs/[email protected]': {}
|
| 1820 |
|
| 1821 |
'@humanfs/[email protected]':
|
src/routes/landingPageHtml.ts
CHANGED
|
@@ -621,7 +621,7 @@ tools = [
|
|
| 621 |
]
|
| 622 |
|
| 623 |
response = client.responses.create(
|
| 624 |
-
model="
|
| 625 |
tools=tools,
|
| 626 |
input="What is the weather like in Boston today?",
|
| 627 |
tool_choice="auto",
|
|
@@ -645,7 +645,7 @@ class CalendarEvent(BaseModel):
|
|
| 645 |
participants: list[str]
|
| 646 |
|
| 647 |
response = client.responses.parse(
|
| 648 |
-
model="
|
| 649 |
input=[
|
| 650 |
{"role": "system", "content": "Extract the event information."},
|
| 651 |
{
|
|
@@ -668,7 +668,7 @@ client = OpenAI(
|
|
| 668 |
)
|
| 669 |
|
| 670 |
response = client.responses.create(
|
| 671 |
-
model="
|
| 672 |
input="how does tiktoken work?",
|
| 673 |
tools=[
|
| 674 |
{
|
|
|
|
| 621 |
]
|
| 622 |
|
| 623 |
response = client.responses.create(
|
| 624 |
+
model="meta-llama/Llama-3.3-70B-Instruct:cerebras",
|
| 625 |
tools=tools,
|
| 626 |
input="What is the weather like in Boston today?",
|
| 627 |
tool_choice="auto",
|
|
|
|
| 645 |
participants: list[str]
|
| 646 |
|
| 647 |
response = client.responses.parse(
|
| 648 |
+
model="meta-llama/Meta-Llama-3-70B-Instruct:novita",
|
| 649 |
input=[
|
| 650 |
{"role": "system", "content": "Extract the event information."},
|
| 651 |
{
|
|
|
|
| 668 |
)
|
| 669 |
|
| 670 |
response = client.responses.create(
|
| 671 |
+
model="meta-llama/Llama-3.3-70B-Instruct:cerebras",
|
| 672 |
input="how does tiktoken work?",
|
| 673 |
tools=[
|
| 674 |
{
|
src/routes/responses.ts
CHANGED
|
@@ -2,13 +2,7 @@ import { type Response as ExpressResponse } from "express";
|
|
| 2 |
import { type ValidatedRequest } from "../middleware/validation.js";
|
| 3 |
import type { CreateResponseParams, McpServerParams, McpApprovalRequestParams } from "../schemas.js";
|
| 4 |
import { generateUniqueId } from "../lib/generateUniqueId.js";
|
| 5 |
-
import {
|
| 6 |
-
import type {
|
| 7 |
-
ChatCompletionInputMessage,
|
| 8 |
-
ChatCompletionInputMessageChunkType,
|
| 9 |
-
ChatCompletionInput,
|
| 10 |
-
} from "@huggingface/tasks";
|
| 11 |
-
|
| 12 |
import type {
|
| 13 |
Response,
|
| 14 |
ResponseStreamEvent,
|
|
@@ -18,9 +12,11 @@ import type {
|
|
| 18 |
ResponseOutputItem,
|
| 19 |
} from "openai/resources/responses/responses";
|
| 20 |
import type {
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
| 24 |
import { callMcpTool, connectMcpServer } from "../mcp.js";
|
| 25 |
|
| 26 |
class StreamingError extends Error {
|
|
@@ -163,7 +159,7 @@ async function* innerRunStream(
|
|
| 163 |
}
|
| 164 |
|
| 165 |
// List MCP tools from server (if required) + prepare tools for the LLM
|
| 166 |
-
let tools:
|
| 167 |
const mcpToolsMapping: Record<string, McpServerParams> = {};
|
| 168 |
if (req.body.tools) {
|
| 169 |
for (const tool of req.body.tools) {
|
|
@@ -213,7 +209,7 @@ async function* innerRunStream(
|
|
| 213 |
type: "function" as const,
|
| 214 |
function: {
|
| 215 |
name: mcpTool.name,
|
| 216 |
-
parameters: mcpTool.input_schema,
|
| 217 |
description: mcpTool.description ?? undefined,
|
| 218 |
},
|
| 219 |
});
|
|
@@ -232,12 +228,8 @@ async function* innerRunStream(
|
|
| 232 |
|
| 233 |
// Prepare payload for the LLM
|
| 234 |
|
| 235 |
-
// Resolve model and provider
|
| 236 |
-
const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
|
| 237 |
-
const provider = req.body.model.includes("@") ? req.body.model.split("@")[0] : undefined;
|
| 238 |
-
|
| 239 |
// Format input to Chat Completion format
|
| 240 |
-
const messages:
|
| 241 |
? [{ role: "system", content: req.body.instructions }]
|
| 242 |
: [];
|
| 243 |
if (Array.isArray(req.body.input)) {
|
|
@@ -247,22 +239,20 @@ async function* innerRunStream(
|
|
| 247 |
switch (item.type) {
|
| 248 |
case "function_call":
|
| 249 |
return {
|
| 250 |
-
|
| 251 |
-
role: "assistant",
|
| 252 |
-
name: `function_call ${item.name} ${item.call_id}`,
|
| 253 |
content: item.arguments,
|
|
|
|
| 254 |
};
|
| 255 |
case "function_call_output":
|
| 256 |
return {
|
| 257 |
-
|
| 258 |
-
role: "assistant",
|
| 259 |
-
name: `function_call_output ${item.call_id}`,
|
| 260 |
content: item.output,
|
|
|
|
| 261 |
};
|
| 262 |
case "message":
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
content
|
| 266 |
typeof item.content === "string"
|
| 267 |
? item.content
|
| 268 |
: item.content
|
|
@@ -270,7 +260,7 @@ async function* innerRunStream(
|
|
| 270 |
switch (content.type) {
|
| 271 |
case "input_image":
|
| 272 |
return {
|
| 273 |
-
type: "image_url" as
|
| 274 |
image_url: {
|
| 275 |
url: content.image_url,
|
| 276 |
},
|
|
@@ -278,7 +268,7 @@ async function* innerRunStream(
|
|
| 278 |
case "output_text":
|
| 279 |
return content.text
|
| 280 |
? {
|
| 281 |
-
type: "text" as
|
| 282 |
text: content.text,
|
| 283 |
}
|
| 284 |
: undefined;
|
|
@@ -286,72 +276,80 @@ async function* innerRunStream(
|
|
| 286 |
return undefined;
|
| 287 |
case "input_text":
|
| 288 |
return {
|
| 289 |
-
type: "text" as
|
| 290 |
text: content.text,
|
| 291 |
};
|
| 292 |
}
|
| 293 |
})
|
| 294 |
-
.filter((item) =>
|
| 295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
case "mcp_list_tools": {
|
| 297 |
-
// Hacky: will be dropped by filter since tools are passed as separate objects
|
| 298 |
return {
|
| 299 |
-
role: "
|
| 300 |
-
|
| 301 |
-
|
| 302 |
};
|
| 303 |
}
|
| 304 |
case "mcp_call": {
|
| 305 |
return {
|
| 306 |
-
role: "
|
| 307 |
-
name: "mcp_call",
|
| 308 |
content: `MCP call (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
|
|
|
|
| 309 |
};
|
| 310 |
}
|
| 311 |
case "mcp_approval_request": {
|
| 312 |
return {
|
| 313 |
-
role: "
|
| 314 |
-
name: "mcp_approval_request",
|
| 315 |
content: `MCP approval request (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
|
|
|
|
| 316 |
};
|
| 317 |
}
|
| 318 |
case "mcp_approval_response": {
|
| 319 |
return {
|
| 320 |
-
role: "
|
| 321 |
-
name: "mcp_approval_response",
|
| 322 |
content: `MCP approval response (${item.id}). Approved: ${item.approve}. Reason: ${item.reason}.`,
|
|
|
|
| 323 |
};
|
| 324 |
}
|
| 325 |
}
|
| 326 |
})
|
| 327 |
-
.filter(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
);
|
| 329 |
} else {
|
| 330 |
-
messages.push({ role: "user", content: req.body.input });
|
| 331 |
}
|
| 332 |
|
| 333 |
// Prepare payload for the LLM
|
| 334 |
-
const payload:
|
| 335 |
// main params
|
| 336 |
-
model,
|
| 337 |
-
provider,
|
| 338 |
messages,
|
| 339 |
-
stream:
|
| 340 |
// options
|
| 341 |
max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
|
| 342 |
response_format: req.body.text?.format
|
| 343 |
-
?
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
}
|
| 355 |
: undefined,
|
| 356 |
temperature: req.body.temperature,
|
| 357 |
tool_choice:
|
|
@@ -475,11 +473,15 @@ async function* listMcpToolsStream(
|
|
| 475 |
*/
|
| 476 |
async function* handleOneTurnStream(
|
| 477 |
apiKey: string | undefined,
|
| 478 |
-
payload:
|
| 479 |
responseObject: IncompleteResponse,
|
| 480 |
mcpToolsMapping: Record<string, McpServerParams>
|
| 481 |
): AsyncGenerator<ResponseStreamEvent> {
|
| 482 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
|
| 484 |
let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
|
| 485 |
let previousTotalTokens = responseObject.usage?.total_tokens ?? 0;
|
|
@@ -565,7 +567,7 @@ async function* handleOneTurnStream(
|
|
| 565 |
}
|
| 566 |
|
| 567 |
let currentOutputItem = responseObject.output.at(-1);
|
| 568 |
-
if (delta.tool_calls[0].function
|
| 569 |
const functionName = delta.tool_calls[0].function.name;
|
| 570 |
// Tool call with a name => new tool call
|
| 571 |
let newOutputObject:
|
|
@@ -594,7 +596,7 @@ async function* handleOneTurnStream(
|
|
| 594 |
newOutputObject = {
|
| 595 |
type: "function_call",
|
| 596 |
id: generateUniqueId("fc"),
|
| 597 |
-
call_id: delta.tool_calls[0].id,
|
| 598 |
name: functionName,
|
| 599 |
arguments: "",
|
| 600 |
};
|
|
@@ -618,7 +620,7 @@ async function* handleOneTurnStream(
|
|
| 618 |
}
|
| 619 |
}
|
| 620 |
|
| 621 |
-
if (delta.tool_calls[0].function
|
| 622 |
// Current item is necessarily a tool call
|
| 623 |
currentOutputItem = responseObject.output.at(-1) as
|
| 624 |
| ResponseOutputItem.McpCall
|
|
@@ -737,7 +739,7 @@ async function* handleOneTurnStream(
|
|
| 737 |
arguments: lastOutputItem.arguments,
|
| 738 |
// Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
|
| 739 |
// TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
|
| 740 |
-
}
|
| 741 |
},
|
| 742 |
],
|
| 743 |
},
|
|
@@ -775,7 +777,7 @@ async function* callApprovedMCPToolStream(
|
|
| 775 |
approvalRequest: McpApprovalRequestParams | undefined,
|
| 776 |
mcpToolsMapping: Record<string, McpServerParams>,
|
| 777 |
responseObject: IncompleteResponse,
|
| 778 |
-
payload:
|
| 779 |
): AsyncGenerator<ResponseStreamEvent> {
|
| 780 |
if (!approvalRequest) {
|
| 781 |
throw new Error(`MCP approval request '${approval_request_id}' not found`);
|
|
@@ -842,7 +844,7 @@ async function* callApprovedMCPToolStream(
|
|
| 842 |
arguments: outputObject.arguments,
|
| 843 |
// Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
|
| 844 |
// TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
|
| 845 |
-
}
|
| 846 |
},
|
| 847 |
],
|
| 848 |
},
|
|
|
|
| 2 |
import { type ValidatedRequest } from "../middleware/validation.js";
|
| 3 |
import type { CreateResponseParams, McpServerParams, McpApprovalRequestParams } from "../schemas.js";
|
| 4 |
import { generateUniqueId } from "../lib/generateUniqueId.js";
|
| 5 |
+
import { OpenAI } from "openai";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import type {
|
| 7 |
Response,
|
| 8 |
ResponseStreamEvent,
|
|
|
|
| 12 |
ResponseOutputItem,
|
| 13 |
} from "openai/resources/responses/responses";
|
| 14 |
import type {
|
| 15 |
+
ChatCompletionCreateParamsStreaming,
|
| 16 |
+
ChatCompletionMessageParam,
|
| 17 |
+
ChatCompletionTool,
|
| 18 |
+
} from "openai/resources/chat/completions.js";
|
| 19 |
+
import type { FunctionParameters } from "openai/resources/shared.js";
|
| 20 |
import { callMcpTool, connectMcpServer } from "../mcp.js";
|
| 21 |
|
| 22 |
class StreamingError extends Error {
|
|
|
|
| 159 |
}
|
| 160 |
|
| 161 |
// List MCP tools from server (if required) + prepare tools for the LLM
|
| 162 |
+
let tools: ChatCompletionTool[] | undefined = [];
|
| 163 |
const mcpToolsMapping: Record<string, McpServerParams> = {};
|
| 164 |
if (req.body.tools) {
|
| 165 |
for (const tool of req.body.tools) {
|
|
|
|
| 209 |
type: "function" as const,
|
| 210 |
function: {
|
| 211 |
name: mcpTool.name,
|
| 212 |
+
parameters: mcpTool.input_schema as FunctionParameters,
|
| 213 |
description: mcpTool.description ?? undefined,
|
| 214 |
},
|
| 215 |
});
|
|
|
|
| 228 |
|
| 229 |
// Prepare payload for the LLM
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
// Format input to Chat Completion format
|
| 232 |
+
const messages: ChatCompletionMessageParam[] = req.body.instructions
|
| 233 |
? [{ role: "system", content: req.body.instructions }]
|
| 234 |
: [];
|
| 235 |
if (Array.isArray(req.body.input)) {
|
|
|
|
| 239 |
switch (item.type) {
|
| 240 |
case "function_call":
|
| 241 |
return {
|
| 242 |
+
role: "tool" as const,
|
|
|
|
|
|
|
| 243 |
content: item.arguments,
|
| 244 |
+
tool_call_id: item.call_id,
|
| 245 |
};
|
| 246 |
case "function_call_output":
|
| 247 |
return {
|
| 248 |
+
role: "tool" as const,
|
|
|
|
|
|
|
| 249 |
content: item.output,
|
| 250 |
+
tool_call_id: item.call_id,
|
| 251 |
};
|
| 252 |
case "message":
|
| 253 |
+
case undefined:
|
| 254 |
+
if (item.role === "assistant" || item.role === "user" || item.role === "system") {
|
| 255 |
+
const content =
|
| 256 |
typeof item.content === "string"
|
| 257 |
? item.content
|
| 258 |
: item.content
|
|
|
|
| 260 |
switch (content.type) {
|
| 261 |
case "input_image":
|
| 262 |
return {
|
| 263 |
+
type: "image_url" as const,
|
| 264 |
image_url: {
|
| 265 |
url: content.image_url,
|
| 266 |
},
|
|
|
|
| 268 |
case "output_text":
|
| 269 |
return content.text
|
| 270 |
? {
|
| 271 |
+
type: "text" as const,
|
| 272 |
text: content.text,
|
| 273 |
}
|
| 274 |
: undefined;
|
|
|
|
| 276 |
return undefined;
|
| 277 |
case "input_text":
|
| 278 |
return {
|
| 279 |
+
type: "text" as const,
|
| 280 |
text: content.text,
|
| 281 |
};
|
| 282 |
}
|
| 283 |
})
|
| 284 |
+
.filter((item) => {
|
| 285 |
+
return item !== undefined;
|
| 286 |
+
});
|
| 287 |
+
return {
|
| 288 |
+
role: item.role,
|
| 289 |
+
content,
|
| 290 |
+
} as ChatCompletionMessageParam;
|
| 291 |
+
}
|
| 292 |
+
return undefined;
|
| 293 |
case "mcp_list_tools": {
|
|
|
|
| 294 |
return {
|
| 295 |
+
role: "tool" as const,
|
| 296 |
+
content: "MCP list tools. Server: '${item.server_label}'.",
|
| 297 |
+
tool_call_id: "mcp_list_tools",
|
| 298 |
};
|
| 299 |
}
|
| 300 |
case "mcp_call": {
|
| 301 |
return {
|
| 302 |
+
role: "tool" as const,
|
|
|
|
| 303 |
content: `MCP call (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
|
| 304 |
+
tool_call_id: "mcp_call",
|
| 305 |
};
|
| 306 |
}
|
| 307 |
case "mcp_approval_request": {
|
| 308 |
return {
|
| 309 |
+
role: "tool" as const,
|
|
|
|
| 310 |
content: `MCP approval request (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
|
| 311 |
+
tool_call_id: "mcp_approval_request",
|
| 312 |
};
|
| 313 |
}
|
| 314 |
case "mcp_approval_response": {
|
| 315 |
return {
|
| 316 |
+
role: "tool" as const,
|
|
|
|
| 317 |
content: `MCP approval response (${item.id}). Approved: ${item.approve}. Reason: ${item.reason}.`,
|
| 318 |
+
tool_call_id: "mcp_approval_response",
|
| 319 |
};
|
| 320 |
}
|
| 321 |
}
|
| 322 |
})
|
| 323 |
+
.filter(
|
| 324 |
+
(message): message is NonNullable<typeof message> =>
|
| 325 |
+
message !== undefined &&
|
| 326 |
+
(typeof message.content === "string" || (Array.isArray(message.content) && message.content.length !== 0))
|
| 327 |
+
)
|
| 328 |
);
|
| 329 |
} else {
|
| 330 |
+
messages.push({ role: "user", content: req.body.input } as const);
|
| 331 |
}
|
| 332 |
|
| 333 |
// Prepare payload for the LLM
|
| 334 |
+
const payload: ChatCompletionCreateParamsStreaming = {
|
| 335 |
// main params
|
| 336 |
+
model: req.body.model,
|
|
|
|
| 337 |
messages,
|
| 338 |
+
stream: true,
|
| 339 |
// options
|
| 340 |
max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
|
| 341 |
response_format: req.body.text?.format
|
| 342 |
+
? req.body.text.format.type === "json_schema"
|
| 343 |
+
? {
|
| 344 |
+
type: "json_schema",
|
| 345 |
+
json_schema: {
|
| 346 |
+
description: req.body.text.format.description,
|
| 347 |
+
name: req.body.text.format.name,
|
| 348 |
+
schema: req.body.text.format.schema,
|
| 349 |
+
strict: req.body.text.format.strict,
|
| 350 |
+
},
|
| 351 |
+
}
|
| 352 |
+
: { type: req.body.text.format.type }
|
|
|
|
| 353 |
: undefined,
|
| 354 |
temperature: req.body.temperature,
|
| 355 |
tool_choice:
|
|
|
|
| 473 |
*/
|
| 474 |
async function* handleOneTurnStream(
|
| 475 |
apiKey: string | undefined,
|
| 476 |
+
payload: ChatCompletionCreateParamsStreaming,
|
| 477 |
responseObject: IncompleteResponse,
|
| 478 |
mcpToolsMapping: Record<string, McpServerParams>
|
| 479 |
): AsyncGenerator<ResponseStreamEvent> {
|
| 480 |
+
const client = new OpenAI({
|
| 481 |
+
baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
|
| 482 |
+
apiKey: apiKey,
|
| 483 |
+
});
|
| 484 |
+
const stream = await client.chat.completions.create(payload);
|
| 485 |
let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
|
| 486 |
let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
|
| 487 |
let previousTotalTokens = responseObject.usage?.total_tokens ?? 0;
|
|
|
|
| 567 |
}
|
| 568 |
|
| 569 |
let currentOutputItem = responseObject.output.at(-1);
|
| 570 |
+
if (delta.tool_calls[0].function?.name) {
|
| 571 |
const functionName = delta.tool_calls[0].function.name;
|
| 572 |
// Tool call with a name => new tool call
|
| 573 |
let newOutputObject:
|
|
|
|
| 596 |
newOutputObject = {
|
| 597 |
type: "function_call",
|
| 598 |
id: generateUniqueId("fc"),
|
| 599 |
+
call_id: delta.tool_calls[0].id ?? "",
|
| 600 |
name: functionName,
|
| 601 |
arguments: "",
|
| 602 |
};
|
|
|
|
| 620 |
}
|
| 621 |
}
|
| 622 |
|
| 623 |
+
if (delta.tool_calls[0].function?.arguments) {
|
| 624 |
// Current item is necessarily a tool call
|
| 625 |
currentOutputItem = responseObject.output.at(-1) as
|
| 626 |
| ResponseOutputItem.McpCall
|
|
|
|
| 739 |
arguments: lastOutputItem.arguments,
|
| 740 |
// Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
|
| 741 |
// TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
|
| 742 |
+
},
|
| 743 |
},
|
| 744 |
],
|
| 745 |
},
|
|
|
|
| 777 |
approvalRequest: McpApprovalRequestParams | undefined,
|
| 778 |
mcpToolsMapping: Record<string, McpServerParams>,
|
| 779 |
responseObject: IncompleteResponse,
|
| 780 |
+
payload: ChatCompletionCreateParamsStreaming
|
| 781 |
): AsyncGenerator<ResponseStreamEvent> {
|
| 782 |
if (!approvalRequest) {
|
| 783 |
throw new Error(`MCP approval request '${approval_request_id}' not found`);
|
|
|
|
| 844 |
arguments: outputObject.arguments,
|
| 845 |
// Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
|
| 846 |
// TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
|
| 847 |
+
},
|
| 848 |
},
|
| 849 |
],
|
| 850 |
},
|