import { Connect, PreviewServer, ViteDevServer } from "vite"; import OpenAI from "openai"; import { Stream } from "openai/streaming.mjs"; import { RateLimiterMemory } from "rate-limiter-flexible"; import { argon2Verify } from "hash-wasm"; import { getSearchToken } from "./searchToken"; import { isVerifiedToken, addVerifiedToken } from "./verifiedTokens"; const rateLimiter = new RateLimiterMemory({ points: 2, duration: 10, }); export function internalApiEndpointServerHook< T extends ViteDevServer | PreviewServer, >(server: T) { server.middlewares.use(async (request, response, next) => { if (!request.url.startsWith("/inference")) return next(); const authHeader = request.headers.authorization; const tokenPrefix = "Bearer "; const token = authHeader && authHeader.startsWith(tokenPrefix) ? authHeader.slice(tokenPrefix.length) : null; if (!token) { response.statusCode = 401; response.end("Missing or invalid Authorization header."); return; } if (!isVerifiedToken(token)) { let isValidToken = false; try { isValidToken = await argon2Verify({ password: getSearchToken(), hash: token, }); } catch (error) { void error; } if (isValidToken) { addVerifiedToken(token); } else { response.statusCode = 401; response.end("Unauthorized."); return; } } try { await rateLimiter.consume(token); } catch { response.statusCode = 429; response.end("Too many requests."); return; } const openai = new OpenAI({ baseURL: process.env.INTERNAL_OPENAI_COMPATIBLE_API_BASE_URL, apiKey: process.env.INTERNAL_OPENAI_COMPATIBLE_API_KEY, }); try { const requestBody = await getRequestBody(request); const completion = await openai.chat.completions.create({ ...requestBody, model: process.env.INTERNAL_OPENAI_COMPATIBLE_API_MODEL, stream: true, }); response.setHeader("Content-Type", "text/event-stream"); response.setHeader("Cache-Control", "no-cache"); response.setHeader("Connection", "keep-alive"); const stream = OpenAIStream(completion); stream.pipeTo( new WritableStream({ write(chunk) { response.write(chunk); }, close() { response.end(); }, }), ); } catch (error) { console.error("Error in internal API endpoint:", error); response.statusCode = 500; response.end(JSON.stringify({ error: "Internal server error" })); } }); } async function getRequestBody( request: Connect.IncomingMessage, ): Promise { return new Promise((resolve, reject) => { let body = ""; request.on("data", (chunk: string) => { body += chunk; }); request.on("end", () => { try { resolve(JSON.parse(body)); } catch (error) { reject(error); } }); }); } function OpenAIStream( completion: Stream, ) { return new ReadableStream({ async start(controller) { for await (const chunk of completion) { const content = chunk.choices[0]?.delta?.content || ""; if (content) { const payload = { id: chunk.id, object: "chat.completion.chunk", created: chunk.created, model: chunk.model, choices: [ { index: 0, delta: { content }, finish_reason: chunk.choices[0].finish_reason, }, ], }; controller.enqueue( new TextEncoder().encode(`data: ${JSON.stringify(payload)}\n\n`), ); } } controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")); controller.close(); }, }); }