Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Request | |
from fastapi.responses import JSONResponse | |
import gradio as gr | |
import os | |
import json | |
import faiss | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
import requests | |
import nest_asyncio | |
import uvicorn | |
# β Load documents and FAISS index | |
with open("texts.json", "r", encoding="utf-8") as f: | |
texts = json.load(f) | |
index = faiss.read_index("faiss_index.bin") | |
embed_model = SentenceTransformer("all-MiniLM-L6-v2") | |
# β API keys and config | |
API_KEY = os.environ.get("OPENROUTER_API_KEY") | |
MODEL = "nousresearch/deephermes-3-llama-3-8b-preview:free" # Updated model | |
app = FastAPI() | |
# β Greeting checker with added variations | |
def is_greeting(text): | |
greetings = [ | |
"hi", "hello", "hey", "good morning", "good afternoon", "good evening", | |
"howdy", "yo", "sup", "greetings", "what's up", "hey there" | |
] | |
return any(g in text.lower() for g in greetings) | |
# β Context fetcher | |
def get_context(query, top_k=5, threshold=0.3): | |
query_vec = embed_model.encode([query]) | |
D, I = index.search(np.array(query_vec), top_k) | |
matches = [(texts[i], d) for i, d in zip(I[0], D[0]) if d >= threshold] | |
if not matches: | |
return "" | |
return "\n".join([text for text, _ in matches]) | |
# β Core chat function | |
def chat_fn(message, history): | |
headers = { | |
"Authorization": f"Bearer {API_KEY}", | |
"Content-Type": "application/json" | |
} | |
context = get_context(message) | |
# β Handle greetings when no context found | |
if not context.strip(): | |
if is_greeting(message): | |
return "π Hey there! How can I help you today?" | |
return "π€ Hmm, Iβm not sure about that. Could you rephrase your question?" | |
# β Construct prompt with friendly natural tone | |
messages = [ | |
{ | |
"role": "system", | |
"content": ( | |
"You are a friendly, helpful assistant." | |
" Use the following context to answer questions naturally and clearly." | |
" Be conversational and concise. Avoid saying you're using context." | |
" If unsure, say: 'Iβm not sure about that. Could you rephrase?'" | |
"\n\nContext:\n" + context | |
) | |
} | |
] | |
for user, assistant in history: | |
messages.append({"role": "user", "content": user}) | |
messages.append({"role": "assistant", "content": assistant}) | |
messages.append({"role": "user", "content": message + "\n\nKeep your reply short and natural."}) | |
payload = { | |
"model": MODEL, | |
"messages": messages, | |
"max_tokens": 200, | |
} | |
try: | |
response = requests.post( | |
"https://openrouter.ai/api/v1/chat/completions", | |
headers=headers, | |
json=payload, | |
timeout=30 | |
) | |
response.raise_for_status() | |
reply = response.json()["choices"][0]["message"]["content"] | |
except Exception as e: | |
reply = f"β οΈ API error: {str(e)[:100]}" | |
return reply.strip() | |
# β POST endpoint for API usage | |
async def chat_api(request: Request): | |
body = await request.json() | |
message = body.get("message", "").strip() | |
history = body.get("history", []) | |
if not message: | |
return JSONResponse(content={"response": "β οΈ Please enter a valid message."}) | |
response = chat_fn(message, history) | |
return JSONResponse(content={"response": response}) | |
# β Gradio interface | |
demo = gr.ChatInterface( | |
fn=chat_fn, | |
title="π¬ CODEX MIRXA KAMRAN", | |
description="Chat with our AI assistant based on internal knowledge. Short, natural and helpful answers!", | |
theme="soft" | |
) | |
app = gr.mount_gradio_app(app, demo, path="/") | |
# β Local run | |
if __name__ == "__main__": | |
nest_asyncio.apply() | |
uvicorn.run(app, host="0.0.0.0", port=7860) | |