Spaces:
Sleeping
Sleeping
File size: 1,641 Bytes
7c9ad45 2501763 7c9ad45 3e9114f e42ca5a 2501763 7c9ad45 2501763 45ad282 2501763 45ad282 2501763 45ad282 2501763 45ad282 7c9ad45 2501763 45ad282 7c9ad45 45ad282 b121316 7c9ad45 45ad282 7c9ad45 45ad282 2501763 5da0a87 2501763 201fa97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import os
import requests
import gradio as gr
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# added new comment
# Get the Groq API key from environment variables
groq_api_key = "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"
os.environ['HF_TOKEN']=os.environ.get("HF_TOKEN")
# Define the URL for the Groq API endpoint
url = "https://api.groq.com/openai/v1/chat/completions"
# Initialize the FastAPI app
app = FastAPI(
title="Multimodal Language Server",
version="1.0",
description="A simple QnA API Server using both Hugging Face and Gemini models"
)
# Initialize the LLaMA model using Hugging Face
llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN) # Use the specific LLaMA model from HF
# Define a QnA prompt using a template
qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}")
# Function to choose model based on preference
# You might want to improve this to include more refined selection criteria
def get_model_response(question):
# Create an API endpoint
@app.post("/llm_api")
async def qna_endpoint(question: str,):
"""
Endpoint to receive a question and return a response from either the Hugging Face or Gemini model.
"""
response = get_model_response(question,use_llama_model)
return {"response": response}
# Run the application
if __name__ == "__main__":
try:
uvicorn.run(app, host="0.0.0.0", port=8000) # Changed to IPv4
except KeyboardInterrupt:
print("Server stopped manually.")
except Exception as e:
print(f"An error occurred: {e}") |