Spaces:
Sleeping
Sleeping
import os | |
import requests | |
import gradio as gr | |
from dotenv import load_dotenv | |
# Load environment variables from .env file | |
load_dotenv() | |
# added new comment | |
# Get the Groq API key from environment variables | |
groq_api_key = "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" | |
os.environ['HF_TOKEN']=os.environ.get("HF_TOKEN") | |
# Define the URL for the Groq API endpoint | |
url = "https://api.groq.com/openai/v1/chat/completions" | |
# Initialize the FastAPI app | |
app = FastAPI( | |
title="Multimodal Language Server", | |
version="1.0", | |
description="A simple QnA API Server using both Hugging Face and Gemini models" | |
) | |
# Initialize the LLaMA model using Hugging Face | |
llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN) # Use the specific LLaMA model from HF | |
# Define a QnA prompt using a template | |
qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}") | |
# Function to choose model based on preference | |
# You might want to improve this to include more refined selection criteria | |
def get_model_response(question): | |
# Create an API endpoint | |
async def qna_endpoint(question: str,): | |
""" | |
Endpoint to receive a question and return a response from either the Hugging Face or Gemini model. | |
""" | |
response = get_model_response(question,use_llama_model) | |
return {"response": response} | |
# Run the application | |
if __name__ == "__main__": | |
try: | |
uvicorn.run(app, host="0.0.0.0", port=8000) # Changed to IPv4 | |
except KeyboardInterrupt: | |
print("Server stopped manually.") | |
except Exception as e: | |
print(f"An error occurred: {e}") |