import os
import requests
import gradio as gr
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# added new comment
# Get the Groq API key from environment variables
groq_api_key = "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"
os.environ['HF_TOKEN']=os.environ.get("HF_TOKEN")


# Define the URL for the Groq API endpoint
url = "https://api.groq.com/openai/v1/chat/completions"


# Initialize the FastAPI app
app = FastAPI(
    title="Multimodal Language Server",
    version="1.0",
    description="A simple QnA API Server using both Hugging Face and Gemini models"
)

# Initialize the LLaMA model using Hugging Face
llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN)  # Use the specific LLaMA model from HF




# Define a QnA prompt using a template
qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}")

# Function to choose model based on preference
# You might want to improve this to include more refined selection criteria
def get_model_response(question):

# Create an API endpoint
@app.post("/llm_api")
async def qna_endpoint(question: str,):
    """
    Endpoint to receive a question and return a response from either the Hugging Face or Gemini model.
    """
    response = get_model_response(question,use_llama_model)
    return {"response": response}

# Run the application
if __name__ == "__main__":
    try:
        uvicorn.run(app, host="0.0.0.0", port=8000)  # Changed to IPv4
    except KeyboardInterrupt:
        print("Server stopped manually.")
    except Exception as e:
        print(f"An error occurred: {e}")