import os import requests import gradio as gr from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # added new comment # Get the Groq API key from environment variables groq_api_key = "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" os.environ['HF_TOKEN']=os.environ.get("HF_TOKEN") # Define the URL for the Groq API endpoint url = "https://api.groq.com/openai/v1/chat/completions" # Initialize the FastAPI app app = FastAPI( title="Multimodal Language Server", version="1.0", description="A simple QnA API Server using both Hugging Face and Gemini models" ) # Initialize the LLaMA model using Hugging Face llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN) # Use the specific LLaMA model from HF # Define a QnA prompt using a template qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}") # Function to choose model based on preference # You might want to improve this to include more refined selection criteria def get_model_response(question): # Create an API endpoint @app.post("/llm_api") async def qna_endpoint(question: str,): """ Endpoint to receive a question and return a response from either the Hugging Face or Gemini model. """ response = get_model_response(question,use_llama_model) return {"response": response} # Run the application if __name__ == "__main__": try: uvicorn.run(app, host="0.0.0.0", port=8000) # Changed to IPv4 except KeyboardInterrupt: print("Server stopped manually.") except Exception as e: print(f"An error occurred: {e}")