Spaces:

Guhanselvam
/

Chat_Bott

Sleeping

App Files Files Community

Chat_Bott / fastapi_server.py

Guhanselvam

Update fastapi_server.py

e42ca5a verified 3 months ago

raw

history blame contribute delete

1.64 kB


	import os
	import requests
	import gradio as gr
	from dotenv import load_dotenv

	# Load environment variables from .env file
	load_dotenv()

	# added new comment
	# Get the Groq API key from environment variables
	groq_api_key = "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"
	os.environ['HF_TOKEN']=os.environ.get("HF_TOKEN")


	# Define the URL for the Groq API endpoint
	url = "https://api.groq.com/openai/v1/chat/completions"


	# Initialize the FastAPI app
	app = FastAPI(
	title="Multimodal Language Server",
	version="1.0",
	description="A simple QnA API Server using both Hugging Face and Gemini models"
	)

	# Initialize the LLaMA model using Hugging Face
	llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN) # Use the specific LLaMA model from HF




	# Define a QnA prompt using a template
	qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}")

	# Function to choose model based on preference
	# You might want to improve this to include more refined selection criteria
	def get_model_response(question):

	# Create an API endpoint
	@app.post("/llm_api")
	async def qna_endpoint(question: str,):
	"""
	Endpoint to receive a question and return a response from either the Hugging Face or Gemini model.
	"""
	response = get_model_response(question,use_llama_model)
	return {"response": response}

	# Run the application
	if __name__ == "__main__":
	try:
	uvicorn.run(app, host="0.0.0.0", port=8000) # Changed to IPv4
	except KeyboardInterrupt:
	print("Server stopped manually.")
	except Exception as e:
	print(f"An error occurred: {e}")