File size: 1,641 Bytes
7c9ad45
2501763
7c9ad45
 
 
 
 
 
 
 
 
3e9114f
e42ca5a
2501763
7c9ad45
 
 
2501763
45ad282
2501763
 
45ad282
2501763
45ad282
2501763
 
45ad282
 
 
7c9ad45
 
2501763
 
 
 
45ad282
 
7c9ad45
45ad282
 
b121316
7c9ad45
45ad282
 
 
7c9ad45
45ad282
2501763
 
 
 
5da0a87
2501763
 
 
201fa97
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

import os
import requests
import gradio as gr
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# added new comment
# Get the Groq API key from environment variables
groq_api_key = "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"
os.environ['HF_TOKEN']=os.environ.get("HF_TOKEN")


# Define the URL for the Groq API endpoint
url = "https://api.groq.com/openai/v1/chat/completions"


# Initialize the FastAPI app
app = FastAPI(
    title="Multimodal Language Server",
    version="1.0",
    description="A simple QnA API Server using both Hugging Face and Gemini models"
)

# Initialize the LLaMA model using Hugging Face
llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN)  # Use the specific LLaMA model from HF




# Define a QnA prompt using a template
qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}")

# Function to choose model based on preference
# You might want to improve this to include more refined selection criteria
def get_model_response(question):

# Create an API endpoint
@app.post("/llm_api")
async def qna_endpoint(question: str,):
    """
    Endpoint to receive a question and return a response from either the Hugging Face or Gemini model.
    """
    response = get_model_response(question,use_llama_model)
    return {"response": response}

# Run the application
if __name__ == "__main__":
    try:
        uvicorn.run(app, host="0.0.0.0", port=8000)  # Changed to IPv4
    except KeyboardInterrupt:
        print("Server stopped manually.")
    except Exception as e:
        print(f"An error occurred: {e}")