radussad commited on
Commit
3afd34e
·
verified ·
1 Parent(s): 85934af

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -0
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Query
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ import torch
4
+ from retriever import retrieve_documents
5
+
6
+ # Load Mistral 7B model
7
+ MODEL_NAME = "mistralai/Mistral-7B-v0.1"
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto", torch_dtype=torch.float16)
10
+
11
+ # Create inference pipeline
12
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
13
+
14
+ # FastAPI server
15
+ app = FastAPI()
16
+
17
+ @app.get("/")
18
+ def read_root():
19
+ return {"message": "Mistral 7B RAG API is running!"}
20
+
21
+ @app.get("/generate/")
22
+ def generate_response(query: str = Query(..., title="User Query")):
23
+ # Retrieve relevant documents
24
+ retrieved_docs = retrieve_documents(query)
25
+
26
+ # Format prompt for RAG
27
+ prompt = f"Use the following information to answer:\n{retrieved_docs}\n\nUser: {query}\nAI:"
28
+
29
+ # Generate response
30
+ output = generator(prompt, max_length=256, do_sample=True, temperature=0.7)[0]["generated_text"]
31
+
32
+ return {"query": query, "response": output}