|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.llms import HuggingFacePipeline |
|
from langchain.chains import RetrievalQA |
|
import torch |
|
|
|
class Handler: |
|
def __init__(self): |
|
|
|
print("Loading model and tokenizer...") |
|
self.model = AutoModelForCausalLM.from_pretrained("anirudh248/upf_code_generator_final", device_map="auto") |
|
self.tokenizer = AutoTokenizer.from_pretrained("anirudh248/upf_code_generator_final") |
|
|
|
|
|
print("Loading FAISS index and embeddings...") |
|
self.embeddings = HuggingFaceEmbeddings() |
|
self.vectorstore = FAISS.load_local("faiss_index", self.embeddings, allow_dangerous_deserialization=True) |
|
|
|
|
|
print("Creating Hugging Face pipeline...") |
|
self.hf_pipeline = pipeline( |
|
"text-generation", |
|
model=self.model, |
|
tokenizer=self.tokenizer, |
|
device=0 if torch.cuda.is_available() else -1, |
|
temperature=0.7, |
|
max_new_tokens=2048, |
|
top_p=0.95, |
|
repetition_penalty=1.15 |
|
) |
|
|
|
|
|
self.llm = HuggingFacePipeline(pipeline=self.hf_pipeline) |
|
|
|
|
|
self.retriever = self.vectorstore.as_retriever() |
|
self.qa_chain = RetrievalQA.from_chain_type( |
|
llm=self.llm, |
|
retriever=self.retriever, |
|
return_source_documents=False |
|
) |
|
|
|
def __call__(self, request): |
|
try: |
|
|
|
prompt = request.json.get("prompt") |
|
if not prompt: |
|
return {"error": "Prompt is required"}, 400 |
|
|
|
|
|
response = self.qa_chain.run(prompt) |
|
|
|
|
|
return {"response": response} |
|
|
|
except Exception as e: |
|
return {"error": str(e)}, 500 |