|
import os
|
|
from langchain_community.document_loaders import TextLoader
|
|
from langchain.text_splitter import CharacterTextSplitter
|
|
from langchain_huggingface import HuggingFaceEmbeddings
|
|
from langchain_chroma import Chroma
|
|
from langchain_huggingface import HuggingFacePipeline
|
|
from langchain.chains import RetrievalQA
|
|
from transformers import pipeline
|
|
import json
|
|
from langchain_huggingface import HuggingFaceEmbeddings
|
|
from langchain_community.document_loaders import TextLoader
|
|
|
|
|
|
|
|
|
|
def load_and_preprocess(file_path):
|
|
loader = TextLoader(file_path)
|
|
documents = loader.load()
|
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
|
return text_splitter.split_documents(documents)
|
|
|
|
|
|
def create_vector_store(documents, persist_directory):
|
|
embeddings = HuggingFaceEmbeddings()
|
|
vector_store = Chroma.from_documents(documents, embeddings, persist_directory=persist_directory)
|
|
return vector_store
|
|
|
|
|
|
def initialize_llm():
|
|
generator = pipeline('text-generation', model='gpt2')
|
|
return HuggingFacePipeline(pipeline=generator)
|
|
|
|
|
|
def build_rag_pipeline(vector_store, llm):
|
|
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
|
return RetrievalQA(llm=llm, retriever=retriever)
|
|
|
|
|
|
def main():
|
|
|
|
gita_docs = load_and_preprocess(r'C:\LLM_summerizer\bhagavad_gita_verses.csv')
|
|
yoga_sutras_docs = load_and_preprocess(r'C:\LLM_summerizer\yoga_raw.txt')
|
|
documents = gita_docs + yoga_sutras_docs
|
|
|
|
|
|
vector_store = create_vector_store(documents, persist_directory='vector_store')
|
|
|
|
|
|
llm = initialize_llm()
|
|
|
|
|
|
rag_pipeline = build_rag_pipeline(vector_store, llm)
|
|
|
|
|
|
query = "What does the Bhagavad Gita say about selfless action?"
|
|
result = rag_pipeline.run(query)
|
|
|
|
|
|
output = {
|
|
"query": query,
|
|
"answer": result
|
|
}
|
|
|
|
|
|
print(json.dumps(output, indent=2))
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|