File size: 4,580 Bytes
d356519
499d2bb
 
 
d356519
 
 
 
 
 
 
 
 
 
 
0308771
d356519
0308771
d356519
 
0308771
 
 
 
 
ac51fbb
0308771
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d356519
 
 
 
 
 
131f2e6
 
 
 
 
2a42128
d356519
e45e431
 
 
 
 
c735f38
5461856
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os

os.system('pip install auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl')

import streamlit as st
import torch
from auto_gptq import AutoGPTQForCausalLM
from transformers import AutoTokenizer, TextStreamer, pipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from streamlit_chat import message

# Check if device is available
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# Initialize everything in session state to avoid reloading
if "initialized" not in st.session_state:
    st.session_state.initialized = False

if not st.session_state.initialized:
    
    # Load PDF
    loader = PyPDFLoader("Medical_Book.pdf")
    docs = loader.load()

    # Initialize embeddings
    embeddings = HuggingFaceInstructEmbeddings(
        model_name="hkunlp/instructor-large", model_kwargs={"device": DEVICE}
    )

    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
    texts = text_splitter.split_documents(docs)

    # Create Chroma vectorstore
    db = Chroma.from_documents(texts, embeddings, persist_directory="db")

    # Load model and tokenizer
    model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
    model_basename = "model"

    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
    model = AutoGPTQForCausalLM.from_quantized(
        model_name_or_path,
        revision="gptq-4bit-128g-actorder_True",
        model_basename=model_basename,
        use_safetensors=True,
        trust_remote_code=True,
        inject_fused_attention=False,
        device=DEVICE,
        quantize_config=None,
    )

    # Set system prompt
    DEFAULT_SYSTEM_PROMPT = """
    You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
    """.strip()

    def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
        return f"""
        [INST] <>
        {system_prompt}
        <>
        {prompt} [/INST]
        """.strip()

    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    text_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=1024,
        temperature=0,
        top_p=0.95,
        repetition_penalty=1.15,
        streamer=streamer,
    )

    llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})

    SYSTEM_PROMPT = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."

    template = generate_prompt(
        """
        {context}
        Question: {question}
        """,
        system_prompt=SYSTEM_PROMPT,
    )

    prompt = PromptTemplate(template=template, input_variables=["context", "question"])

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_kwargs={"k": 2}),
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt},
    )

    st.session_state.qa_chain = qa_chain
    st.session_state.initialized = True

st.title("Medical Chatbot")

if "history" not in st.session_state:
    st.session_state.history = []

# Display chat history using streamlit-chat
for i, chat in enumerate(st.session_state.history):
    message(chat['question'], is_user=True, key=f"user_{i}")
    message(chat['answer'], key=f"bot_{i}")

user_input = st.chat_input(placeholder="Ask a question:", key="input")

# if st.button("Generate"):
if user_input:
    result = st.session_state.qa_chain(user_input)
    answer = result["result"]
    st.session_state.history.append({"question": user_input, "answer": answer})
    st.experimental_rerun()