Spaces:
No application file
No application file
Upload 3 files
Browse files- app.py +65 -0
- requirements.txt +107 -0
- utils.py +96 -0
app.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import statements
|
2 |
+
from langchain.chat_models import ChatOpenAI
|
3 |
+
from langchain.chains import ConversationChain
|
4 |
+
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
|
5 |
+
from langchain.prompts import (
|
6 |
+
SystemMessagePromptTemplate,
|
7 |
+
HumanMessagePromptTemplate,
|
8 |
+
ChatPromptTemplate,
|
9 |
+
MessagesPlaceholder
|
10 |
+
)
|
11 |
+
import streamlit as st
|
12 |
+
from streamlit_chat import message
|
13 |
+
from utils import *
|
14 |
+
|
15 |
+
# Streamlit setup
|
16 |
+
st.subheader("Legal Guardian")
|
17 |
+
|
18 |
+
# Session state initialization
|
19 |
+
if 'responses' not in st.session_state:
|
20 |
+
st.session_state['responses'] = ["How can I assist you?"]
|
21 |
+
|
22 |
+
if 'requests' not in st.session_state:
|
23 |
+
st.session_state['requests'] = []
|
24 |
+
|
25 |
+
if 'buffer_memory' not in st.session_state:
|
26 |
+
st.session_state.buffer_memory = ConversationBufferWindowMemory(k=3, return_messages=True)
|
27 |
+
|
28 |
+
# Initialize ChatOpenAI and conversation
|
29 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key="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")
|
30 |
+
|
31 |
+
system_msg_template = SystemMessagePromptTemplate.from_template("""
|
32 |
+
Legal Guardian' is a GPT designed to assist with a broad range of legal questions related to children's issues, focusing on laws in India...
|
33 |
+
...It asks for clarification on vague questions to ensure accurate and relevant responses, and treats each query independently for focused assistance.'
|
34 |
+
""")
|
35 |
+
|
36 |
+
human_msg_template = HumanMessagePromptTemplate.from_template("{input}")
|
37 |
+
prompt_template = ChatPromptTemplate.from_messages([system_msg_template, MessagesPlaceholder(variable_name="history"), human_msg_template])
|
38 |
+
conversation = ConversationChain(memory=st.session_state.buffer_memory, prompt=prompt_template, llm=llm, verbose=True)
|
39 |
+
|
40 |
+
# Streamlit UI components
|
41 |
+
response_container = st.container()
|
42 |
+
textcontainer = st.container()
|
43 |
+
|
44 |
+
# Handle user input and display conversation
|
45 |
+
with textcontainer:
|
46 |
+
query = st.text_input("Query: ", key="input")
|
47 |
+
if st.button("Submit"):
|
48 |
+
with st.spinner("typing..."):
|
49 |
+
conversation_string = get_conversation_string()
|
50 |
+
refined_query = query_refiner(conversation_string, query)
|
51 |
+
st.subheader("Refined Query:")
|
52 |
+
st.write(refined_query)
|
53 |
+
context = find_match(refined_query)
|
54 |
+
response = conversation.predict(input=f"Context:\n {context} \n\n Query:\n{query}")
|
55 |
+
st.session_state.requests.append(query)
|
56 |
+
st.session_state.responses.append(response)
|
57 |
+
|
58 |
+
# Display conversation history
|
59 |
+
with response_container:
|
60 |
+
if st.session_state['responses']:
|
61 |
+
st.subheader("Chat History:")
|
62 |
+
for i in range(len(st.session_state['responses'])):
|
63 |
+
message(st.session_state['responses'][i], key=str(i))
|
64 |
+
if i < len(st.session_state['requests']):
|
65 |
+
message(st.session_state["requests"][i], is_user=True, key=str(i) + '_user')
|
requirements.txt
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.8.6
|
2 |
+
aiosignal==1.3.1
|
3 |
+
altair==5.1.2
|
4 |
+
annotated-types==0.6.0
|
5 |
+
anyio==3.7.1
|
6 |
+
async-timeout==4.0.3
|
7 |
+
attrs==23.1.0
|
8 |
+
blinker==1.6.3
|
9 |
+
cachetools==5.3.2
|
10 |
+
certifi==2023.7.22
|
11 |
+
charset-normalizer==3.3.1
|
12 |
+
click==8.1.7
|
13 |
+
dataclasses-json==0.6.1
|
14 |
+
dnspython==2.4.2
|
15 |
+
exceptiongroup==1.1.3
|
16 |
+
filelock==3.13.0
|
17 |
+
frozenlist==1.4.0
|
18 |
+
fsspec==2023.10.0
|
19 |
+
gitdb==4.0.11
|
20 |
+
GitPython==3.1.40
|
21 |
+
greenlet==3.0.1
|
22 |
+
huggingface-hub==0.17.3
|
23 |
+
idna==3.4
|
24 |
+
importlib-metadata==6.8.0
|
25 |
+
Jinja2==3.1.2
|
26 |
+
joblib==1.3.2
|
27 |
+
jsonpatch==1.33
|
28 |
+
jsonpointer==2.4
|
29 |
+
jsonschema==4.19.1
|
30 |
+
jsonschema-specifications==2023.7.1
|
31 |
+
langchain==0.0.325
|
32 |
+
langsmith==0.0.53
|
33 |
+
loguru==0.7.2
|
34 |
+
markdown-it-py==3.0.0
|
35 |
+
MarkupSafe==2.1.3
|
36 |
+
marshmallow==3.20.1
|
37 |
+
mdurl==0.1.2
|
38 |
+
mpmath==1.3.0
|
39 |
+
multidict==6.0.4
|
40 |
+
mypy-extensions==1.0.0
|
41 |
+
networkx==3.2.1
|
42 |
+
nltk==3.8.1
|
43 |
+
numpy==1.26.1
|
44 |
+
nvidia-cublas-cu12==12.1.3.1
|
45 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
46 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
47 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
48 |
+
nvidia-cudnn-cu12==8.9.2.26
|
49 |
+
nvidia-cufft-cu12==11.0.2.54
|
50 |
+
nvidia-curand-cu12==10.3.2.106
|
51 |
+
nvidia-cusolver-cu12==11.4.5.107
|
52 |
+
nvidia-cusparse-cu12==12.1.0.106
|
53 |
+
nvidia-nccl-cu12==2.18.1
|
54 |
+
nvidia-nvjitlink-cu12==12.3.52
|
55 |
+
nvidia-nvtx-cu12==12.1.105
|
56 |
+
openai==0.28.1
|
57 |
+
packaging==23.2
|
58 |
+
pandas==2.1.2
|
59 |
+
Pillow==10.1.0
|
60 |
+
pinecone-client==2.2.4
|
61 |
+
protobuf==4.24.4
|
62 |
+
pyarrow==13.0.0
|
63 |
+
pydantic==2.4.2
|
64 |
+
pydantic_core==2.10.1
|
65 |
+
pydeck==0.8.1b0
|
66 |
+
Pygments==2.16.1
|
67 |
+
python-dateutil==2.8.2
|
68 |
+
python-dotenv==1.0.0
|
69 |
+
pytz==2023.3.post1
|
70 |
+
PyYAML==6.0.1
|
71 |
+
referencing==0.30.2
|
72 |
+
regex==2023.10.3
|
73 |
+
requests==2.31.0
|
74 |
+
rich==13.6.0
|
75 |
+
rpds-py==0.10.6
|
76 |
+
safetensors==0.4.0
|
77 |
+
scikit-learn==1.3.2
|
78 |
+
scipy==1.11.3
|
79 |
+
sentence-transformers==2.2.2
|
80 |
+
sentencepiece==0.1.99
|
81 |
+
six==1.16.0
|
82 |
+
smmap==5.0.1
|
83 |
+
sniffio==1.3.0
|
84 |
+
SQLAlchemy==2.0.22
|
85 |
+
streamlit==1.28.0
|
86 |
+
streamlit-chat==0.1.1
|
87 |
+
sympy==1.12
|
88 |
+
tenacity==8.2.3
|
89 |
+
threadpoolctl==3.2.0
|
90 |
+
tokenizers==0.14.1
|
91 |
+
toml==0.10.2
|
92 |
+
toolz==0.12.0
|
93 |
+
torch==2.1.0
|
94 |
+
torchvision==0.16.0
|
95 |
+
tornado==6.3.3
|
96 |
+
tqdm==4.66.1
|
97 |
+
transformers==4.34.1
|
98 |
+
triton==2.1.0
|
99 |
+
typing-inspect==0.9.0
|
100 |
+
typing_extensions==4.8.0
|
101 |
+
tzdata==2023.3
|
102 |
+
tzlocal==5.2
|
103 |
+
urllib3==2.0.7
|
104 |
+
validators==0.22.0
|
105 |
+
watchdog==3.0.0
|
106 |
+
yarl==1.9.2
|
107 |
+
zipp==3.17.0
|
utils.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
import pinecone
|
3 |
+
import openai
|
4 |
+
import streamlit as st
|
5 |
+
from indic_transliteration import sanscript
|
6 |
+
from indic_transliteration.sanscript import transliterate
|
7 |
+
|
8 |
+
# Set OpenAI API key
|
9 |
+
openai.api_key = "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"
|
10 |
+
|
11 |
+
# Initialize SentenceTransformer model
|
12 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
13 |
+
|
14 |
+
# Initialize Pinecone index
|
15 |
+
pinecone.init(api_key='14b2909a-c00c-4ff8-9b79-87eb51b9d891', environment='gcp-starter')
|
16 |
+
index = pinecone.Index('langchain-chatbot')
|
17 |
+
|
18 |
+
# Function to find the most relevant match in Pinecone index
|
19 |
+
def find_match(input):
|
20 |
+
input_em = model.encode(input).tolist()
|
21 |
+
result = index.query(input_em, top_k=2, includeMetadata=True)
|
22 |
+
return result['matches'][0]['metadata']['text'] + "\n" + result['matches'][1]['metadata']['text']
|
23 |
+
|
24 |
+
# Function to refine a user query using OpenAI's Completion API
|
25 |
+
def query_refiner(conversation, query):
|
26 |
+
response = openai.Completion.create(
|
27 |
+
model="text-davinci-003",
|
28 |
+
prompt=f"Given the following user query and conversation log, formulate a question that would be the most relevant to provide the user with an answer from a knowledge base.\n\nCONVERSATION LOG: \n{conversation}\n\nQuery: {query}\n\nRefined Query:",
|
29 |
+
temperature=0.7,
|
30 |
+
max_tokens=256,
|
31 |
+
top_p=1,
|
32 |
+
frequency_penalty=0,
|
33 |
+
presence_penalty=0
|
34 |
+
)
|
35 |
+
return response['choices'][0]['text']
|
36 |
+
|
37 |
+
# Function to get the conversation string for display
|
38 |
+
def get_conversation_string():
|
39 |
+
conversation_string = ""
|
40 |
+
for i in range(len(st.session_state['responses']) - 1):
|
41 |
+
conversation_string += "Human: " + st.session_state['requests'][i] + "\n"
|
42 |
+
conversation_string += "Bot: " + st.session_state['responses'][i + 1] + "\n"
|
43 |
+
return conversation_string
|
44 |
+
|
45 |
+
def transliterate_tamil_to_english(text):
|
46 |
+
return transliterate(text, sanscript.TAMIL, sanscript.ITRANS)
|
47 |
+
|
48 |
+
# Function to find the most relevant match in Pinecone index
|
49 |
+
def find_match(input):
|
50 |
+
input_em = model.encode(input).tolist()
|
51 |
+
result = index.query(input_em, top_k=2, includeMetadata=True)
|
52 |
+
return result['matches'][0]['metadata']['text'] + "\n" + result['matches'][1]['metadata']['text']
|
53 |
+
|
54 |
+
# ... (your existing functions)
|
55 |
+
|
56 |
+
# Streamlit app
|
57 |
+
def main():
|
58 |
+
st.title("LangChain Chatbot")
|
59 |
+
|
60 |
+
# User input for the conversation in Tamil
|
61 |
+
user_input_tamil = st.text_input("User Input (Tamil):", "")
|
62 |
+
|
63 |
+
# Transliterate Tamil input to English for processing
|
64 |
+
user_input_english = transliterate_tamil_to_english(user_input_tamil)
|
65 |
+
|
66 |
+
# Retrieve refined query using OpenAI
|
67 |
+
refined_query = query_refiner(get_conversation_string(), user_input_english)
|
68 |
+
|
69 |
+
# Find the most relevant match using Sentence Transformers and Pinecone
|
70 |
+
match_result = find_match(refined_query)
|
71 |
+
|
72 |
+
# Display results
|
73 |
+
st.text("User Input (Tamil): " + user_input_tamil)
|
74 |
+
st.text("User Input (English): " + user_input_english)
|
75 |
+
st.text("Refined Query: " + refined_query)
|
76 |
+
st.text("Top Matches:")
|
77 |
+
st.text(match_result)
|
78 |
+
st.title("LangChain Chatbot")
|
79 |
+
|
80 |
+
# User input for the conversation
|
81 |
+
user_input = st.text_input("User Input:", "")
|
82 |
+
|
83 |
+
# Retrieve refined query using OpenAI
|
84 |
+
refined_query = query_refiner(get_conversation_string(), user_input)
|
85 |
+
|
86 |
+
# Find the most relevant match using Sentence Transformers and Pinecone
|
87 |
+
match_result = find_match(refined_query)
|
88 |
+
|
89 |
+
# Display results
|
90 |
+
st.text("Refined Query: " + refined_query)
|
91 |
+
st.text("Top Matches:")
|
92 |
+
st.text(match_result)
|
93 |
+
|
94 |
+
# Run the Streamlit app
|
95 |
+
if __name__ == "__main__":
|
96 |
+
main()
|