shreyasiv commited on
Commit
d26ea12
·
1 Parent(s): 1f518c3

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +65 -0
  2. requirements.txt +107 -0
  3. utils.py +96 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import statements
2
+ from langchain.chat_models import ChatOpenAI
3
+ from langchain.chains import ConversationChain
4
+ from langchain.chains.conversation.memory import ConversationBufferWindowMemory
5
+ from langchain.prompts import (
6
+ SystemMessagePromptTemplate,
7
+ HumanMessagePromptTemplate,
8
+ ChatPromptTemplate,
9
+ MessagesPlaceholder
10
+ )
11
+ import streamlit as st
12
+ from streamlit_chat import message
13
+ from utils import *
14
+
15
+ # Streamlit setup
16
+ st.subheader("Legal Guardian")
17
+
18
+ # Session state initialization
19
+ if 'responses' not in st.session_state:
20
+ st.session_state['responses'] = ["How can I assist you?"]
21
+
22
+ if 'requests' not in st.session_state:
23
+ st.session_state['requests'] = []
24
+
25
+ if 'buffer_memory' not in st.session_state:
26
+ st.session_state.buffer_memory = ConversationBufferWindowMemory(k=3, return_messages=True)
27
+
28
+ # Initialize ChatOpenAI and conversation
29
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key="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")
30
+
31
+ system_msg_template = SystemMessagePromptTemplate.from_template("""
32
+ Legal Guardian' is a GPT designed to assist with a broad range of legal questions related to children's issues, focusing on laws in India...
33
+ ...It asks for clarification on vague questions to ensure accurate and relevant responses, and treats each query independently for focused assistance.'
34
+ """)
35
+
36
+ human_msg_template = HumanMessagePromptTemplate.from_template("{input}")
37
+ prompt_template = ChatPromptTemplate.from_messages([system_msg_template, MessagesPlaceholder(variable_name="history"), human_msg_template])
38
+ conversation = ConversationChain(memory=st.session_state.buffer_memory, prompt=prompt_template, llm=llm, verbose=True)
39
+
40
+ # Streamlit UI components
41
+ response_container = st.container()
42
+ textcontainer = st.container()
43
+
44
+ # Handle user input and display conversation
45
+ with textcontainer:
46
+ query = st.text_input("Query: ", key="input")
47
+ if st.button("Submit"):
48
+ with st.spinner("typing..."):
49
+ conversation_string = get_conversation_string()
50
+ refined_query = query_refiner(conversation_string, query)
51
+ st.subheader("Refined Query:")
52
+ st.write(refined_query)
53
+ context = find_match(refined_query)
54
+ response = conversation.predict(input=f"Context:\n {context} \n\n Query:\n{query}")
55
+ st.session_state.requests.append(query)
56
+ st.session_state.responses.append(response)
57
+
58
+ # Display conversation history
59
+ with response_container:
60
+ if st.session_state['responses']:
61
+ st.subheader("Chat History:")
62
+ for i in range(len(st.session_state['responses'])):
63
+ message(st.session_state['responses'][i], key=str(i))
64
+ if i < len(st.session_state['requests']):
65
+ message(st.session_state["requests"][i], is_user=True, key=str(i) + '_user')
requirements.txt ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.6
2
+ aiosignal==1.3.1
3
+ altair==5.1.2
4
+ annotated-types==0.6.0
5
+ anyio==3.7.1
6
+ async-timeout==4.0.3
7
+ attrs==23.1.0
8
+ blinker==1.6.3
9
+ cachetools==5.3.2
10
+ certifi==2023.7.22
11
+ charset-normalizer==3.3.1
12
+ click==8.1.7
13
+ dataclasses-json==0.6.1
14
+ dnspython==2.4.2
15
+ exceptiongroup==1.1.3
16
+ filelock==3.13.0
17
+ frozenlist==1.4.0
18
+ fsspec==2023.10.0
19
+ gitdb==4.0.11
20
+ GitPython==3.1.40
21
+ greenlet==3.0.1
22
+ huggingface-hub==0.17.3
23
+ idna==3.4
24
+ importlib-metadata==6.8.0
25
+ Jinja2==3.1.2
26
+ joblib==1.3.2
27
+ jsonpatch==1.33
28
+ jsonpointer==2.4
29
+ jsonschema==4.19.1
30
+ jsonschema-specifications==2023.7.1
31
+ langchain==0.0.325
32
+ langsmith==0.0.53
33
+ loguru==0.7.2
34
+ markdown-it-py==3.0.0
35
+ MarkupSafe==2.1.3
36
+ marshmallow==3.20.1
37
+ mdurl==0.1.2
38
+ mpmath==1.3.0
39
+ multidict==6.0.4
40
+ mypy-extensions==1.0.0
41
+ networkx==3.2.1
42
+ nltk==3.8.1
43
+ numpy==1.26.1
44
+ nvidia-cublas-cu12==12.1.3.1
45
+ nvidia-cuda-cupti-cu12==12.1.105
46
+ nvidia-cuda-nvrtc-cu12==12.1.105
47
+ nvidia-cuda-runtime-cu12==12.1.105
48
+ nvidia-cudnn-cu12==8.9.2.26
49
+ nvidia-cufft-cu12==11.0.2.54
50
+ nvidia-curand-cu12==10.3.2.106
51
+ nvidia-cusolver-cu12==11.4.5.107
52
+ nvidia-cusparse-cu12==12.1.0.106
53
+ nvidia-nccl-cu12==2.18.1
54
+ nvidia-nvjitlink-cu12==12.3.52
55
+ nvidia-nvtx-cu12==12.1.105
56
+ openai==0.28.1
57
+ packaging==23.2
58
+ pandas==2.1.2
59
+ Pillow==10.1.0
60
+ pinecone-client==2.2.4
61
+ protobuf==4.24.4
62
+ pyarrow==13.0.0
63
+ pydantic==2.4.2
64
+ pydantic_core==2.10.1
65
+ pydeck==0.8.1b0
66
+ Pygments==2.16.1
67
+ python-dateutil==2.8.2
68
+ python-dotenv==1.0.0
69
+ pytz==2023.3.post1
70
+ PyYAML==6.0.1
71
+ referencing==0.30.2
72
+ regex==2023.10.3
73
+ requests==2.31.0
74
+ rich==13.6.0
75
+ rpds-py==0.10.6
76
+ safetensors==0.4.0
77
+ scikit-learn==1.3.2
78
+ scipy==1.11.3
79
+ sentence-transformers==2.2.2
80
+ sentencepiece==0.1.99
81
+ six==1.16.0
82
+ smmap==5.0.1
83
+ sniffio==1.3.0
84
+ SQLAlchemy==2.0.22
85
+ streamlit==1.28.0
86
+ streamlit-chat==0.1.1
87
+ sympy==1.12
88
+ tenacity==8.2.3
89
+ threadpoolctl==3.2.0
90
+ tokenizers==0.14.1
91
+ toml==0.10.2
92
+ toolz==0.12.0
93
+ torch==2.1.0
94
+ torchvision==0.16.0
95
+ tornado==6.3.3
96
+ tqdm==4.66.1
97
+ transformers==4.34.1
98
+ triton==2.1.0
99
+ typing-inspect==0.9.0
100
+ typing_extensions==4.8.0
101
+ tzdata==2023.3
102
+ tzlocal==5.2
103
+ urllib3==2.0.7
104
+ validators==0.22.0
105
+ watchdog==3.0.0
106
+ yarl==1.9.2
107
+ zipp==3.17.0
utils.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import pinecone
3
+ import openai
4
+ import streamlit as st
5
+ from indic_transliteration import sanscript
6
+ from indic_transliteration.sanscript import transliterate
7
+
8
+ # Set OpenAI API key
9
+ openai.api_key = "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"
10
+
11
+ # Initialize SentenceTransformer model
12
+ model = SentenceTransformer('all-MiniLM-L6-v2')
13
+
14
+ # Initialize Pinecone index
15
+ pinecone.init(api_key='14b2909a-c00c-4ff8-9b79-87eb51b9d891', environment='gcp-starter')
16
+ index = pinecone.Index('langchain-chatbot')
17
+
18
+ # Function to find the most relevant match in Pinecone index
19
+ def find_match(input):
20
+ input_em = model.encode(input).tolist()
21
+ result = index.query(input_em, top_k=2, includeMetadata=True)
22
+ return result['matches'][0]['metadata']['text'] + "\n" + result['matches'][1]['metadata']['text']
23
+
24
+ # Function to refine a user query using OpenAI's Completion API
25
+ def query_refiner(conversation, query):
26
+ response = openai.Completion.create(
27
+ model="text-davinci-003",
28
+ prompt=f"Given the following user query and conversation log, formulate a question that would be the most relevant to provide the user with an answer from a knowledge base.\n\nCONVERSATION LOG: \n{conversation}\n\nQuery: {query}\n\nRefined Query:",
29
+ temperature=0.7,
30
+ max_tokens=256,
31
+ top_p=1,
32
+ frequency_penalty=0,
33
+ presence_penalty=0
34
+ )
35
+ return response['choices'][0]['text']
36
+
37
+ # Function to get the conversation string for display
38
+ def get_conversation_string():
39
+ conversation_string = ""
40
+ for i in range(len(st.session_state['responses']) - 1):
41
+ conversation_string += "Human: " + st.session_state['requests'][i] + "\n"
42
+ conversation_string += "Bot: " + st.session_state['responses'][i + 1] + "\n"
43
+ return conversation_string
44
+
45
+ def transliterate_tamil_to_english(text):
46
+ return transliterate(text, sanscript.TAMIL, sanscript.ITRANS)
47
+
48
+ # Function to find the most relevant match in Pinecone index
49
+ def find_match(input):
50
+ input_em = model.encode(input).tolist()
51
+ result = index.query(input_em, top_k=2, includeMetadata=True)
52
+ return result['matches'][0]['metadata']['text'] + "\n" + result['matches'][1]['metadata']['text']
53
+
54
+ # ... (your existing functions)
55
+
56
+ # Streamlit app
57
+ def main():
58
+ st.title("LangChain Chatbot")
59
+
60
+ # User input for the conversation in Tamil
61
+ user_input_tamil = st.text_input("User Input (Tamil):", "")
62
+
63
+ # Transliterate Tamil input to English for processing
64
+ user_input_english = transliterate_tamil_to_english(user_input_tamil)
65
+
66
+ # Retrieve refined query using OpenAI
67
+ refined_query = query_refiner(get_conversation_string(), user_input_english)
68
+
69
+ # Find the most relevant match using Sentence Transformers and Pinecone
70
+ match_result = find_match(refined_query)
71
+
72
+ # Display results
73
+ st.text("User Input (Tamil): " + user_input_tamil)
74
+ st.text("User Input (English): " + user_input_english)
75
+ st.text("Refined Query: " + refined_query)
76
+ st.text("Top Matches:")
77
+ st.text(match_result)
78
+ st.title("LangChain Chatbot")
79
+
80
+ # User input for the conversation
81
+ user_input = st.text_input("User Input:", "")
82
+
83
+ # Retrieve refined query using OpenAI
84
+ refined_query = query_refiner(get_conversation_string(), user_input)
85
+
86
+ # Find the most relevant match using Sentence Transformers and Pinecone
87
+ match_result = find_match(refined_query)
88
+
89
+ # Display results
90
+ st.text("Refined Query: " + refined_query)
91
+ st.text("Top Matches:")
92
+ st.text(match_result)
93
+
94
+ # Run the Streamlit app
95
+ if __name__ == "__main__":
96
+ main()