AKIN-THOMAS commited on
Commit
421d1ca
·
1 Parent(s): 8f69d3e
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 👀
4
  colorFrom: indigo
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 3.38.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
4
  colorFrom: indigo
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 3.11.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
app.ipynb ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 9,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import datetime\n",
10
+ "import gradio as gr\n",
11
+ "from dotenv import load_dotenv\n",
12
+ "from langchain.vectorstores import Chroma\n",
13
+ "from langchain.embeddings.openai import OpenAIEmbeddings\n",
14
+ "from langchain.chat_models import ChatOpenAI\n",
15
+ "from langchain.prompts import PromptTemplate\n",
16
+ "from langchain.chains import RetrievalQA\n",
17
+ "from langchain.chains import ConversationalRetrievalChain\n",
18
+ "from langchain.memory import ConversationBufferMemory\n",
19
+ "\n",
20
+ "\n",
21
+ "import warnings\n",
22
+ "warnings.filterwarnings('ignore')"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 2,
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "# from langchain.memory import MemoryViewMemory\n"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 10,
37
+ "metadata": {},
38
+ "outputs": [
39
+ {
40
+ "name": "stdout",
41
+ "output_type": "stream",
42
+ "text": [
43
+ "gpt-3.5-turbo-0301\n"
44
+ ]
45
+ }
46
+ ],
47
+ "source": [
48
+ "import datetime\n",
49
+ "current_date = datetime.datetime.now().date()\n",
50
+ "if current_date < datetime.date(2023, 9, 2):\n",
51
+ " llm_name = \"gpt-3.5-turbo-0301\"\n",
52
+ "else:\n",
53
+ " llm_name = \"gpt-3.5-turbo\"\n",
54
+ "print(llm_name)"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": 11,
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "def chatWithNCAIR(question, history):\n",
64
+ " load_dotenv()\n",
65
+ "\n",
66
+ " persist_directory = 'docs/chroma/'\n",
67
+ " embedding = OpenAIEmbeddings()\n",
68
+ " vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)\n",
69
+ " llm = ChatOpenAI(model_name=llm_name, temperature=0)\n",
70
+ "\n",
71
+ " template = \"\"\"Use the following pieces of context to answer the question at the end. \n",
72
+ " If you don't know the answer, just say that you don't know, don't try to make up an answer. \n",
73
+ " Use three sentences maximum. Keep the answer as concise as possible. \n",
74
+ " Always say \"thank you for choosing NCAIR BOT!\" at the end of the answer. \n",
75
+ " {context}\n",
76
+ " Question: {question}\n",
77
+ " Helpful Answer:\"\"\"\n",
78
+ " QA_CHAIN_PROMPT = PromptTemplate(input_variables=[\"context\", \"question\"],template=template,)\n",
79
+ "\n",
80
+ " # Run chain\n",
81
+ " from langchain.chains import RetrievalQA\n",
82
+ " # question = \"Will interns go through the fabLab during the onboarding?\"\n",
83
+ " qa_chain = RetrievalQA.from_chain_type(llm,\n",
84
+ " retriever=vectordb.as_retriever(),\n",
85
+ " return_source_documents=True,\n",
86
+ " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT})\n",
87
+ "\n",
88
+ " memory = ConversationBufferMemory(\n",
89
+ " memory_key=\"chat_history\",\n",
90
+ " return_messages=True\n",
91
+ " )\n",
92
+ " retriever=vectordb.as_retriever()\n",
93
+ " qa = ConversationalRetrievalChain.from_llm(\n",
94
+ " llm,\n",
95
+ " retriever=retriever,\n",
96
+ " memory=memory\n",
97
+ " )\n",
98
+ "\n",
99
+ "\n",
100
+ " result = qa({\"question\": question})\n",
101
+ " return result[\"answer\"]\n"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": 12,
107
+ "metadata": {},
108
+ "outputs": [
109
+ {
110
+ "data": {
111
+ "text/plain": [
112
+ "'Yes, as an intern in NCAIR, you will first undergo the onboarding session in Fablab, PCB, shopbot, 3D printing, solid work, and then go through compulsory NADIT programs.'"
113
+ ]
114
+ },
115
+ "execution_count": 12,
116
+ "metadata": {},
117
+ "output_type": "execute_result"
118
+ }
119
+ ],
120
+ "source": [
121
+ "chatWithNCAIR(\"Will interns go through the fabLab during the onboarding?\",\"\")"
122
+ ]
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": 6,
127
+ "metadata": {},
128
+ "outputs": [],
129
+ "source": [
130
+ "# chatWithNCAIR(\"Is it compulsory?\",\"\")"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": 7,
136
+ "metadata": {},
137
+ "outputs": [],
138
+ "source": [
139
+ "# ! pip install --upgrade gradio"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": 8,
145
+ "metadata": {},
146
+ "outputs": [
147
+ {
148
+ "name": "stdout",
149
+ "output_type": "stream",
150
+ "text": [
151
+ "Running on local URL: http://127.0.0.1:7860\n",
152
+ "Running on public URL: https://0e3e0326bf1f8474c9.gradio.live\n",
153
+ "\n",
154
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
155
+ ]
156
+ },
157
+ {
158
+ "data": {
159
+ "text/html": [
160
+ "<div><iframe src=\"https://0e3e0326bf1f8474c9.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
161
+ ],
162
+ "text/plain": [
163
+ "<IPython.core.display.HTML object>"
164
+ ]
165
+ },
166
+ "metadata": {},
167
+ "output_type": "display_data"
168
+ },
169
+ {
170
+ "data": {
171
+ "text/plain": []
172
+ },
173
+ "execution_count": 8,
174
+ "metadata": {},
175
+ "output_type": "execute_result"
176
+ }
177
+ ],
178
+ "source": [
179
+ "demo = gr.ChatInterface(fn=chatWithNCAIR,\n",
180
+ " chatbot=gr.Chatbot(height=300, min_width=40),\n",
181
+ " textbox=gr.Textbox(placeholder=\"Ask me a question relating to NCAIR\"),\n",
182
+ " title=\"Chat with NCAIR💬\",\n",
183
+ " description=\"Ask NCAIR any question\",\n",
184
+ " theme=\"soft\",\n",
185
+ " cache_examples=True,\n",
186
+ " retry_btn=None,\n",
187
+ " undo_btn=\"Delete Previous\",\n",
188
+ " clear_btn=\"Clear\",)\n",
189
+ "\n",
190
+ "demo.launch(share=True)"
191
+ ]
192
+ }
193
+ ],
194
+ "metadata": {
195
+ "kernelspec": {
196
+ "display_name": "Python 3",
197
+ "language": "python",
198
+ "name": "python3"
199
+ },
200
+ "language_info": {
201
+ "codemirror_mode": {
202
+ "name": "ipython",
203
+ "version": 3
204
+ },
205
+ "file_extension": ".py",
206
+ "mimetype": "text/x-python",
207
+ "name": "python",
208
+ "nbconvert_exporter": "python",
209
+ "pygments_lexer": "ipython3",
210
+ "version": "3.10.8"
211
+ },
212
+ "orig_nbformat": 4
213
+ },
214
+ "nbformat": 4,
215
+ "nbformat_minor": 2
216
+ }
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import gradio as gr
3
+ from dotenv import load_dotenv
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain.chains import RetrievalQA
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ from langchain.memory import ConversationBufferMemory
11
+
12
+
13
+ import warnings
14
+ warnings.filterwarnings('ignore')
15
+
16
+ current_date = datetime.datetime.now().date()
17
+ if current_date < datetime.date(2023, 9, 2):
18
+ llm_name = "gpt-3.5-turbo-0301"
19
+ else:
20
+ llm_name = "gpt-3.5-turbo"
21
+ # print(llm_name)
22
+
23
+
24
+ def chatWithNCAIR(question, history):
25
+ load_dotenv()
26
+
27
+ persist_directory = 'docs/chroma/'
28
+ embedding = OpenAIEmbeddings()
29
+ vectordb = Chroma(persist_directory=persist_directory,
30
+ embedding_function=embedding)
31
+ llm = ChatOpenAI(model_name=llm_name, temperature=0)
32
+
33
+ template = """Use the following pieces of context to answer the question at the end.
34
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
35
+ Use three sentences maximum. Keep the answer as concise as possible.
36
+ Always say "thank you for choosing NCAIR BOT!" at the end of the answer.
37
+ {context}
38
+ Question: {question}
39
+ Helpful Answer:"""
40
+ QA_CHAIN_PROMPT = PromptTemplate(
41
+ input_variables=["context", "question"], template=template,)
42
+
43
+ # Run chain
44
+ from langchain.chains import RetrievalQA
45
+ # question = "Will interns go through the fabLab during the onboarding?"
46
+ qa_chain = RetrievalQA.from_chain_type(llm,
47
+ retriever=vectordb.as_retriever(),
48
+ return_source_documents=True,
49
+ chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})
50
+
51
+ memory = ConversationBufferMemory(
52
+ memory_key="chat_history",
53
+ return_messages=True
54
+ )
55
+ retriever = vectordb.as_retriever()
56
+ qa = ConversationalRetrievalChain.from_llm(
57
+ llm,
58
+ retriever=retriever,
59
+ memory=memory
60
+ )
61
+
62
+ result = qa({"question": question})
63
+ return result["answer"]
64
+
65
+
66
+ demo = gr.ChatInterface(fn=chatWithNCAIR,
67
+ chatbot=gr.Chatbot(height=300, min_width=40),
68
+ textbox=gr.Textbox(
69
+ placeholder="Ask me a question relating to NCAIR"),
70
+ title="Chat with NCAIR💬",
71
+ description="Ask NCAIR any question",
72
+ theme="soft",
73
+ cache_examples=True,
74
+ retry_btn=None,
75
+ undo_btn="Delete Previous",
76
+ clear_btn="Clear",)
77
+
78
+ demo.launch(inline=False)
docs/chroma/chroma-collections.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0d400379140f0334717afd5ded1e939056093dcddd99f8d04303d7b4a02c829
3
+ size 557
docs/chroma/chroma-embeddings.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d68c750aa538c7a2c358f48f8753a8cdaf5818a3490d220773979d55b72f268
3
+ size 1696701
docs/chroma/index/id_to_uuid_e9f84229-87ab-426e-a3d7-e7ec6f6f5d41.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d3c04482c0178c82187bc551504affad306d4393dd174574a496362edd7e9c
3
+ size 4245
docs/chroma/index/index_e9f84229-87ab-426e-a3d7-e7ec6f6f5d41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb3e967edc62e77b399c78f0cc8e880cbed7747e6f9687cb6b5670f401c2898f
3
+ size 843572
docs/chroma/index/index_metadata_e9f84229-87ab-426e-a3d7-e7ec6f6f5d41.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c53793715df10b935e9b52ece240f4e042b4e862360889b585a4beb084dfc91
3
+ size 103
docs/chroma/index/uuid_to_id_e9f84229-87ab-426e-a3d7-e7ec6f6f5d41.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bca60c0cc265ca0df973a335e615317e610b0f352bb139209431f233f2193c
3
+ size 4974
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ typing
2
+ typing-inspect
3
+ typing_extensions
4
+ openai
5
+ langchain
6
+ chroma
7
+ chromadb
8
+ pydantic
9
+ python-dotenv
10
+ gradio
11
+ datetime