File size: 4,395 Bytes
0f046d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package punkt_tab to\n",
      "[nltk_data]     /Users/ryanrodriguez/nltk_data...\n",
      "[nltk_data]   Package punkt_tab is already up-to-date!\n",
      "[nltk_data] Downloading package averaged_perceptron_tagger_eng to\n",
      "[nltk_data]     /Users/ryanrodriguez/nltk_data...\n",
      "[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-\n",
      "[nltk_data]       date!\n"
     ]
    }
   ],
   "source": [
    "from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain.chains import create_retrieval_chain\n",
    "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
    "from backend.app.vectorstore import get_vector_db"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "system_role_prompt = \"\"\"\n",
    "    You are a helpful assistant that generates questions based on a given context.\n",
    "\"\"\"\n",
    "\n",
    "user_role_prompt = \"\"\"\n",
    "    Based on the following context about {query}, generate 5 relevant and specific questions.\n",
    "    Make sure the questions can be answered using only the provided context.\n",
    "\n",
    "    Context: {context}\n",
    "\n",
    "    Generate 5 questions that test understanding of the material in the context.\n",
    "    \n",
    "    Return only a json object with the following format:\n",
    "    {{\n",
    "        \"questions\": [\"question1\", \"question2\", \"question3\", \"question4\", \"question5\"]\n",
    "    }}\n",
    "\"\"\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "chat_prompt = ChatPromptTemplate.from_messages([\n",
    "    (\"system\", system_role_prompt),\n",
    "    (\"user\", user_role_prompt)\n",
    "])\n",
    "\n",
    "openai_chat_model = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0.7)\n",
    "\n",
    "retriever = get_vector_db().as_retriever(search_kwargs={\"k\": 2})\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_core.runnables import RunnablePassthrough\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "\n",
    "simple_rag  = (\n",
    "    {\"context\": retriever, \"query\": RunnablePassthrough(), \"num_questions\": RunnablePassthrough()}\n",
    "    | chat_prompt\n",
    "    | openai_chat_model\n",
    "    | StrOutputParser()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_result = simple_rag.invoke(\"RAG\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['What are the two main components of a typical RAG application?',\n",
       " 'What is the purpose of the indexing component in a RAG application?',\n",
       " \"What are the steps involved in the 'Load' phase of indexing?\",\n",
       " 'Why is splitting text into smaller chunks important in the context of RAG applications?',\n",
       " 'How does the retrieval and generation component of a RAG application process user queries?']"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import json\n",
    "result = json.loads(raw_result)\n",
    "result[\"questions\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}