Spaces:

Rsr2425
/

SimpliFi

Sleeping

File size: 4,395 Bytes

0f046d0

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package punkt_tab to\n",
      "[nltk_data]     /Users/ryanrodriguez/nltk_data...\n",
      "[nltk_data]   Package punkt_tab is already up-to-date!\n",
      "[nltk_data] Downloading package averaged_perceptron_tagger_eng to\n",
      "[nltk_data]     /Users/ryanrodriguez/nltk_data...\n",
      "[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-\n",
      "[nltk_data]       date!\n"
     ]
    }
   ],
   "source": [
    "from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain.chains import create_retrieval_chain\n",
    "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
    "from backend.app.vectorstore import get_vector_db"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "system_role_prompt = \"\"\"\n",
    "    You are a helpful assistant that generates questions based on a given context.\n",
    "\"\"\"\n",
    "\n",
    "user_role_prompt = \"\"\"\n",
    "    Based on the following context about {query}, generate 5 relevant and specific questions.\n",
    "    Make sure the questions can be answered using only the provided context.\n",
    "\n",
    "    Context: {context}\n",
    "\n",
    "    Generate 5 questions that test understanding of the material in the context.\n",
    "    \n",
    "    Return only a json object with the following format:\n",
    "    {{\n",
    "        \"questions\": [\"question1\", \"question2\", \"question3\", \"question4\", \"question5\"]\n",
    "    }}\n",
    "\"\"\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "chat_prompt = ChatPromptTemplate.from_messages([\n",
    "    (\"system\", system_role_prompt),\n",
    "    (\"user\", user_role_prompt)\n",
    "])\n",
    "\n",
    "openai_chat_model = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0.7)\n",
    "\n",
    "retriever = get_vector_db().as_retriever(search_kwargs={\"k\": 2})\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_core.runnables import RunnablePassthrough\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "\n",
    "simple_rag  = (\n",
    "    {\"context\": retriever, \"query\": RunnablePassthrough(), \"num_questions\": RunnablePassthrough()}\n",
    "    | chat_prompt\n",
    "    | openai_chat_model\n",
    "    | StrOutputParser()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_result = simple_rag.invoke(\"RAG\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['What are the two main components of a typical RAG application?',\n",
       " 'What is the purpose of the indexing component in a RAG application?',\n",
       " \"What are the steps involved in the 'Load' phase of indexing?\",\n",
       " 'Why is splitting text into smaller chunks important in the context of RAG applications?',\n",
       " 'How does the retrieval and generation component of a RAG application process user queries?']"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import json\n",
    "result = json.loads(raw_result)\n",
    "result[\"questions\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}