{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import dotenv\n", "import os\n", "dotenv.load_dotenv('.env.rag')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from langchain_openai import ChatOpenAI\n", "llm = ChatOpenAI(model=\"gpt-4o-mini\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Mpox is a disease that spreads through close contact, including sexual contact, and can cause symptoms such as blistered skin rashes, ulcers, and flu-like symptoms. It has been present in the U.S. since 2022 and was declared a global health emergency by the WHO. Recently, a new strain of mpox, known as clade Ib, has emerged, primarily affecting regions in Africa.'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import bs4\n", "from langchain import hub\n", "from langchain_chroma import Chroma\n", "from langchain_community.document_loaders import TextLoader\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.runnables import RunnablePassthrough\n", "from langchain_openai import OpenAIEmbeddings\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "\n", "# Load, chunk and index the contents of the blog.\n", "folder = 'example_medical'\n", "docs = []\n", "\n", "for file in os.listdir(folder):\n", " if file.endswith('.txt'):\n", " loader = TextLoader(os.path.join(folder, file))\n", " docs.extend(loader.load())\n", "\n", "\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", "splits = text_splitter.split_documents(docs)\n", "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n", "\n", "# Retrieve and generate using the relevant snippets of the blog.\n", "retriever = vectorstore.as_retriever()\n", "prompt = hub.pull(\"rlm/rag-prompt\")\n", "\n", "\n", "def format_docs(docs):\n", " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", "\n", "\n", "rag_chain = (\n", " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", " | prompt\n", " | llm\n", " | StrOutputParser()\n", ")\n", "\n", "rag_chain.invoke(\"What is mpox?\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 2 }