{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1695395317315 }, "id": "RRYSu48huSUW" }, "outputs": [], "source": [ "pip -q install -U langchain huggingface_hub tiktoken PyPDF2 pypdf sentence_transformers together FlagEmbedding faiss-gpu openai text-generation pymupdf" ] }, { "cell_type": "markdown", "metadata": { "id": "gvIjaK53dP5l" }, "source": [ "## RetrievalQA with LLaMA 2-70B on Together API" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1695395317576 }, "id": "dNA4TsHpu6OM" }, "outputs": [], "source": [ "import os\n", "\n", "os.environ[\"TOGETHER_API_KEY\"] = \"53b21bdab47f250b23da974391f9c0e7fb07ec242aec6c1e17d329c931edfa38\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "J-KFB7J_u_3L", "outputId": "1f31d8b5-5491-47bc-8342-b4f48aa70a9e" }, "outputs": [], "source": [ "pip install asyncpg" ] }, { "cell_type": "markdown", "metadata": { "id": "HqwsGJDhvAQ5" }, "source": [ "# Setting up Together API\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1695395321792 }, "id": "B3pqftc7nacA" }, "outputs": [], "source": [ "import together\n", "\n", "# set your API key\n", "together.api_key = os.environ[\"TOGETHER_API_KEY\"]\n", "\n", "# list available models and descriptons\n", "# models = together.Models.list()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "gather": { "logged": 1695395322559 }, "id": "mdFedq669R1D", "outputId": "8a5fc3d6-57a6-48af-eb96-058dea32dd70" }, "outputs": [], "source": [ "# together.Models.start(\"togethercomputer/llama-2-70b-chat\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1695395325479 }, "id": "RgbLVmf-o4j7" }, "outputs": [], "source": [ "import together\n", "\n", "import logging\n", "from typing import Any, Dict, List, Mapping, Optional\n", "\n", "from pydantic import Extra, Field, root_validator\n", "\n", "from langchain.callbacks.manager import CallbackManagerForLLMRun\n", "from langchain.llms.base import LLM\n", "from langchain.llms.utils import enforce_stop_tokens\n", "from langchain.utils import get_from_dict_or_env\n", "\n", "\n", "from gradio_client import Client\n", "\n", "client = Client(\"https://073695670dbd200693.gradio.live/\")\n", "\n", "class TogetherLLM(LLM):\n", " \"\"\"Together large language models.\"\"\"\n", "\n", " model: str = \"togethercomputer/llama-2-70b-chat\"\n", " \"\"\"model endpoint to use\"\"\"\n", "\n", " together_api_key: str = os.environ[\"TOGETHER_API_KEY\"]\n", " \"\"\"Together API key\"\"\"\n", "\n", " temperature: float = 0.7\n", " \"\"\"What sampling temperature to use.\"\"\"\n", "\n", " max_tokens: int = 512\n", " \"\"\"The maximum number of tokens to generate in the completion.\"\"\"\n", "\n", " class Config:\n", " extra = Extra.forbid\n", "\n", " @root_validator()\n", " def validate_environment(cls, values: Dict) -> Dict:\n", " \"\"\"Validate that the API key is set.\"\"\"\n", " api_key = get_from_dict_or_env(\n", " values, \"together_api_key\", \"TOGETHER_API_KEY\"\n", " )\n", " values[\"together_api_key\"] = api_key\n", " return values\n", "\n", " @property\n", " def _llm_type(self) -> str:\n", " \"\"\"Return type of LLM.\"\"\"\n", " return \"together\"\n", "\n", " def _call(\n", " self,\n", " prompt: str,\n", " **kwargs: Any,\n", " ) -> str:\n", " \"\"\"Call to Together endpoint.\"\"\"\n", " # together.api_key = self.together_api_key\n", " # output = together.Complete.create(prompt,\n", " # model=self.model,\n", " # max_tokens=self.max_tokens,\n", " # temperature=self.temperature,\n", " # )\n", " # text = output['output']['choices'][0]['text']\n", " # return text\n", " print(prompt)\n", " result = client.predict(\n", "\t\t\t\tprompt,\t# str in 'Question' Textbox component\n", "\t\t\t\t0.95,\t# int | float (numeric value between 0.05 and 1.0)\n", "\t\t\t\tself.temperature,\t# int | float (numeric value between 0.1 and 1.0)\n", "\t\t\t\t50,\t# int | float (numeric value between 1 and 50)\n", "\t\t\t\t300,\t\n", "\t\t\t\tfn_index=0\n", " )\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "gather": { "logged": 1692868876070 }, "id": "ZlQzln_PRonn", "outputId": "7a2b8ad7-f392-49fe-af3d-e620c4b0bf02" }, "outputs": [], "source": [ "\n", "# !wget -O new_papers_2.zip https://www.dropbox.com/scl/fi/67a80h373n1z38088c9fb/new_papers_2.zip?rlkey=1azfz3w5aazd24ihotwzmol2j&dl=1\n", "# !unzip -q new_papers_2.zip -d new_papers" ] }, { "cell_type": "markdown", "metadata": { "id": "7AnZQpL_IZZZ" }, "source": [ "# LangChain multi-doc retriever with ChromaDB\n", "\n", "***Key Points***\n", "- Multiple Files - PDFs\n", "- ChromaDB\n", "- LLaMA-2 LLM\n", "- BGE Embeddings\n" ] }, { "cell_type": "markdown", "metadata": { "id": "fgfdhZ5uRpFn" }, "source": [ "## Setting up LangChain\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1692868876936 }, "id": "Y_2-HBI3RpFn" }, "outputs": [], "source": [ "import os" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "gather": { "logged": 1692870096658 }, "id": "XHVE9uFb3Ajj", "outputId": "fea94568-9e6c-459e-8918-8f2b4f297f6e" }, "outputs": [], "source": [ "# from langchain.vectorstores import Chroma\n", "from langchain.vectorstores import FAISS\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "\n", "from langchain.chains import RetrievalQA\n", "from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import PyPDFLoader\n", "from langchain.document_loaders import DirectoryLoader\n", "\n", "\n", "# from InstructorEmbedding import INSTRUCTOR\n", "# from langchain.embeddings import HuggingFaceInstructEmbeddings" ] }, { "cell_type": "markdown", "metadata": { "id": "9UcQKUId3X2M" }, "source": [ "## Load multiple and process documents" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1695396081422 }, "id": "PRSeXXc_3Ypj" }, "outputs": [], "source": [ "# # Load and process the text files\n", "# # loader = TextLoader('single_text_file.txt')\n", "# loader = DirectoryLoader('votum-ml/pdfs/', glob=\"./*.pdf\", loader_cls=PyPDFLoader)\n", "\n", "# documents = loader.load()\n", "\n", "\n", "import asyncio\n", "import asyncpg\n", "\n", "\n", "\n", "\n", "conn = await asyncpg.connect(host=\"legalscraperserver.postgres.database.azure.com\",\n", " database=\"postgres\",\n", " user=\"tejasw\",\n", " password=\"Password1234\",\n", " port=5432,)\n", "row = await conn.fetch(\n", " '''SELECT sections, act_name, text\n", "FROM acts\n", "WHERE sections IS NOT NULL''')\n", "\n", "# AND(act_name LIKE '%The Indian Penal Code, 1860%' OR act_name LIKE '%The Code of Criminal Procedure, 1973%' OR act_name LIKE '%Motor Vehicles Act%')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "gather": { "logged": 1695396160144 }, "id": "vT6KgAIT_BtB", "outputId": "281aa4cb-27c3-46bf-ad2e-7c43e9dc76e4" }, "outputs": [], "source": [ "import json\n", "import re\n", "\n", "\n", "data = []\n", "\n", "\n", "def remove_between_periods(sentence):\n", " # Define a regular expression pattern to match text between two periods\n", " pattern = r'\\.(.*?)\\.'\n", "\n", " # Use re.sub to replace the matched substring with an empty string\n", " modified_sentence = re.sub(pattern, '.', sentence)\n", "\n", " return modified_sentence\n", "\n", "def preprocess_text(text):\n", " \n", " text = text.lower()\n", " \n", " # Remove URLs using regex\n", " text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)\n", "\n", " # Remove phone numbers (matches formats like +1234567890, 123-456-7890, (123) 456-7890, and more)\n", " text = re.sub(r'\\+?\\d{1,4}[-\\s]?\\(?\\d{1,3}\\)?[-\\s]?\\d{1,4}[-\\s]?\\d{1,4}', '', text)\n", "\n", " # Remove special characters and unwanted sequences (e.g., /xa), except ',' and ':'\n", " text = re.sub(r'[^a-zA-Z0-9\\s,/:\\.]|[\\xa0]', '', text)\n", " \n", " text = text.replace('tweet','')\n", "\n", " # Remove extra whitespace\n", " text = ' '.join(text.split())\n", "\n", " return text\n", "\n", "for act in row:\n", " for section in act['sections']:\n", " print(section)\n", " json_data = json.loads(section)\n", " if 'omitted.' in json_data['section_name'].lower():\n", " continue\n", " json_data['section_name'] = f\"{remove_between_periods(json_data['section_name'])} of {act['act_name'].replace(',','')}\"\n", " d = json_data['section_name'] + ' : ' + json_data['text']\n", " data.append(preprocess_text(d))\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1695396204431 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "len(data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "gather": { "logged": 1692868887177 }, "id": "3__nT0D4Fkmg", "outputId": "d77220d1-04d8-41a9-df8d-36c6e61bf1ab" }, "outputs": [], "source": [ "#splitting the text into\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", "\n", "texts = [text_splitter.split_text(d)[0] for d in data]\n", "\n", "len(texts)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "gather": { "logged": 1692868888971 }, "id": "mi9VSazP0RSN", "outputId": "c15cd59b-b8e5-4ed2-d2b9-7abe5f31806b" }, "outputs": [], "source": [ "texts[0]" ] }, { "cell_type": "markdown", "metadata": { "id": "fhs0C0FYASlM" }, "source": [ "## HF BGE Embeddings" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 401, "referenced_widgets": [ "72336b98bb664e598f8da434dea12bcf", "06cb9e36eb3e440aa62fa3865a0bf7cb", "1e968f495d78483ab22002cf46e7599a", "65a19b80971f4f16ae486e0b33a920d9", "e99f15fea33a4bcfb98bb21e581e77f7", "f54192aed0d8415f85a5a32cb18548ad", "0131eff774774716bb6003568076f496", "6268fe40a4b44006a43f23c48b4fd73b", "448242d61da2461abcede77139f0b740", "8d7cae1057ef41e38b8d3bd8d71127b4", "b23961b8865f44e685351b732acfd160", "8811e27cb7b24fe7a30bbef63c8b034d", "120abc9282ac4eb099153d368c81b2f3", "dd39941179714219a4fbd70898e77c36", "15efa95fc5904f41a22691b44ef8e884", "3a907db6f66941419d5d8619cd133624", "67b402cc9e3a41fc881c62764bf0b952", "3c0e004a5c5e4308a7720c779ca2c3c0", "8c53a2d79f2f4c38b47718c614707250", "65e13b52d29c4d08bafba8bda728bd5f", "11c7410fb7a64903beaacf78aed04247", "35b83316c5fa4bf589af3130e002dabe", "599e033fa98f44819af4c2d7048fbfb4", "8f60547b75a34645ad6f02f610c1ac31", "c4ca4c02ad0e451dae2cce9c2a26ec8d", "f0bc30cc5e1d4f5a869800ac835a6943", "6bfa3a255e8f4b799219b76cd6dfed0b", "db0209e961d64aba9d4c3a8991de2016", "f95edd8ea46a486dadae84f43f316eb6", "43aebf595fc0440c9500f0315f88beaa", "58002026e9ff43ed94d85bae003d2e84", "70ecd38ec9664d5f824589a02e5cfb13", "54e87d4e5d1c4d5fbb16a92270ae6c54", "1a4810e47dbf451b8a6d3ab2c816bf64", "a6ff33d70a3c401d8010d3a8f177bf82", "673c6cdc933542fc9f99cffd106ffee4", "3fea5bdf46ad46779579d51cd71a6ea7", "74b14aa171d042fe91c86cd67c150d91", "bc1b2a86311b4f21b284a175b104efe0", "9b805c8d7629453bb83ce2044bea26b0", "1e001fb312664979b6187c5c85ebee7c", "df82a6c3b7c14698b50360b440667698", "60f2eb478ee84fd8b0bcc35abd1c9179", "37421eef29c54855b289cebf13143b09", "05c10e5d60174493af2c49dd0cc55bcc", "9a963eee8e01456a9c6ff820fc9a1394", "444abec95fc340c58a63df8a8e71c1d7", "58ff9c1ea7be4ed4900ef448fe88b6cc", "9f3eb6593c4d4bfeb6d7a8f503f1d03c", "ba685441301d43a3949b99927221c2b0", "83386ff94da24656a1ae65471b55f312", "86eb2d64cbe6467180ab51a9e1b88c97", "d24ee1d8b86747748e5e737ad0b459b6", "2395734dcfc34fc681327354beb3a86a", "0846f8c2f3544519a51f06a903fdb912", "b9935d3733ab4538a9c6c098808bca91", "fd050344a8b7417294272972622b1e50", "5a39b11a60c246d3a18ae934055528cd", "a4066ca4095649049bdb4607dbb14cb5", "3b0fb9694f604e1e8d275e517a61d2f2", "7906c529005c4a159403444bb8e4df95", "f5ab1675368e483787ff2da9050ff2d5", "f235594eb8904996ba571ac9fef56186", "054a1877685942f8a9b0d19f0e40ea09", "1b48c1cdb6914bb19074270dab4e175f", "9a19badd95594721b3f5fef44bd3d709", "ecb2e4310050497ca02326ff23f4e67a", "377dbea85b2447208a72bf4256a974b0", "1d45e345641e4cb0a7eb54aeb455cc07", "7149671d48db4dddaf9ccd8d2dea5d80", "3f9c959a4ae24605abe9f182a4980b42", "a689fc2cfdbd44cb8b8efc9a104530e7", "0ded063225bd4930b8c3a84ac0212bed", "d12e11cb3dc54b3fa80c6f744d4eb816", "1aa15e1bdd8242dcafe1c44898e167cc", "6c7f23dfac85466685361cc5bc6d2414", "b241b6209e034e33a24b8ba7eeba8285", "1c08a7e8bae84703a5272969e507ade2", "f97f4652b3964daabd7540e8c84db808", "0fbe66c557be4183815f350a5fc54cea", "c9243985fc5c4bcfb5e4782f91bf1f08", "da04d057103740a1840ac025db099ba6", "f6ceba2c8e944e07ac37335f6bc27775", "49823e682b3249ae83c79acf807eadca", "3b609ca6ab7e497891ed09246b19fd02", "7860f2ae650f406fbd2dbf5b0b035a7b", "f59159361af34c768bc5158ad49844e1", "85b1e48e37dd439da9ab97ae84e841ee", "aa1c0efe0b5646a89cb23b6642a0dbb4", "9fc0c24783a448c48385f9b3dfde2fbf", "49ad9ddc3dca4d1d96bc12a59764571f", "c5303b4931ea44519318b9bb209f634d", "51c09b1d9214446285e103ee9282e206", "83b683c844a549ae9b0a647dc87cd2bc", "62313c10697f482590fb4eee271540bf", "3cf6dc01e2a0428d840eebe1029bb8ce", "502c36fb431241d2ae3cf12807cd5f15", "94a73b26bf0144abb64f0cf4c64d385d", "44e4249c3f0e4100aa5bb3ca53084060", "c566fb3083984a5e894584249467ae4d", "959d049d809248e49f029b5389b22a90", "b72122ea1c3f4b568972d051e6ffa5a8", "0efdf5a21c674849bf8b2718cbb1cc09", "da5d4e9910114cdfb8d1700124db3c43", "4d22677d3f384744a8297a66f636f051", "5f176465351944c7b2bc28b91594428b", "2a565e6a2c314e649fad873b012815d9", "988ad9b09380430cbf39cd3abdfbd6ff", "1e30c2cf9def45bd8069dc09fbfe844a", "548aba40f7314690815320c348e4223b", "33a420fc6fa44e8db19c06451c624c4e", "b7378b19e6f3469482f141b0af6a8fa7", "c890dead5ddc45e0817338a9349fd74c", "ab8775594daf4028b475709a0f24a888", "306369c675c94a1e9d374f75b5952200", "aba560edd18940c4abe09c0fa7263bb1", "94eb8f0f65c44616a083c381614b6617", "ecd8be14e0cd44679787aeab059516bf", "2a56adb0d9084371b96fc9bb8d869f80", "2090392ba65446bbb53e7c6fb3261b0d", "d1090f09758d4e7a8b3047f6adb15733", "88d16f0c12824d698be908bed4efafc9", "5f9e237c28fa46b3bc82f3cf788a952d", "2114d8772db84490a20b9aa82642f44f", "cb38a38b867846ddae5b51176dd37ef9", "a0f6807767154e328dab28d7547bbfee", "53691fa1d8a348728d859395ff2313fa", "58231fd9a4f74188877e36047e5e17a9", "3c8ecbe8cea243fc8a709fa1100110d2", "81998912dbe2489abbbb84730c5890cc", "264f84496d4144ca97ce6d5aa584dc88", "2c97617f883f41e8961a4235712cc73c" ] }, "gather": { "logged": 1692868897098 }, "id": "Emj46ATxtV9C", "outputId": "169be147-f31e-4153-cf91-9b1cea6667be" }, "outputs": [], "source": [ "\n", "from langchain.embeddings import HuggingFaceBgeEmbeddings\n", "\n", "model_name = \"BAAI/bge-base-en\"\n", "encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity\n", "\n", "model_norm = HuggingFaceBgeEmbeddings(\n", " model_name=model_name,\n", " model_kwargs={'device': 'cpu'},\n", " encode_kwargs=encode_kwargs\n", ")\n" ] }, { "cell_type": "markdown", "metadata": { "id": "YsYsIy8F4cdm" }, "source": [ "## create the DB\n", "\n", " T4 GPU" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Q_eTIZwf4Dk2", "outputId": "57072a9e-bd8f-4296-b4e1-83c098f868c7" }, "outputs": [], "source": [ "%%time\n", "# Embed and store the texts\n", "# Supplying a persist_directory will store the embeddings on disk\n", "\n", "persist_directory = 'db'\n", "\n", "## Here is the nmew embeddings being used\n", "embedding = model_norm\n", "\n", "vectordb = FAISS.from_texts(texts,\n", " embedding=embedding)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# vectordb.save_local(\"faiss_index\")\n", "\n", "vectordb = FAISS.load_local('faiss_index',embeddings=model_norm)" ] }, { "cell_type": "markdown", "metadata": { "id": "siLXR-XT0JoI" }, "source": [ "## Make a retriever" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1692870223799 }, "id": "jVWgPJXs1yRq" }, "outputs": [], "source": [ "retriever = vectordb.as_retriever(search_type='similarity',search_kwargs={\"k\": 5})" ] }, { "cell_type": "markdown", "metadata": { "id": "4Ia-4OXa5IeP" }, "source": [ "## Make a chain" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import openai\n", "\n", "from langchain.chat_models import AzureChatOpenAI\n", "from langchain.schema import HumanMessage\n", "\n", "\n", "\n", "model = AzureChatOpenAI(\n", " openai_api_base=\"https://votum.openai.azure.com/\",\n", " openai_api_version= \"2023-07-01-preview\",\n", " # openai_api_version=\"2023-05-15\",\n", " openai_api_key=\"9ce18c180b8d43cb90568fd0ff6daefd\",\n", " openai_api_type=\"azure\",\n", " deployment_name='gpt-4'\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model(\n", " [\n", " HumanMessage(\n", " content=\"What model are you?\"\n", " )\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1692870227467 }, "id": "dCtX_DK9S-K0" }, "outputs": [], "source": [ "# llm = TogetherLLM(\n", "# model= \"togethercomputer/llama-2-70b-chat\",\n", "# temperature = 0.5,\n", "# max_tokens = 1024\n", "# )\n", "\n", "\n", "from langchain.llms import HuggingFaceTextGenInference\n", "\n", "llm = HuggingFaceTextGenInference(\n", " inference_server_url=\"http://20.83.177.108:8080/\",\n", " max_new_tokens=512,\n", " top_k=10,\n", " top_p=0.95,\n", " typical_p=0.95,\n", " temperature=0.6,\n", " # repetition_penalty=1.1,\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1692870229949 }, "id": "MGx8XblM4shW" }, "outputs": [], "source": [ "\n", "\n", "from langchain.prompts import PromptTemplate\n", "\n", "\n", "prompt_template = \"\"\"You are an expert legal assistant with extensive knowledge about Indian law. Your task is to respond to the given query in a consice and factually correct manner. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n", "\n", "{context}\n", "\n", "Question: {question}\n", "Response:\"\"\"\n", "\n", "\n", "PROMPT = PromptTemplate(\n", " template=prompt_template, input_variables=[\"context\", \"question\"]\n", ")\n", "\n", "qa_chain = RetrievalQA.from_chain_type(llm=llm,\n", " chain_type_kwargs={\"prompt\": PROMPT},\n", " retriever=retriever,\n", " return_source_documents=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "gather": { "logged": 1692873421219 }, "id": "wKfX4vX-5RFT", "outputId": "2a22808f-7c7e-4ccc-fcfd-026b0065c201" }, "outputs": [], "source": [ "import textwrap\n", "\n", "def wrap_text_preserve_newlines(text, width=110):\n", " # Split the input text into lines based on newline characters\n", " lines = text.split('\\n')\n", "\n", " # Wrap each line individually\n", " wrapped_lines = [textwrap.fill(line, width=width) for line in lines]\n", "\n", " # Join the wrapped lines back together using newline characters\n", " wrapped_text = '\\n'.join(wrapped_lines)\n", "\n", " return wrapped_text\n", "\n", "def process_llm_response(llm_response):\n", " # print(wrap_text_preserve_newlines(llm_response['result']))\n", " print(llm_response['result'])\n", " # print('\\n\\nSources:')\n", " # for source in llm_response[\"source_documents\"]:\n", " # print(source.metadata['source'])\n", "\n", "\n", "#, in case you can't find any relevant statutes respond with 'i don't know' rather than providing incorrect answer.\n", "query = \"\"\"{user_text}\"\"\"\n", "\n", " \n", "llm_response = qa_chain(query.format(user_text='How much alcohol can i legally consume before driving'))\n", "print(llm_response)\n", "# process_llm_response(llm_response)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pip install langchain openai" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain.llms import OpenAI\n", "from langchain.agents import initialize_agent, Tool\n", "from langchain.agents import AgentType\n", "from langchain.agents.react.base import DocstoreExplorer\n", "\n", "\n", "docstore = DocstoreExplorer(vectordb)\n", "tools = [\n", " Tool(\n", " name=\"Search\",\n", " func=docstore.search,\n", " description=\"useful for when you need to ask with search\",\n", " ),\n", " Tool(\n", " name=\"Lookup\",\n", " func=docstore.lookup,\n", " description=\"useful for when you need to ask with lookup\",\n", " ),\n", "]\n", "\n", "import langchain\n", "langchain.verbose= True\n", "llm = OpenAI(temperature=0)\n", "\n", "react = initialize_agent(tools, model, agent=AgentType.REACT_DOCSTORE, verbose=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain.agents import create_pandas_dataframe_agent\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.agents.agent_types import AgentType\n", "from langchain.tools import tool\n", "import pandas as pd\n", "\n", "html = \"\"\"\n", "
Financial Year | Cost Inflation Index (CII) |
2001-02 (Base year) | 100 |
2002-03 | 105 |
2003-04 | 109 |
2004-05 | 113 |
2005-06 | 117 |
2006-07 | 122 |
2007-08 | 129 |
2008-09 | 137 |
2009-10 | 148 |
2010-11 | 167 |
2011-12 | 184 |
2012-13 | 200 |
2013-14 | 220 |
2014-15 | 240 |
2015-16 | 254 |
2016-17 | 264 |
2017-18 | 272 |
2018-19 | 280 |
2019-20 | 289 |
2020-21 | 301 |
2021-22 | 317 |
2022-23 | 331 |
2023-24 | 348 |