{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "408df710-efb3-45e0-94e1-5c4bdac72c06",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
    "from langchain.vectorstores import FAISS\n",
    "from langchain.embeddings import OllamaEmbeddings\n",
    "from langchain.chains import RetrievalQA\n",
    "from langchain.chat_models import ChatOllama\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "b0c1e9a7-85c6-48fb-bc81-b5903b67c044",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.schema import Document\n",
    "with open(\"untitled.txt\", 'r') as f:\n",
    "    doc = f.read()\n",
    "\n",
    "docs = [Document(page_content=doc)]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "b15e44da-6beb-489c-b847-d3276915ce8d",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)\n",
    "chunks = splitter.split_documents(docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "4f6a18b4-cb85-481a-b43f-f38c60959155",
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings = OllamaEmbeddings(model='llama3.2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "7a7709bc-48a3-4771-af21-5c48e5ae9296",
   "metadata": {},
   "outputs": [],
   "source": [
    "vector_store = []\n",
    "for i in range(len(chunks)):\n",
    "    em = embeddings.embed_query(chunks[i].page_content)\n",
    "    vector_store.append(em)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "f95b010f-c384-4111-af98-43ba51568b08",
   "metadata": {},
   "outputs": [],
   "source": [
    "def similarity_search(te):\n",
    "    result_list = [] # create a list to store all the embeddings\n",
    "    emb = embeddings.embed_query(te) # create an embedding for our \"te\"\n",
    "    for i in range(8): # we have created 8 chunks\n",
    "        result = 0 # initialize the result for each chunk\n",
    "        for j in range(3072): # we have 3072 dimentional vector as a representation for each chunk and out text\n",
    "            result += emb[j] * vector_store[i][j]# we then take the dot product\n",
    "        result = result / 55.42\n",
    "        result_list.append({f'chunk {i+1}':result}) # and finally append the dot product in our return_list\n",
    "    return result_list\n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "e63c7e94-56a7-4333-804d-e7c2b8697c77",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'chunk 1': 84.98757149361835}, {'chunk 2': 75.60246478296749}, {'chunk 3': 79.17318761328006}, {'chunk 4': 80.69328472997623}, {'chunk 5': 68.01708598133246}, {'chunk 6': 67.64770328462416}, {'chunk 7': 87.4843210948032}, {'chunk 8': 78.75659878926277}]\n"
     ]
    }
   ],
   "source": [
    "print(similarity_search(\"Do not share passwords or access credentials. 7. Performance Reviews Formal reviews conducted every 6 months.\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "5418bed3-8cd4-403d-ab6b-8f476cac7f41",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Use company devices for official work only.\\n\\nKeep systems updated and report any security incidents.\\n\\nDo not share passwords or access credentials.\\n\\n7. Performance Reviews\\n\\nFormal reviews conducted every 6 months.\\n\\nFocus on personal growth, goals, and team contributions.\\n\\n8. Learning & Development'"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chunks[5].page_content\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe2342b3-3f82-4ffa-9ad4-c19f12dded21",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}