{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "6fdb2a6f",
   "metadata": {},
   "source": [
    "## INFERENCE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a9f7c9cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "from flair.models import SequenceTagger\n",
    "from flair.data import Sentence\n",
    "\n",
    "from tokenizer import StatsTokenizer\n",
    "from hypothesis import HypothesisTest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a391d4d9",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/vinay/opt/anaconda3/envs/lab/lib/python3.9/site-packages/huggingface_hub/file_download.py:629: FutureWarning: `cached_download` is the legacy way to download files from the HF hub, please consider upgrading to `hf_hub_download`\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2023-04-20 14:07:50,024 loading file /Users/vinay/.flair/models/stats-ner/36efff182e4649faa516d32ec20e0e565b874997ed6ee16de9cf7f4009a56ee3.09a8678c0f280ee4b018f8d418135a2f149e0ce74d4f61859f150e53b022dd29\n",
      "2023-04-20 14:07:50,368 SequenceTagger predicts: Dictionary with 11 tags: O, S-T, B-T, E-T, I-T, S-P, B-P, E-P, I-P, <START>, <STOP>\n"
     ]
    }
   ],
   "source": [
    "# load the model from hugging face\n",
    "model = SequenceTagger.load(\"VinayNR/stats-nerd\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "d4e55cb5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# predict the tags on a sample file <Provide File on the Path>\n",
    "file_path = \"../sample.txt\"\n",
    "with open(file_path) as f:\n",
    "    fileStr = f.read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "fdb6df57",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Prediction begin\n",
      "Prediction done\n"
     ]
    }
   ],
   "source": [
    "# predict tags on the sentence\n",
    "sentence = Sentence(fileStr.replace('\\n',' '), use_tokenizer=StatsTokenizer())\n",
    "print(\"Prediction begin\")\n",
    "res = model.predict(sentence)\n",
    "print(\"Prediction done\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "847cabce",
   "metadata": {},
   "outputs": [],
   "source": [
    "# get reported statistical tests in the sentence\n",
    "reported_tests = HypothesisTest.get_reported_stat_tests(sentence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "b38b8ab6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reported Statistics in the text above....\n",
      "--------------------\n",
      "Test Type : t | Test Stat : 1.45 | DF : 23 | Rep P-val : <0.01\n",
      "Calculated p-value :  0.08027960035102566\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print('Reported Statistics in the text above....')\n",
    "print('--------------------')\n",
    "for test in reported_tests:\n",
    "    print(test)\n",
    "    print('Calculated p-value : ', test.calculate_p_val())\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "93b24840",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Span[1:9]: \"t ( df = 23 ) = 1.45\" → T (0.9833)\n",
      "Span[14:17]: \"p < 0.01\" → P (0.8529)\n"
     ]
    }
   ],
   "source": [
    "for entity in sentence.get_labels('ner'):\n",
    "    print(entity)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}