Mdkaif2782 commited on
Commit
9b47fda
·
verified ·
1 Parent(s): 7d317a2

Upload kuet_preli_prblem_1 (1).ipynb

Browse files
Files changed (1) hide show
  1. kuet_preli_prblem_1 (1).ipynb +307 -0
kuet_preli_prblem_1 (1).ipynb ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "markdown",
21
+ "source": [
22
+ "installing required libraries\n"
23
+ ],
24
+ "metadata": {
25
+ "id": "IhtNWaiM0V3D"
26
+ }
27
+ },
28
+ {
29
+ "source": [
30
+ "!pip install datasets==2.14.5\n",
31
+ "!pip install transformers==4.28.0\n",
32
+ "!pip install protobuf==3.20.*"
33
+ ],
34
+ "cell_type": "code",
35
+ "metadata": {
36
+ "collapsed": true,
37
+ "id": "cxFRfDCoLJzH"
38
+ },
39
+ "execution_count": null,
40
+ "outputs": []
41
+ },
42
+ {
43
+ "cell_type": "markdown",
44
+ "source": [
45
+ "importing the dataset from hugging face and splitting it"
46
+ ],
47
+ "metadata": {
48
+ "id": "W27dIock0c5K"
49
+ }
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": null,
54
+ "metadata": {
55
+ "collapsed": true,
56
+ "id": "XR0cgTdaKWAC"
57
+ },
58
+ "outputs": [],
59
+ "source": [
60
+ "from datasets import load_dataset\n",
61
+ "\n",
62
+ "dataset = load_dataset(\"SKNahin/bengali-transliteration-data\")\n",
63
+ "\n",
64
+ "split_dataset = dataset['train'].train_test_split(test_size=0.2, seed=42)\n",
65
+ "\n",
66
+ "train_dataset = split_dataset['train']\n",
67
+ "val_dataset = split_dataset['test']\n",
68
+ "\n",
69
+ "print(f\"Training samples: {len(train_dataset)}, Validation samples: {len(val_dataset)}\")\n"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "markdown",
74
+ "source": [
75
+ "tokenizing the data and training the model"
76
+ ],
77
+ "metadata": {
78
+ "id": "o75NKyHh0lD0"
79
+ }
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "source": [
84
+ "from transformers import MBartForConditionalGeneration, MBart50TokenizerFast, Trainer, TrainingArguments\n",
85
+ "import torch\n",
86
+ "\n",
87
+ "model_name = \"facebook/mbart-large-50\"\n",
88
+ "tokenizer = MBart50TokenizerFast.from_pretrained(model_name)\n",
89
+ "model = MBartForConditionalGeneration.from_pretrained(model_name)\n",
90
+ "\n",
91
+ "\n",
92
+ "tokenizer.src_lang = \"en_XX\"\n",
93
+ "tokenizer.tgt_lang = \"bn_IN\"\n",
94
+ "\n",
95
+ "\n",
96
+ "def preprocess(batch):\n",
97
+ " inputs = tokenizer(batch[\"rm\"], max_length=128, truncation=True, padding=\"max_length\")\n",
98
+ " targets = tokenizer(batch[\"bn\"], max_length=128, truncation=True, padding=\"max_length\")\n",
99
+ " inputs[\"labels\"] = targets[\"input_ids\"]\n",
100
+ " return inputs\n",
101
+ "\n",
102
+ "\n",
103
+ "train_dataset = train_dataset.map(preprocess, batched=True)\n",
104
+ "val_dataset = val_dataset.map(preprocess, batched=True)\n",
105
+ "\n",
106
+ "\n",
107
+ "train_dataset.set_format(type=\"torch\", columns=[\"input_ids\", \"attention_mask\", \"labels\"])\n",
108
+ "val_dataset.set_format(type=\"torch\", columns=[\"input_ids\", \"attention_mask\", \"labels\"])\n",
109
+ "\n",
110
+ "\n",
111
+ "training_args = TrainingArguments(\n",
112
+ " output_dir=\"./mbart_results\",\n",
113
+ " evaluation_strategy=\"epoch\",\n",
114
+ " learning_rate=3e-5,\n",
115
+ " per_device_train_batch_size=2,\n",
116
+ " per_device_eval_batch_size=2,\n",
117
+ " num_train_epochs=5,\n",
118
+ " weight_decay=0.01,\n",
119
+ " save_total_limit=2,\n",
120
+ " logging_dir=\"./mbart_logs\",\n",
121
+ " logging_steps=10,\n",
122
+ " save_steps=500,\n",
123
+ " fp16=torch.cuda.is_available(),\n",
124
+ ")\n",
125
+ "\n",
126
+ "trainer = Trainer(\n",
127
+ " model=model,\n",
128
+ " args=training_args,\n",
129
+ " train_dataset=train_dataset,\n",
130
+ " eval_dataset=val_dataset,\n",
131
+ " tokenizer=tokenizer,\n",
132
+ ")\n",
133
+ "\n",
134
+ "trainer.train()\n"
135
+ ],
136
+ "metadata": {
137
+ "colab": {
138
+ "base_uri": "https://localhost:8080/",
139
+ "height": 339
140
+ },
141
+ "outputId": "0af79106-6873-472c-8d6a-6d385d2d151b",
142
+ "id": "06Q9XzHVg8v6",
143
+ "collapsed": true
144
+ },
145
+ "execution_count": 3,
146
+ "outputs": [
147
+ {
148
+ "output_type": "error",
149
+ "ename": "KeyboardInterrupt",
150
+ "evalue": "",
151
+ "traceback": [
152
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
153
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
154
+ "\u001b[0;32m<ipython-input-3-3ccb4aa8eee1>\u001b[0m in \u001b[0;36m<cell line: 54>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;31m# Train the model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 54\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
155
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1660\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_inner_training_loop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train_batch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_find_batch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1661\u001b[0m )\n\u001b[0;32m-> 1662\u001b[0;31m return inner_training_loop(\n\u001b[0m\u001b[1;32m 1663\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1664\u001b[0m \u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
156
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2004\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_step_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2005\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2006\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_log_save_evaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_keys_for_eval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2007\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2008\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_substep_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
157
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2289\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2290\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshould_save\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2291\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_save_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2292\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_save\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2293\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
158
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_save_checkpoint\u001b[0;34m(self, model, trial, metrics)\u001b[0m\n\u001b[1;32m 2346\u001b[0m \u001b[0mrun_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_output_dir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrial\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtrial\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2347\u001b[0m \u001b[0moutput_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcheckpoint_folder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2348\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_internal_call\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2349\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdeepspeed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2350\u001b[0m \u001b[0;31m# under zero3 model file itself doesn't get saved since it's bogus! Unless deepspeed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
159
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36msave_model\u001b[0;34m(self, output_dir, _internal_call)\u001b[0m\n\u001b[1;32m 2828\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2829\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshould_save\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2830\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_save\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2831\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2832\u001b[0m \u001b[0;31m# Push to the Hub when `save_model` is called by the user.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
160
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_save\u001b[0;34m(self, output_dir, state_dict)\u001b[0m\n\u001b[1;32m 2884\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mWEIGHTS_NAME\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2885\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2886\u001b[0;31m self.model.save_pretrained(\n\u001b[0m\u001b[1;32m 2887\u001b[0m \u001b[0moutput_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstate_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstate_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msafe_serialization\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_safetensors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2888\u001b[0m )\n",
161
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py\u001b[0m in \u001b[0;36msave_pretrained\u001b[0;34m(self, save_directory, is_main_process, state_dict, save_function, push_to_hub, max_shard_size, safe_serialization, variant, **kwargs)\u001b[0m\n\u001b[1;32m 1841\u001b[0m \u001b[0msafe_save_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshard\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_directory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshard_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetadata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"format\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"pt\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1842\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1843\u001b[0;31m \u001b[0msave_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshard\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_directory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshard_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1844\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1845\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
162
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36msave\u001b[0;34m(obj, f, pickle_module, pickle_protocol, _use_new_zipfile_serialization, _disable_byteorder_record)\u001b[0m\n\u001b[1;32m 848\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_use_new_zipfile_serialization\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 849\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_writer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 850\u001b[0;31m _save(\n\u001b[0m\u001b[1;32m 851\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 852\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
163
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_save\u001b[0;34m(obj, zip_file, pickle_module, pickle_protocol, _disable_byteorder_record)\u001b[0m\n\u001b[1;32m 1112\u001b[0m \u001b[0mstorage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1113\u001b[0m \u001b[0;31m# Now that it is on the CPU we can directly copy it into the zip file\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1114\u001b[0;31m \u001b[0mzip_file\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_record\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_bytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
164
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
165
+ ]
166
+ }
167
+ ]
168
+ },
169
+ {
170
+ "cell_type": "markdown",
171
+ "source": [
172
+ "evaluating the model and generating predictions"
173
+ ],
174
+ "metadata": {
175
+ "id": "N2KBMAZi2PwO"
176
+ }
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "source": [
181
+ "import torch\n",
182
+ "\n",
183
+ "sample = val_dataset.select(range(10))\n",
184
+ "inputs = sample[\"input_ids\"]\n",
185
+ "\n",
186
+ "if torch.cuda.is_available():\n",
187
+ " inputs = inputs.cuda()\n",
188
+ "\n",
189
+ "preds = model.generate(inputs)\n",
190
+ "\n",
191
+ "decoded_preds = [tokenizer.decode(pred, skip_special_tokens=True, clean_up_tokenization_spaces=True) for pred in preds]\n",
192
+ "decoded_labels = [tokenizer.decode(label, skip_special_tokens=True, clean_up_tokenization_spaces=True) for label in sample[\"labels\"]]\n",
193
+ "\n",
194
+ "for i, (pred, label) in enumerate(zip(decoded_preds, decoded_labels)):\n",
195
+ " print(f\"Sample {i + 1}\")\n",
196
+ " print(f\"Prediction: {pred}\")\n",
197
+ " print(f\"Label: {label}\\n\")\n"
198
+ ],
199
+ "metadata": {
200
+ "collapsed": true,
201
+ "id": "bVnn2zoxQFxc"
202
+ },
203
+ "execution_count": null,
204
+ "outputs": []
205
+ },
206
+ {
207
+ "cell_type": "markdown",
208
+ "source": [
209
+ "saving the fine tuned model"
210
+ ],
211
+ "metadata": {
212
+ "id": "G2lVyL663QgH"
213
+ }
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "source": [
218
+ "model.save_pretrained(\"./banglish-to-bangla\")\n",
219
+ "tokenizer.save_pretrained(\"./banglish-to-bangla\")"
220
+ ],
221
+ "metadata": {
222
+ "id": "c-4-GqLRZT-C",
223
+ "collapsed": true
224
+ },
225
+ "execution_count": null,
226
+ "outputs": []
227
+ },
228
+ {
229
+ "cell_type": "markdown",
230
+ "source": [
231
+ "taking custom input from the user to check"
232
+ ],
233
+ "metadata": {
234
+ "id": "2nA9BzIT3Tmb"
235
+ }
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "source": [
240
+ "import torch\n",
241
+ "\n",
242
+ "def translate_banglish_to_bangla(model, tokenizer, banglish_input):\n",
243
+ " inputs = tokenizer(banglish_input, return_tensors=\"pt\", padding=True, truncation=True, max_length=128)\n",
244
+ "\n",
245
+ " if torch.cuda.is_available():\n",
246
+ " inputs = {key: value.cuda() for key, value in inputs.items()}\n",
247
+ " model = model.cuda()\n",
248
+ "\n",
249
+ " translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id[\"bn_IN\"])\n",
250
+ " translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]\n",
251
+ "\n",
252
+ " return translated_text\n",
253
+ "\n",
254
+ "print(\"Enter your Banglish text (type 'exit' to quit):\")\n",
255
+ "while True:\n",
256
+ " banglish_text = input(\"Banglish: \")\n",
257
+ " if banglish_text.lower() == \"exit\":\n",
258
+ " break\n",
259
+ "\n",
260
+ "\n",
261
+ " translated_text = translate_banglish_to_bangla(model, tokenizer, banglish_text)\n",
262
+ " print(f\"Translated Bangla: {translated_text}\\n\")\n"
263
+ ],
264
+ "metadata": {
265
+ "id": "uQ-HtJ7ledXW"
266
+ },
267
+ "execution_count": null,
268
+ "outputs": []
269
+ },
270
+ {
271
+ "cell_type": "markdown",
272
+ "source": [
273
+ "exporting the model in .zip format"
274
+ ],
275
+ "metadata": {
276
+ "id": "RoOeyvDa3b_y"
277
+ }
278
+ },
279
+ {
280
+ "cell_type": "code",
281
+ "source": [
282
+ "from google.colab import files\n",
283
+ "import zipfile\n",
284
+ "\n",
285
+ "def zipdir(path, ziph):\n",
286
+ " # ziph is zipfile handle\n",
287
+ " for root, dirs, files in os.walk(path):\n",
288
+ " for file in files:\n",
289
+ " ziph.write(os.path.join(root, file))\n",
290
+ "\n",
291
+ "import os\n",
292
+ "if not os.path.exists(\"./banglish-to-bangla\"):\n",
293
+ " print(\"Directory ./banglish-to-bangla not found. Please run the training code first.\")\n",
294
+ "else:\n",
295
+ " zipf = zipfile.ZipFile('banglish-to-bangla.zip', 'w', zipfile.ZIP_DEFLATED)\n",
296
+ " zipdir('./banglish-to-bangla', zipf)\n",
297
+ " zipf.close()\n",
298
+ " files.download('banglish-to-bangla.zip')"
299
+ ],
300
+ "metadata": {
301
+ "id": "cP8HldTAaHqo"
302
+ },
303
+ "execution_count": null,
304
+ "outputs": []
305
+ }
306
+ ]
307
+ }