Upload 42 files
Browse files- .gitattributes +9 -35
- LICENSE +21 -0
- README.md +37 -13
- UIT-ViCoV19QA/README.md +52 -0
- UIT-ViCoV19QA/UIT-ViCoV19-QA_main.ipynb +1 -0
- UIT-ViCoV19QA/dataset/1_ans/UIT-ViCoV19QA_test.csv +3 -0
- UIT-ViCoV19QA/dataset/1_ans/UIT-ViCoV19QA_train.csv +3 -0
- UIT-ViCoV19QA/dataset/1_ans/UIT-ViCoV19QA_val.csv +3 -0
- UIT-ViCoV19QA/dataset/2_ans/UIT-ViCoV19QA_test.csv +3 -0
- UIT-ViCoV19QA/dataset/2_ans/UIT-ViCoV19QA_train.csv +3 -0
- UIT-ViCoV19QA/dataset/2_ans/UIT-ViCoV19QA_val.csv +3 -0
- UIT-ViCoV19QA/dataset/3_ans/UIT-ViCoV19QA_test.csv +3 -0
- UIT-ViCoV19QA/dataset/3_ans/UIT-ViCoV19QA_train.csv +3 -0
- UIT-ViCoV19QA/dataset/3_ans/UIT-ViCoV19QA_val.csv +3 -0
- UIT-ViCoV19QA/dataset/4_ans/UIT-ViCoV19QA_test.csv +3 -0
- UIT-ViCoV19QA/dataset/4_ans/UIT-ViCoV19QA_train.csv +3 -0
- UIT-ViCoV19QA/dataset/4_ans/UIT-ViCoV19QA_val.csv +3 -0
- UIT-ViCoV19QA/dataset/UIT-ViCoV19QA.csv +3 -0
- UIT-ViCoV19QA/models/cnn.py +274 -0
- UIT-ViCoV19QA/models/layers.py +68 -0
- UIT-ViCoV19QA/models/rnn1.py +241 -0
- UIT-ViCoV19QA/models/rnn2.py +206 -0
- UIT-ViCoV19QA/models/seq2seq.py +39 -0
- UIT-ViCoV19QA/models/transformer.py +271 -0
- app.py +81 -0
- dataset/1_ans/UIT-ViCoV19QA_test.csv +3 -0
- dataset/1_ans/UIT-ViCoV19QA_train.csv +3 -0
- dataset/1_ans/UIT-ViCoV19QA_val.csv +3 -0
- dataset/2_ans/UIT-ViCoV19QA_test.csv +3 -0
- dataset/2_ans/UIT-ViCoV19QA_train.csv +3 -0
- dataset/2_ans/UIT-ViCoV19QA_val.csv +3 -0
- dataset/3_ans/UIT-ViCoV19QA_test.csv +3 -0
- dataset/3_ans/UIT-ViCoV19QA_train.csv +3 -0
- dataset/3_ans/UIT-ViCoV19QA_val.csv +3 -0
- dataset/4_ans/UIT-ViCoV19QA_test.csv +3 -0
- dataset/4_ans/UIT-ViCoV19QA_train.csv +3 -0
- dataset/4_ans/UIT-ViCoV19QA_val.csv +3 -0
- dataset/UIT-ViCoV19QA.csv +3 -0
- models/README.md +3 -0
- models/vi-medical-t5-finetune-qa/tokenizer_config.json +3 -0
- notebooks/vi-medical-t5-finetune-qa.ipynb +0 -0
- requirements.txt +11 -0
.gitattributes
CHANGED
@@ -1,35 +1,9 @@
|
|
1 |
-
*.
|
2 |
-
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
|
5 |
-
*.
|
6 |
-
*.
|
7 |
-
*.
|
8 |
-
|
9 |
-
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.csv filter=lfs diff=lfs merge=lfs -text
|
2 |
+
models/* filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
models/vi-medical-t5-finetune-qa/* filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.safetensorsin filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
8 |
+
models/vi-medical-t5-finetune-qa/checkpoint-75969/*.safetensors filter=lfs diff=lfs merge=lfs -text
|
9 |
+
models/vi-medical-t5-finetune-qa/model.safetensors filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2025 Danh Tran
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,13 +1,37 @@
|
|
1 |
-
---
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
title: Vietnames Medical T5 Finetune Question and Answer
|
4 |
+
sdk: gradio
|
5 |
+
colorFrom: green
|
6 |
+
colorTo: green
|
7 |
+
python_verison: 3.10.12
|
8 |
+
sdk_version: 5.23.3
|
9 |
+
---
|
10 |
+
# Vietnames Medical T5 Finetune Question and Answer
|
11 |
+
|
12 |
+
## Dataset
|
13 |
+
You can download dataset at this url: https://github.com/triet2397/UIT-ViCoV19QA
|
14 |
+
## Metrics
|
15 |
+
- Loss:
|
16 |
+
- Trainging Set: 0.306100.
|
17 |
+
- Validation Set: 0.322764.
|
18 |
+
|
19 |
+
## Demo
|
20 |
+
You can try this project demo at:
|
21 |
+
|
22 |
+
## Usage
|
23 |
+
- Install Denpendencies:
|
24 |
+
```bash
|
25 |
+
pip install -r requirements.txt
|
26 |
+
```
|
27 |
+
- Download download the 'danhtran2mind/vi-medical-t5-finetune-qa' model from Hugging Face using the following commands:
|
28 |
+
```bash
|
29 |
+
cd models
|
30 |
+
git lfs clone https://huggingface.co/danhtran2mind/vi-medical-t5-finetune-qa
|
31 |
+
cd ..
|
32 |
+
```
|
33 |
+
- Run Gradio app:
|
34 |
+
```bash
|
35 |
+
python app.py
|
36 |
+
```
|
37 |
+
- Your app will run at `localhost:7860`
|
UIT-ViCoV19QA/README.md
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# UIT-ViCoV19QA: A Dataset for COVID-19 Community-based Question Answering on Vietnamese Language
|
2 |
+
Authors: Triet Minh Thai, Ngan Ha-Thao Chu, Anh Tuan Vo, and Son T. Luu.
|
3 |
+
|
4 |
+
Description: The UIT-ViCoV19QA dataset comprises 4,500 Vietnamese question-answer pairs about COVID-19 pandemic collected from trusted medical FAQ sources, each question has at least one answer and at most four unique paraphrased answers.
|
5 |
+
|
6 |
+
The statistics of the dataset are shown in the table below.
|
7 |
+
|
8 |
+
|No. | Stats. | Train | Dev. | Test | All |
|
9 |
+
| :-------- | :------------------------------- | ------: | ------: | ------: | ------: |
|
10 |
+
| Answer 1 | Number of question-answer pairs | 3500 | 500 | 500 | 4500 |
|
11 |
+
| | Average question length | 31.44 | 33.66 | 32.32 | 31.79 |
|
12 |
+
| | Average answer length | 120.53 | 116.04 | 118.11 | 119.76 |
|
13 |
+
| | Question vocabulary size | 4396 | 1869 | 1770 | 4924 |
|
14 |
+
| | Answer vocabulary size | 8537 | 3689 | 3367 | 9411 |
|
15 |
+
|Answer 2 | Number of question-answer pairs | 1390 | 209 | 201 | 1800 |
|
16 |
+
| | Average question length | 35.56 | 39.22 | 39.72 | 36.45 |
|
17 |
+
| | Average answer length | 40.54 | 39.25 | 42.73 | 40.64 |
|
18 |
+
| | Question vocabulary size | 2883 | 1269 | 1207 | 3305 |
|
19 |
+
| | Answer vocabulary size | 2632 | 1098 | 1129 | 2949 |
|
20 |
+
| Answer 3 | Number of question-answer pairs | 542 | 79 | 79 | 700 |
|
21 |
+
| | Average question length | 34.77 | 36.7 | 39.28 | 35.49 |
|
22 |
+
| | Average answer length | 28.68 | 26.43 | 30.89 | 28.67 |
|
23 |
+
| | Question vocabulary size | 1836 | 717 | 693 | 2111 |
|
24 |
+
| | Answer vocabulary size | 1554 | 503 | 585 | 1753 |
|
25 |
+
| Answer 4 | Number of question-answer pairs | 272 | 39 | 39 | 350 |
|
26 |
+
| | Average question length | 36.57 | 37.59 | 42.15 | 37.1 |
|
27 |
+
| | Average answer length | 29.75 | 29.03 | 35.72 | 30.25 |
|
28 |
+
| | Question vocabulary size | 1315 | 470 | 460 | 1519 |
|
29 |
+
| | Answer vocabulary size | 924 | 353 | 374 | 1075 |
|
30 |
+
|
31 |
+
Link to publication: https://aclanthology.org/2022.paclic-1.88/.
|
32 |
+
|
33 |
+
The dataset is used only for research purposes.
|
34 |
+
|
35 |
+
Some parts of the source code were inherited from the publication at https://github.com/barshana-banerjee/ParaQA_Experiments.git.
|
36 |
+
|
37 |
+
# Contact information
|
38 |
+
Mr. Triet Minh Thai: [email protected]
|
39 |
+
Mr. Son T. Luu: [email protected]
|
40 |
+
|
41 |
+
# Citation
|
42 |
+
@inproceedings{thai-etal-2022-uit,
|
43 |
+
title = "{UIT}-{V}i{C}o{V}19{QA}: A Dataset for {COVID}-19 Community-based Question Answering on {V}ietnamese Language",
|
44 |
+
author = "Thai, Triet and Thao-Ha, Ngan Chu and Vo, Anh and Luu, Son",
|
45 |
+
booktitle = "Proceedings of the 36th Pacific Asia Conference on Language, Information and Computation",
|
46 |
+
month = oct,
|
47 |
+
year = "2022",
|
48 |
+
address = "Manila, Philippines",
|
49 |
+
publisher = "De La Salle University",
|
50 |
+
url = "https://aclanthology.org/2022.paclic-1.88",
|
51 |
+
pages = "801--810",
|
52 |
+
}
|
UIT-ViCoV19QA/UIT-ViCoV19-QA_main.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Import Packages and libraries","metadata":{}},{"cell_type":"code","source":"%%capture\n!pip install numpy==1.17.4\n!pip install nltk==3.4.5\n!pip install torchtext==0.4.0\n!pip install scikit_learn==0.23.2\n!pip install spacy==2.3.5\n!pip install textblob==0.15.3\n!pip install torch==1.6.0 \n!pip install torchvision==0.7.0\n!pip install tqdm\n!pip install underthesea==1.3.3\n!pip install rouge_score","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:54:40.390494Z","iopub.execute_input":"2023-02-12T04:54:40.390942Z","iopub.status.idle":"2023-02-12T04:56:23.604920Z","shell.execute_reply.started":"2023-02-12T04:54:40.390903Z","shell.execute_reply":"2023-02-12T04:56:23.603544Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"import nltk\nnltk.download('wordnet')\n\nimport os\nimport math\nimport random\nimport argparse\nfrom pathlib import Path\nimport re\nimport numpy as np\nimport pandas as pd\nimport time\nimport gc\nimport sys\n\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torchtext.data import Field, Example, Dataset\nfrom torchtext.data import BucketIterator\nimport torch.nn.functional as F\n\nfrom sklearn.model_selection import train_test_split\n\nfrom tqdm import tqdm\nfrom tqdm.notebook import tqdm_notebook\n\n%rm -rf ./UIT-ViCoV19QA\n!git clone https://github.com/minhtriet2397/UIT-ViCoV19QA.git\n\nsys.path.insert(0, '..')\nsys.path.insert(0, '/kaggle/working/UIT-ViCoV19QA/models')\n%cd /kaggle/working/UIT-ViCoV19QA/models\n%pwd\n\nsys.argv=['']\ndel sys","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:56:23.607682Z","iopub.execute_input":"2023-02-12T04:56:23.608107Z","iopub.status.idle":"2023-02-12T04:56:27.462112Z","shell.execute_reply.started":"2023-02-12T04:56:23.608063Z","shell.execute_reply":"2023-02-12T04:56:27.460470Z"},"trusted":true},"execution_count":10,"outputs":[{"name":"stderr","text":"[nltk_data] Downloading package wordnet to /usr/share/nltk_data...\n[nltk_data] Package wordnet is already up-to-date!\n","output_type":"stream"},{"name":"stdout","text":"Cloning into 'UIT-ViCoV19QA'...\nremote: Enumerating objects: 62, done.\u001b[K\nremote: Counting objects: 100% (62/62), done.\u001b[K\nremote: Compressing objects: 100% (43/43), done.\u001b[K\nremote: Total 62 (delta 20), reused 51 (delta 15), pack-reused 0\u001b[K\nUnpacking objects: 100% (62/62), 1.72 MiB | 1.50 MiB/s, done.\n/kaggle/working/UIT-ViCoV19QA/models\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Main","metadata":{}},{"cell_type":"code","source":"\"\"\"Constants for the baseline models\"\"\"\nSEED = 42\nQUESTION = 'question'\n\nRNN_NAME = 'rnn'\nCNN_NAME = 'cnn'\nTRANSFORMER_NAME = 'transformer'\n\nATTENTION_1 = 'bahdanau'\nATTENTION_2 = 'luong'\n\nGPU = 'gpu'\nCPU = 'cpu'\nCUDA = 'cuda'\n\nCHECKPOINT_PATH = '/model/'\n\nANSWER_TOKEN = '<ans>'\nENTITY_TOKEN = '<ent>'\nEOS_TOKEN = '<eos>'\nSOS_TOKEN = '<sos>'\nPAD_TOKEN = '<pad>'\n\nSRC_NAME = 'src'\nTRG_NAME = 'trg'\n\npath = '/kaggle/working/'","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:56:27.464159Z","iopub.execute_input":"2023-02-12T04:56:27.464596Z","iopub.status.idle":"2023-02-12T04:56:27.471151Z","shell.execute_reply.started":"2023-02-12T04:56:27.464553Z","shell.execute_reply":"2023-02-12T04:56:27.470168Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"code","source":"import random\nimport numpy as np\n\ndef parse_args():\n \"\"\"Add arguments to parser\"\"\"\n parser = argparse.ArgumentParser(description='Verbalization dataset baseline models.')\n parser.add_argument('--model', default=RNN_NAME, type=str,\n choices=[RNN_NAME, CNN_NAME, TRANSFORMER_NAME], help='model to train the dataset')\n parser.add_argument('--input', default=QUESTION, type=str,\n choices=[QUESTION], help='use question as input')\n parser.add_argument('--attention', default=ATTENTION_2, type=str,\n choices=[ATTENTION_1, ATTENTION_2], help='attention layer for rnn model')\n parser.add_argument('--batch_size', default=8, type=int, help='batch size')\n parser.add_argument('--epochs_num', default=30, type=int, help='number of epochs')\n parser.add_argument('--answer_num', default=1, type=int, \n choices=[1,2,3,4], help='number of answer')\n args = parser.parse_args()\n return args\n\ndef set_SEED():\n SEED = 42\n random.seed(SEED)\n np.random.seed(SEED)\n torch.manual_seed(SEED)\n torch.cuda.manual_seed(SEED)\n torch.cuda.manual_seed_all(SEED)\n torch.backends.cudnn.enabled = False\n torch.backends.cudnn.benchmark = False\n torch.backends.cudnn.deterministic = True\n\nclass Checkpoint(object):\n \"\"\"Checkpoint class\"\"\"\n @staticmethod\n def save(model,cell, path):\n \"\"\"Save model using name\"\"\"\n name_tmp = model.name+\"_\"+ cell if model.name==RNN_NAME else model.name\n name = f'{name_tmp}.pt'\n torch.save(model.state_dict(), path+name)\n\n @staticmethod\n def load(model,path, name):\n \"\"\"Load model using name\"\"\"\n #name = f'{model.name}.pt'\n model.load_state_dict(torch.load(path+name))\n return model","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:56:27.474094Z","iopub.execute_input":"2023-02-12T04:56:27.474784Z","iopub.status.idle":"2023-02-12T04:56:27.487154Z","shell.execute_reply.started":"2023-02-12T04:56:27.474749Z","shell.execute_reply":"2023-02-12T04:56:27.486131Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"markdown","source":"## Import data and create torchtext dataset","metadata":{}},{"cell_type":"code","source":"from underthesea import word_tokenize\n\nclass VerbalDataset(object):\n \"\"\"VerbalDataset class\"\"\"\n \n def __init__(self,train,val,test):\n self.train = train\n self.val = val\n self.test = test\n self.train_data = None\n self.valid_data = None\n self.test_data = None\n self.src_field = None\n self.trg_field = None\n\n def _make_torchtext_dataset(self, data, fields):\n examples = [Example.fromlist(i, fields) for i in tqdm_notebook(data)]\n return Dataset(examples, fields)\n\n def load_data_and_fields(self, ):\n \"\"\"\n Load verbalization data\n Create source and target fields\n \"\"\"\n train, test, val = self.train, self.test, self.val\n \n train = train.melt(id_vars=['id',\"Question\"],value_name=\"Answer\")\n train = train[train['Answer'].astype(bool)].drop(['id','variable'],axis=1).values\n \n test = test.melt(id_vars=['id',\"Question\"],value_name=\"Answer\")\n test = test[test['Answer'].astype(bool)].drop(['id','variable'],axis=1).values\n \n val = val.melt(id_vars=['id',\"Question\"],value_name=\"Answer\")\n val = val[val['Answer'].astype(bool)].drop(['id','variable'],axis=1).values\n\n # create fields\n self.src_field = Field(tokenize=word_tokenize,\n init_token=SOS_TOKEN,\n eos_token=EOS_TOKEN,\n lower=True,\n include_lengths=True,\n batch_first=True)\n \n self.trg_field = Field(tokenize=word_tokenize,\n init_token=SOS_TOKEN,\n eos_token=EOS_TOKEN,\n lower=True,\n batch_first=True)\n\n fields_tuple = [(SRC_NAME, self.src_field), (TRG_NAME, self.trg_field)]\n\n # create toechtext datasets\n self.train_data = self._make_torchtext_dataset(train, fields_tuple)\n self.valid_data = self._make_torchtext_dataset(val, fields_tuple)\n self.test_data = self._make_torchtext_dataset(test, fields_tuple)\n\n # build vocabularies\n self.src_field.build_vocab(self.train_data, min_freq=1)\n self.trg_field.build_vocab(self.train_data, min_freq=1)\n print(\"i am field tuple\",fields_tuple)\n\n def get_data(self):\n \"\"\"Return train, validation and test data objects\"\"\"\n return self.train_data, self.valid_data, self.test_data\n\n def get_fields(self):\n \"\"\"Return source and target field objects\"\"\"\n return self.src_field, self.trg_field\n\n def get_vocabs(self):\n \"\"\"Return source and target vocabularies\"\"\"\n #print('self, trg field vocab: ', self.trg_field.vocab)\n return self.src_field.vocab, self.trg_field.vocab","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:56:27.488581Z","iopub.execute_input":"2023-02-12T04:56:27.488938Z","iopub.status.idle":"2023-02-12T04:56:27.506628Z","shell.execute_reply.started":"2023-02-12T04:56:27.488904Z","shell.execute_reply":"2023-02-12T04:56:27.505736Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"set_SEED()\nargs = parse_args()\nanswer_num = args.answer_num\n\nset_ = ['train','val','test']\ntrain = pd.read_csv(f'{path}UIT-ViCoV19QA/dataset/{answer_num}_ans/UIT-ViCoV19QA_train.csv',na_filter=False,delimiter='|')\nval = pd.read_csv(f'{path}UIT-ViCoV19QA/dataset/{answer_num}_ans/UIT-ViCoV19QA_val.csv',na_filter=False,delimiter='|')\ntest = pd.read_csv(f'{path}UIT-ViCoV19QA/dataset/{answer_num}_ans/UIT-ViCoV19QA_test.csv',na_filter=False,delimiter='|')\n\ndataset = VerbalDataset(train,val,test)\ndataset.load_data_and_fields()\nsrc_vocab, trg_vocab = dataset.get_vocabs()\ntrain_data, valid_data, test_data = dataset.get_data()\n\nprint('--------------------------------')\nprint(f\"Training data: {len(train_data.examples)}\")\nprint(f\"Evaluation data: {len(valid_data.examples)}\")\nprint(f\"Testing data: {len(test_data.examples)}\")\nprint('--------------------------------')\nprint(f'Question example: {train_data.examples[2].src}\\n')\nprint(f'Answer example: {train_data.examples[2].trg}')\nprint('--------------------------------')\nprint(f\"Unique tokens in questions vocabulary: {len(src_vocab)}\")\nprint(f\"Unique tokens in answers vocabulary: {len(trg_vocab)}\")\nprint('--------------------------------')","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:56:27.508078Z","iopub.execute_input":"2023-02-12T04:56:27.508549Z","iopub.status.idle":"2023-02-12T04:57:40.844030Z","shell.execute_reply.started":"2023-02-12T04:56:27.508515Z","shell.execute_reply":"2023-02-12T04:57:40.842961Z"},"trusted":true},"execution_count":14,"outputs":[{"output_type":"display_data","data":{"text/plain":" 0%| | 0/3500 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"05a9e8b8ae224b9da9a92c5869ecf8b1"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/500 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5d4189ed992b40a9be50fe21799441c1"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/500 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b9457f3414b14bbfb685897ff5fa8aeb"}},"metadata":{}},{"name":"stdout","text":"i am field tuple [('src', <torchtext.data.field.Field object at 0x7f928e083990>), ('trg', <torchtext.data.field.Field object at 0x7f928e083050>)]\n--------------------------------\nTraining data: 3500\nEvaluation data: 500\nTesting data: 500\n--------------------------------\nQuestion example: ['tôi', 'đang', 'cho', 'con', 'bú', '(', '10', 'tháng', 'tuổi', ')', 'có', 'được', 'chủng', 'ngừa', 'vaccine', 'covid-19', 'không', '?', 'trẻ nhỏ', 'bao nhiêu', 'tháng', 'tuổi', 'mới', 'chủng', 'ngừa', 'được', 'vaccine', 'covid-19', 'ạ', '?', 'xin', 'cảm ơn', '!']\n\nAnswer example: ['chào', 'chị', ',', 'theo', 'hướng dẫn', 'của', 'bộ', 'y tế', ',', 'phụ nữ', 'đang', 'cho', 'con', 'bú', 'sẽ', 'hoãn', 'tiêm', 'vaccine', 'covid-19', 'trong', 'thời gian', 'này', '.', 'hiện nay', ',', 'mỗi', 'loại', 'vaccine', 'sẽ', 'chỉ định', 'ở', 'những', 'đối tượng', 'khác', 'nhau', 'như', 'vaccine covid-19', 'của', 'astrazeneca', 'chỉ định', 'tiêm chủng', 'cho', 'người', 'từ', '18', 'tuổi', 'trở lên', ',', 'vaccine', 'của', 'pfizer', '/', 'biontech', 'chỉ định', 'cho', 'trẻ', 'từ', '12', 'tuổi', 'trở lên', ',', 'chưa', 'có', 'vắc xin', 'nào', 'chỉ định', 'cho', 'trẻ', 'nhỏ', 'dưới', '12', 'tuổi', '.', 'cảm ơn', 'câu', 'hỏi', 'của', 'chị', '.', 'cảm ơn', 'chị', '.']\n--------------------------------\nUnique tokens in questions vocabulary: 4396\nUnique tokens in answers vocabulary: 8537\n--------------------------------\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## Define MODEL","metadata":{}},{"cell_type":"code","source":"class Seq2Seq(nn.Module):\n def __init__(self, encoder, decoder, name):\n super().__init__()\n self.encoder = encoder\n self.decoder = decoder\n self.name = name\n\n def forward(self, src_tokens, src_lengths, trg_tokens, teacher_forcing_ratio=0.5):\n encoder_out = self.encoder(src_tokens, \n src_lengths=src_lengths)\n \n decoder_out = self.decoder(trg_tokens, encoder_out,\n src_tokens=src_tokens,\n teacher_forcing_ratio=teacher_forcing_ratio)\n return decoder_out","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:57:40.845634Z","iopub.execute_input":"2023-02-12T04:57:40.846239Z","iopub.status.idle":"2023-02-12T04:57:40.853175Z","shell.execute_reply.started":"2023-02-12T04:57:40.846199Z","shell.execute_reply":"2023-02-12T04:57:40.852098Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"# Choose model here\nargs.model = CNN_NAME # CNN and Transformers don't apply Attention_1, Attention_2\nargs.attention = ATTENTION_1\ncell_name = 'gru'\n\nif args.model == RNN_NAME and args.attention == ATTENTION_1:\n from rnn1 import Encoder, Decoder\nelif args.model == RNN_NAME and args.attention == ATTENTION_2:\n from rnn2 import Encoder, Decoder\nelif args.model == CNN_NAME:\n from cnn import Encoder, Decoder\nelif args.model == TRANSFORMER_NAME:\n from transformer import Encoder, Decoder, NoamOpt","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:57:40.854778Z","iopub.execute_input":"2023-02-12T04:57:40.855209Z","iopub.status.idle":"2023-02-12T04:57:40.870049Z","shell.execute_reply.started":"2023-02-12T04:57:40.855164Z","shell.execute_reply":"2023-02-12T04:57:40.869129Z"},"trusted":true},"execution_count":16,"outputs":[]},{"cell_type":"code","source":"set_SEED()\nDEVICE = torch.device(CUDA if torch.cuda.is_available() else CPU)\n\nif args.model == RNN_NAME and args.attention == ATTENTION_2:\n encoder = Encoder(src_vocab, DEVICE, cell_name)\n decoder = Decoder(trg_vocab, DEVICE, cell_name)\nelse:\n encoder = Encoder(src_vocab, DEVICE)\n decoder = Decoder(trg_vocab, DEVICE)\nmodel = Seq2Seq(encoder, decoder, args.model).to(DEVICE)\n\nparameters_num = sum(p.numel() for p in model.parameters() if p.requires_grad)\n\nprint('--------------------------------')\nprint(f'Model: {args.model}')\nprint(f'Model input: {args.input}')\nif args.model == RNN_NAME:\n print(f'Attention: {args.attention}')\n print('Cell name: ',cell_name)\nprint(f'The model has {parameters_num:,} trainable parameters')\nprint('--------------------------------')","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:57:40.871571Z","iopub.execute_input":"2023-02-12T04:57:40.872416Z","iopub.status.idle":"2023-02-12T04:57:43.496740Z","shell.execute_reply.started":"2023-02-12T04:57:40.872379Z","shell.execute_reply":"2023-02-12T04:57:43.495579Z"},"trusted":true},"execution_count":17,"outputs":[{"name":"stdout","text":"--------------------------------\nModel: cnn\nModel input: question\nThe model has 28,191,065 trainable parameters\n--------------------------------\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## Train model","metadata":{}},{"cell_type":"code","source":"class Evaluator(object):\n \"\"\"Evaluator class\"\"\"\n def __init__(self, criterion):\n self.criterion = criterion\n\n def evaluate(self, model, iterator, teacher_ratio=1.0):\n model.eval()\n epoch_loss = 0\n with torch.no_grad():\n for _, batch in enumerate(iterator):\n src, src_len = batch.src\n trg = batch.trg\n input_trg = trg if model.name == RNN_NAME else trg[:, :-1]\n output = model(src, src_len, input_trg, teacher_ratio)\n trg = trg.t() if model.name == RNN_NAME else trg[:, 1:]\n output = output.contiguous().view(-1, output.shape[-1])\n trg = trg.contiguous().view(-1)\n # output: (batch_size * trg_len) x output_dim\n # trg: (batch_size * trg_len)\n loss = self.criterion(output, trg)\n epoch_loss += loss.item()\n return epoch_loss / len(iterator)","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:57:43.500533Z","iopub.execute_input":"2023-02-12T04:57:43.500824Z","iopub.status.idle":"2023-02-12T04:57:43.508605Z","shell.execute_reply.started":"2023-02-12T04:57:43.500797Z","shell.execute_reply":"2023-02-12T04:57:43.507609Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"code","source":"from torch.cuda.amp import autocast, GradScaler\n\nclass Trainer(object):\n \"\"\"Trainer Class\"\"\"\n def __init__(self, optimizer, criterion, batch_size, device):\n self.optimizer = optimizer\n self.criterion = criterion\n self.batch_size = batch_size\n self.device = device\n self.evaluator = Evaluator(criterion=self.criterion)\n\n def _train_batch(self, model, iterator, teacher_ratio, clip):\n model.train()\n epoch_loss = 0\n #scaler = GradScaler()\n for _, batch in enumerate(tqdm_notebook(iterator)):\n src, src_len = batch.src\n trg = batch.trg\n self.optimizer.zero_grad()\n input_trg = trg if model.name == RNN_NAME else trg[:, :-1]\n output = model(src, src_len, input_trg, teacher_ratio)\n trg = trg.t() if model.name == RNN_NAME else trg[:, 1:]\n output = output.contiguous().view(-1, output.shape[-1])\n trg = trg.contiguous().view(-1)\n # output: (batch_size * trg_len) x output_dim\n # trg: (batch_size * trg_len)\n torch.cuda.empty_cache()\n loss = self.criterion(output, trg)\n loss.backward()\n torch.nn.utils.clip_grad_norm_(model.parameters(), clip)\n self.optimizer.step()\n epoch_loss += loss.item()\n \n return epoch_loss / len(iterator)\n\n def _get_iterators(self, train_data, valid_data, model_name):\n return BucketIterator.splits((train_data, valid_data),\n batch_size=self.batch_size,\n sort_within_batch=True if model_name == RNN_NAME else \\\n False,\n sort_key=lambda x: len(x.src),\n device=self.device)\n\n def _epoch_time(self, start_time, end_time):\n elapsed_time = end_time - start_time\n elapsed_mins = int(elapsed_time / 60)\n elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n return elapsed_mins, elapsed_secs\n\n def _log_epoch(self, train_loss, valid_loss, epoch, start_time, end_time):\n minutes, seconds = self._epoch_time(start_time, end_time)\n print(f'Epoch: {epoch+1:02} | Time: {minutes}m {seconds}s')\n print(f'\\tTrain Loss: {train_loss:.3f} | Train PPL: {np.exp(train_loss):7.3f}')\n print(f'\\t Val. Loss: {valid_loss:.3f} | Val. PPL: {np.exp(valid_loss):7.3f}')\n\n def _train_epoches(self, model, train_data, valid_data, path_, num_of_epochs, teacher_ratio, clip):\n best_valid_loss = float('inf')\n # pylint: disable=unbalanced-tuple-unpacking\n train_iterator, valid_iterator = self._get_iterators(train_data, valid_data, model.name)\n train_loss_list = []\n val_loss_list = []\n for epoch in range(num_of_epochs):\n start_time = time.time()\n train_loss = self._train_batch(model, train_iterator, teacher_ratio, clip)\n valid_loss = self.evaluator.evaluate(model, valid_iterator, teacher_ratio)\n \n train_loss_list.append(train_loss)\n val_loss_list.append(valid_loss)\n \n end_time = time.time()\n self._log_epoch(train_loss, valid_loss, epoch, start_time, end_time)\n if valid_loss < best_valid_loss:\n best_valid_loss = valid_loss\n Checkpoint.save(model,cell_name,path_)\n return train_loss_list, val_loss_list\n\n def train(self, model, train_data, valid_data, path_, num_of_epochs=20, teacher_ratio=1.0, clip=1):\n \"\"\"Train model\"\"\"\n return self._train_epoches(model, train_data, valid_data, path_, num_of_epochs, teacher_ratio, clip)","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:57:43.510381Z","iopub.execute_input":"2023-02-12T04:57:43.511005Z","iopub.status.idle":"2023-02-12T04:57:43.531297Z","shell.execute_reply.started":"2023-02-12T04:57:43.510969Z","shell.execute_reply":"2023-02-12T04:57:43.530280Z"},"trusted":true},"execution_count":19,"outputs":[]},{"cell_type":"code","source":"# create optimizer\nif args.model ==TRANSFORMER_NAME:\n for p in model.parameters():\n if p.dim() > 1:\n nn.init.xavier_uniform_(p)\n optimizer = NoamOpt(torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))\nelse:\n optimizer = optim.Adam(model.parameters(),lr=0.001)\n\nbatch_size = 8\nepochs=10\n\n# define criterion\ncriterion = nn.CrossEntropyLoss(ignore_index=trg_vocab.stoi[PAD_TOKEN])\n\ntrainer = Trainer(optimizer, criterion, batch_size, DEVICE)\ntrain_loss, val_loss = trainer.train(model, train_data, valid_data, path, num_of_epochs=epochs)","metadata":{"execution":{"iopub.status.busy":"2023-02-12T04:57:43.532550Z","iopub.execute_input":"2023-02-12T04:57:43.533240Z","iopub.status.idle":"2023-02-12T05:02:30.402067Z","shell.execute_reply.started":"2023-02-12T04:57:43.533204Z","shell.execute_reply":"2023-02-12T05:02:30.401321Z"},"trusted":true},"execution_count":20,"outputs":[{"output_type":"display_data","data":{"text/plain":" 0%| | 0/438 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8ba4c23730ce4082adc7e68434ea1782"}},"metadata":{}},{"name":"stdout","text":"Epoch: 01 | Time: 0m 28s\n\tTrain Loss: 5.281 | Train PPL: 196.623\n\t Val. Loss: 4.216 | Val. PPL: 67.775\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/438 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5f4d0b32027b46568806a7d354a5c1d7"}},"metadata":{}},{"name":"stdout","text":"Epoch: 02 | Time: 0m 27s\n\tTrain Loss: 4.260 | Train PPL: 70.826\n\t Val. Loss: 3.829 | Val. PPL: 46.032\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/438 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b31778e68d504eee8b47c1d222a06225"}},"metadata":{}},{"name":"stdout","text":"Epoch: 03 | Time: 0m 28s\n\tTrain Loss: 3.869 | Train PPL: 47.907\n\t Val. Loss: 3.614 | Val. PPL: 37.105\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/438 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e721a12b8c1044d592677509df1489f6"}},"metadata":{}},{"name":"stdout","text":"Epoch: 04 | Time: 0m 28s\n\tTrain Loss: 3.628 | Train PPL: 37.635\n\t Val. Loss: 3.497 | Val. PPL: 33.008\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/438 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e2cb9b7f88cd45e18fce9790587d22bb"}},"metadata":{}},{"name":"stdout","text":"Epoch: 05 | Time: 0m 28s\n\tTrain Loss: 3.461 | Train PPL: 31.849\n\t Val. Loss: 3.420 | Val. PPL: 30.578\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/438 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5e508c73ac384cb4894825dfa6479f26"}},"metadata":{}},{"name":"stdout","text":"Epoch: 06 | Time: 0m 28s\n\tTrain Loss: 3.305 | Train PPL: 27.254\n\t Val. Loss: 3.359 | Val. PPL: 28.772\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/438 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"83c37918834041fb8beee9e57fb328ae"}},"metadata":{}},{"name":"stdout","text":"Epoch: 07 | Time: 0m 28s\n\tTrain Loss: 3.204 | Train PPL: 24.640\n\t Val. Loss: 3.330 | Val. PPL: 27.945\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/438 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"09ad137b3f034180a265b82800e676da"}},"metadata":{}},{"name":"stdout","text":"Epoch: 08 | Time: 0m 28s\n\tTrain Loss: 3.103 | Train PPL: 22.255\n\t Val. Loss: 3.307 | Val. PPL: 27.293\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/438 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"881a973adcce47d5b08d5cf765583076"}},"metadata":{}},{"name":"stdout","text":"Epoch: 09 | Time: 0m 28s\n\tTrain Loss: 3.017 | Train PPL: 20.422\n\t Val. Loss: 3.298 | Val. PPL: 27.063\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/438 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a7403b6050274dde9704403ef6ec5160"}},"metadata":{}},{"name":"stdout","text":"Epoch: 10 | Time: 0m 28s\n\tTrain Loss: 2.958 | Train PPL: 19.267\n\t Val. Loss: 3.283 | Val. PPL: 26.656\n","output_type":"stream"}]},{"cell_type":"code","source":"import matplotlib.pyplot as plt\nimport os \nos.chdir(r'/kaggle/working')\nfrom IPython.display import FileLink\n\nplt.plot(train_loss)\nplt.plot(val_loss)\nplt.title('model loss')\nplt.xlabel('epoch')\nplt.legend(['train', 'val'], loc='upper left')\n\n#plt.savefig('loss.png')\nplt.show()","metadata":{"_kg_hide-input":true,"execution":{"iopub.status.busy":"2023-02-12T05:02:30.403778Z","iopub.execute_input":"2023-02-12T05:02:30.404565Z","iopub.status.idle":"2023-02-12T05:02:30.626564Z","shell.execute_reply.started":"2023-02-12T05:02:30.404525Z","shell.execute_reply":"2023-02-12T05:02:30.625678Z"},"trusted":true},"execution_count":21,"outputs":[{"output_type":"display_data","data":{"text/plain":"<Figure size 432x288 with 1 Axes>","image/png":"\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":"## Evaluation","metadata":{}},{"cell_type":"code","source":"val_ref = [list(filter(None, np.delete(i,[0,1]))) for i in val.values]\ntest_ref = [list(filter(None, np.delete(i,[0,1]))) for i in test.values]\n\nval_trg = []\ntest_trg = []\ntrg_ = [val_trg,test_trg]\nfor t in trg_:\n for i in val_ref:\n tmp=[]\n for j in i:\n s = word_tokenize(j)\n tmp.append(s)\n t.append(tmp)\n\nval_src = [i.src for i in valid_data.examples]\nnew_valid = [[val_src[i],val_trg[i]] for i in range(len(val_trg)) ]\ntest_src = [i.src for i in test_data.examples]\nnew_test = [[test_src[i],test_trg[i]] for i in range(len(test_trg))]","metadata":{"execution":{"iopub.status.busy":"2023-02-12T05:10:41.155405Z","iopub.execute_input":"2023-02-12T05:10:41.155774Z","iopub.status.idle":"2023-02-12T05:10:52.815140Z","shell.execute_reply.started":"2023-02-12T05:10:41.155744Z","shell.execute_reply":"2023-02-12T05:10:52.814150Z"},"trusted":true},"execution_count":28,"outputs":[]},{"cell_type":"code","source":"import nltk\nfrom nltk.translate.bleu_score import SmoothingFunction\nfrom rouge_score import rouge_scorer\n\nsmoothie = SmoothingFunction().method4\n\nclass BleuScorer(object):\n \"\"\"Blue scorer class\"\"\"\n def __init__(self):\n self.results = []\n self.results_meteor = []\n \n self.score = 0\n self.bleu_4 = 0\n self.meteor_score = 0\n self.rouge_score = 0\n \n self.instances = 0\n self.meteor_instances = 0\n\n def example_score(self, reference, hypothesis):\n \"\"\"Calculate blue score for one example\"\"\"\n bleu_1 = nltk.translate.bleu_score.sentence_bleu(reference, hypothesis,weights=(1,0,0,0),smoothing_function=SmoothingFunction().method4)\n bleu_4 = nltk.translate.bleu_score.sentence_bleu(reference, hypothesis,weights=(0.25,0.25,0.25,0.25),smoothing_function=SmoothingFunction().method4)\n return bleu_1, bleu_4\n \n def example_score_rouge(self, reference, hypothesis):\n scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=False)\n scores = []\n for i in reference:\n scores.append(scorer.score(i,hypothesis)['rougeL'][-1])\n return np.max(scores) #best\n \n \n def example_score_meteor(self, reference, hypothesis):\n \"\"\"Calculate blue score for one example\"\"\"\n return nltk.translate.meteor_score.meteor_score(reference,hypothesis)\n\n def data_score(self, data, predictor):\n \"\"\"Score complete list of data\"\"\"\n results_prelim = []\n for example in tqdm_notebook(data):\n #i = 1\n# src = [t.lower() for t in example.src]\n# reference = [t.lower() for t in example.trg]\n \n src = example[0]\n reference = [[string.lower() for string in sublist] for sublist in example[1]]\n\n #and calculate bleu score average of all hypothesis\n #hypothesis = predictor.predict(example.src)\n hypothesis = predictor.predict(src)\n bleu_1,bleu_4 = self.example_score(reference, hypothesis)\n meteor_score = self.example_score_meteor([' '.join(i) for i in reference], ' '.join(hypothesis))\n rouge_score = self.example_score_rouge([' '.join(i) for i in reference], ' '.join(hypothesis))\n \n f = open(\"result.txt\", \"a\")\n f.write('Question: '+\" \".join(src)+'\\n')\n for i in range(len(reference)):\n f.write('Reference_{}: '.format(i)+\" \".join(reference[i])+'\\n')\n f.write('Hypothesis: '+\" \".join(hypothesis)+'\\n')\n f.write('BLEU-1: '+ str(bleu_1*100)+'\\n')\n f.write('BLEU-4: '+str(bleu_4*100)+'\\n')\n f.write('METEOR: '+str(meteor_score*100)+'\\n')\n f.write('ROUGE-L: '+str(rouge_score*100)+'\\n\\n')\n \n f.close()\n \n \n results_prelim.append({\n 'question': '\"' + str(src) + '\"',\n 'reference': reference,\n 'hypothesis': hypothesis,\n 'bleu_1': bleu_1,\n 'bleu_4': bleu_4,\n 'meteor_score': meteor_score,\n 'rouge_score': rouge_score,\n \n })\n \n results = [max((v for v in results_prelim if v['question'] == x), key=lambda y:y['bleu_1']) for x in set(v['question'] for v in results_prelim)] \n\n with open(path+'result_output.txt', 'w') as f:\n for elem in results:\n f.write(\"%s\\n\" % elem)\n self.results.append(elem)\n self.score += elem['bleu_1']\n self.bleu_4 += elem['bleu_4']\n self.meteor_score += elem['meteor_score']\n self.rouge_score += elem['rouge_score']\n self.instances += 1\n return self.score / self.instances, self.bleu_4 / self.instances, self.meteor_score / self.instances, self.rouge_score / self.instances\n\n def average_score(self):\n \"\"\"Return bleu average score\"\"\"\n return self.score / self.instances, self.bleu_4 / self.instances\n \n def average_rouge_score(self):\n \"\"\"Return bleu average score\"\"\"\n return self.rouge_score / self.instances\n \n \n def data_meteor_score(self, data, predictor):\n \"\"\"Score complete list of data\"\"\"\n results_prelim = []\n for example in data:\n src = [t.lower() for t in example.src]\n reference = [t.lower() for t in example.trg]\n hypothesis = predictor.predict(example.src)\n meteor_score = self.example_score_meteor(' '.join(reference), ' '.join(hypothesis))\n results_prelim.append({\n 'question': '\"' + str(src) + '\"',\n 'reference': reference,\n 'hypothesis': hypothesis,\n 'meteor_score': meteor_score\n })\n results_meteor = [max((v for v in results_prelim if v['question'] == x), key=lambda y:y['meteor_score']) for x in set(v['question'] for v in results_prelim)] \n\n with open(path+'result_meteor_output.txt', 'w') as f:\n for elem in results_meteor:\n f.write(\"%s\\n\" % elem)\n self.results_meteor.append(elem)\n self.meteor_score += elem['meteor_score']\n self.meteor_instances += 1\n return self.meteor_score/self.meteor_instances\n \n def average_meteor_score(self):\n \"\"\"Return meteor average score\"\"\"\n return self.meteor_score/self.instances\n\n def reset(self):\n \"\"\"Reset object properties\"\"\"\n self.results = []\n self.results_meteor = []\n self.score = 0\n self.meteor_score = 0\n self.instances = 0\n self.meteor_instances = 0","metadata":{"execution":{"iopub.status.busy":"2023-02-12T05:05:50.377470Z","iopub.execute_input":"2023-02-12T05:05:50.378036Z","iopub.status.idle":"2023-02-12T05:05:50.427362Z","shell.execute_reply.started":"2023-02-12T05:05:50.378000Z","shell.execute_reply":"2023-02-12T05:05:50.426483Z"},"trusted":true},"execution_count":24,"outputs":[]},{"cell_type":"code","source":"class Predictor(object):\n \"\"\"Predictor class\"\"\"\n def __init__(self, model, src_vocab, trg_vocab, device):\n self.model = model\n self.src_vocab = src_vocab\n self.trg_vocab = trg_vocab\n self.device = device\n\n def _predict_step(self, tokens):\n self.model.eval()\n tokenized_sentence = [SOS_TOKEN] + [t.lower() for t in tokens] + [EOS_TOKEN]\n numericalized = [self.src_vocab.stoi[token] for token in tokenized_sentence]\n src_tensor = torch.LongTensor(numericalized).unsqueeze(0).to(self.device)\n\n with torch.no_grad():\n encoder_out = self.model.encoder(src_tensor)\n\n outputs = [self.trg_vocab.stoi[SOS_TOKEN]]\n\n # cnn positional embedding gives assertion error for tensor\n # of size > max_positions-1, we predict tokens for max_positions-2\n # to avoid the error\n for _ in range(self.model.decoder.max_positions-2):\n trg_tensor = torch.LongTensor(outputs).unsqueeze(0).to(self.device)\n\n with torch.no_grad():\n output = self.model.decoder(trg_tensor, encoder_out, src_tokens=src_tensor)\n\n prediction = output.argmax(2)[:, -1].item()\n\n if prediction == self.trg_vocab.stoi[EOS_TOKEN] or len(outputs)==500:\n break\n \n outputs.append(prediction)\n\n translation = [self.trg_vocab.itos[i] for i in outputs]\n\n return translation[1:] # , attention\n\n def _predict_rnn_step(self, tokens):\n self.model.eval()\n with torch.no_grad():\n tokenized_sentence = [SOS_TOKEN] + [t.lower() for t in tokens] + [EOS_TOKEN]\n numericalized = [self.src_vocab.stoi[t] for t in tokenized_sentence]\n\n src_len = torch.LongTensor([len(numericalized)]).to(self.device)\n tensor = torch.LongTensor(numericalized).unsqueeze(1).to(self.device)\n\n translation_tensor_logits = self.model(tensor.t(), src_len, None)\n\n translation_tensor = torch.argmax(translation_tensor_logits.squeeze(1), 1)\n translation = [self.trg_vocab.itos[t] for t in translation_tensor]\n\n return translation[1:] # , attention\n\n def predict(self, tokens):\n \"\"\"Perform prediction on given tokens\"\"\"\n return self._predict_rnn_step(tokens) if self.model.name == RNN_NAME else \\\n self._predict_step(tokens)","metadata":{"execution":{"iopub.status.busy":"2023-02-12T05:05:54.736292Z","iopub.execute_input":"2023-02-12T05:05:54.736655Z","iopub.status.idle":"2023-02-12T05:05:54.751475Z","shell.execute_reply.started":"2023-02-12T05:05:54.736624Z","shell.execute_reply":"2023-02-12T05:05:54.750546Z"},"trusted":true},"execution_count":25,"outputs":[]},{"cell_type":"code","source":"name = args.model+\"_\"+cell_name if args.model==RNN_NAME else args.model\nmodel = Checkpoint.load(model,path,'./{}.pt'.format(name))\n\nvalid_iterator, test_iterator = BucketIterator.splits(\n (valid_data, test_data),\n batch_size=8,\n sort_within_batch=True if args.model == RNN_NAME else False,\n sort_key=lambda x: len(x.src),\n device=DEVICE)\n\n# evaluate model\nvalid_loss = trainer.evaluator.evaluate(model, valid_iterator)\ntest_loss = trainer.evaluator.evaluate(model, test_iterator)\n\n# calculate blue score for valid and test data\npredictor = Predictor(model, src_vocab, trg_vocab, DEVICE)\n\n# # train_scorer = BleuScorer()\nvalid_scorer = BleuScorer()\ntest_scorer = BleuScorer()\n\nvalid_scorer.data_score(new_valid, predictor)\ntest_scorer.data_score(new_test, predictor)","metadata":{"execution":{"iopub.status.busy":"2023-02-12T05:05:59.035087Z","iopub.execute_input":"2023-02-12T05:05:59.035456Z","iopub.status.idle":"2023-02-12T05:07:51.964640Z","shell.execute_reply.started":"2023-02-12T05:05:59.035426Z","shell.execute_reply":"2023-02-12T05:07:51.963354Z"},"trusted":true},"execution_count":26,"outputs":[{"output_type":"display_data","data":{"text/plain":" 0%| | 0/500 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5b1e766e58d04f1eb714a08c586e5c3a"}},"metadata":{}},{"name":"stdout","text":"| Test Loss: 3.169 | Test PPL: 23.787 |\n| Test Data Average BLEU score (0.13299530736768786, 0.08383683068952133) |\n| Test Data Average METEOR score 0.14519957374057366 |\n","output_type":"stream"}]},{"cell_type":"code","source":"print(f'| Val. Loss: {valid_loss:.3f} | Test PPL: {math.exp(valid_loss):7.3f} |')\nprint(f'| Val. Data Average BLEU1, BLEU4 score {valid_scorer.average_score()} |')\nprint(f'| Val. Data Average METEOR score {valid_scorer.average_meteor_score()} |')\nprint(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')\nprint(f'| Test Data Average BLEU1, BLEU4 score {test_scorer.average_score()} |')\nprint(f'| Test Data Average METEOR score {test_scorer.average_meteor_score()} |')","metadata":{"execution":{"iopub.status.busy":"2023-02-12T05:02:42.325507Z","iopub.status.idle":"2023-02-12T05:02:42.325985Z","shell.execute_reply.started":"2023-02-12T05:02:42.325740Z","shell.execute_reply":"2023-02-12T05:02:42.325764Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"r = {'ppl':[round(math.exp(test_loss),3)],\n 'BLEU-1':[test_scorer.average_score()[0]*100],\n 'BLEU-4':[test_scorer.average_score()[1]*100],\n 'METEOR':[test_scorer.average_meteor_score()*100],\n 'ROUGE-L':[test_scorer.average_rouge_score()*100]}\n\ndf_result = pd.DataFrame(data=r)\n\nhtml = df_result.style.set_table_styles([{'selector': 'th', 'props': [('font-size', '15pt')]}]).set_properties(**{'font-size': '15pt'})\nhtml","metadata":{"execution":{"iopub.status.busy":"2023-02-12T05:09:12.132953Z","iopub.execute_input":"2023-02-12T05:09:12.133326Z","iopub.status.idle":"2023-02-12T05:09:12.198299Z","shell.execute_reply.started":"2023-02-12T05:09:12.133295Z","shell.execute_reply":"2023-02-12T05:09:12.197237Z"},"trusted":true},"execution_count":27,"outputs":[{"execution_count":27,"output_type":"execute_result","data":{"text/plain":"<pandas.io.formats.style.Styler at 0x7f91cc4f0150>","text/html":"<style type=\"text/css\">\n#T_c7a0a_ th {\n font-size: 15pt;\n}\n#T_c7a0a_row0_col0, #T_c7a0a_row0_col1, #T_c7a0a_row0_col2, #T_c7a0a_row0_col3, #T_c7a0a_row0_col4 {\n font-size: 15pt;\n}\n</style>\n<table id=\"T_c7a0a_\">\n <thead>\n <tr>\n <th class=\"blank level0\" > </th>\n <th class=\"col_heading level0 col0\" >ppl</th>\n <th class=\"col_heading level0 col1\" >BLEU-1</th>\n <th class=\"col_heading level0 col2\" >BLEU-4</th>\n <th class=\"col_heading level0 col3\" >METEOR</th>\n <th class=\"col_heading level0 col4\" >ROUGE-L</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th id=\"T_c7a0a_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n <td id=\"T_c7a0a_row0_col0\" class=\"data row0 col0\" >23.787000</td>\n <td id=\"T_c7a0a_row0_col1\" class=\"data row0 col1\" >13.299531</td>\n <td id=\"T_c7a0a_row0_col2\" class=\"data row0 col2\" >8.383683</td>\n <td id=\"T_c7a0a_row0_col3\" class=\"data row0 col3\" >14.519957</td>\n <td id=\"T_c7a0a_row0_col4\" class=\"data row0 col4\" >27.549394</td>\n </tr>\n </tbody>\n</table>\n"},"metadata":{}}]}]}
|
UIT-ViCoV19QA/dataset/1_ans/UIT-ViCoV19QA_test.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d254fafb6c9340b101bf478536af6e1dd7b1ffd516a820f2580e48df88794573
|
3 |
+
size 520161
|
UIT-ViCoV19QA/dataset/1_ans/UIT-ViCoV19QA_train.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3660454cae6ff01e7c0046708abfe46dc67357bb239ec27bb4aa7d692c941149
|
3 |
+
size 3711672
|
UIT-ViCoV19QA/dataset/1_ans/UIT-ViCoV19QA_val.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18d4db7ec783e08f8cba280109b70b029c38aeecbdd72330eedc7cc52324687b
|
3 |
+
size 520352
|
UIT-ViCoV19QA/dataset/2_ans/UIT-ViCoV19QA_test.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c42f217da35a5318dacd46f85e003ac370471fc2b3e491e4ead730fcf77d0685
|
3 |
+
size 582656
|
UIT-ViCoV19QA/dataset/2_ans/UIT-ViCoV19QA_train.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85130ab449d23fe672e6a91b1791fdd5bae3b5b72bfc5f32620c5099bf82013f
|
3 |
+
size 4122052
|
UIT-ViCoV19QA/dataset/2_ans/UIT-ViCoV19QA_val.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:109b7905010d967ade5299b0de22211606685557fd09d785179206654ec941b6
|
3 |
+
size 579852
|
UIT-ViCoV19QA/dataset/3_ans/UIT-ViCoV19QA_test.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:188b82c5487b3bb6ff94bf654a1702f344c753c79e8bf27fcaf9df03f9bb6f55
|
3 |
+
size 600605
|
UIT-ViCoV19QA/dataset/3_ans/UIT-ViCoV19QA_train.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:284958313282493c3b04c3eca2fe60c640eda64a81bd218b5a1ea0b1b07bb52a
|
3 |
+
size 4240422
|
UIT-ViCoV19QA/dataset/3_ans/UIT-ViCoV19QA_val.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cb2fd30ac52b1ff58eee680fa0135208841da39d873ed379841b88a27b822b4
|
3 |
+
size 595439
|
UIT-ViCoV19QA/dataset/4_ans/UIT-ViCoV19QA_test.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99860bae4ae5043e869c8047558d7db6d40e76d174aed522b7f37d7931c64fc9
|
3 |
+
size 610868
|
UIT-ViCoV19QA/dataset/4_ans/UIT-ViCoV19QA_train.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e2e7f21cef6898d42eafc5ee094e9dac285e251af19f9f6b9c274d92f446881
|
3 |
+
size 4300607
|
UIT-ViCoV19QA/dataset/4_ans/UIT-ViCoV19QA_val.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfe58d5c3e89a404c2db0620e3e9c893e8e0e29bfeaab05b6650f7ca6e82946a
|
3 |
+
size 603979
|
UIT-ViCoV19QA/dataset/UIT-ViCoV19QA.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65b147559695843829a0932fcaab9e6d1415d9821c3a0a3c1aa7ff8118a6ac6f
|
3 |
+
size 5515361
|
UIT-ViCoV19QA/models/cnn.py
ADDED
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Seq2seq based: Convolutional Sequence to Sequence Learning
|
3 |
+
https://arxiv.org/abs/1705.03122
|
4 |
+
"""
|
5 |
+
import math
|
6 |
+
import torch
|
7 |
+
import torch.nn as nn
|
8 |
+
import torch.nn.functional as F
|
9 |
+
from utils.constants import PAD_TOKEN
|
10 |
+
from models.layers import Embedding, Linear, Conv1d, PositionalEmbedding, LearnedPositionalEmbedding
|
11 |
+
|
12 |
+
def extend_conv_spec(convolutions):
|
13 |
+
"""
|
14 |
+
Extends convolutional spec with default residual if it is not specified
|
15 |
+
"""
|
16 |
+
extended = []
|
17 |
+
for spec in convolutions:
|
18 |
+
if len(spec) == 3:
|
19 |
+
extended.append(spec)
|
20 |
+
elif len(spec) == 2:
|
21 |
+
extended.append(spec + (1,))
|
22 |
+
else:
|
23 |
+
raise Exception('invalid number of parameters in convolution spec ' + str(spec) + '. expected 2 or 3')
|
24 |
+
return tuple(extended)
|
25 |
+
|
26 |
+
class Encoder(nn.Module):
|
27 |
+
"""Encoder"""
|
28 |
+
def __init__(self, vocabulary, device, embed_dim=512, convolutions=((512, 3),) * 3,
|
29 |
+
dropout=0.5, max_positions=5000):
|
30 |
+
super().__init__()
|
31 |
+
self.vocabulary = vocabulary
|
32 |
+
input_dim = len(vocabulary)
|
33 |
+
self.padding_idx = vocabulary.stoi[PAD_TOKEN]
|
34 |
+
self.dropout = dropout
|
35 |
+
self.device = device
|
36 |
+
|
37 |
+
self.embed_tokens = Embedding(input_dim, embed_dim, self.padding_idx)
|
38 |
+
self.embed_positions = PositionalEmbedding(max_positions, embed_dim, self.padding_idx)
|
39 |
+
|
40 |
+
convolutions = extend_conv_spec(convolutions)
|
41 |
+
in_channels = convolutions[0][0]
|
42 |
+
self.embed2inchannels = Linear(embed_dim, in_channels)
|
43 |
+
self.projections = nn.ModuleList()
|
44 |
+
self.convolutions = nn.ModuleList()
|
45 |
+
self.residuals = []
|
46 |
+
|
47 |
+
layer_in_channels = [in_channels]
|
48 |
+
for _, (out_channels, kernel_size, residual) in enumerate(convolutions):
|
49 |
+
if residual == 0:
|
50 |
+
residual_dim = out_channels
|
51 |
+
else:
|
52 |
+
residual_dim = layer_in_channels[-residual]
|
53 |
+
self.projections.append(Linear(residual_dim, out_channels)
|
54 |
+
if residual_dim != out_channels else None)
|
55 |
+
if kernel_size % 2 == 1:
|
56 |
+
padding = kernel_size // 2
|
57 |
+
else:
|
58 |
+
padding = 0
|
59 |
+
self.convolutions.append(
|
60 |
+
nn.Conv1d(in_channels=in_channels,
|
61 |
+
out_channels=out_channels * 2,
|
62 |
+
kernel_size=kernel_size,
|
63 |
+
padding=padding)
|
64 |
+
)
|
65 |
+
self.residuals.append(residual)
|
66 |
+
in_channels = out_channels
|
67 |
+
layer_in_channels.append(out_channels)
|
68 |
+
|
69 |
+
self.inchannels2embed = Linear(in_channels, embed_dim)
|
70 |
+
|
71 |
+
def forward(self, src_tokens, **kwargs):
|
72 |
+
"""
|
73 |
+
Forward pass for convolutional encoder
|
74 |
+
Args:
|
75 |
+
src_tokens (LongTensor): (batch, src_len)
|
76 |
+
Returns:
|
77 |
+
conved (LongTensor): (batch, src_len, embed_dim)
|
78 |
+
combined (LongTensor): (batch, src_len, embed_dim)
|
79 |
+
"""
|
80 |
+
# embed tokens and positions
|
81 |
+
embedded = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens)
|
82 |
+
embedded = F.dropout(embedded, p=self.dropout, training=self.training)
|
83 |
+
|
84 |
+
conv_input = self.embed2inchannels(embedded) # (batch, src_len, in_channels)
|
85 |
+
|
86 |
+
# used to mask padding in input
|
87 |
+
encoder_padding_mask = src_tokens.eq(self.padding_idx) # (batch, src_len)
|
88 |
+
if not encoder_padding_mask.any():
|
89 |
+
encoder_padding_mask = None
|
90 |
+
|
91 |
+
x = conv_input.permute(0, 2, 1) # (batch, in_channels, src_len)
|
92 |
+
residuals = [x]
|
93 |
+
# temporal convolutions
|
94 |
+
for proj, conv, res_layer in zip(self.projections, self.convolutions, self.residuals):
|
95 |
+
if res_layer > 0:
|
96 |
+
residual = residuals[-res_layer]
|
97 |
+
residual = residual if proj is None else proj(residual)
|
98 |
+
else:
|
99 |
+
residual = None
|
100 |
+
|
101 |
+
if encoder_padding_mask is not None:
|
102 |
+
x = x.masked_fill(encoder_padding_mask.unsqueeze(1), 0)
|
103 |
+
|
104 |
+
x = F.dropout(x, p=self.dropout, training=self.training)
|
105 |
+
if conv.kernel_size[0] % 2 == 1:
|
106 |
+
# padding is implicit in the conv
|
107 |
+
x = conv(x)
|
108 |
+
else:
|
109 |
+
padding_l = (conv.kernel_size[0] - 1) // 2
|
110 |
+
padding_r = conv.kernel_size[0] // 2
|
111 |
+
x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r))
|
112 |
+
x = conv(x)
|
113 |
+
x = F.glu(x, dim=1)
|
114 |
+
|
115 |
+
# apply residual connection
|
116 |
+
if residual is not None:
|
117 |
+
x = (x + residual) * math.sqrt(0.5)
|
118 |
+
residuals.append(x)
|
119 |
+
|
120 |
+
conved = self.inchannels2embed(x.permute(0, 2, 1))
|
121 |
+
|
122 |
+
if encoder_padding_mask is not None:
|
123 |
+
conved = conved.masked_fill(encoder_padding_mask.unsqueeze(-1), 0)
|
124 |
+
|
125 |
+
combined = (conved + embedded) * math.sqrt(0.5)
|
126 |
+
|
127 |
+
return conved, combined
|
128 |
+
class Attention(nn.Module):
|
129 |
+
"""Attention"""
|
130 |
+
def __init__(self, conv_channels, embed_dim):
|
131 |
+
super().__init__()
|
132 |
+
self.linear_in = Linear(conv_channels, embed_dim)
|
133 |
+
self.linear_out = Linear(embed_dim, conv_channels)
|
134 |
+
|
135 |
+
def forward(self, conved, embedded, encoder_out, encoder_padding_mask):
|
136 |
+
"""
|
137 |
+
Forward Attention Layer
|
138 |
+
Args:
|
139 |
+
conved (LongTensor): (batch, conv_channels, trg_len)
|
140 |
+
embedded (LongTensor): (batch, trg_len, embed_dim)
|
141 |
+
encoder_out (encoder_conved, encoder_combined): (batch, src_len, embed_dim)
|
142 |
+
encoder_padding_mask(LongTensor): (batch, src_len)
|
143 |
+
Returns:
|
144 |
+
attended_combined (LongTensor): (batch, conv_channels, trg_len)
|
145 |
+
attention (LongTensor): (batch, trg_len, src_len)
|
146 |
+
"""
|
147 |
+
encoder_conved, encoder_combined = encoder_out
|
148 |
+
|
149 |
+
conved_emb = self.linear_in(conved.permute(0, 2, 1)) # (batch, trg_len, embed_dim)
|
150 |
+
combined = (conved_emb + embedded) * math.sqrt(0.5) # (batch, trg_len, embed_dim)
|
151 |
+
|
152 |
+
energy = torch.matmul(combined, encoder_conved.permute(0, 2, 1)) # (batch, trg_len, src_len)
|
153 |
+
|
154 |
+
# don't attend over padding
|
155 |
+
energy = energy.float().masked_fill(encoder_padding_mask.unsqueeze(1), float('-inf'))
|
156 |
+
|
157 |
+
attention = F.softmax(energy, dim=2)
|
158 |
+
|
159 |
+
attended_encoding = torch.matmul(attention, encoder_combined) # (batch, trg_len, embed_dim)
|
160 |
+
attended_encoding = self.linear_out(attended_encoding) # (batch, trg_len, conv_channels)
|
161 |
+
|
162 |
+
# apply residual connection
|
163 |
+
attended_combined = (conved + attended_encoding.permute(0, 2, 1)) * math.sqrt(0.5)
|
164 |
+
|
165 |
+
return attended_combined, attention
|
166 |
+
class Decoder(nn.Module):
|
167 |
+
"""Decoder"""
|
168 |
+
def __init__(self, vocabulary, device, embed_dim=512, convolutions=((512, 3),) * 3,
|
169 |
+
dropout=0.5, max_positions=5000):
|
170 |
+
super().__init__()
|
171 |
+
|
172 |
+
self.vocabulary = vocabulary
|
173 |
+
self.dropout = dropout
|
174 |
+
self.device = device
|
175 |
+
self.max_positions = max_positions
|
176 |
+
|
177 |
+
convolutions = extend_conv_spec(convolutions)
|
178 |
+
in_channels = convolutions[0][0]
|
179 |
+
output_dim = len(vocabulary)
|
180 |
+
self.padding_idx = vocabulary.stoi[PAD_TOKEN]
|
181 |
+
|
182 |
+
self.embed_tokens = Embedding(output_dim, embed_dim, self.padding_idx)
|
183 |
+
self.embed_positions = PositionalEmbedding(max_positions, embed_dim, self.padding_idx)
|
184 |
+
|
185 |
+
self.embed2inchannels = Linear(embed_dim, in_channels)
|
186 |
+
self.projections = nn.ModuleList()
|
187 |
+
self.convolutions = nn.ModuleList()
|
188 |
+
self.attention = nn.ModuleList()
|
189 |
+
self.residuals = []
|
190 |
+
|
191 |
+
layer_in_channels = [in_channels]
|
192 |
+
for _, (out_channels, kernel_size, residual) in enumerate(convolutions):
|
193 |
+
if residual == 0:
|
194 |
+
residual_dim = out_channels
|
195 |
+
else:
|
196 |
+
residual_dim = layer_in_channels[-residual]
|
197 |
+
self.projections.append(Linear(residual_dim, out_channels)
|
198 |
+
if residual_dim != out_channels else None)
|
199 |
+
self.convolutions.append(
|
200 |
+
nn.Conv1d(in_channels=in_channels,
|
201 |
+
out_channels=out_channels * 2,
|
202 |
+
kernel_size=kernel_size)
|
203 |
+
)
|
204 |
+
self.attention.append(Attention(out_channels, embed_dim))
|
205 |
+
self.residuals.append(residual)
|
206 |
+
in_channels = out_channels
|
207 |
+
layer_in_channels.append(out_channels)
|
208 |
+
|
209 |
+
self.inchannels2embed = Linear(in_channels, embed_dim)
|
210 |
+
self.linear_out = Linear(embed_dim, output_dim)
|
211 |
+
|
212 |
+
def forward(self, trg_tokens, encoder_out, **kwargs):
|
213 |
+
"""
|
214 |
+
Forward pass for convolutional decoder
|
215 |
+
Args:
|
216 |
+
trg_tokens (LongTensor): (batch, trg_len)
|
217 |
+
encoder_out (encoder_conved, encoder_combined): (batch, src_len, embed_dim)
|
218 |
+
src_tokens (LongTensor): (batch, src_len)
|
219 |
+
Returns:
|
220 |
+
outputs (LongTensor): (batch, trg_len, output_dim)
|
221 |
+
avg_attn_scores (LongTensor): (batch, trg_len, src_len)
|
222 |
+
"""
|
223 |
+
src_tokens = kwargs.get('src_tokens', '')
|
224 |
+
encoder_padding_mask = src_tokens.eq(self.padding_idx)
|
225 |
+
|
226 |
+
# embed tokens and positions
|
227 |
+
embedded = self.embed_tokens(trg_tokens) + self.embed_positions(trg_tokens)
|
228 |
+
embedded = F.dropout(embedded, p=self.dropout, training=self.training) # (batch, trg_len, embed_dim)
|
229 |
+
|
230 |
+
conv_input = self.embed2inchannels(embedded) # (batch, trg_len, in_channels)
|
231 |
+
|
232 |
+
x = conv_input.permute(0, 2, 1) # (batch, in_channels, trg_len)
|
233 |
+
|
234 |
+
avg_attn_scores = None
|
235 |
+
num_attn_layers = len(self.attention)
|
236 |
+
residuals = [x]
|
237 |
+
for proj, conv, attention, res_layer in zip(self.projections, self.convolutions, self.attention,
|
238 |
+
self.residuals):
|
239 |
+
if res_layer > 0:
|
240 |
+
residual = residuals[-res_layer]
|
241 |
+
residual = residual if proj is None else proj(residual)
|
242 |
+
else:
|
243 |
+
residual = None
|
244 |
+
|
245 |
+
x = F.dropout(x, p=self.dropout, training=self.training)
|
246 |
+
# add padding
|
247 |
+
padding = torch.zeros(x.shape[0],
|
248 |
+
x.shape[1],
|
249 |
+
conv.kernel_size[0] - 1).fill_(self.padding_idx).to(self.device)
|
250 |
+
x = torch.cat((padding, x), dim=2)
|
251 |
+
x = conv(x)
|
252 |
+
x = F.glu(x, dim=1)
|
253 |
+
|
254 |
+
# attention
|
255 |
+
x, attn_scores = attention(x, embedded, encoder_out, encoder_padding_mask)
|
256 |
+
|
257 |
+
if not self.training:
|
258 |
+
attn_scores = attn_scores / num_attn_layers
|
259 |
+
if avg_attn_scores is None:
|
260 |
+
avg_attn_scores = attn_scores
|
261 |
+
else:
|
262 |
+
avg_attn_scores.add_(attn_scores)
|
263 |
+
|
264 |
+
# apply residual connection
|
265 |
+
if residual is not None:
|
266 |
+
x = (x + residual) * math.sqrt(0.5)
|
267 |
+
residuals.append(x)
|
268 |
+
|
269 |
+
conved = self.inchannels2embed(x.permute(0, 2, 1)) # (batch, trg_len, embed_dim)
|
270 |
+
conved = F.dropout(conved, p=self.dropout, training=self.training)
|
271 |
+
|
272 |
+
outputs = self.linear_out(conved)
|
273 |
+
|
274 |
+
return outputs # , avg_attn_scores
|
UIT-ViCoV19QA/models/layers.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
import torch
|
3 |
+
|
4 |
+
def RNN(cell_name):
|
5 |
+
if cell_name.lower() == 'lstm':
|
6 |
+
return LSTM
|
7 |
+
elif cell_name.lower() == 'gru':
|
8 |
+
return GRU
|
9 |
+
else:
|
10 |
+
raise ValueError(f"Unsupported RNN Cell: {cell_name}")
|
11 |
+
|
12 |
+
def Embedding(num_embeddings, embedding_dim, padding_idx):
|
13 |
+
"""Embedding layer"""
|
14 |
+
m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
|
15 |
+
nn.init.uniform_(m.weight, -0.1, 0.1)
|
16 |
+
nn.init.constant_(m.weight[padding_idx], 0)
|
17 |
+
return m
|
18 |
+
|
19 |
+
def Linear(in_features, out_features, bias=True):
|
20 |
+
"""Linear layer"""
|
21 |
+
m = nn.Linear(in_features, out_features, bias=bias)
|
22 |
+
m.weight.data.uniform_(-0.1, 0.1)
|
23 |
+
if bias:
|
24 |
+
m.bias.data.uniform_(-0.1, 0.1)
|
25 |
+
return m
|
26 |
+
|
27 |
+
def LSTM(input_size, hidden_size, **kwargs):
|
28 |
+
"""LSTM layer"""
|
29 |
+
m = nn.LSTM(input_size, hidden_size, **kwargs)
|
30 |
+
for name, param in m.named_parameters():
|
31 |
+
if 'weight' in name or 'bias' in name:
|
32 |
+
param.data.uniform_(-0.1, 0.1)
|
33 |
+
return m
|
34 |
+
|
35 |
+
def GRU(input_size, hidden_size, **kwargs):
|
36 |
+
"""GRU layer"""
|
37 |
+
m = nn.GRU(input_size, hidden_size, **kwargs)
|
38 |
+
for name, param in m.named_parameters():
|
39 |
+
if 'weight' in name or 'bias' in name:
|
40 |
+
param.data.uniform_(-0.1, 0.1)
|
41 |
+
return m
|
42 |
+
|
43 |
+
def Conv1d(in_channels, out_channels, kernel_size, padding=0):
|
44 |
+
"""Conv1d"""
|
45 |
+
m = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, padding=padding)
|
46 |
+
nn.init.normal_(m.weight, 0, 0.1)
|
47 |
+
nn.init.constant_(m.bias, 0)
|
48 |
+
return m
|
49 |
+
|
50 |
+
def PositionalEmbedding(num_embeddings, embedding_dim, padding_idx):
|
51 |
+
"""PositionalEmbedding"""
|
52 |
+
m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx)
|
53 |
+
nn.init.normal_(m.weight, 0, 0.1)
|
54 |
+
nn.init.constant_(m.weight[padding_idx], 0)
|
55 |
+
return m
|
56 |
+
|
57 |
+
class LearnedPositionalEmbedding(nn.Embedding):
|
58 |
+
"""LearnedPositionalEmbedding"""
|
59 |
+
def __init__(self, num_embeddings, embedding_dim, padding_idx):
|
60 |
+
super().__init__(num_embeddings, embedding_dim, padding_idx)
|
61 |
+
|
62 |
+
def forward(self, input):
|
63 |
+
"""Input size [bsz x seqlen]"""
|
64 |
+
# Replace non-padding symbols with their position numbers.
|
65 |
+
# Position numbers begin at padding_idx+1. Padding symbols are ignored.
|
66 |
+
mask = input.ne(self.padding_idx).int()
|
67 |
+
positions = (torch.cumsum(mask, dim=1).type_as(mask) * mask).long() + self.padding_idx
|
68 |
+
return super().forward(positions)
|
UIT-ViCoV19QA/models/rnn1.py
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Seq2seq based: Neural Machine Translation by Jointly Learning to Align and Translate
|
3 |
+
https://arxiv.org/abs/1409.0473
|
4 |
+
"""
|
5 |
+
import random
|
6 |
+
import torch
|
7 |
+
import torch.nn as nn
|
8 |
+
import torch.nn.functional as F
|
9 |
+
from utils.constants import PAD_TOKEN, EOS_TOKEN, SOS_TOKEN
|
10 |
+
from models.layers import RNN, Embedding, Linear, LSTM, GRU
|
11 |
+
|
12 |
+
class Encoder(nn.Module):
|
13 |
+
"""Encoder"""
|
14 |
+
def __init__(self, vocabulary, device, embed_dim=512, hidden_size=512,
|
15 |
+
num_layers=2, dropout=0.5, bidirectional=True, cell_name='gru'):
|
16 |
+
super().__init__()
|
17 |
+
input_dim = len(vocabulary)
|
18 |
+
self.vocabulary = vocabulary
|
19 |
+
self.pad_id = vocabulary.stoi[PAD_TOKEN]
|
20 |
+
self.embed_dim= embed_dim
|
21 |
+
self.hidden_size = hidden_size
|
22 |
+
self.num_layers = num_layers
|
23 |
+
|
24 |
+
self.dropout = dropout
|
25 |
+
self.bidirectional = bidirectional
|
26 |
+
self.cell_name = cell_name
|
27 |
+
self.device = device
|
28 |
+
|
29 |
+
self.embed_tokens = Embedding(input_dim, self.embed_dim, self.pad_id)
|
30 |
+
|
31 |
+
self.rnn_cell = RNN(cell_name)
|
32 |
+
self.rnn = self.rnn_cell(
|
33 |
+
input_size=self.embed_dim,
|
34 |
+
hidden_size=self.hidden_size,
|
35 |
+
num_layers=self.num_layers,
|
36 |
+
dropout=self.dropout if self.num_layers > 1 else 0.,
|
37 |
+
bidirectional=self.bidirectional
|
38 |
+
)
|
39 |
+
self.linear_out = nn.Linear(hidden_size * 2, hidden_size)
|
40 |
+
|
41 |
+
def forward(self, src_tokens, **kwargs):
|
42 |
+
"""
|
43 |
+
Forward Encoder
|
44 |
+
Args:
|
45 |
+
src_tokens (LongTensor): (batch, src_len)
|
46 |
+
src_lengths (LongTensor): (batch)
|
47 |
+
Returns:
|
48 |
+
x (LongTensor): (src_len, batch, hidden_size * num_directions)
|
49 |
+
hidden (LongTensor): (batch, enc_hid_dim)
|
50 |
+
"""
|
51 |
+
src_lengths = kwargs.get('src_lengths', '')
|
52 |
+
src_tokens = src_tokens.t()
|
53 |
+
|
54 |
+
x = self.embed_tokens(src_tokens)
|
55 |
+
x = F.dropout(x, p=self.dropout, training=self.training) # (src_len, batch, embed_dim)
|
56 |
+
|
57 |
+
packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths)
|
58 |
+
|
59 |
+
packed_outputs, hidden = self.rnn(packed_x) # hidden: (n_layers * num_directions, batch, hidden_size)
|
60 |
+
|
61 |
+
x, _ = nn.utils.rnn.pad_packed_sequence(packed_outputs)
|
62 |
+
x = F.dropout(x, p=self.dropout, training=self.training)
|
63 |
+
|
64 |
+
# input hidden for decoder is the final encoder hidden state
|
65 |
+
# since rnn is bidirectional get last forward and backward hidden state
|
66 |
+
|
67 |
+
last_forward = hidden[-2, :, :]
|
68 |
+
last_backward = hidden[-1, :, :]
|
69 |
+
hidden = torch.cat((last_forward, last_backward), dim=1)
|
70 |
+
hidden = torch.tanh(self.linear_out(hidden)) # (batch, enc_hid_dim)
|
71 |
+
|
72 |
+
return x, hidden
|
73 |
+
|
74 |
+
class Attention(nn.Module):
|
75 |
+
"""Attention"""
|
76 |
+
def __init__(self, enc_hid_dim, dec_hid_dim):
|
77 |
+
super().__init__()
|
78 |
+
|
79 |
+
self.linear = nn.Linear((enc_hid_dim * 2) + dec_hid_dim, dec_hid_dim)
|
80 |
+
self.v = nn.Parameter(torch.rand(dec_hid_dim))
|
81 |
+
|
82 |
+
def forward(self, hidden, encoder_outputs, mask):
|
83 |
+
"""
|
84 |
+
Forward Attention Layer
|
85 |
+
Args:
|
86 |
+
hidden (LongTensor): (batch, dec_hid_dim)
|
87 |
+
encoder_outputs (LongTensor): (src_len, batch, enc_hid_dim * 2)
|
88 |
+
mask (LongTensor): (batch, src_len)
|
89 |
+
Returns:
|
90 |
+
attention (LongTensor): (batch, src_len)
|
91 |
+
"""
|
92 |
+
|
93 |
+
batch = encoder_outputs.shape[1]
|
94 |
+
src_len = encoder_outputs.shape[0]
|
95 |
+
|
96 |
+
hidden = hidden.unsqueeze(1).repeat(1, src_len, 1) # (batch, src_len, dec_hid_dim)
|
97 |
+
|
98 |
+
encoder_outputs = encoder_outputs.permute(1, 0, 2) # (batch, src_len, enc_hid_dim * 2)
|
99 |
+
|
100 |
+
energy = torch.tanh(self.linear(torch.cat((hidden, encoder_outputs), dim=2))) # (batch, src_len, dec_hid_dim)
|
101 |
+
energy = energy.permute(0, 2, 1) # (batch, dec_hid_dim, src_len)
|
102 |
+
|
103 |
+
v = self.v.repeat(batch, 1).unsqueeze(1) # (batch, 1, dec_hid_dim)
|
104 |
+
|
105 |
+
attention = torch.bmm(v, energy).squeeze(1)
|
106 |
+
|
107 |
+
attention = attention.masked_fill(mask == 0, float('-inf'))
|
108 |
+
|
109 |
+
return F.softmax(attention, dim=1)
|
110 |
+
|
111 |
+
class Decoder(nn.Module):
|
112 |
+
"""Decoder"""
|
113 |
+
def __init__(self, vocabulary, device, embed_dim=512, hidden_size=512,
|
114 |
+
num_layers=2, dropout=0.5, max_positions=500, cell_name='gru'):
|
115 |
+
super().__init__()
|
116 |
+
self.vocabulary = vocabulary
|
117 |
+
self.pad_id = vocabulary.stoi[PAD_TOKEN]
|
118 |
+
self.sos_idx = vocabulary.stoi[SOS_TOKEN]
|
119 |
+
self.eos_idx = vocabulary.stoi[EOS_TOKEN]
|
120 |
+
|
121 |
+
self.embed_dim = embed_dim
|
122 |
+
self.hidden_size = hidden_size
|
123 |
+
self.need_attn = True
|
124 |
+
self.output_dim = len(vocabulary)
|
125 |
+
|
126 |
+
self.dropout = dropout
|
127 |
+
self.max_positions = max_positions
|
128 |
+
self.device = device
|
129 |
+
self.cell_name = cell_name
|
130 |
+
|
131 |
+
# suppose encoder and decoder have same hidden size
|
132 |
+
self.attention = Attention(self.hidden_size, self.hidden_size)
|
133 |
+
self.embed_tokens = Embedding(self.output_dim, self.embed_dim, self.pad_id)
|
134 |
+
|
135 |
+
self.rnn_cell = RNN(cell_name)
|
136 |
+
self.rnn = self.rnn_cell(
|
137 |
+
input_size=(hidden_size * 2) + embed_dim,
|
138 |
+
hidden_size=hidden_size,
|
139 |
+
)
|
140 |
+
|
141 |
+
self.linear_out = Linear(
|
142 |
+
in_features=(hidden_size * 2) + hidden_size + embed_dim,
|
143 |
+
out_features=self.output_dim
|
144 |
+
)
|
145 |
+
|
146 |
+
def _decoder_step(self, input, hidden, encoder_outputs, mask):
|
147 |
+
input = input.unsqueeze(0) # (1, batch)
|
148 |
+
|
149 |
+
x = self.embed_tokens(input) # (1, batch, emb_dim)
|
150 |
+
x = F.dropout(x, p=self.dropout, training=self.training)
|
151 |
+
|
152 |
+
attn = self.attention(hidden, encoder_outputs, mask) # (batch, src_len)
|
153 |
+
attn = F.dropout(attn, p=self.dropout, training=self.training)
|
154 |
+
|
155 |
+
attn = attn.unsqueeze(1) # (batch, 1, src_len)
|
156 |
+
|
157 |
+
encoder_outputs = encoder_outputs.permute(1, 0, 2) # (batch, src_len, 2 * enc_hid_dim)
|
158 |
+
|
159 |
+
weighted = torch.bmm(attn, encoder_outputs) # (batch, 1, 2 * enc_hid_dim)
|
160 |
+
|
161 |
+
weighted = weighted.permute(1, 0, 2) # (1, batch, 2 * enc_hid_dim)
|
162 |
+
|
163 |
+
rnn_input = torch.cat((x, weighted), dim=2) # (1, batch, 2 * enc_hid_dim + embed_dim)
|
164 |
+
|
165 |
+
output, hidden = self.rnn(rnn_input, hidden.unsqueeze(0))
|
166 |
+
# output: (1, batch, dec_hid_dim)
|
167 |
+
# hidden: (1, batch, dec_hid_dim)
|
168 |
+
|
169 |
+
x = x.squeeze(0)
|
170 |
+
output = output.squeeze(0)
|
171 |
+
weighted = weighted.squeeze(0)
|
172 |
+
|
173 |
+
x = torch.cat((output, weighted, x), dim=1)
|
174 |
+
output = self.linear_out(x) # (batch, output_dim)
|
175 |
+
|
176 |
+
return output, hidden.squeeze(0), attn.squeeze(1)
|
177 |
+
|
178 |
+
def forward(self, trg_tokens, encoder_out, **kwargs):
|
179 |
+
"""
|
180 |
+
Forward Decoder
|
181 |
+
Args:
|
182 |
+
trg_tokens (LongTensor): (trg_len, batch)
|
183 |
+
Tuple (encoder_out):
|
184 |
+
encoder_out (LongTensor): (src_len, batch, 2 * hidden_size)
|
185 |
+
hidden (LongTensor): (batch, enc_hid_dim)
|
186 |
+
src_tokens (LongTensor): (src_len, batch)
|
187 |
+
Returns:
|
188 |
+
outputs (LongTensor): (max_len, batch, output_dim)
|
189 |
+
attentions (LongTensor): (max_len, batch, src_len)
|
190 |
+
"""
|
191 |
+
encoder_out, hidden = encoder_out
|
192 |
+
src_tokens = kwargs.get('src_tokens', '')
|
193 |
+
teacher_ratio = kwargs.get('teacher_forcing_ratio', '')
|
194 |
+
src_tokens = src_tokens.t()
|
195 |
+
batch = src_tokens.shape[1]
|
196 |
+
|
197 |
+
if trg_tokens is None:
|
198 |
+
teacher_ratio = 0.
|
199 |
+
inference = True
|
200 |
+
trg_tokens = torch.zeros((self.max_positions, batch)).long().\
|
201 |
+
fill_(self.sos_idx).\
|
202 |
+
to(self.device)
|
203 |
+
else:
|
204 |
+
trg_tokens = trg_tokens.t()
|
205 |
+
inference = False
|
206 |
+
|
207 |
+
max_len = trg_tokens.shape[0]
|
208 |
+
|
209 |
+
# initialize tensors to store the outputs and attentions
|
210 |
+
outputs = torch.zeros(max_len, batch, self.output_dim).to(self.device)
|
211 |
+
attentions = torch.zeros(max_len, batch, src_tokens.shape[0]).to(self.device)
|
212 |
+
|
213 |
+
# prepare decoder input(<sos> token)
|
214 |
+
input = trg_tokens[0, :]
|
215 |
+
|
216 |
+
mask = (src_tokens != self.pad_id).permute(1, 0) # (batch, src_len)
|
217 |
+
|
218 |
+
for i in range(1, max_len):
|
219 |
+
|
220 |
+
# forward through decoder using inout, encoder hidden, encoder outputs and mask
|
221 |
+
# get predictions, hidden state and attentions
|
222 |
+
output, hidden, attention = self._decoder_step(input, hidden, encoder_out, mask)
|
223 |
+
|
224 |
+
# save predictions for position i
|
225 |
+
outputs[i] = output
|
226 |
+
|
227 |
+
# save attention for position i
|
228 |
+
attentions[i] = attention
|
229 |
+
|
230 |
+
# if teacher forcing
|
231 |
+
# use actual next token as input for next position
|
232 |
+
# else
|
233 |
+
# use highest predicted token
|
234 |
+
input = trg_tokens[i] if random.random() < teacher_ratio else output.argmax(1)
|
235 |
+
|
236 |
+
# if inference is enabled and highest predicted token is <eos> then stop
|
237 |
+
# and return everything till position i
|
238 |
+
if inference and input.item() == self.eos_idx:
|
239 |
+
return outputs[:i] # , attentions[:i]
|
240 |
+
|
241 |
+
return outputs # , attentions
|
UIT-ViCoV19QA/models/rnn2.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Seq2seq based: Effective Approaches to Attention-based Neural Machine Translation
|
3 |
+
https://arxiv.org/abs/1508.04025
|
4 |
+
"""
|
5 |
+
import random
|
6 |
+
import torch
|
7 |
+
import torch.nn as nn
|
8 |
+
import torch.nn.functional as F
|
9 |
+
from utils.constants import PAD_TOKEN, EOS_TOKEN, SOS_TOKEN
|
10 |
+
from models.layers import RNN, Embedding, Linear, LSTM, GRU
|
11 |
+
|
12 |
+
class Encoder(nn.Module):
|
13 |
+
"""Encoder"""
|
14 |
+
def __init__(self, vocabulary, device, cell_name, hidden_size=512, num_layers=2,
|
15 |
+
bidirectional=True, dropout=0.5):
|
16 |
+
super().__init__()
|
17 |
+
input_dim = len(vocabulary)
|
18 |
+
self.num_layers = num_layers
|
19 |
+
self.pad_id = vocabulary.stoi[PAD_TOKEN]
|
20 |
+
self.hidden_size = hidden_size
|
21 |
+
self.bidirectional = bidirectional
|
22 |
+
self.device = device
|
23 |
+
self.rnn_cell = RNN(cell_name)
|
24 |
+
|
25 |
+
self.embedding = Embedding(input_dim, self.hidden_size, self.pad_id)
|
26 |
+
self.dropout = dropout
|
27 |
+
|
28 |
+
self.rnn = self.rnn_cell(
|
29 |
+
input_size=self.hidden_size,
|
30 |
+
hidden_size=self.hidden_size,
|
31 |
+
num_layers=self.num_layers,
|
32 |
+
batch_first=True,
|
33 |
+
bidirectional=self.bidirectional,
|
34 |
+
dropout=self.dropout if self.num_layers > 1 else 0.
|
35 |
+
)
|
36 |
+
|
37 |
+
def forward(self, src_tokens, **kwargs):
|
38 |
+
"""
|
39 |
+
Forward Encoder
|
40 |
+
Args:
|
41 |
+
src_tokens (LongTensor): (batch, src_len)
|
42 |
+
src_lengths (LongTensor): (batch)
|
43 |
+
Returns:
|
44 |
+
x (LongTensor): (src_len, batch, hidden_size * num_directions)
|
45 |
+
hidden (LongTensor): (batch, enc_hid_dim)
|
46 |
+
"""
|
47 |
+
src_lengths = kwargs.get('src_lengths', '')
|
48 |
+
|
49 |
+
embedded = self.embedding(src_tokens)
|
50 |
+
embedded = F.dropout(embedded, p=self.dropout, training=self.training)
|
51 |
+
|
52 |
+
embedded = nn.utils.rnn.pack_padded_sequence(embedded, src_lengths, batch_first=True)
|
53 |
+
output, hidden = self.rnn(embedded)
|
54 |
+
|
55 |
+
output, _ = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)
|
56 |
+
output = F.dropout(output, p=self.dropout, training=self.training)
|
57 |
+
|
58 |
+
if isinstance(hidden, tuple):
|
59 |
+
hidden = tuple([self._cat_directions(h) for h in hidden])
|
60 |
+
else:
|
61 |
+
hidden = self._cat_directions(hidden)
|
62 |
+
|
63 |
+
return output, hidden
|
64 |
+
|
65 |
+
def _cat_directions(self, h):
|
66 |
+
"""
|
67 |
+
If the encoder is bidirectional, do the following transformation.
|
68 |
+
(#directions * #layers, #batch, hidden_size) -> (#layers, #batch, #directions * hidden_size)
|
69 |
+
"""
|
70 |
+
if self.bidirectional:
|
71 |
+
h = torch.cat([h[0:h.size(0):2], h[1:h.size(0):2]], 2)
|
72 |
+
return h
|
73 |
+
|
74 |
+
class Attention(nn.Module):
|
75 |
+
"""Attention"""
|
76 |
+
def __init__(self, input_embed, source_embed, output_embed):
|
77 |
+
super().__init__()
|
78 |
+
self.linear_in = Linear(input_embed, source_embed)
|
79 |
+
self.linear_out = Linear(input_embed+source_embed, output_embed)
|
80 |
+
|
81 |
+
def forward(self, output, context, mask):
|
82 |
+
"""
|
83 |
+
Forward Attention
|
84 |
+
"""
|
85 |
+
# input: bsz x input_embed_dim
|
86 |
+
# source_hids: srclen x bsz x source_embed_dim
|
87 |
+
|
88 |
+
input = output.squeeze(1)
|
89 |
+
source_hids = context.permute(1, 0, 2)
|
90 |
+
|
91 |
+
x = self.linear_in(input)
|
92 |
+
|
93 |
+
# compute attention
|
94 |
+
attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2)
|
95 |
+
|
96 |
+
# don't attend over padding
|
97 |
+
attn_scores = attn_scores.float().masked_fill(mask == 0, float('-inf'))
|
98 |
+
|
99 |
+
attn_scores = F.softmax(attn_scores, dim=0) # srclen x bsz
|
100 |
+
|
101 |
+
# sum weighted sources
|
102 |
+
x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0)
|
103 |
+
|
104 |
+
x = torch.cat((x, input), dim=1)
|
105 |
+
x = self.linear_out(x)
|
106 |
+
x = torch.tanh(x)
|
107 |
+
|
108 |
+
return x, attn_scores
|
109 |
+
|
110 |
+
class Decoder(nn.Module):
|
111 |
+
"""Decoder"""
|
112 |
+
def __init__(self, vocabulary, device,cell_name, hidden_size=512, num_layers=2,
|
113 |
+
max_len=500, dropout=0.5):
|
114 |
+
super().__init__()
|
115 |
+
self.output_dim = len(vocabulary)
|
116 |
+
self.hidden_size = hidden_size
|
117 |
+
self.num_layers = num_layers
|
118 |
+
self.max_length = max_len
|
119 |
+
self.device = device
|
120 |
+
self.eos_id = vocabulary.stoi[EOS_TOKEN]
|
121 |
+
self.sos_id = vocabulary.stoi[SOS_TOKEN]
|
122 |
+
self.pad_id = vocabulary.stoi[PAD_TOKEN]
|
123 |
+
self.rnn_cell = RNN(cell_name)
|
124 |
+
|
125 |
+
self.encoder_proj = Linear(hidden_size*2, hidden_size)
|
126 |
+
|
127 |
+
self.embedding = Embedding(self.output_dim, self.hidden_size, self.pad_id)
|
128 |
+
self.dropout = dropout
|
129 |
+
|
130 |
+
self.rnn = self.rnn_cell(
|
131 |
+
input_size=hidden_size,
|
132 |
+
hidden_size=hidden_size,
|
133 |
+
num_layers=self.num_layers,
|
134 |
+
batch_first=True,
|
135 |
+
dropout=self.dropout if num_layers > 1 else 0.
|
136 |
+
)
|
137 |
+
|
138 |
+
self.attention = Attention(self.hidden_size, self.hidden_size*2, self.hidden_size)
|
139 |
+
self.linear_out = Linear(self.hidden_size, self.output_dim)
|
140 |
+
|
141 |
+
def _decoder_step(self, input_var, hidden, encoder_outputs, mask):
|
142 |
+
input_var = input_var.unsqueeze(1)
|
143 |
+
|
144 |
+
embedded = self.embedding(input_var)
|
145 |
+
embedded = F.dropout(embedded, p=self.dropout, training=self.training)
|
146 |
+
|
147 |
+
output, hidden = self.rnn(embedded, hidden)
|
148 |
+
output = F.dropout(output, p=self.dropout, training=self.training)
|
149 |
+
|
150 |
+
output, attn = self.attention(output, encoder_outputs, mask)
|
151 |
+
output = F.dropout(output, p=self.dropout, training=self.training)
|
152 |
+
|
153 |
+
output = self.linear_out(output)
|
154 |
+
# output = F.dropout(output, p=self.dropout, training=self.training)
|
155 |
+
output = F.log_softmax(output, dim=1)
|
156 |
+
|
157 |
+
return output, hidden, attn
|
158 |
+
|
159 |
+
def forward(self, trg_tokens, encoder_out, **kwargs):
|
160 |
+
"""
|
161 |
+
Forward Decoder
|
162 |
+
"""
|
163 |
+
encoder_out, hidden = encoder_out
|
164 |
+
src_tokens = kwargs.get('src_tokens', '')
|
165 |
+
teacher_forcing_ratio = kwargs.get('teacher_forcing_ratio', '')
|
166 |
+
batch_size, src_length = src_tokens.size()
|
167 |
+
|
168 |
+
if trg_tokens is None:
|
169 |
+
teacher_forcing_ratio = 0.
|
170 |
+
inference = True
|
171 |
+
trg_tokens = torch.zeros((batch_size, self.max_length)).long().\
|
172 |
+
fill_(self.sos_id).\
|
173 |
+
to(self.device)
|
174 |
+
else:
|
175 |
+
inference = False
|
176 |
+
|
177 |
+
max_length = trg_tokens.shape[1]
|
178 |
+
|
179 |
+
outputs = torch.zeros(max_length, batch_size, self.output_dim).to(self.device)
|
180 |
+
attentions = torch.zeros(max_length, batch_size, src_length).to(self.device)
|
181 |
+
|
182 |
+
mask = (src_tokens != self.pad_id).t()
|
183 |
+
|
184 |
+
# check whether encoder has lstm or gru hidden state and
|
185 |
+
# project their output to decoder hidden state
|
186 |
+
if isinstance(hidden, tuple):
|
187 |
+
hidden = [self.encoder_proj(h) for h in hidden] # new_line
|
188 |
+
else:
|
189 |
+
hidden = self.encoder_proj(hidden)
|
190 |
+
|
191 |
+
# use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
|
192 |
+
|
193 |
+
decoder_input = trg_tokens[:, 0]
|
194 |
+
|
195 |
+
# Here we miss the output for position 0
|
196 |
+
for i in range(1, max_length):
|
197 |
+
output, hidden, attention = self._decoder_step(decoder_input, hidden, encoder_out, mask)
|
198 |
+
outputs[i] = output
|
199 |
+
attentions[i] = attention.t()
|
200 |
+
use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
|
201 |
+
decoder_input = trg_tokens[:, i] if use_teacher_forcing else output.argmax(1)
|
202 |
+
|
203 |
+
if inference and decoder_input.item() == self.eos_id and i > 0:
|
204 |
+
return outputs[:i] # , attentions[:i]
|
205 |
+
|
206 |
+
return outputs # , attentions
|
UIT-ViCoV19QA/models/seq2seq.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Main sequence to sequence class which conects
|
3 |
+
encoder-decoder model
|
4 |
+
"""
|
5 |
+
import torch.nn as nn
|
6 |
+
|
7 |
+
class Seq2Seq(nn.Module):
|
8 |
+
"""
|
9 |
+
Seq2seq class
|
10 |
+
"""
|
11 |
+
def __init__(self, encoder, decoder, name):
|
12 |
+
super().__init__()
|
13 |
+
self.encoder = encoder
|
14 |
+
self.decoder = decoder
|
15 |
+
self.name = name
|
16 |
+
|
17 |
+
def forward(self, src_tokens, src_lengths, trg_tokens, teacher_forcing_ratio=0.5):
|
18 |
+
"""
|
19 |
+
Run the forward pass for an encoder-decoder model.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
src_tokens (LongTensor): tokens in the source language of shape
|
23 |
+
`(src_len, batch)`
|
24 |
+
src_lengths (LongTensor): source sentence lengths of shape `(batch)`
|
25 |
+
trg_tokens (LongTensor): tokens in the target language of shape
|
26 |
+
`(tgt_len, batch)`, for teacher forcing
|
27 |
+
teacher_forcing_ratio (float): teacher forcing probability
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
tuple:
|
31 |
+
- the decoder's output of shape `(batch, tgt_len, vocab)`
|
32 |
+
- attention scores of shape `(batch, trg_len, src_len)`
|
33 |
+
"""
|
34 |
+
encoder_out = self.encoder(src_tokens, src_lengths=src_lengths)
|
35 |
+
|
36 |
+
decoder_out = self.decoder(trg_tokens, encoder_out,
|
37 |
+
src_tokens=src_tokens,
|
38 |
+
teacher_forcing_ratio=teacher_forcing_ratio)
|
39 |
+
return decoder_out
|
UIT-ViCoV19QA/models/transformer.py
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Seq2seq based: Attention Is All You Need
|
3 |
+
https://arxiv.org/abs/1706.03762
|
4 |
+
"""
|
5 |
+
import math
|
6 |
+
import torch
|
7 |
+
import torch.nn as nn
|
8 |
+
import torch.onnx.operators
|
9 |
+
import torch.nn.functional as F
|
10 |
+
from torch.autograd import Variable
|
11 |
+
from utils.constants import PAD_TOKEN
|
12 |
+
|
13 |
+
class Encoder(nn.Module):
|
14 |
+
"""Encoder"""
|
15 |
+
def __init__(self, vocabulary, device, embed_dim=512, layers=2,
|
16 |
+
heads=8, pf_dim=2048, dropout=0.5, max_positions=5000):
|
17 |
+
super().__init__()
|
18 |
+
input_dim = len(vocabulary)
|
19 |
+
self.padding_idx = vocabulary.stoi[PAD_TOKEN]
|
20 |
+
self.dropout = dropout
|
21 |
+
self.device = device
|
22 |
+
|
23 |
+
self.scale = math.sqrt(embed_dim)
|
24 |
+
self.embed_tokens = nn.Embedding(input_dim, embed_dim)
|
25 |
+
self.embed_positions = PositionalEmbedding(embed_dim, dropout, max_positions)
|
26 |
+
|
27 |
+
self.layers = nn.ModuleList([EncoderLayer(embed_dim, heads, pf_dim, dropout, device) for _ in range(layers)])
|
28 |
+
|
29 |
+
def forward(self, src_tokens, **kwargs):
|
30 |
+
"""
|
31 |
+
Forward pass for transformer encoder
|
32 |
+
Args:
|
33 |
+
src_tokens (LongTensor): (batch, src_len)
|
34 |
+
Returns:
|
35 |
+
x (LongTensor): (batch, src_len, embed_dim)
|
36 |
+
"""
|
37 |
+
src_mask = (src_tokens != self.padding_idx).unsqueeze(1).unsqueeze(2)
|
38 |
+
|
39 |
+
x = self.embed_tokens(src_tokens) * self.scale
|
40 |
+
x += self.embed_positions(src_tokens)
|
41 |
+
x = F.dropout(x, p=self.dropout, training=self.training)
|
42 |
+
|
43 |
+
for layer in self.layers:
|
44 |
+
x = layer(x, src_mask)
|
45 |
+
|
46 |
+
return x
|
47 |
+
|
48 |
+
class EncoderLayer(nn.Module):
|
49 |
+
"""EncoderLayer"""
|
50 |
+
def __init__(self, embed_dim, heads, pf_dim, dropout, device):
|
51 |
+
super().__init__()
|
52 |
+
|
53 |
+
self.layer_norm = nn.LayerNorm(embed_dim)
|
54 |
+
self.self_attn = MultiHeadedAttention(embed_dim, heads, dropout, device)
|
55 |
+
self.pos_ff = PositionwiseFeedforward(embed_dim, pf_dim, dropout)
|
56 |
+
self.dropout = nn.Dropout(dropout)
|
57 |
+
|
58 |
+
def forward(self, src_tokens, src_mask):
|
59 |
+
"""
|
60 |
+
Forward pass for transformer encoder layer
|
61 |
+
Args:
|
62 |
+
src_tokens (LongTensor): (batch, src_len, embed_dim)
|
63 |
+
src_mask (LongTensor): (batch, src_len)
|
64 |
+
Returns:
|
65 |
+
x (LongTensor): (batch, src_len, embed_dim)
|
66 |
+
"""
|
67 |
+
x = self.layer_norm(src_tokens + self.dropout(self.self_attn(src_tokens, src_tokens, src_tokens, src_mask)))
|
68 |
+
x = self.layer_norm(x + self.dropout(self.pos_ff(x)))
|
69 |
+
|
70 |
+
return x
|
71 |
+
|
72 |
+
class Decoder(nn.Module):
|
73 |
+
"""Decoder"""
|
74 |
+
def __init__(self, vocabulary, device, embed_dim=512, layers=2,
|
75 |
+
heads=8, pf_dim=2048, dropout=0.5, max_positions=5000):
|
76 |
+
super().__init__()
|
77 |
+
|
78 |
+
output_dim = len(vocabulary)
|
79 |
+
self.pad_id = vocabulary.stoi[PAD_TOKEN]
|
80 |
+
self.pf_dim = pf_dim
|
81 |
+
self.dropout = dropout
|
82 |
+
self.device = device
|
83 |
+
self.max_positions = max_positions
|
84 |
+
|
85 |
+
self.scale = math.sqrt(embed_dim)
|
86 |
+
self.embed_tokens = nn.Embedding(output_dim, embed_dim)
|
87 |
+
self.embed_positions = PositionalEmbedding(embed_dim, dropout, max_positions)
|
88 |
+
|
89 |
+
self.layers = nn.ModuleList([DecoderLayer(embed_dim, heads, pf_dim, dropout, device) for _ in range(layers)])
|
90 |
+
|
91 |
+
self.linear_out = nn.Linear(embed_dim, output_dim)
|
92 |
+
|
93 |
+
def make_masks(self, src_tokens, trg_tokens):
|
94 |
+
src_mask = (src_tokens != self.pad_id).unsqueeze(1).unsqueeze(2)
|
95 |
+
trg_pad_mask = (trg_tokens != self.pad_id).unsqueeze(1).unsqueeze(3).byte()
|
96 |
+
trg_len = trg_tokens.shape[1]
|
97 |
+
trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device=self.device)).byte()
|
98 |
+
trg_mask = trg_pad_mask & trg_sub_mask
|
99 |
+
return src_mask, trg_mask
|
100 |
+
|
101 |
+
def forward(self, trg_tokens, encoder_out, **kwargs):
|
102 |
+
"""
|
103 |
+
Forward pass for transformer decoder
|
104 |
+
Args:
|
105 |
+
trg_tokens (LongTensor): (batch, trg_len)
|
106 |
+
encoder_out (LongTensor): (batch, src_len, embed_dim)
|
107 |
+
src_tokens (LongTensor): (batch, src_len)
|
108 |
+
Returns:
|
109 |
+
x (LongTensor): (batch, trg_len, output_dim)
|
110 |
+
"""
|
111 |
+
src_tokens = kwargs.get('src_tokens', '')
|
112 |
+
src_mask, trg_mask = self.make_masks(src_tokens, trg_tokens)
|
113 |
+
|
114 |
+
#print(trg_tokens.shape) #batch_size = 12
|
115 |
+
x = self.embed_tokens(trg_tokens) * self.scale #[12, 296, 512]
|
116 |
+
|
117 |
+
x += self.embed_positions(trg_tokens)#[1, 100, 512]
|
118 |
+
x = F.dropout(x, p=self.dropout, training=self.training)
|
119 |
+
|
120 |
+
for layer in self.layers:
|
121 |
+
x = layer(x, encoder_out, trg_mask, src_mask)
|
122 |
+
|
123 |
+
return self.linear_out(x)
|
124 |
+
|
125 |
+
class DecoderLayer(nn.Module):
|
126 |
+
"""DecoderLayer"""
|
127 |
+
def __init__(self, embed_dim, heads, pf_dim, dropout, device):
|
128 |
+
super().__init__()
|
129 |
+
self.layer_norm = nn.LayerNorm(embed_dim)
|
130 |
+
self.self_attn = MultiHeadedAttention(embed_dim, heads, dropout, device)
|
131 |
+
self.src_attn = MultiHeadedAttention(embed_dim, heads, dropout, device)
|
132 |
+
self.pos_ff = PositionwiseFeedforward(embed_dim, pf_dim, dropout)
|
133 |
+
self.dropout = nn.Dropout(dropout)
|
134 |
+
|
135 |
+
def forward(self, embed_trg, embed_src, trg_mask, src_mask):
|
136 |
+
"""
|
137 |
+
Forward pass for transformer decoder layer
|
138 |
+
Args:
|
139 |
+
embed_trg (LongTensor): (batch, trg_len, embed_dim)
|
140 |
+
embed_src (LongTensor): (batch, src_len, embed_dim)
|
141 |
+
trg_mask (LongTensor): (batch, trg_len)
|
142 |
+
src_mask (LongTensor): (batch, src_len)
|
143 |
+
Returns:
|
144 |
+
x (LongTensor): (batch, trg_len, embed_dim)
|
145 |
+
"""
|
146 |
+
x = self.layer_norm(embed_trg + self.dropout(self.self_attn(embed_trg, embed_trg, embed_trg, trg_mask)))
|
147 |
+
x = self.layer_norm(x + self.dropout(self.src_attn(x, embed_src, embed_src, src_mask)))
|
148 |
+
x = self.layer_norm(x + self.dropout(self.pos_ff(x)))
|
149 |
+
|
150 |
+
return x
|
151 |
+
|
152 |
+
class MultiHeadedAttention(nn.Module):
|
153 |
+
"""MultiHeadedAttention"""
|
154 |
+
def __init__(self, embed_dim, heads, dropout, device):
|
155 |
+
super().__init__()
|
156 |
+
assert embed_dim % heads == 0
|
157 |
+
self.attn_dim = embed_dim // heads
|
158 |
+
self.heads = heads
|
159 |
+
self.dropout = dropout
|
160 |
+
|
161 |
+
self.linear_q = nn.Linear(embed_dim, embed_dim)
|
162 |
+
self.linear_k = nn.Linear(embed_dim, embed_dim)
|
163 |
+
self.linear_v = nn.Linear(embed_dim, embed_dim)
|
164 |
+
|
165 |
+
self.scale = torch.sqrt(torch.FloatTensor([self.attn_dim])).to(device)
|
166 |
+
|
167 |
+
self.linear_out = nn.Linear(embed_dim, embed_dim)
|
168 |
+
|
169 |
+
def forward(self, query, key, value, mask=None):
|
170 |
+
"""
|
171 |
+
Forward pass for transformer decoder layer
|
172 |
+
Args:
|
173 |
+
query (LongTensor): (batch, sent_len, embed_dim)
|
174 |
+
key (LongTensor): (batch, sent_len, embed_dim)
|
175 |
+
value (LongTensor): (batch, sent_len, embed_dim)
|
176 |
+
mask (LongTensor): (batch, sent_len)
|
177 |
+
Returns:
|
178 |
+
x (LongTensor): (batch, sent_len, embed_dim)
|
179 |
+
"""
|
180 |
+
batch_size = query.shape[0]
|
181 |
+
|
182 |
+
Q = self.linear_q(query)
|
183 |
+
K = self.linear_k(key)
|
184 |
+
V = self.linear_v(value)
|
185 |
+
|
186 |
+
Q = Q.view(batch_size, -1, self.heads, self.attn_dim).permute(0, 2, 1, 3) # (batch, heads, sent_len, attn_dim)
|
187 |
+
K = K.view(batch_size, -1, self.heads, self.attn_dim).permute(0, 2, 1, 3) # (batch, heads, sent_len, attn_dim)
|
188 |
+
V = V.view(batch_size, -1, self.heads, self.attn_dim).permute(0, 2, 1, 3) # (batch, heads, sent_len, attn_dim)
|
189 |
+
|
190 |
+
energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale # (batch, heads, sent_len, sent_len)
|
191 |
+
|
192 |
+
if mask is not None:
|
193 |
+
energy = energy.masked_fill(mask == 0, -1e10)
|
194 |
+
|
195 |
+
attention = F.softmax(energy, dim=-1) # (batch, heads, sent_len, sent_len)
|
196 |
+
attention = F.dropout(attention, p=self.dropout, training=self.training)
|
197 |
+
|
198 |
+
x = torch.matmul(attention, V) # (batch, heads, sent_len, attn_dim)
|
199 |
+
x = x.permute(0, 2, 1, 3).contiguous() # (batch, sent_len, heads, attn_dim)
|
200 |
+
x = x.view(batch_size, -1, self.heads * (self.attn_dim)) # (batch, sent_len, embed_dim)
|
201 |
+
x = self.linear_out(x)
|
202 |
+
|
203 |
+
return x
|
204 |
+
|
205 |
+
class PositionwiseFeedforward(nn.Module):
|
206 |
+
"""PositionwiseFeedforward"""
|
207 |
+
def __init__(self, embed_dim, pf_dim, dropout):
|
208 |
+
super().__init__()
|
209 |
+
self.linear_1 = nn.Linear(embed_dim, pf_dim)
|
210 |
+
self.linear_2 = nn.Linear(pf_dim, embed_dim)
|
211 |
+
self.dropout = dropout
|
212 |
+
|
213 |
+
def forward(self, x):
|
214 |
+
"""
|
215 |
+
PositionwiseFeedforward
|
216 |
+
Args:
|
217 |
+
x (LongTensor): (batch, src_len, embed_dim)
|
218 |
+
Returns:
|
219 |
+
x (LongTensor): (batch, src_len, embed_dim)
|
220 |
+
"""
|
221 |
+
x = torch.relu(self.linear_1(x))
|
222 |
+
x = F.dropout(x, p=self.dropout, training=self.training)
|
223 |
+
|
224 |
+
return self.linear_2(x)
|
225 |
+
|
226 |
+
class PositionalEmbedding(nn.Module):
|
227 |
+
"Implement the PE function."
|
228 |
+
def __init__(self, d_model, dropout, max_len=500):
|
229 |
+
super().__init__()
|
230 |
+
pos_embed = torch.zeros(max_len, d_model)
|
231 |
+
position = torch.arange(0., max_len).unsqueeze(1)
|
232 |
+
div_term = torch.exp(torch.arange(0., d_model, 2) * -(math.log(10000.0) / d_model))
|
233 |
+
pos_embed[:, 0::2] = torch.sin(position * div_term)
|
234 |
+
pos_embed[:, 1::2] = torch.cos(position * div_term)
|
235 |
+
pos_embed = pos_embed.unsqueeze(0)
|
236 |
+
self.register_buffer('pos_embed', pos_embed)
|
237 |
+
|
238 |
+
def forward(self, x):
|
239 |
+
|
240 |
+
return Variable(self.pos_embed[:, :x.size(1)], requires_grad=False)
|
241 |
+
|
242 |
+
class NoamOpt:
|
243 |
+
"Optim wrapper that implements rate."
|
244 |
+
def __init__(self, optimizer, model_size=512, factor=1, warmup=2000):
|
245 |
+
self.optimizer = optimizer
|
246 |
+
self._step = 0
|
247 |
+
self.warmup = warmup
|
248 |
+
self.factor = factor
|
249 |
+
self.model_size = model_size
|
250 |
+
self._rate = 0
|
251 |
+
self.param_groups = optimizer.param_groups
|
252 |
+
|
253 |
+
def step(self):
|
254 |
+
"Update parameters and rate"
|
255 |
+
self._step += 1
|
256 |
+
rate = self.rate()
|
257 |
+
for p in self.optimizer.param_groups:
|
258 |
+
p['lr'] = rate
|
259 |
+
self._rate = rate
|
260 |
+
self.optimizer.step()
|
261 |
+
|
262 |
+
def rate(self, step = None):
|
263 |
+
"Implement `lrate` above"
|
264 |
+
if step is None:
|
265 |
+
step = self._step
|
266 |
+
return self.factor * \
|
267 |
+
(self.model_size ** (-0.5) *
|
268 |
+
min(step ** (-0.5), step * self.warmup ** (-1.5)))
|
269 |
+
|
270 |
+
def zero_grad(self):
|
271 |
+
self.optimizer.zero_grad()
|
app.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
+
import gradio as gr
|
4 |
+
import subprocess
|
5 |
+
import os
|
6 |
+
|
7 |
+
def run_shell_command(command):
|
8 |
+
process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
|
9 |
+
output, error = process.communicate()
|
10 |
+
if error:
|
11 |
+
raise Exception(f"Error running command: {command}\n{error.decode('utf-8')}")
|
12 |
+
return output.decode('utf-8')
|
13 |
+
|
14 |
+
def load_model_and_tokenizer(model_path):
|
15 |
+
# Load the trained tokenizer
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
17 |
+
|
18 |
+
# Load the trained model
|
19 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
|
20 |
+
|
21 |
+
# Move the model to the GPU if available
|
22 |
+
device = torch.device("cuda" if torch.cuda.is available() else "cpu")
|
23 |
+
model.to(device)
|
24 |
+
|
25 |
+
return tokenizer, model, device
|
26 |
+
|
27 |
+
def generate_text(tokenizer, model, device, prompt, max_length=100,
|
28 |
+
num_return_sequences=1, top_p=0.95, temperature=0.7):
|
29 |
+
# Tokenize the input prompt
|
30 |
+
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
|
31 |
+
|
32 |
+
# Generate text
|
33 |
+
output = model.generate(
|
34 |
+
input_ids,
|
35 |
+
max_length=max_length,
|
36 |
+
num_return_sequences=num_return_sequences,
|
37 |
+
no_repeat_ngram_size=2,
|
38 |
+
top_k=50,
|
39 |
+
top_p=top_p,
|
40 |
+
temperature=temperature,
|
41 |
+
do_sample=True
|
42 |
+
)
|
43 |
+
|
44 |
+
# Convert the generated text back to a string
|
45 |
+
generated_text = [tokenizer.decode(ids, skip_special_tokens=True) for ids in output]
|
46 |
+
|
47 |
+
return generated_text
|
48 |
+
|
49 |
+
def gradio_generate_text(prompt, max_length=100, num_return_sequences=1, top_p=0.95, temperature=0.7):
|
50 |
+
generated_text = generate_text(tokenizer, model, device, prompt, max_length, num_return_sequences, top_p, temperature)
|
51 |
+
return generated_text
|
52 |
+
|
53 |
+
# Ensure the models directory exists
|
54 |
+
# if not os.path.exists('models'):
|
55 |
+
# os.makedirs('models')
|
56 |
+
# if not os.path.exists('models/vi-medical-t5-finetune-qa'):
|
57 |
+
# # Run the Git LFS commands to clone the model
|
58 |
+
# run_shell_command('git lfs install')
|
59 |
+
# run_shell_command('cd models && git lfs clone https://huggingface.co/danhtran2mind/vi-medical-t5-finetune-qa && cd ..')
|
60 |
+
|
61 |
+
# Load the trained model and tokenizer
|
62 |
+
model_path = "models/vi-medical-t5-finetune-qa"
|
63 |
+
tokenizer, model, device = load_model_and_tokenizer(model_path)
|
64 |
+
|
65 |
+
# Create Gradio interface
|
66 |
+
iface = gr.Interface(
|
67 |
+
fn=gradio_generate_text,
|
68 |
+
inputs=[
|
69 |
+
gr.inputs.Textbox(lines=5, label="Input Prompt"),
|
70 |
+
gr.inputs.Slider(minimum=10, maximum=500, default=100, label="Max Length"),
|
71 |
+
gr.inputs.Slider(minimum=1, maximum=10, default=1, label="Number of Sequences"),
|
72 |
+
gr.inputs.Slider(minimum=0.1, maximum=1.0, default=0.95, label="Top-p Sampling"),
|
73 |
+
gr.inputs.Slider(minimum=0.1, maximum=1.0, default=0.7, label="Temperature")
|
74 |
+
],
|
75 |
+
outputs=gr.outputs.Textbox(label="Generated Text"),
|
76 |
+
title="Vietnamese Medical T5 Fine-Tuned Model",
|
77 |
+
description="Generate text using a fine-tuned Vietnamese medical T5 model."
|
78 |
+
)
|
79 |
+
|
80 |
+
# Launch the Gradio interface
|
81 |
+
iface.launch()
|
dataset/1_ans/UIT-ViCoV19QA_test.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d254fafb6c9340b101bf478536af6e1dd7b1ffd516a820f2580e48df88794573
|
3 |
+
size 520161
|
dataset/1_ans/UIT-ViCoV19QA_train.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3660454cae6ff01e7c0046708abfe46dc67357bb239ec27bb4aa7d692c941149
|
3 |
+
size 3711672
|
dataset/1_ans/UIT-ViCoV19QA_val.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18d4db7ec783e08f8cba280109b70b029c38aeecbdd72330eedc7cc52324687b
|
3 |
+
size 520352
|
dataset/2_ans/UIT-ViCoV19QA_test.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c42f217da35a5318dacd46f85e003ac370471fc2b3e491e4ead730fcf77d0685
|
3 |
+
size 582656
|
dataset/2_ans/UIT-ViCoV19QA_train.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85130ab449d23fe672e6a91b1791fdd5bae3b5b72bfc5f32620c5099bf82013f
|
3 |
+
size 4122052
|
dataset/2_ans/UIT-ViCoV19QA_val.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:109b7905010d967ade5299b0de22211606685557fd09d785179206654ec941b6
|
3 |
+
size 579852
|
dataset/3_ans/UIT-ViCoV19QA_test.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:188b82c5487b3bb6ff94bf654a1702f344c753c79e8bf27fcaf9df03f9bb6f55
|
3 |
+
size 600605
|
dataset/3_ans/UIT-ViCoV19QA_train.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:284958313282493c3b04c3eca2fe60c640eda64a81bd218b5a1ea0b1b07bb52a
|
3 |
+
size 4240422
|
dataset/3_ans/UIT-ViCoV19QA_val.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cb2fd30ac52b1ff58eee680fa0135208841da39d873ed379841b88a27b822b4
|
3 |
+
size 595439
|
dataset/4_ans/UIT-ViCoV19QA_test.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99860bae4ae5043e869c8047558d7db6d40e76d174aed522b7f37d7931c64fc9
|
3 |
+
size 610868
|
dataset/4_ans/UIT-ViCoV19QA_train.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e2e7f21cef6898d42eafc5ee094e9dac285e251af19f9f6b9c274d92f446881
|
3 |
+
size 4300607
|
dataset/4_ans/UIT-ViCoV19QA_val.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfe58d5c3e89a404c2db0620e3e9c893e8e0e29bfeaab05b6650f7ca6e82946a
|
3 |
+
size 603979
|
dataset/UIT-ViCoV19QA.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65b147559695843829a0932fcaab9e6d1415d9821c3a0a3c1aa7ff8118a6ac6f
|
3 |
+
size 5515361
|
models/README.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:152dc41e703f31f4950210d6df2c69e384952acf1e99496b051cc2a86b010715
|
3 |
+
size 323
|
models/vi-medical-t5-finetune-qa/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:131a6e62a18a252551d92684ef82c7d7f8ef9f9f750ea4d666147d954bcc6de9
|
3 |
+
size 19282
|
notebooks/vi-medical-t5-finetune-qa.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas==2.2.3
|
2 |
+
numpy==1.26.4
|
3 |
+
matplotlib==3.7.5
|
4 |
+
scikit-learn==1.2.2
|
5 |
+
gensim==4.3.3
|
6 |
+
underthesea==6.8.4
|
7 |
+
tensorflow==2.17.1
|
8 |
+
datasets==3.3.1
|
9 |
+
torch==2.5.1
|
10 |
+
transformers==4.47.0
|
11 |
+
gradio
|