c4ai-usp
/

gap-text2sql

Model card Files Files and versions Community

antonlabate commited on Dec 21, 2022

Commit

d758c99

1 Parent(s): 13c08c5

ver 1.3

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

gap-text2sql-main/CODE_OF_CONDUCT.md +4 -0
gap-text2sql-main/CONTRIBUTING.md +59 -0
gap-text2sql-main/LICENSE +175 -0
gap-text2sql-main/NOTICE +1 -0
gap-text2sql-main/README.md +202 -0
gap-text2sql-main/data/preprocessed_data/bart_parser_pretrain_label_mapping.json +11 -0
gap-text2sql-main/mrat-sql-gap/.gitignore +133 -0
gap-text2sql-main/mrat-sql-gap/.ptignore +19 -0
gap-text2sql-main/mrat-sql-gap/BART_large.sh +33 -0
gap-text2sql-main/mrat-sql-gap/BERTimbau-base.sh +34 -0
gap-text2sql-main/mrat-sql-gap/BERTimbau-large.sh +33 -0
gap-text2sql-main/mrat-sql-gap/crash_on_ipy.py +19 -0
gap-text2sql-main/mrat-sql-gap/data/spider/generate.sh +24 -0
gap-text2sql-main/mrat-sql-gap/data/spider/train_spider.json.patch +49 -0
gap-text2sql-main/mrat-sql-gap/data/sqlite_files/singer/singer.sqlite +0 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BART-large-en/gap-bart.jsonnet +106 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BART-large-en/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BERTimbau-base/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BERTimbau-base/nl2code-bertimbau-base.jsonnet +110 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BERTimbau-large/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BERTimbau-large/nl2code-bertimbau-large.jsonnet +110 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en-enr-enb/T5-v1_1.jsonnet +106 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en-enr-enb/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en-extra-3enr-1en/T5-v1_1.jsonnet +106 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en-extra-3enr-1en/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en/T5-v1_1.jsonnet +106 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-en/T5-v1_1.jsonnet +106 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-en/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-en-pt-es-fr/gap-bart.jsonnet +103 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-en-pt-es-fr/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-en/gap-bart.jsonnet +103 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-en/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-pt-en/gap-bart.jsonnet +103 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-pt-en/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-pt/gap-bart.jsonnet +103 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-pt/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-FIT-en-pt-es-fr-extra-3enr-3ptr-3esr-3frr/mT5.jsonnet +106 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-FIT-en-pt-es-fr-extra-3enr-3ptr-3esr-3frr/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-FIT-en-pt-es-fr/mT5.jsonnet +106 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-FIT-en-pt-es-fr/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en-pt-es-fr-enr-enb/mT5.jsonnet +106 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en-pt-es-fr-enr-enb/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en-pt-es-fr/gap-mT5.jsonnet +106 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en-pt-es-fr/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en/gap-mT5.jsonnet +106 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en/nl2code-base.libsonnet +109 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/spider-BART-large-en-train_en-eval.jsonnet +33 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/spider-BERTimbau-base-pt.jsonnet +33 -0
gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/spider-BERTimbau-large-pt.jsonnet +33 -0

gap-text2sql-main/CODE_OF_CONDUCT.md ADDED Viewed

	@@ -0,0 +1,4 @@

+## Code of Conduct
+This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
+For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
+[email protected] with any additional questions or comments.

gap-text2sql-main/CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,59 @@

+# Contributing Guidelines
+Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
+documentation, we greatly value feedback and contributions from our community.
+Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
+information to effectively respond to your bug report or contribution.
+## Reporting Bugs/Feature Requests
+We welcome you to use the GitHub issue tracker to report bugs or suggest features.
+When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
+reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
+* A reproducible test case or series of steps
+* The version of our code being used
+* Any modifications you've made relevant to the bug
+* Anything unusual about your environment or deployment
+## Contributing via Pull Requests
+Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
+1. You are working against the latest source on the *main* branch.
+2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
+3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
+To send us a pull request, please:
+1. Fork the repository.
+2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
+3. Ensure local tests pass.
+4. Commit to your fork using clear commit messages.
+5. Send us a pull request, answering any default questions in the pull request interface.
+6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
+GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
+[creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
+## Finding contributions to work on
+Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
+## Code of Conduct
+This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
+For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
+[email protected] with any additional questions or comments.
+## Security issue notifications
+If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
+## Licensing
+See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.

gap-text2sql-main/LICENSE ADDED Viewed

	@@ -0,0 +1,175 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.

gap-text2sql-main/NOTICE ADDED Viewed

	@@ -0,0 +1 @@


1	+ Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.

gap-text2sql-main/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+# mRAT-SQL+GAP - Multilingual version of the RAT-SQL+GAP
+Code and model from our BRACIS 2021 [paper published in Springer Lecture Notes in Computer Science](https://link.springer.com/chapter/10.1007%2F978-3-030-91699-2_35), [here the pre-print in arXiv](https://arxiv.org/abs/2110.03546).
+Based on: RAT-SQL+GAP: [Github](https://github.com/awslabs/gap-text2sql). Paper: [AAAI 2021 paper](https://arxiv.org/abs/2012.10309)
+## Abstract
+mRAT-SQL+GAP is a multilingual version of the RAT-SQL+GAP, wich start with Portuguese Language. Here is available the code, dataset and the results.
+## Directory Structure
+Go to the directory where you want to install the structure
+```bash
+git clone https://github.com/C4AI/gap-text2sql
+cd gap-text2sql/mrat-sql-gap
+```
+## Conda mtext2slq Environment Setup
+```bash
+conda create --name mtext2sql python=3.7
+conda activate mtext2sql
+conda install pytorch=1.5 cudatoolkit=10.2 -c pytorch
+pip install gdown
+conda install -c conda-forge jsonnet
+pip install -r requirements.txt
+python -c "import nltk; nltk.download('stopwords'); nltk.download('punkt')"
+conda install jupyter notebook
+conda install -c conda-forge jupyter_contrib_nbextensions
+```
+## Setup Script
+Just run this script below, it will copy the datasets.
+The original version of the Spider dataset is distributed under the [CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/legalcode) license.
+The modified versions (translated to Portuguese, Spanish, French, double-size(English and Portuguese) and quad-size (English, Portuguese, Spanish and French)) of train_spider.json, train_others.json, and dev.json are distributed under the [CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/legalcode) license, respecting ShareAlike.
+```bash
+chmod +x setup.sh
+./setup.sh
+```
+## Specific setup
+The models and checkpoints have big files (GBytes), so if you have enough disk space you can run all shell scripts. To understand how things work, run just BART_large.sh and after run the others.
+```bash
+./BART_large.sh
+./mBART50MtoM-large.sh
+./mT5_large.sh
+./BERTimbau-base.sh
+./BERTimbau-large.sh
+```
+## Environment Test
+Now the environment is ready for training (fine-tune) and inferences. The training is very slow more than 60 hours for BART, BERTimbau, mBART50, and more than 28 hours for mT5. Therefore I recommend testing the environment with the inference.
+### Preprocess dataset
+This preprocess step is necessary both for inference and for training. It will take some time, maybe 40 minutes.
+I will use the script for BART, but you can use the other, look the directory experiments/spider-configs
+```bash
+python run.py preprocess experiments/spider-configs/spider-BART-large-en-train_en-eval.jsonnet
+```
+You can see the files processed in the paths:
+`data/spider-en/nl2code-1115,output_from=true,fs=2,emb=bart,cvlink`
+## Inference
+I will use the script for BART again.
+Note: We are making inferences using the checkpoint already trained (directory logdir) and defined in:
+`experiments/spider-configs/spider-BART-large-en-train_en-eval.jsonnet`
+`logdir: "logdir/BART-large-en-train",` and
+`eval_steps: [40300],`
+```bash
+python run.py eval experiments/spider-configs/spider-BART-large-en-train_en-eval.jsonnet
+```
+You then get the inference results and evaluation results in the paths:
+`ie_dirs/BART-large-en-train/bart-large-en_run_1_true_1-step41000.infer`
+and
+`ie_dirs/BART-large-en-train/bart-large-en_run_1_true_1-step41000.eval`.
+## Training
+Execute if it is really necessary, if you want to fine-tune the model, this will take a long time. But if you have a good machine available and want to see different checkpoints in the logdir, do it.
+```bash
+python run.py train experiments/spider-configs/spider-BART-large-en-train_en-eval.jsonnet
+```
+You then get the training checkpoints in the paths:
+`logdir/BART-large-en-train`
+## Checkpoints and Inferences
+The checkpoints are available here (ESM - Exact Set Matching Accuracy):
+Paper mRAT-SQL+GAP - Multilingual version of the RAT-SQL+GAP
+* BART-large trained in English [, ESM all: 0.718]
+	* Checkpoint: [40300](https://drive.google.com/file/d/1F4R-WkJKtJ4lFni3q4lBug6tzSo0H5Qe/view?usp=sharing)
+		* Inference - English: [ESM all: 0.718 - Baseline](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/BART-large-en-train/spider_eval_match_ratsqlgap-BART-ori-step40300.txt)
+* BERTimbau-base trained in Portuguese
+	* Checkpoint: [24100](https://drive.google.com/file/d/1gIZS0RuIxdjmm7sNbA3R6p6--9iMJmW8/view?usp=sharing)
+		* Inference - Portuguese: [ESM all: 0.417](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/BERTimbau-base-pt-train/spider_eval_match_ratsqlgap-bertimbau-base-step24100.txt)
+* mBART50MtoM-large trained in English
+	* Checkpoint [23100](https://drive.google.com/file/d/16mQf1gMTVGkvONUGpzELzkjCFX5M74cO/view?usp=sharing)
+		* Inference - English: [ESM all: 0.651](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mBART50MtoM-large-en-train/spider_eval_match_ratsqlgap-mBART50MtoM-large-en-ori-data-step23100.txt)
+* mBART50MtoM-large trained in Portuguese
+	* Checkpoint [39100](https://drive.google.com/file/d/1fWPH4bG9-UjW-p6OgmpINWLLsnOopWLh/view?usp=sharing)
+		* Inference - Portuguese: [ESM all: 0.588](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mBART50MtoM-large-pt-train/spider_eval_match_ratsqlgap-mBART50MtoM-largeSimplemmaPtEn-step39100.txt)
+* mBART50MtoM-large trained in English and Portuguese (together)
+	* Checkpoint [41000](https://drive.google.com/file/d/1szb44h_2t3fK2Vc02PdaAjDqnkWqM-0U/view?usp=sharing)
+		* Inference - English: [ESM all: 0.664](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mBART50MtoM-large-en-pt-train/41000/spider_eval_match_ratsqlgap-mBART50MtoM-largeSimplemmaPtEn-pt-en-Eval-en-step41000.txt)
+		* Inference - Portuguese: [ESM all: 0.595 Best inferences in Portuguese](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mBART50MtoM-large-en-pt-train/41000/spider_eval_match_ratsqlgap-mBART50MtoM-largeSimplemmaPtEn-pt-en-Eval-pt-step41000.txt)
+	* Checkpoint [21100](https://drive.google.com/file/d/1MeLkvGf9-5it1JXnUvU9AmXVnnbAAfP0/view?usp=sharing)
+		* Inference - English: [ESM all: 0.678 Best inferences in English](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mBART50MtoM-large-en-pt-train/21100/spider_eval_match_ratsqlgap-mBART50MtoM-largeSimplemmaPtEn-pt-en-Eval-en-step21100.txt)
+		* Inference - Portuguese: [ESM all: 0.581](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mBART50MtoM-large-en-pt-train/21100/spider_eval_match_ratsqlgap-mBART50MtoM-largeSimplemmaPtEn-pt-en-Eval-pt-step21100.txt)
+Future work of the paper mRAT-SQL+GAP
+* BERTimbau-large trained in Portuguese
+	* Checkpoint: [40100](https://drive.google.com/file/d/1q1NOxisOcIdkMftzGPVxBDn989LDDG3X/view?usp=sharing)
+		* Inference - Portuguese: [ESM all: 0.418](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/BERTimbau-large-pt-train/spider_eval_match_ratsqlgap-BERTimbau-large-pt-train-Eval-pt-step40100.txt)
+* mBART50MtoM-large trained in English, Portuguese, Spanish and French (together) - just best inferences
+	* Checkpoint: [39100](https://drive.google.com/file/d/18nioEDEpZf-6CNH_sU3IMZxsSNts_a4y/view?usp=sharing)
+		* Inference - English: [ESM all: 0.696](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mBART50MtoM-large-en-pt-es-fr-train/39100/spider_eval_match_ratsqlgap-mBART50MtoM-large-en-pt-es-fr-train_en-eval-step39100.txt)
+	* Checkpoint: [42100](https://drive.google.com/file/d/1AmJjyVHiP9V-FzW9Q1sXge4YMWAP-srg/view?usp=sharing)
+		* Inference - Portuguese: [ESM all pt: 0.626](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mBART50MtoM-large-en-pt-es-fr-train/42100/spider_eval_match_ratsqlgap-mBART50MtoM-large-en-pt-es-fr-train_pt-eval-step42100.txt)
+		* Inference - Spanish: [ESM all: 0.628](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mBART50MtoM-large-en-pt-es-fr-train/42100/spider_eval_match_ratsqlgap-mBART50MtoM-large-en-pt-es-fr-train_es-eval-step42100.txt)
+	* Checkpoint: [44100](https://drive.google.com/file/d/1P0F218tNkW42Pb7okn3uFyTT5sy4zGZR/view?usp=sharing)
+		* Inference - French: [ESM all: 0.649](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mBART50MtoM-large-en-pt-es-fr-train/44100/spider_eval_match_ratsqlgap-mBART50MtoM-large-en-pt-es-fr-train_fr-eval-step44100.txt)
+Paper mRAT-SQL-FIT
+* mT5-large trained in English 51Ksteps
+	* Checkpoint: [50100](https://drive.google.com/file/d/1BZ519XxYtXpxxO1iiBy8kSLG4eq34yEX/view?usp=sharing)
+		* Inference - English: [ESM all: 0.684](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-en-train/spider_eval_match_ratsqlgap-mT5-large-en-train_en-eval-step50100.txt)
+* mT5-large trained in English, Portuguese, Spanish and French (together) 51Ksteps - just best inferences
+	* Checkpoint: [51100](https://drive.google.com/file/d/1GSQX0dJlsipQPBvYrBqY0SMFhFGCTW0E/view?usp=sharing)
+		* Inference - English: [ESM all: 0.715](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-en-pt-es-fr-51Ksteps-train/51100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-en-pt-es-fr-train_en-eval-step51100.txt)
+	* Checkpoint: [42100](https://drive.google.com/file/d/1qx2QeZhoygCstZP_QgtLkTlVfBVHzZin/view?usp=sharing)
+		* Inference - Portuguese: [ESM all: 0.680](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-en-pt-es-fr-51Ksteps-train/42100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-en-pt-es-fr-train_pt-eval-step42100.txt)
+	* Checkpoint: [50100](https://drive.google.com/file/d/1eDm7SHz2il1RYryGLSYVGxt-ozHugJcf/view?usp=sharing)
+		* Inference - Spanish: [ESM all: 0.660](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-en-pt-es-fr-51Ksteps-train/50100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-en-pt-es-fr-train_es-eval-step50100.txt)
+		* Inference - French: [ESM all: 0.672](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-en-pt-es-fr-51Ksteps-train/50100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-en-pt-es-fr-train_fr-eval-step50100.txt)
+* mT5-large trained in English, Portuguese, Spanish and French (together) 120Ksteps - just best inferences
+	* Checkpoint: [77500](https://drive.google.com/file/d/1eUYr_i5O9U1ldm_pBdGozmiRt_42BCh8/view?usp=sharing)
+		* Inference - English: [ESM all: 0.718](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-en-pt-es-fr-120Ksteps-train//77500/spider_eval_match_ratsqlgap-mT5-large-NoGAP-en-pt-es-fr-train_Div-en-eval-step77500.txt)
+	* Checkpoint: [85500](https://drive.google.com/file/d/1n55OlnyE3RDQtUXMHPwC99Za0xfQavrK/view?usp=sharing)
+		* Inference - Portuguese: [ESM all: 0.675](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-en-pt-es-fr-120Ksteps-train//85500/spider_eval_match_ratsqlgap-mT5-large-NoGAP-en-pt-es-fr-train_Div-pt-eval-step85500.txt)
+	* Checkpoint: [76500](https://drive.google.com/file/d/1Qs-f2gIgWTJWiWrYGsiULxTBwwpgGatc/view?usp=sharing)
+		* Inference - Spanish: [ESM all: 0.675](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-en-pt-es-fr-120Ksteps-train//76500/spider_eval_match_ratsqlgap-mT5-large-NoGAP-en-pt-es-fr-train_Div-es-eval-step76500.txt)
+	* Checkpoint: [67500](https://drive.google.com/file/d/1cpTEXMhJXVbJfDc8sW1nfSX91p5VSJtn/view?usp=sharing)
+		* Inference - French: [ESM all: 0.681](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-en-pt-es-fr-120Ksteps-train//67500/spider_eval_match_ratsqlgap-mT5-large-NoGAP-en-pt-es-fr-train_Div-fr-eval-step67500.txt)
+* mT5-large trained in English, Portuguese, Spanish and French (together) FIT 120Ksteps - just best inferences
+	* Checkpoint: [105100](https://drive.google.com/file/d/1h0knsFfD6XCXxoEVSFR_I1WdYvMOkcvA/view?usp=sharing)
+		* Inference - English: (simplemma.load_data('en','pt','es','fr')): [ESM all: 0.735 Best inferences in English](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-FIT-en-pt-es-fr-120Ksteps-train//105100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-120Ksteps-FIT-en-pt-es-fr_Div-en-eval-step105100.txt)
+		* Inference - English: (simplemma.load_data('en'): [ESM all: 0.736 Best inferences in English](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-FIT-en-pt-es-fr-120Ksteps-train//105100/0.736/spider_eval_match_ratsqlgap-mT5-large-NoGAP-120Ksteps-FIT-en-pt-es-fr_Div-en-eval-step105100.txt)
+	* Checkpoint: [102100](https://drive.google.com/file/d/1VCfLnQgZsrb8lJFkhxzPoyfqc7dEY_K-/view?usp=sharing)
+		* Inference - Portuguese: [ESM all: 0.687](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-FIT-en-pt-es-fr-120Ksteps-train//102100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-120Ksteps-FIT-en-pt-es-fr_Div-pt-eval-step102100.txt)
+	* Checkpoint: [114100](https://drive.google.com/file/d/13DIB5p97bUnquLpd-dO0-Q1bA9LWjIXD/view?usp=sharing)
+		* Inference - Spanish: [ESM all: 0.689](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-FIT-en-pt-es-fr-120Ksteps-train//114100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-120Ksteps-FIT-en-pt-es-fr_Div-es-eval-step114100.txt)
+		* Inference - French: [ESM all: 0.698](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-FIT-en-pt-es-fr-120Ksteps-train//114100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-120Ksteps-FIT-en-pt-es-fr_Div-fr-eval-step114100.txt)
+* mT5-large trained in English, Portuguese, Spanish and French (together) 2048TKs - 480Ksteps - just inference in English
+	* Checkpoint: [290100](https://drive.google.com/file/d/19Uvfw7QL-8i3yKvybRe9ADLDuSa-MeJv/view?usp=sharing)
+		* Inference - English: [ESM all: 0.697](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-en-pt-es-fr-2048TKs-480Ksteps-train/290100/spider_eval_match_ratsqlgap-mT5-large-2048TKs-en-pt-es-fr_Div-en-eval-step290100.txt)
+Other Best Results
+* T5-v1_1-large  trained in English FIT 150Ksteps
+	* Checkpoint: [150300](https://drive.google.com/file/d/14iAERUfhNdU7Gdx9gD9HGuGrnumHaPtq/view?usp=sharing)
+		* Inference - English: [ESM all: 0.736](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/T5-v1_1-large-FIT-en-150Ksteps-train/spider_eval_match_ratsqlgap-T5-v1_1-large-NoGAP-150Ksteps-FIT-en_One-en-eval-step150300.txt)
+* mT5-large trained in English, Portuguese, Spanish and French (together) + Non Linear Data Augmentation by rules for extra question 3enr-3ptr-3esr-3frr FIT 150Ksteps
+	* Checkpoint: [128100](https://drive.google.com/file/d/1OjBr9CR0B9feRuk5-Wjh5VpAosB15uNR/view?usp=sharing)
+		* Inference - English: [ESM all: 0.726](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-FIT-extra-150Ksteps-train/128100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-150Ksteps-FIT-en-pt-es-fr-extra-3enr-3ptr-3esr-3frr_Div-en-eval-step128100.txt)
+	* Checkpoint: [125100](https://drive.google.com/file/d/1bLkTK7qJmwQatK_r6tjSmo6fYYtR4oJ3/view?usp=sharing)
+		* Inference - Portuguese: [ESM all: 0.698](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-FIT-extra-150Ksteps-train/125100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-150Ksteps-FIT-en-pt-es-fr-extra-3enr-3ptr-3esr-3frr_Div-pt-eval-step125100.txt)
+		* Inference - French: [ESM all: 0.700](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-FIT-extra-150Ksteps-train/125100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-150Ksteps-FIT-en-pt-es-fr-extra-3enr-3ptr-3esr-3frr_Div-fr-eval-step125100.txt)
+	* Checkpoint: [136100](https://drive.google.com/file/d/1zAcHQS0iNOXRm4iaiqybuAFETfitwBnR/view?usp=sharing)
+		* Inference - Spanish: [ESM all: 0.691](https://github.com/C4AI/gap-text2sql/blob/main/mrat-sql-gap/inference-results/mT5-large-FIT-extra-150Ksteps-train/136100/spider_eval_match_ratsqlgap-mT5-large-NoGAP-150Ksteps-FIT-en-pt-es-fr-extra-3enr-3ptr-3esr-3frr_Div-es-eval-step136100.txt)
+## Results
+All intermediate files of the results are in the directory [inference-results](https://github.com/C4AI/gap-text2sql/tree/main/mrat-sql-gap/inference-results).
+## Security
+See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
+## License
+This project is licensed under the Apache-2.0 License.

gap-text2sql-main/data/preprocessed_data/bart_parser_pretrain_label_mapping.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "keyword": ["<s>", "<pad>", "</s>", "select", "where", "_value_", "_values_", "group", "by", "order", "limit", "from", "join", "on",
+            "count", "distinct", "and", "desc", "avg", "having", "max", "in", "<", "sum", "intersect", "not", "min",
+            "except", "all", "or", "asc", "like", "!=", "union", "between", "into",
+            "when", "else", "case", "then", "true", "false", "end", "as", "left", "right", "natural", "full", "convert", "cast",
+            "is", "null", "<=>", "/", "(", ".", "$", "=>", "_", "]", "[", ">", "#", "!", ",", "*", "&", "|", "?", "~", "-", "<=", "'",
+            ")", "}", "+", "\"", "{", "=", "^", "@", "<", ">="],
+  "label_padding_token": "<pad>",
+  "label_eos_token": "</s>",
+  "label_bos_token": "<s>"
+}

gap-text2sql-main/mrat-sql-gap/.gitignore ADDED Viewed

	@@ -0,0 +1,133 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+*.DS_Store
+### Project data ###
+.idea/
+.vscode/
+.vector_cache/
+third_party/stanford-corenlp-full-2018-10-05/
+### Logs & Experiment dirs ###
+logdirs/
+pt-exps/
+philly-exps/
+ie_dirs/
+dumps/
+att_dirs/

gap-text2sql-main/mrat-sql-gap/.ptignore ADDED Viewed

	@@ -0,0 +1,19 @@

+.vector_cache/
+.vscode/
+.idea/
+data/
+venv/
+build/
+notebooks/
+third_party/stanford-corenlp-full-2018-10-05/
+third_party/syntaxSQL/
+*.egg
+*.egg-info
+logdirs/
+pt-exps/
+philly-exps/
+ie_dirs/
+experiments/

gap-text2sql-main/mrat-sql-gap/BART_large.sh ADDED Viewed

	@@ -0,0 +1,33 @@

+#!/bin/bash
+echo "Folders structure preparation"
+Name="BART-large"
+#mkdir logdir/${Name}-en-train
+#mkdir ie_dirs/${Name}-en-train
+#--
+mkdir models/${Name}
+mkdir models/${Name}/pretrained_checkpoint
+echo "Model Download - ATTENTION: REVIEW THE FILES SIZES "
+cd models/${Name}/pretrained_checkpoint
+curl https://gap-text2sql-public.s3.amazonaws.com/checkpoint-artifacts/pretrained-checkpoint -o pytorch_model.bin
+cd ..
+cd ..
+cd ..
+echo "Download Checkpoint"
+cd logdir/${Name}-en-train
+mkdir bs=12,lr=1.0e-04,bert_lr=1.0e-05,end_lr=0e0,att=1
+cd bs=12,lr=1.0e-04,bert_lr=1.0e-05,end_lr=0e0,att=1
+gdown --id 1F4R-WkJKtJ4lFni3q4lBug6tzSo0H5Qe
+curl https://gap-text2sql-public.s3.amazonaws.com/checkpoint-artifacts/gap-finetuned-checkpoint -o model_checkpoint-00041000
+cd ..
+cd ..
+cd ..

gap-text2sql-main/mrat-sql-gap/BERTimbau-base.sh ADDED Viewed

	@@ -0,0 +1,34 @@

+#!/bin/bash
+echo "Folders structure preparation"
+Name="BERTimbau-base"
+#mkdir logdir/${Name}-pt-train
+#mkdir ie_dirs/${Name}-pt-train
+#--
+#mkdir models/${Name}
+#mkdir models/${Name}/pretrained_checkpoint
+#echo "Download Pretrained Model"
+#cd models/${Name}/pretrained_checkpoint
+#curl https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/added_tokens.json -o added_tokens.json
+#curl https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/config.json -o config.json
+#curl https://cdn-lfs.huggingface.co/neuralmind/bert-base-portuguese-cased/96d2144445b6ba3530c27e38e7e27139fd0b0a5e36d9ca66f4155da7c5f199b0 -o flax_model.msgpack
+#curl https://cdn-lfs.huggingface.co/neuralmind/bert-base-portuguese-cased/cb1693767adef60abf23d9fde3996f0c1e6310afad103a2db94ad44854568955 -o pytorch_model.bin
+#curl https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/special_tokens_map.json -o special_tokens_map.json
+#curl https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/tokenizer_config.json -o tokenizer_config.json
+#curl https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/vocab.txt -o vocab.txt
+#cd ..
+#cd ..
+#cd ..
+echo "Download Checkpoint"
+cd logdir/${Name}-pt-train
+mkdir bs=6,lr=7.4e-04,bert_lr=3.0e-06,end_lr=0e0,att=1
+cd bs=6,lr=7.4e-04,bert_lr=3.0e-06,end_lr=0e0,att=1
+gdown --id 1gIZS0RuIxdjmm7sNbA3R6p6--9iMJmW8
+cd ..
+cd ..
+cd ..

gap-text2sql-main/mrat-sql-gap/BERTimbau-large.sh ADDED Viewed

	@@ -0,0 +1,33 @@

+#!/bin/bash
+echo "Folders structure preparation"
+Name="BERTimbau-large"
+#mkdir logdir/${Name}-pt-train
+#mkdir ie_dirs/${Name}-pt-train
+#--
+#mkdir models/${Name}
+#mkdir models/${Name}/pretrained_checkpoint
+#echo "Download Pretrained Model"
+#cd models/${Name}/pretrained_checkpoint
+#curl https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/added_tokens.json -o added_tokens.json
+#curl https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/config.json -o config.json
+#curl https://cdn-lfs.huggingface.co/neuralmind/bert-large-portuguese-cased/9af4f60f0bdd71e483baf8a1dd3e3dc509ceeaa7dd2007ed63f110b5c990e6e6 -o flax_model.msgpack
+#curl https://cdn-lfs.huggingface.co/neuralmind/bert-large-portuguese-cased/48f211712fdad2263e35c368b0ec79ad635c2df0acb275152e0f7cbd165bb7ca -o pytorch_model.bin
+#curl https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/special_tokens_map.json -o special_tokens_map.json
+#curl https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/tokenizer_config.json -o tokenizer_config.json
+#curl https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/vocab.txt -o vocab.txt
+#cd ..
+#cd ..
+#cd ..
+echo "Download Checkpoint"
+cd logdir/${Name}-pt-train
+mkdir bs=6,lr=7.4e-04,bert_lr=3.0e-06,end_lr=0e0,att=1
+cd bs=6,lr=7.4e-04,bert_lr=3.0e-06,end_lr=0e0,att=1
+gdown --id 1q1NOxisOcIdkMftzGPVxBDn989LDDG3X
+cd ..
+cd ..
+cd ..

gap-text2sql-main/mrat-sql-gap/crash_on_ipy.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import sys
+class ExceptionHook:
+    instance = None
+    def __call__(self, type, value, tb):
+      if self.instance is None:
+        if hasattr(sys, 'ps1') or not sys.stderr.isatty():
+          sys.__excepthook__(type, value, tb)
+        else:
+          import traceback
+          # from IPython.core import ultratb
+          # self.instance = ultratb.FormattedTB(mode='Plain',
+          #      color_scheme='Linux', call_pdb=1)
+          import pudb
+          traceback.print_exception(type, value, tb)
+          pudb.post_mortem(tb)
+sys.excepthook = ExceptionHook()

gap-text2sql-main/mrat-sql-gap/data/spider/generate.sh ADDED Viewed

	@@ -0,0 +1,24 @@

+#!/bin/bash
+if [ "$#" -ne 1 ]; then
+    echo "Please specify directory containing Spider files."
+    exit 1
+fi
+BASE=$(realpath $(dirname $0))
+# Re-generate 'sql' to fix bad parsing
+cp $1/tables.json ${BASE}
+for input in train_others train_spider dev; do
+    echo Procesing $input
+    cp $1/${input}.json ${BASE}
+    if [[ -e ${BASE}/${input}.json.patch ]]; then
+        pushd ${BASE} >& /dev/null
+        patch < ${input}.json.patch
+        popd >& /dev/null
+    fi
+        python -m seq2struct.datasets.spider_lib.preprocess.parse_raw_json \
+        --tables ${BASE}/tables.json \
+        --input ${BASE}/${input}.json \
+        --output ${BASE}/${input}.json
+    echo
+done

gap-text2sql-main/mrat-sql-gap/data/spider/train_spider.json.patch ADDED Viewed

	@@ -0,0 +1,49 @@

+--- train_spider.json	2018-09-23 20:27:49.000000000 -0700
++++ train_spider_fixed.json	2019-02-05 19:09:31.000000000 -0800
+@@ -429190,10 +429190,10 @@
+     },
+     {
+         "db_id": "assets_maintenance",
+-        "query": "SELECT T1.company_name FROM Third_Party_Companies AS T1 JOIN Maintenance_Contracts AS T2 ON T1.company_id  =  T2.maintenance_contract_company_id JOIN Ref_Company_Types AS T3 ON T1.company_type_code  =  T3.company_type_code ORDER BY T2.contract_end_date DESC LIMIT 1",
++        "query": "SELECT T1.company_type FROM Third_Party_Companies AS T1 JOIN Maintenance_Contracts AS T2 ON T1.company_id  =  T2.maintenance_contract_company_id ORDER BY T2.contract_end_date DESC LIMIT 1",
+         "query_toks": [
+             "SELECT",
+-            "T1.company_name",
++            "T1.company_type",
+             "FROM",
+             "Third_Party_Companies",
+             "AS",
+@@ -429206,14 +429206,6 @@
+             "T1.company_id",
+             "=",
+             "T2.maintenance_contract_company_id",
+-            "JOIN",
+-            "Ref_Company_Types",
+-            "AS",
+-            "T3",
+-            "ON",
+-            "T1.company_type_code",
+-            "=",
+-            "T3.company_type_code",
+             "ORDER",
+             "BY",
+             "T2.contract_end_date",
+@@ -429242,18 +429234,6 @@
+             "t2",
+             ".",
+             "maintenance_contract_company_id",
+-            "join",
+-            "ref_company_types",
+-            "as",
+-            "t3",
+-            "on",
+-            "t1",
+-            ".",
+-            "company_type_code",
+-            "=",
+-            "t3",
+-            ".",
+-            "company_type_code",
+             "order",
+             "by",
+             "t2",

gap-text2sql-main/mrat-sql-gap/data/sqlite_files/singer/singer.sqlite ADDED Viewed

Binary file (20.5 kB). View file

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BART-large-en/gap-bart.jsonnet ADDED Viewed

	@@ -0,0 +1,106 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-en/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-bart',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            bart_version: args.bart_version,
+			pretrained_checkpoint: args.pretrained_checkpoint,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            bart_version: args.bart_version,
+			pretrained_checkpoint: args.pretrained_checkpoint,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'BART-large-nl2code-1115,output_from=%s,fs=%d,emb=bart,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'BART-large-nl2code-1115,output_from=%s,fs=%d,emb=bart,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            bart_version:: null,
+			pretrained_checkpoint:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BART-large-en/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-en/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 51000,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BERTimbau-base/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-pt/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'BERTimbau-base_nl2code,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 40000,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BERTimbau-base/nl2code-bertimbau-base.jsonnet ADDED Viewed

	@@ -0,0 +1,110 @@

+local _base = import 'nl2code-base.libsonnet';
+local _output_from = true;
+local _fs = 2;
+function(args) _base(output_from=_output_from, data_path=args.data_path) + {
+    local data_path = args.data_path,
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 768,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-bert',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            bert_version: args.bert_version,
+            bert_token_type: args.bert_token_type,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            bert_version: args.bert_version,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: data_path + 'BERTimbau-base_nl2code,output_from=%s,fs=%d,emb=bert,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: data_path + 'BERTimbau-base_nl2code,output_from=%s,fs=%d,emb=bert,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            bert_version:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        }
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+        max_steps: args.max_steps,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+    log: {
+        reopen_to_flush: true,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BERTimbau-large/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-pt/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'BERTimbau-large_nl2code,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 44100,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/BERTimbau-large/nl2code-bertimbau-large.jsonnet ADDED Viewed

	@@ -0,0 +1,110 @@

+local _base = import 'nl2code-base.libsonnet';
+local _output_from = true;
+local _fs = 2;
+function(args) _base(output_from=_output_from, data_path=args.data_path) + {
+    local data_path = args.data_path,
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-bert',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            bert_version: args.bert_version,
+            bert_token_type: args.bert_token_type,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            bert_version: args.bert_version,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: data_path + 'BERTimbau-large_nl2code,output_from=%s,fs=%d,emb=bert,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: data_path + 'BERTimbau-large_nl2code,output_from=%s,fs=%d,emb=bert,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            bert_version:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        }
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+        max_steps: args.max_steps,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+    log: {
+        reopen_to_flush: true,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en-enr-enb/T5-v1_1.jsonnet ADDED Viewed

	@@ -0,0 +1,106 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-FIT-en-enr-enb/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-t5',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'T5-v1_1-large-NoGAP-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'T5-v1_1-large-NoGAP-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            t5_version:: null,
+            pretrained_checkpoint:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en-enr-enb/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-FIT-en-enr-enb/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 170300,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en-extra-3enr-1en/T5-v1_1.jsonnet ADDED Viewed

	@@ -0,0 +1,106 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-FIT-en-extra-3enr-1enb/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-t5',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'T5-v1_1-large-NoGAP-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'T5-v1_1-large-NoGAP-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            t5_version:: null,
+            pretrained_checkpoint:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en-extra-3enr-1en/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-FIT-en-extra-3enr-1enb/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 170300,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en/T5-v1_1.jsonnet ADDED Viewed

	@@ -0,0 +1,106 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-FIT-en/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-t5',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'T5-v1_1-large-NoGAP-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'T5-v1_1-large-NoGAP-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            t5_version:: null,
+            pretrained_checkpoint:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-FIT-en/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-FIT-en/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 170300,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-en/T5-v1_1.jsonnet ADDED Viewed

	@@ -0,0 +1,106 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-en/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-t5',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'T5-v1_1-large-NoGAP-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'T5-v1_1-large-NoGAP-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            t5_version:: null,
+            pretrained_checkpoint:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/T5-v1_1-large-170Ksteps-en/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-en/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 170300,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-en-pt-es-fr/gap-bart.jsonnet ADDED Viewed

	@@ -0,0 +1,103 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-en-pt-es-fr/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-bart',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            bart_version: args.bart_version,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            bart_version: args.bart_version,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'mBART50MtoM-large-nl2code-1115,output_from=%s,fs=%d,emb=bart,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'mBART50MtoM-large-nl2code-1115,output_from=%s,fs=%d,emb=bart,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            bart_version:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-en-pt-es-fr/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-en-pt-es-fr/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 44100,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-en/gap-bart.jsonnet ADDED Viewed

	@@ -0,0 +1,103 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-en/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-bart',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            bart_version: args.bart_version,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            bart_version: args.bart_version,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'mBART50MtoM-large-large-nl2code-1115,output_from=%s,fs=%d,emb=bart,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'mBART50MtoM-large-large-nl2code-1115,output_from=%s,fs=%d,emb=bart,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            bart_version:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-en/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-en/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 41000,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-pt-en/gap-bart.jsonnet ADDED Viewed

	@@ -0,0 +1,103 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-en-pt/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-bart',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            bart_version: args.bart_version,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            bart_version: args.bart_version,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'mBART50MtoM-large-nl2code-1115,output_from=%s,fs=%d,emb=bart,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'mBART50MtoM-large-nl2code-1115,output_from=%s,fs=%d,emb=bart,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            bart_version:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-pt-en/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-en-pt/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 41000,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-pt/gap-bart.jsonnet ADDED Viewed

	@@ -0,0 +1,103 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-pt/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-bart',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            bart_version: args.bart_version,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            bart_version: args.bart_version,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'mBART50MtoM-large-nl2code-1115,output_from=%s,fs=%d,emb=bart,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'mBART50MtoM-large-nl2code-1115,output_from=%s,fs=%d,emb=bart,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            bart_version:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mBART50MtoM-large-pt/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-pt/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 41000,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-FIT-en-pt-es-fr-extra-3enr-3ptr-3esr-3frr/mT5.jsonnet ADDED Viewed

	@@ -0,0 +1,106 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-FIT-en-pt-es-fr-extra-3enr-3ptr-3esr-3frr/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-t5',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'mT5-large-NoGAP-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'mT5-large-NoGAP-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            t5_version:: null,
+            pretrained_checkpoint:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-FIT-en-pt-es-fr-extra-3enr-3ptr-3esr-3frr/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-FIT-en-pt-es-fr-extra-3enr-3ptr-3esr-3frr/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 150300,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-FIT-en-pt-es-fr/mT5.jsonnet ADDED Viewed

	@@ -0,0 +1,106 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-FIT-en-pt-es-fr/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-t5',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'mT5-large-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'mT5-large-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            t5_version:: null,
+            pretrained_checkpoint:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-FIT-en-pt-es-fr/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-FIT-en-pt-es-fr/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 120300,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en-pt-es-fr-enr-enb/mT5.jsonnet ADDED Viewed

	@@ -0,0 +1,106 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-en-pt-es-fr-enr-enb/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-t5',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'mT5-large-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'mT5-large-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            t5_version:: null,
+            pretrained_checkpoint:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en-pt-es-fr-enr-enb/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/en-pt-es-fr-enr-enb/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 51300,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en-pt-es-fr/gap-mT5.jsonnet ADDED Viewed

	@@ -0,0 +1,106 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-en-pt-es-fr/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-t5',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'mT5-large-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'mT5-large-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            t5_version:: null,
+            pretrained_checkpoint:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en-pt-es-fr/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-en-pt-es-fr/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 51300,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en/gap-mT5.jsonnet ADDED Viewed

	@@ -0,0 +1,106 @@

+local _0428_base = import 'nl2code-base.libsonnet';
+local _data_path = 'data/spider-en/';
+local _output_from = true;
+local _fs = 2;
+function(args) _0428_base(output_from=_output_from, data_path=_data_path) + {
+    local lr_s = '%0.1e' % args.lr,
+    local bert_lr_s = '%0.1e' % args.bert_lr,
+    local end_lr_s = if args.end_lr == 0 then '0e0' else '%0.1e' % args.end_lr,
+    local base_bert_enc_size = 1024,
+    local enc_size =  base_bert_enc_size,
+    model_name: 'bs=%(bs)d,lr=%(lr)s,bert_lr=%(bert_lr)s,end_lr=%(end_lr)s,att=%(att)d' % (args + {
+        lr: lr_s,
+        bert_lr: bert_lr_s,
+        end_lr: end_lr_s,
+    }),
+    model+: {
+        encoder+: {
+            name: 'spider-t5',
+            batch_encs_update:: null,
+            question_encoder:: null,
+            column_encoder:: null,
+            table_encoder:: null,
+            dropout:: null,
+            update_config+:  {
+                name: 'relational_transformer',
+                num_layers: args.num_layers,
+                num_heads: 8,
+                sc_link: args.sc_link,
+                cv_link: args.cv_link,
+            },
+            summarize_header: args.summarize_header,
+            use_column_type: args.use_column_type,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            top_k_learnable:: null,
+            word_emb_size:: null,
+        },
+        encoder_preproc+: {
+            word_emb:: null,
+            min_freq:: null,
+            max_count:: null,
+            db_path: _data_path + "database",
+            compute_sc_link: args.sc_link,
+            compute_cv_link: args.cv_link,
+            fix_issue_16_primary_keys: true,
+            t5_version: args.t5_version,
+            pretrained_checkpoint: args.pretrained_checkpoint,
+            count_tokens_in_word_emb_for_vocab:: null,
+            save_path: _data_path + 'mT5-large-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+        },
+        decoder_preproc+: {
+            grammar+: {
+                end_with_from: args.end_with_from,
+                clause_order: args.clause_order,
+                infer_from_conditions: true,
+                factorize_sketch: _fs,
+            },
+            save_path: _data_path + 'mT5-large-nl2code-1115,output_from=%s,fs=%d,emb=t5,cvlink' % [_output_from, _fs],
+            compute_sc_link:: null,
+            compute_cv_link:: null,
+            db_path:: null,
+            fix_issue_16_primary_keys:: null,
+            t5_version:: null,
+            pretrained_checkpoint:: null,
+        },
+        decoder+: {
+            name: 'NL2Code',
+            dropout: 0.20687225956012834,
+            desc_attn: 'mha',
+            enc_recurrent_size: enc_size,
+            recurrent_size : args.decoder_hidden_size,
+            loss_type: 'softmax',
+            use_align_mat: args.use_align_mat,
+            use_align_loss: args.use_align_loss,
+        },
+    },
+    train+: {
+        batch_size: args.bs,
+        num_batch_accumulated: args.num_batch_accumulated,
+        clip_grad: 1,
+        model_seed: args.att,
+        data_seed:  args.att,
+        init_seed:  args.att,
+    },
+    optimizer: {
+        name: 'bertAdamw',
+        lr: 0.0,
+        bert_lr: 0.0,
+    },
+    lr_scheduler+: {
+        name: 'bert_warmup_polynomial_group',
+        start_lrs: [args.lr, args.bert_lr],
+        end_lr: args.end_lr,
+        num_warmup_steps: $.train.max_steps / 8,
+    },
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/mT5-large-en/nl2code-base.libsonnet ADDED Viewed

	@@ -0,0 +1,109 @@

+# Model details:
+# - NL2Code
+# - Pretrained, fixed word embeddings
+#   - glove-42B
+#   - min_freq 50
+# - Spiderv2 encoder
+#   - question_encoder ['emb', 'bilstm']
+#   - column_encoder ['emb', 'bilstm-summarize']
+#   - table_encoder ['emb', 'bilstm-summarize']
+#   - upd_steps 4
+# - Optimization
+#   - max_steps 40k
+#   - batch_size 10
+#   - Adam with lr 1e-3
+function(output_from, data_path='data/spider-en/') {
+    local PREFIX = data_path,
+    data: {
+        train: {
+            name: 'spider',
+            paths: [
+              PREFIX + 'train_%s.json' % [s]
+              for s in ['spider', 'others']],
+            tables_paths: [
+              PREFIX + 'tables.json',
+            ],
+            db_path: PREFIX + 'database',
+        },
+        val: {
+            name: 'spider',
+            paths: [PREFIX + 'dev.json'],
+            tables_paths: [PREFIX + 'tables.json'],
+            db_path: PREFIX + 'database',
+        },
+    },
+    model: {
+        name: 'EncDec',
+        encoder: {
+            name: 'spiderv2',
+            dropout: 0.2,
+            word_emb_size: 300,
+            question_encoder: ['emb', 'bilstm'],
+            column_encoder: ['emb', 'bilstm-summarize'],
+            table_encoder: ['emb', 'bilstm-summarize'],
+            update_config:  {
+                name: 'relational_transformer',
+                num_layers: 4,
+                num_heads: 8,
+            },
+        },
+        decoder: {
+            name: 'NL2Code',
+            dropout: 0.2,
+            desc_attn: 'mha',
+        },
+        encoder_preproc: {
+            word_emb: {
+                name: 'glove',
+                kind: '42B',
+            },
+            count_tokens_in_word_emb_for_vocab: false,
+            min_freq: 50,
+            max_count: 5000,
+            include_table_name_in_column: false,
+            save_path: PREFIX + 'nl2code-0401,output_from=%s,emb=glove-42B,min_freq=50/' % [output_from],
+        },
+        decoder_preproc: self.encoder_preproc {
+            grammar: {
+                name: 'spider',
+                output_from: output_from,
+                use_table_pointer: output_from,
+                include_literals: false,
+            },
+            use_seq_elem_rules: true,
+            word_emb:: null,
+            include_table_name_in_column:: null,
+            count_tokens_in_word_emb_for_vocab:: null,
+        },
+    },
+    train: {
+        batch_size: 10,
+        eval_batch_size: 50,
+        keep_every_n: 1000,
+        eval_every_n: 100,
+        save_every_n: 100,
+        report_every_n: 10,
+        max_steps: 51100,
+        num_eval_items: 50,
+    },
+    optimizer: {
+        name: 'adam',
+        lr: 0.0,
+    },
+    lr_scheduler: {
+        name: 'warmup_polynomial',
+        num_warmup_steps: $.train.max_steps / 20,
+        start_lr: 1e-3,
+        end_lr: 0,
+        decay_steps: $.train.max_steps - self.num_warmup_steps,
+        power: 0.5,
+    }
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/spider-BART-large-en-train_en-eval.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+    local exp_id = 1,
+    logdir: "logdir/BART-large-en-train",
+    model_config: "experiments/spider-configs/BART-large-en/gap-bart.jsonnet",
+    model_config_args: {
+        bs: 12,
+        num_batch_accumulated: 2,
+        bart_version: "facebook/bart-large",
+		pretrained_checkpoint: "models/BART-large/pretrained_checkpoint/pytorch_model.bin",
+        summarize_header: "avg",
+        use_column_type: false,
+        num_layers: 8,
+        lr: 1e-4,
+        bert_lr: 1e-5,
+        att: 1,
+        end_lr: 0,
+        sc_link: true,
+        cv_link: true,
+        use_align_mat: true,
+        use_align_loss: true,
+        bart_token_type: true,
+        decoder_hidden_size: 512,
+        end_with_from: true, # equivalent to "SWGOIF" if true
+        clause_order: null, # strings like "SWGOIF", it will be prioriotized over end_with_from
+    },
+    eval_name: "bart-large-en_run_%d_%s_%d" % [exp_id, self.eval_use_heuristic, self.eval_beam_size],
+    eval_output: "ie_dirs/BART-large-en-train_en-eval",
+    eval_beam_size: 1,
+    eval_use_heuristic: true,
+    eval_steps: [40300],
+    eval_section: "val",
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/spider-BERTimbau-base-pt.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+    logdir: "logdir/BERTimbau-base-pt-train",
+    model_config: "experiments/spider-configs/BERTimbau-base/nl2code-bertimbau-base.jsonnet",
+    model_config_args: {
+        data_path: 'data/spider-pt/',
+        bs: 6,
+        num_batch_accumulated: 4,
+        bert_version: "neuralmind/bert-base-portuguese-cased",
+        summarize_header: "avg",
+        use_column_type: false,
+        max_steps: 81000,
+        num_layers: 8,
+        lr: 7.44e-4,
+        bert_lr: 3e-6,
+        att: 1,
+        end_lr: 0,
+        sc_link: true,
+        cv_link: true,
+        use_align_mat: true,
+        use_align_loss: true,
+        bert_token_type: true,
+        decoder_hidden_size: 512,
+        end_with_from: true, # equivalent to "SWGOIF" if true
+        clause_order: null, # strings like "SWGOIF", it will be prioriotized over end_with_from
+    },
+    eval_name: "bertimbau-base-pt-eval_%s_%d" % [self.eval_use_heuristic, self.eval_beam_size],
+    eval_output: "ie_dirs/BERTimbau-base-pt-train",
+    eval_beam_size: 1,
+    eval_use_heuristic: true,
+    eval_steps: [24100],
+    eval_section: "val",
+}

gap-text2sql-main/mrat-sql-gap/experiments/spider-configs/spider-BERTimbau-large-pt.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+    logdir: "logdir/BERTimbau-large-pt-train",
+    model_config: "experiments/spider-configs/BERTimbau-large/nl2code-bertimbau-large.jsonnet",
+    model_config_args: {
+        data_path: 'data/spider-pt/',
+        bs: 6,
+        num_batch_accumulated: 4,
+        bert_version: "neuralmind/bert-large-portuguese-cased",
+        summarize_header: "avg",
+        use_column_type: false,
+        max_steps: 81000,
+        num_layers: 8,
+        lr: 7.44e-4,
+        bert_lr: 3e-6,
+        att: 1,
+        end_lr: 0,
+        sc_link: true,
+        cv_link: true,
+        use_align_mat: true,
+        use_align_loss: true,
+        bert_token_type: true,
+        decoder_hidden_size: 512,
+        end_with_from: true, # equivalent to "SWGOIF" if true
+        clause_order: null, # strings like "SWGOIF", it will be prioriotized over end_with_from
+    },
+    eval_name: "bertimbau-large-pt-eval_%s_%d" % [self.eval_use_heuristic, self.eval_beam_size],
+    eval_output: "ie_dirs/BERTimbau-large-pt-train",
+    eval_beam_size: 1,
+    eval_use_heuristic: true,
+    eval_steps: [40100],
+    eval_section: "val",
+}