Spaces:
Runtime error
Runtime error
| echo `date` | |
| infname=$1 | |
| outfname=$2 | |
| src_lang=$3 | |
| tgt_lang=$4 | |
| exp_dir=$5 | |
| ref_fname=$6 | |
| SRC_PREFIX='SRC' | |
| TGT_PREFIX='TGT' | |
| #`dirname $0`/env.sh | |
| SUBWORD_NMT_DIR='subword-nmt' | |
| model_dir=$exp_dir/model | |
| data_bin_dir=$exp_dir/final_bin | |
| ### normalization and script conversion | |
| echo "Applying normalization and script conversion" | |
| input_size=`python scripts/preprocess_translate.py $infname $outfname.norm $src_lang true` | |
| echo "Number of sentences in input: $input_size" | |
| ### apply BPE to input file | |
| echo "Applying BPE" | |
| python $SUBWORD_NMT_DIR/subword_nmt/apply_bpe.py \ | |
| -c $exp_dir/vocab/bpe_codes.32k.${SRC_PREFIX} \ | |
| --vocabulary $exp_dir/vocab/vocab.$SRC_PREFIX \ | |
| --vocabulary-threshold 5 \ | |
| < $outfname.norm \ | |
| > $outfname._bpe | |
| # not needed for joint training | |
| # echo "Adding language tags" | |
| python scripts/add_tags_translate.py $outfname._bpe $outfname.bpe $src_lang $tgt_lang | |
| ### run decoder | |
| echo "Decoding" | |
| src_input_bpe_fname=$outfname.bpe | |
| tgt_output_fname=$outfname | |
| fairseq-interactive $data_bin_dir \ | |
| -s $SRC_PREFIX -t $TGT_PREFIX \ | |
| --distributed-world-size 1 \ | |
| --path $model_dir/checkpoint_best.pt \ | |
| --batch-size 64 --buffer-size 2500 --beam 5 --remove-bpe \ | |
| --skip-invalid-size-inputs-valid-test \ | |
| --user-dir model_configs \ | |
| --input $src_input_bpe_fname > $tgt_output_fname.log 2>&1 | |
| echo "Extracting translations, script conversion and detokenization" | |
| # this part reverses the transliteration from devnagiri script to target lang and then detokenizes it. | |
| python scripts/postprocess_translate.py $tgt_output_fname.log $tgt_output_fname $input_size $tgt_lang true | |
| # This block is now moved to compute_bleu.sh for release with more documentation. | |
| # if [ $src_lang == 'en' ]; then | |
| # # indicnlp tokenize the output files before evaluation | |
| # input_size=`python scripts/preprocess_translate.py $ref_fname $ref_fname.tok $tgt_lang` | |
| # input_size=`python scripts/preprocess_translate.py $tgt_output_fname $tgt_output_fname.tok $tgt_lang` | |
| # sacrebleu --tokenize none $ref_fname.tok < $tgt_output_fname.tok | |
| # else | |
| # # indic to en models | |
| # sacrebleu $ref_fname < $tgt_output_fname | |
| # fi | |
| # echo `date` | |
| echo "Translation completed" | |