Spaces:

chrisjay
/

masakhane-benchmarks

Build error

File size: 5,023 Bytes

b605eca
 
e26a582
b605eca
 
ace9aa6
 
37d3de6
 
 
 
379a274
8e94ee2
37d3de6
e586aba
ace9aa6
b605eca
 
ace9aa6
b605eca
 
 
 
 
 
 
 
 
ace9aa6
37fe427
cd1f2cb
 
5e78dce
1a49d63
ace9aa6
b605eca
ace9aa6
 
 
 
 
 
25e3777
b605eca
 
ace9aa6
 
 
b605eca
ace9aa6
 
379a274
e586aba
cd1f2cb
 
e586aba
cd1f2cb
447b9ea
 
 
 
 
cd1f2cb
447b9ea
 
cd1f2cb
 
ace9aa6
b605eca
 
 
cd1f2cb
 
 
a054096
cd1f2cb
ace9aa6
e26a582
ace9aa6
e26a582
ace9aa6
1a49d63
cd1f2cb
e26a582
ace9aa6
fa5e9aa
ace9aa6
 
cd1f2cb
ace9aa6
b605eca
 
 
c0a0850
ace9aa6
b605eca
ace9aa6
 
4e441f5
c0a0850
7da89d6
b605eca
 
447b9ea
d76e7c4
2153e72
b605eca
2153e72
8804405

import gradio as gr
import yaml
from joeynmt.prediction import load_params_for_prediction,translate_for_hf_space
from huggingface_hub import hf_hub_download

language_map = {'English':'en','Swahili':'sw','Fon':'fon','Igbo':'ig',
                'Arabic':'ar','Shona':'sn','Ẹ̀dó':'bin','Hausa':'ha',
                'Efik':'efi','Twi':'twi','Afrikaans':'af','Yoruba':'yo','Urhobo':'urh','Dendi':'ddn','̀Ẹ̀sán':'ish','Isoko':'iso',
                'Kamba':'kam','Luo':'luo','Southern Ndebele':'nr','Tshivenda':'ve'}  
  
 
#List of available languages I worked on.
#...
available_language_pairs =['en-sw','en-af','en-ar','en-ddn','en-ish','en-iso','en-kam','en-luo','en-nr','en-ve','efi-en','en-bin','en-ha','en-ig','en-fon','en-twi','sn-en','sw-en','yo-en','en-urh']
available_languages = list(language_map.keys())

def load_config(path="configs/default.yaml") -> dict:
    """
    CODE ADAPTED FROM: https://github.com/joeynmt/joeynmt
    Loads and parses a YAML configuration file.

    :param path: path to YAML configuration file
    :return: configuration dictionary
    """
    with open(path, 'r', encoding="utf-8") as ymlfile:
      
        cfg = yaml.safe_load(ymlfile)
    return cfg
  
def load_model(source_language,target_language):  
    #source_language = language_map[source_language_]
    #target_language = language_map[target_language_]
    
  
    translation_dir = 'main'

    try:
      file_yaml = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/config.yaml",force_filename='config.yaml')
      src_vocab = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/src_vocab.txt")
      trg_vocab  = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/trg_vocab.txt")
      best_ckpt = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/best.ckpt")
    except Exception:
      raise Exception(f'It seems we do not have a working configuration repo yet for {source_language} -> {target_language}. \n You could help us by creating it here: https://huggingface.co/chrisjay/masakhane_benchmarks/tree/main')


    parsed_yaml_file = load_config(file_yaml)
    parsed_yaml_file['data']['src_vocab']=src_vocab
    parsed_yaml_file['data']['trg_vocab']=trg_vocab

    params = load_params_for_prediction(parsed_yaml_file,best_ckpt)
    return params


#Load models of all available language pairs
model_mapping = {} 
examples_available_models=[] # Keep track of models that loaded successfully and display only them in the Examples.
for availabe_lang in available_language_pairs:
    try:
        model_mapping.update({availabe_lang:load_model(availabe_lang.split('-')[0],availabe_lang.split('-')[1])})
        examples_available_models.append([f"{list(language_map.keys())[list(language_map.values()).index(availabe_lang.split('-')[0])]}",f"{list(language_map.keys())[list(language_map.values()).index(availabe_lang.split('-')[1])]}"]) #idea to extract key from value got from https://stackoverflow.com/questions/8023306/get-key-by-value-in-dictionary
    except Exception:
        continue

if examples_available_models==[]:
    raise Exception(f'Available models for Space cannot be empty!')


def get_translation(source_language,target_language,source_sentence=None,source_file=None):
    '''
    This takes a sentence and gets the translation.
    '''
    
    source_language_ = language_map[source_language]
    target_language_ = language_map[target_language]
   
    
    source = source_sentence
    translation_type='sentence'
    if source_file!=None:
        translation_type='file'
        source = source_file.name
    try:     
        params = model_mapping[f'{source_language_}-{target_language_}']
        pred = translate_for_hf_space(params,source,translation_type)
    except Exception:
        return f'There was an issue loading the translation model for {source_language} -> {target_language}. Try another pair please'
    
    return pred[0] if source_file==None else pred
 



title = "Interact with Masakhane Benchmark Models"
description = "This enables you to interact with some of the Masakhane Benchmark Models and keep up with their improvement. Some of these models undergo finetuning on a regular basis. This way, you can easily use the best model with no hassles."

iface = gr.Interface(fn=get_translation, 
  inputs=[gr.inputs.Dropdown(choices = available_languages,default='English'),
  gr.inputs.Dropdown(choices = available_languages,default='Swahili'),
  gr.inputs.Textbox(label="Input"),
  gr.inputs.File(file_count="single", type="file", label='Or upload txt file containing sentences', optional=True)],
  outputs=gr.outputs.Textbox(type="auto", label='Translation'),
  title=title,
  description=description,
  examples=examples_available_models,
  enable_queue=True,
  theme='huggingface')
iface.launch()