import gradio as gr import re from transformers import T5Tokenizer, T5ForConditionalGeneration import torch print(f"Is CUDA available: {torch.cuda.is_available()}") # Load the model and tokenizer model_name = "kiansheik/tupi-verb-anotation" tokenizer = T5Tokenizer.from_pretrained(model_name) model = T5ForConditionalGeneration.from_pretrained(model_name) # Identify all non-ascii special chars in the navarro alphabet special_chars = "û î ŷ á é í ý ó ú ã ẽ ĩ ỹ õ ũ '".split(" ") # Create a two-way dictionary def normalize_tupi(x): for i, char in enumerate(special_chars): x = x.replace(char, f"[w{i}q]") return re.sub('\s+', ' ', x).strip().replace(' ', '[SPACE]') def replace_outside_brackets(match): part = match.group() if part.startswith('[') and part.endswith(']'): return part # Return the part unchanged if it's inside brackets return part def navarroize_tupi(x): # Pattern to match text outside square brackets pattern = r'\[.*?\]|[^[\]]+' return re.sub(pattern, replace_outside_brackets, x) def anotate(st, debug=False): # Now we will test the model with a sample sentence sentence = normalize_tupi(st.lower()).replace(',', '').replace('.', '').replace('?', '').replace('!', '').replace('-', '').strip() inp_sent = navarroize_tupi(sentence).replace(' ','[SPACE]') if debug: print("Input Phrase:\t\t", inp_sent) input_ids = tokenizer.encode(inp_sent, return_tensors="pt") # Generate the annotated output using the model output_ids = model.generate(input_ids, max_length=100) # Decode the output to get the annotated sentence annotated_sentence = tokenizer.decode(output_ids[0]) tl = annotated_sentence.replace(' ##', ' ').replace('##', '').replace(' \' ', "'").replace("Ġ", " ").replace('[PAD]', '').replace('[SPACE]', ' ') for i, char in enumerate(special_chars): tl = tl.replace(f"[w{i}q]", f"{char}") out_pred = navarroize_tupi(tl)[5:-5] return out_pred def generate_text(input_text): return anotate(input_text) # Create Gradio interface iface = gr.Interface( fn=generate_text, inputs=gr.Textbox(label="Input Text"), outputs=gr.Textbox(label="Generated Text"), title="Tupi Verb Annotation", description="Enter text to generate output using the T5 Small model." ) if __name__ == "__main__": iface.launch()