Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
import torch | |
print(f"Is CUDA available: {torch.cuda.is_available()}") | |
# Load the model and tokenizer | |
model_name = "kiansheik/tupi-verb-anotation" | |
tokenizer = T5Tokenizer.from_pretrained(model_name) | |
model = T5ForConditionalGeneration.from_pretrained(model_name) | |
# Identify all non-ascii special chars in the navarro alphabet | |
special_chars = "û î ŷ á é í ý ó ú ã ẽ ĩ ỹ õ ũ '".split(" ") | |
# Create a two-way dictionary | |
def normalize_tupi(x): | |
for i, char in enumerate(special_chars): | |
x = x.replace(char, f"[w{i}q]") | |
return re.sub('\s+', ' ', x).strip().replace(' ', '[SPACE]') | |
def replace_outside_brackets(match): | |
part = match.group() | |
if part.startswith('[') and part.endswith(']'): | |
return part # Return the part unchanged if it's inside brackets | |
return part | |
def navarroize_tupi(x): | |
# Pattern to match text outside square brackets | |
pattern = r'\[.*?\]|[^[\]]+' | |
return re.sub(pattern, replace_outside_brackets, x) | |
def anotate(st, debug=False): | |
# Now we will test the model with a sample sentence | |
sentence = normalize_tupi(st.lower()).replace(',', '').replace('.', '').replace('?', '').replace('!', '').replace('-', '').strip() | |
inp_sent = navarroize_tupi(sentence).replace(' ','[SPACE]') | |
if debug: | |
print("Input Phrase:\t\t", inp_sent) | |
input_ids = tokenizer.encode(inp_sent, return_tensors="pt") | |
# Generate the annotated output using the model | |
output_ids = model.generate(input_ids, max_length=100) | |
# Decode the output to get the annotated sentence | |
annotated_sentence = tokenizer.decode(output_ids[0]) | |
tl = annotated_sentence.replace(' ##', ' ').replace('##', '').replace(' \' ', "'").replace("Ġ", " ").replace('[PAD]', '').replace('[SPACE]', ' ') | |
for i, char in enumerate(special_chars): | |
tl = tl.replace(f"[w{i}q]", f"{char}") | |
out_pred = navarroize_tupi(tl)[5:-5] | |
return out_pred | |
def generate_text(input_text): | |
return anotate(input_text) | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=generate_text, | |
inputs=gr.Textbox(label="Input Text"), | |
outputs=gr.Textbox(label="Generated Text"), | |
title="Tupi Verb Annotation", | |
description="Enter text to generate output using the T5 Small model." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |