kiansheik's picture
Update app.py
02d00af verified
import gradio as gr
import re
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
print(f"Is CUDA available: {torch.cuda.is_available()}")
# Load the model and tokenizer
model_name = "kiansheik/tupi-verb-anotation"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
# Identify all non-ascii special chars in the navarro alphabet
special_chars = "û î ŷ á é í ý ó ú ã ẽ ĩ ỹ õ ũ '".split(" ")
# Create a two-way dictionary
def normalize_tupi(x):
for i, char in enumerate(special_chars):
x = x.replace(char, f"[w{i}q]")
return re.sub('\s+', ' ', x).strip().replace(' ', '[SPACE]')
def replace_outside_brackets(match):
part = match.group()
if part.startswith('[') and part.endswith(']'):
return part # Return the part unchanged if it's inside brackets
return part
def navarroize_tupi(x):
# Pattern to match text outside square brackets
pattern = r'\[.*?\]|[^[\]]+'
return re.sub(pattern, replace_outside_brackets, x)
def anotate(st, debug=False):
# Now we will test the model with a sample sentence
sentence = normalize_tupi(st.lower()).replace(',', '').replace('.', '').replace('?', '').replace('!', '').replace('-', '').strip()
inp_sent = navarroize_tupi(sentence).replace(' ','[SPACE]')
if debug:
print("Input Phrase:\t\t", inp_sent)
input_ids = tokenizer.encode(inp_sent, return_tensors="pt")
# Generate the annotated output using the model
output_ids = model.generate(input_ids, max_length=100)
# Decode the output to get the annotated sentence
annotated_sentence = tokenizer.decode(output_ids[0])
tl = annotated_sentence.replace(' ##', ' ').replace('##', '').replace(' \' ', "'").replace("Ġ", " ").replace('[PAD]', '').replace('[SPACE]', ' ')
for i, char in enumerate(special_chars):
tl = tl.replace(f"[w{i}q]", f"{char}")
out_pred = navarroize_tupi(tl)[5:-5]
return out_pred
def generate_text(input_text):
return anotate(input_text)
# Create Gradio interface
iface = gr.Interface(
fn=generate_text,
inputs=gr.Textbox(label="Input Text"),
outputs=gr.Textbox(label="Generated Text"),
title="Tupi Verb Annotation",
description="Enter text to generate output using the T5 Small model."
)
if __name__ == "__main__":
iface.launch()