skoneru's picture
Update app.py
724dd06
import os
import sys
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer
client = InferenceClient(model="https://083b-141-3-25-29.ngrok-free.app")
tokenizer = AutoTokenizer.from_pretrained("enoch/llama-65b-hf", padding_side='left',return_token_type_ids=False)
def inference(message):
output = client.text_generation(message, max_new_tokens=512)
return output.replace("<SS> ","\n")
def tokenize_len(sents):
return len(tokenizer.encode("\n".join(sents)))
def validate_inputs(source, sent_hyp, pe_hyp):
src_sents = source.split("\n")
sent_hyp_sents = sent_hyp.split("\n")
pe_hyp_sents = pe_hyp.split("\n")
max_len = 512
if len(src_sents) < 1:
return False
if len(sent_hyp_sents) != len(src_sents) or len(pe_hyp_sents) > len(src_sents):
return False
if tokenize_len(src_sents) > max_len or tokenize_len(sent_hyp_sents) > max_len or tokenize_len(pe_hyp_sents) > max_len:
return Flase
return True
def translate(source, sent_hyp, pe_hyp):
if validate_inputs(source, sent_hyp, pe_hyp):
prefix="English:\n"
suffix="\nGerman Translation:\n"
pe_suffix="\nPost-Edited Translation:\n"
source = " <SS> ".join(source.split("\n"))
sent_hyp = " <SS> ".join(sent_hyp.split("\n"))
pe_hyp = " <SS> ".join(pe_hyp.split("\n"))
prompt = prefix + source + "\n" + suffix + sent_hyp + "\n" + pe_suffix + "\n" + pe_hyp
else:
raise gr.Error("Please make sure that you meet the following conditions: Source and sentence level hypothesis lines are equal and the initial post-edited translation lines are less than source, The number of tokens in each box is less than 256.")
pe_hyp = "\n".join(pe_hyp.split(" <SS> "))
return pe_hyp + inference(prompt)
example_pronoun_false = ["- Yeah, but Rico's garland beat them all.\nIt was big.", "- Ja, aber Ricos Kränz war der schönste.\nEs war groß.",""]
example_pronoun_correct = ["- Yeah, but Rico's garland beat them all.\nIt was big.", "- Ja, aber Ricos Kränz war der schönste.\nEs war groß.","- Ja, aber Ricos Kranz"]
example_term_false = ["Lets talk about large language models.\nThese days, large language models can be used everywhere.", "Lassen Sie uns über große Sprachmodelle sprechen.\nHeutzutage können große Sprachmodelle überall eingesetzt werden.",""]
example_term_correct = ["Lets talk about large language models.\nThese days, large language models can be used everywhere.", "Lassen Sie uns über große Sprachmodelle sprechen.\nHeutzutage können große Sprachmodelle überall eingesetzt werden.","Lassen Sie uns über Large Language Models sprechen."]
example_formal_false = ["You should be excited\nbut, calm down!\nyou must be careful","Sie sollten aufgeregt sein\naber beruhigen Sie sich!\ndu musst vorsichtig sein",""]
example_formal_correct = ["You should be excited\nbut, calm down!\nyou must be careful","Sie sollten aufgeregt sein\naber beruhigen Sie sich!\ndu musst vorsichtig sein","Du solltest aufgeregt sein"]
with open('description.md',mode='r',encoding='utf-8') as f:
description = f.readlines()
description = "\n".join(description)
css_code = ".gradio-container {background: url('file=background.png');}"
iface = gr.Interface(
fn=translate,
inputs=[gr.Textbox(lines=2, placeholder="Enter your English Sentences that you want to translate", label="English Sentences"), gr.Textbox(lines=2, placeholder="Enter your sentence-level German Tranlations that you want to post-edit using Llama2",label="Sentence-Level German Translations"),gr.Textbox(lines=2, placeholder="Enter your partially corrected translation and the model will continue from there - Can be left empty or generate the output once and correct it later :)", label="Manual Post-Edited German Translation")],
outputs=gr.Textbox(lines=2,placeholder="Enter your inputs and click submit!",label="Automatic Post-Edited German Translation"),
examples=[
example_term_false,
example_term_correct,
example_formal_false,
example_formal_correct,
example_pronoun_false,
example_pronoun_correct,
],
title="Contextual Refinement of Translations: Integrating Manual Feedback",
description=description,
)
iface.launch(share=True)