File size: 4,412 Bytes
f506c0e
 
 
 
 
 
724dd06
69cdb9a
f506c0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bbe402
 
f506c0e
 
 
 
 
 
 
 
2dff11f
f506c0e
 
375ed8c
f506c0e
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import sys
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer

client = InferenceClient(model="https://083b-141-3-25-29.ngrok-free.app")
tokenizer = AutoTokenizer.from_pretrained("enoch/llama-65b-hf", padding_side='left',return_token_type_ids=False)


def inference(message):
    output = client.text_generation(message, max_new_tokens=512)
    return output.replace("<SS> ","\n")

def tokenize_len(sents):
    return len(tokenizer.encode("\n".join(sents)))

def validate_inputs(source, sent_hyp, pe_hyp):
    src_sents = source.split("\n")
    sent_hyp_sents = sent_hyp.split("\n")
    pe_hyp_sents = pe_hyp.split("\n")
    max_len = 512

    if len(src_sents) < 1:
        return False

    if len(sent_hyp_sents) != len(src_sents) or len(pe_hyp_sents) > len(src_sents):
        return False

    if tokenize_len(src_sents) > max_len or tokenize_len(sent_hyp_sents) > max_len or tokenize_len(pe_hyp_sents) > max_len:
        return Flase

    return True


def translate(source, sent_hyp, pe_hyp):

    if validate_inputs(source, sent_hyp, pe_hyp):
        prefix="English:\n"
        suffix="\nGerman Translation:\n"
        pe_suffix="\nPost-Edited Translation:\n"

        source = " <SS> ".join(source.split("\n"))
        sent_hyp = " <SS> ".join(sent_hyp.split("\n"))
        pe_hyp = " <SS> ".join(pe_hyp.split("\n"))

        prompt = prefix + source + "\n" + suffix + sent_hyp + "\n" +  pe_suffix + "\n" + pe_hyp 
    else:
        raise gr.Error("Please make sure that you meet the following conditions: Source and sentence level hypothesis lines are equal and the initial post-edited translation lines are less than source, The number of tokens in each box is less than 256.")

    
    pe_hyp = "\n".join(pe_hyp.split(" <SS> "))
    return pe_hyp + inference(prompt)


example_pronoun_false = ["- Yeah, but Rico's garland beat them all.\nIt was big.", "- Ja, aber Ricos Kränz war der schönste.\nEs war groß.",""]
example_pronoun_correct = ["- Yeah, but Rico's garland beat them all.\nIt was big.", "- Ja, aber Ricos Kränz war der schönste.\nEs war groß.","- Ja, aber Ricos Kranz"]


example_term_false = ["Lets talk about large language models.\nThese days, large language models can be used everywhere.", "Lassen Sie uns über große Sprachmodelle sprechen.\nHeutzutage können große Sprachmodelle überall eingesetzt werden.",""]
example_term_correct = ["Lets talk about large language models.\nThese days, large language models can be used everywhere.", "Lassen Sie uns über große Sprachmodelle sprechen.\nHeutzutage können große Sprachmodelle überall eingesetzt werden.","Lassen Sie uns über Large Language Models sprechen."]

example_formal_false = ["You should be excited\nbut, calm down!\nyou must be careful","Sie sollten aufgeregt sein\naber beruhigen Sie sich!\ndu musst vorsichtig sein",""]
example_formal_correct = ["You should be excited\nbut, calm down!\nyou must be careful","Sie sollten aufgeregt sein\naber beruhigen Sie sich!\ndu musst vorsichtig sein","Du solltest aufgeregt sein"]

with open('description.md',mode='r',encoding='utf-8') as f:
    description = f.readlines()
    description = "\n".join(description)

css_code = ".gradio-container {background: url('file=background.png');}"
iface = gr.Interface(
        fn=translate,
        inputs=[gr.Textbox(lines=2, placeholder="Enter your English Sentences that you want to translate", label="English Sentences"), gr.Textbox(lines=2, placeholder="Enter your sentence-level German Tranlations that you want to post-edit using Llama2",label="Sentence-Level German Translations"),gr.Textbox(lines=2, placeholder="Enter your partially corrected translation and the model will continue from there - Can be left empty or generate the output once and correct it later :)", label="Manual Post-Edited German Translation")],
        outputs=gr.Textbox(lines=2,placeholder="Enter your inputs and click submit!",label="Automatic Post-Edited German Translation"),
        examples=[
            example_term_false,
            example_term_correct,
            example_formal_false,
            example_formal_correct,
            example_pronoun_false,
            example_pronoun_correct,
        ],
        title="Contextual Refinement of Translations: Integrating Manual Feedback",
        description=description,
        )
    
iface.launch(share=True)