anzorq commited on
Commit
577fc77
·
1 Parent(s): 34e3213

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -49
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
 
 
3
  # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
 
5
  # model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
@@ -27,54 +28,80 @@ import gradio as gr
27
  # # result = {"input":text, "translations":translations}
28
  # return text, translations
29
 
30
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
31
- from optimum.bettertransformer import BetterTransformer
32
- import intel_extension_for_pytorch as ipex
33
- from transformers.modeling_outputs import BaseModelOutput
34
- import torch
35
 
36
- model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
37
- src_lang = "ru"
38
- tgt_lang = "zu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- tokenizer = AutoTokenizer.from_pretrained(model_path)
41
- model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
 
42
 
43
- # flash attention optimization
44
- model = BetterTransformer.transform(model, keep_original_model=False)
 
 
 
 
 
45
 
46
- # ipex optimization
47
- model.eval()
48
- model = ipex.optimize(model, dtype=torch.float, level="O1", conv_bn_folding=False, inplace=True)
49
 
50
- # Get the encoder
51
- encoder = model.get_encoder()
 
52
 
53
- # Prepare an example input for the encoder
54
- example_input_text = "Example text in Russian"
55
- inputs_example = tokenizer(example_input_text, return_tensors="pt")
56
 
57
- # Trace just the encoder with strict=False
58
- scripted_encoder = torch.jit.trace(encoder, inputs_example['input_ids'], strict=False)
59
 
60
  def translate(text, num_beams=4, num_return_sequences=4):
61
- inputs = tokenizer(text, return_tensors="pt")
62
- num_return_sequences = min(num_return_sequences, num_beams)
63
 
64
- # Use the scripted encoder for the first step of inference
65
- encoder_output_dict = scripted_encoder(inputs['input_ids'])
66
- encoder_outputs = BaseModelOutput(last_hidden_state=encoder_output_dict['last_hidden_state'])
67
 
68
- # Use the original, untraced model for the second step, passing the encoder's outputs as inputs
69
- translated_tokens = model.generate(
70
- encoder_outputs=encoder_outputs,
71
- forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang],
72
- num_beams=num_beams,
73
- num_return_sequences=num_return_sequences
74
- )
75
 
76
- translations = [tokenizer.decode(translation, skip_special_tokens=True) for translation in translated_tokens]
77
- return text, translations
 
 
 
78
 
79
  output = gr.Textbox()
80
  # with gr.Accordion("Advanced Options"):
@@ -85,19 +112,19 @@ num_return_sequences = gr.inputs.Slider(2, 10, step=1, label="Number of returned
85
  title = "Russian-Circassian translator demo"
86
  article = "<p style='text-align: center'>Want to help? Join the <a href='https://discord.gg/cXwv495r' target='_blank'>Discord server</a></p>"
87
 
88
- examples = [
89
- ["Мы идем домой"],
90
- ["Сегодня хорошая погода"],
91
- ["Дети играют во дворе"],
92
- ["We live in a big house"],
93
- ["Tu es une bonne personne."],
94
- ["أين تعيش؟"],
95
- ["Bir şeyler yapmak istiyorum."],
96
- ["– Если я его отпущу, то ты вовек не сможешь его поймать, – заявил Сосруко."],
97
- ["Как только старик ушел, Сатаней пошла к Саусырыко."],
98
- ["我永远不会放弃你。"],
99
- ["우리는 소치에 살고 있습니다."],
100
- ]
101
 
102
  gr.Interface(
103
  fn=translate,
 
1
  import gradio as gr
2
 
3
+ ############### VANILLA INFERENCE ###############
4
  # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
5
 
6
  # model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
 
28
  # # result = {"input":text, "translations":translations}
29
  # return text, translations
30
 
 
 
 
 
 
31
 
32
+ ############### IPEX OPTIMIZED INFERENCE ###############
33
+ # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
34
+ # from optimum.bettertransformer import BetterTransformer
35
+ # import intel_extension_for_pytorch as ipex
36
+ # from transformers.modeling_outputs import BaseModelOutput
37
+ # import torch
38
+
39
+ # model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
40
+ # src_lang = "ru"
41
+ # tgt_lang = "zu"
42
+
43
+ # tokenizer = AutoTokenizer.from_pretrained(model_path)
44
+ # model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
45
+
46
+ # # flash attention optimization
47
+ # model = BetterTransformer.transform(model, keep_original_model=False)
48
+
49
+ # # ipex optimization
50
+ # model.eval()
51
+ # model = ipex.optimize(model, dtype=torch.float, level="O1", conv_bn_folding=False, inplace=True)
52
+
53
+ # # Get the encoder
54
+ # encoder = model.get_encoder()
55
+
56
+ # # Prepare an example input for the encoder
57
+ # example_input_text = "Example text in Russian"
58
+ # inputs_example = tokenizer(example_input_text, return_tensors="pt")
59
+
60
+ # # Trace just the encoder with strict=False
61
+ # scripted_encoder = torch.jit.trace(encoder, inputs_example['input_ids'], strict=False)
62
+
63
+ # def translate(text, num_beams=4, num_return_sequences=4):
64
+ # inputs = tokenizer(text, return_tensors="pt")
65
+ # num_return_sequences = min(num_return_sequences, num_beams)
66
 
67
+ # # Use the scripted encoder for the first step of inference
68
+ # encoder_output_dict = scripted_encoder(inputs['input_ids'])
69
+ # encoder_outputs = BaseModelOutput(last_hidden_state=encoder_output_dict['last_hidden_state'])
70
 
71
+ # # Use the original, untraced model for the second step, passing the encoder's outputs as inputs
72
+ # translated_tokens = model.generate(
73
+ # encoder_outputs=encoder_outputs,
74
+ # forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang],
75
+ # num_beams=num_beams,
76
+ # num_return_sequences=num_return_sequences
77
+ # )
78
 
79
+ # translations = [tokenizer.decode(translation, skip_special_tokens=True) for translation in translated_tokens]
80
+ # return text, translations
 
81
 
82
+ ############### ONNX MODEL INFERENCE ###############
83
+ from transformers import AutoTokenizer, pipeline
84
+ from optimum.onnxruntime import ORTModelForSeq2SeqLM
85
 
86
+ model_id = "anzorq/m2m100_418M_ft_ru-kbd_44K"
 
 
87
 
88
+ model = ORTModelForSeq2SeqLM.from_pretrained(model_id, subfolder="onnx", file_name="encoder_model_optimized.onnx")
89
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
90
 
91
  def translate(text, num_beams=4, num_return_sequences=4):
92
+ inputs = tokenizer(text, return_tensors="pt")
 
93
 
94
+ num_return_sequences = min(num_return_sequences, num_beams)
 
 
95
 
96
+ translated_tokens = model.generate(
97
+ **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["zu"], num_beams=num_beams, num_return_sequences=num_return_sequences
98
+ )
 
 
 
 
99
 
100
+ translations = []
101
+ for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True):
102
+ translations.append(translation)
103
+
104
+ return text, translations
105
 
106
  output = gr.Textbox()
107
  # with gr.Accordion("Advanced Options"):
 
112
  title = "Russian-Circassian translator demo"
113
  article = "<p style='text-align: center'>Want to help? Join the <a href='https://discord.gg/cXwv495r' target='_blank'>Discord server</a></p>"
114
 
115
+ # examples = [
116
+ # ["Мы идем домой"],
117
+ # ["Сегодня хорошая погода"],
118
+ # ["Дети играют во дворе"],
119
+ # ["We live in a big house"],
120
+ # ["Tu es une bonne personne."],
121
+ # ["أين تعيش؟"],
122
+ # ["Bir şeyler yapmak istiyorum."],
123
+ # ["– Если я его отпущу, то ты вовек не сможешь его поймать, – заявил Сосруко."],
124
+ # ["Как только старик ушел, Сатаней пошла к Саусырыко."],
125
+ # ["我永远不会放弃你。"],
126
+ # ["우리는 소치에 살고 있습니다."],
127
+ # ]
128
 
129
  gr.Interface(
130
  fn=translate,