anzorq commited on
Commit
78b7b89
·
1 Parent(s): e18d854

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -17
app.py CHANGED
@@ -1,31 +1,80 @@
1
  import gradio as gr
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
 
 
 
4
 
5
- model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
6
- src_lang="ru"
7
- tgt_lang="zu"
8
 
9
- # tokenizer = AutoTokenizer.from_pretrained(model_path, src_lang=src_lang)
10
  tokenizer = AutoTokenizer.from_pretrained(model_path)
11
- model = AutoModelForSeq2SeqLM.from_pretrained(model_path, use_safetensors=True)#, load_in_4bit=True, device_map="auto")
12
- model.to_bettertransformer()
13
 
14
- def translate(text, num_beams=4, num_return_sequences=4):
15
- inputs = tokenizer(text, return_tensors="pt")
16
 
17
- num_return_sequences = min(num_return_sequences, num_beams)
 
 
18
 
19
- translated_tokens = model.generate(
20
- **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences
21
- )
22
 
23
- translations = []
24
- for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True):
25
- translations.append(translation)
26
 
27
- # result = {"input":text, "translations":translations}
28
- return text, translations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  output = gr.Textbox()
31
  # with gr.Accordion("Advanced Options"):
 
1
  import gradio as gr
2
 
3
+ # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
+
5
+ # model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
6
+ # src_lang="ru"
7
+ # tgt_lang="zu"
8
+
9
+ # # tokenizer = AutoTokenizer.from_pretrained(model_path, src_lang=src_lang)
10
+ # tokenizer = AutoTokenizer.from_pretrained(model_path)
11
+ # model = AutoModelForSeq2SeqLM.from_pretrained(model_path, use_safetensors=True)#, load_in_4bit=True, device_map="auto")
12
+ # model.to_bettertransformer()
13
+
14
+ # def translate(text, num_beams=4, num_return_sequences=4):
15
+ # inputs = tokenizer(text, return_tensors="pt")
16
+
17
+ # num_return_sequences = min(num_return_sequences, num_beams)
18
+
19
+ # translated_tokens = model.generate(
20
+ # **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences
21
+ # )
22
+
23
+ # translations = []
24
+ # for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True):
25
+ # translations.append(translation)
26
+
27
+ # # result = {"input":text, "translations":translations}
28
+ # return text, translations
29
+
30
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
31
+ from optimum.bettertransformer import BetterTransformer
32
+ import intel_extension_for_pytorch as ipex
33
+ from transformers.modeling_outputs import BaseModelOutput
34
+ import torch
35
 
36
+ model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
37
+ src_lang = "ru"
38
+ tgt_lang = "zu"
39
 
 
40
  tokenizer = AutoTokenizer.from_pretrained(model_path)
41
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
 
42
 
43
+ # flash attention optimization
44
+ model = BetterTransformer.transform(model, keep_original_model=False)
45
 
46
+ # ipex optimization
47
+ model.eval()
48
+ model = ipex.optimize(model, dtype=torch.float, level="O1", conv_bn_folding=False, inplace=True)
49
 
50
+ # Get the encoder
51
+ encoder = model.get_encoder()
 
52
 
53
+ # Prepare an example input for the encoder
54
+ example_input_text = "Example text in Russian"
55
+ inputs_example = tokenizer(example_input_text, return_tensors="pt")
56
 
57
+ # Trace just the encoder with strict=False
58
+ scripted_encoder = torch.jit.trace(encoder, inputs_example['input_ids'], strict=False)
59
+
60
+ def translate(text, num_beams=4, num_return_sequences=4):
61
+ inputs = tokenizer(text, return_tensors="pt")
62
+ num_return_sequences = min(num_return_sequences, num_beams)
63
+
64
+ # Use the scripted encoder for the first step of inference
65
+ encoder_output_dict = scripted_encoder(inputs['input_ids'])
66
+ encoder_outputs = BaseModelOutput(last_hidden_state=encoder_output_dict['last_hidden_state'])
67
+
68
+ # Use the original, untraced model for the second step, passing the encoder's outputs as inputs
69
+ translated_tokens = model.generate(
70
+ encoder_outputs=encoder_outputs,
71
+ forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang],
72
+ num_beams=num_beams,
73
+ num_return_sequences=num_return_sequences
74
+ )
75
+
76
+ translations = [tokenizer.decode(translation, skip_special_tokens=True) for translation in translated_tokens]
77
+ return text, translations
78
 
79
  output = gr.Textbox()
80
  # with gr.Accordion("Advanced Options"):