Spaces:
Runtime error
Runtime error
Commit
·
32d94a8
1
Parent(s):
112f63d
Add application1 file3
Browse files
app.py
CHANGED
@@ -9,103 +9,103 @@ from threading import Thread
|
|
9 |
import time
|
10 |
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
#
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
#
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
#
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
#
|
44 |
-
#
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
#
|
53 |
-
|
54 |
-
|
55 |
-
#
|
56 |
-
|
57 |
-
|
58 |
-
#
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
#
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
#
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
#
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
#
|
85 |
-
|
86 |
-
|
87 |
-
#
|
88 |
-
|
89 |
-
|
90 |
-
#
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
|
95 |
|
96 |
# Interface Code
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
|
|
|
9 |
import time
|
10 |
|
11 |
|
12 |
+
model_id = "HuggingFaceH4/vsft-llava-1.5-7b-hf-trl"
|
13 |
+
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
14 |
+
base_model = LlavaForConditionalGeneration.from_pretrained(model_id, quantization_config=quantization_config, torch_dtype=torch.float16)
|
15 |
+
|
16 |
+
# Load the PEFT Lora adapter
|
17 |
+
peft_lora_adapter_path = "Praveen0309/llava-1.5-7b-hf-ft-mix-vsft-3"
|
18 |
+
peft_lora_adapter = PeftModel.from_pretrained(base_model, peft_lora_adapter_path, adapter_name="lora_adapter")
|
19 |
+
base_model.load_adapter(peft_lora_adapter_path, adapter_name="lora_adapter")
|
20 |
+
|
21 |
+
processor = AutoProcessor.from_pretrained("HuggingFaceH4/vsft-llava-1.5-7b-hf-trl")
|
22 |
+
|
23 |
+
# Function to translate text from Bengali to English
|
24 |
+
def deep_translator_bn_en(input_sentence):
|
25 |
+
english_translation = GoogleTranslator(source="bn", target="en").translate(input_sentence)
|
26 |
+
return english_translation
|
27 |
+
|
28 |
+
# Function to translate text from English to Bengali
|
29 |
+
def deep_translator_en_bn(input_sentence):
|
30 |
+
bengali_translation = GoogleTranslator(source="en", target="bn").translate(input_sentence)
|
31 |
+
return bengali_translation
|
32 |
+
|
33 |
+
def bot_streaming(message, history):
|
34 |
+
print(message)
|
35 |
+
|
36 |
+
if message["files"]:
|
37 |
+
# message["files"][-1] is a Dict or just a string
|
38 |
+
if type(message["files"][-1]) == dict:
|
39 |
+
image = message["files"][-1]["path"]
|
40 |
+
else:
|
41 |
+
image = message["files"][-1]
|
42 |
+
else:
|
43 |
+
# if there's no image uploaded for this turn, look for images in the past turns
|
44 |
+
# kept inside tuples, take the last one
|
45 |
+
for hist in history:
|
46 |
+
if type(hist[0]) == tuple:
|
47 |
+
image = hist[0][0]
|
48 |
+
break # Exit the loop after finding the first image
|
49 |
+
|
50 |
+
try:
|
51 |
+
if image is None:
|
52 |
+
# Handle the case where image is None
|
53 |
+
raise Exception("You need to upload an image for LLaVA to work.")
|
54 |
+
except NameError:
|
55 |
+
# Handle the case where 'image' is not defined at all
|
56 |
+
raise Exception("You need to upload an image for LLaVA to work.")
|
57 |
+
|
58 |
+
# Translate Bengali input to English before processing
|
59 |
+
english_prompt = deep_translator_bn_en(message['text'])
|
60 |
+
|
61 |
+
prompt = f"<|start_header_id|>user<|end_header_id|>\n\n<image>\n{english_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
62 |
+
# print(f"prompt: {prompt}")
|
63 |
+
|
64 |
+
image = Image.open(image)
|
65 |
+
inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)
|
66 |
+
|
67 |
+
streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": False, "skip_prompt": True})
|
68 |
+
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=512, do_sample=False)
|
69 |
+
|
70 |
+
thread = Thread(target=base_model.generate, kwargs=generation_kwargs)
|
71 |
+
thread.start()
|
72 |
+
|
73 |
+
text_prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{english_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
74 |
+
# print(f"text_prompt: {text_prompt}")
|
75 |
+
|
76 |
+
buffer = ""
|
77 |
+
time.sleep(0.5)
|
78 |
+
for new_text in streamer:
|
79 |
+
# find <|eot_id|> and remove it from the new_text
|
80 |
+
if "<|eot_id|>" in new_text:
|
81 |
+
new_text = new_text.split("<|eot_id|>")[0]
|
82 |
+
buffer += new_text
|
83 |
+
|
84 |
+
# generated_text_without_prompt = buffer[len(text_prompt):]
|
85 |
+
generated_text_without_prompt = buffer
|
86 |
+
|
87 |
+
# Translate English response from LLaVA back to Bengali
|
88 |
+
bengali_response = deep_translator_en_bn(generated_text_without_prompt)
|
89 |
+
|
90 |
+
# print(f"new_text: {bengali_response}")
|
91 |
+
yield bengali_response
|
92 |
+
|
93 |
+
thread.join()
|
94 |
|
95 |
|
96 |
# Interface Code
|
97 |
+
chatbot=gr.Chatbot(scale=1)
|
98 |
+
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
|
99 |
+
with gr.Blocks(fill_height=True, ) as app:
|
100 |
+
gr.ChatInterface(
|
101 |
+
fn=bot_streaming,
|
102 |
+
description="Try Cleaveland Chatbot. Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
|
103 |
+
stop_btn="Stop Generation",
|
104 |
+
multimodal=True,
|
105 |
+
textbox=chat_input,
|
106 |
+
chatbot=chatbot,
|
107 |
+
)
|
108 |
+
|
109 |
+
app.queue(api_open=False)
|
110 |
+
app.launch(show_api=False, share=True)
|
111 |
|