Praveen0309 commited on
Commit
112f63d
·
1 Parent(s): 245d0d6

Add application1 file2

Browse files
Files changed (2) hide show
  1. app.py +105 -84
  2. requirements.txt +0 -0
app.py CHANGED
@@ -1,90 +1,111 @@
1
  import torch
2
- from transformers import AutoProcessor, LlavaForConditionalGeneration, BitsAndBytesConfig
3
- from peft import PeftModel
 
4
  from deep_translator import GoogleTranslator
5
  import gradio as gr
6
- import base64
7
-
8
-
9
-
10
- model_id = "HuggingFaceH4/vsft-llava-1.5-7b-hf-trl"
11
- quantization_config = BitsAndBytesConfig(load_in_4bit=True)
12
- base_model = LlavaForConditionalGeneration.from_pretrained(model_id, quantization_config=quantization_config, torch_dtype=torch.float16)
13
-
14
- # Load the PEFT Lora adapter
15
- peft_lora_adapter_path = "Praveen0309/llava-1.5-7b-hf-ft-mix-vsft-3"
16
- peft_lora_adapter = PeftModel.from_pretrained(base_model, peft_lora_adapter_path, adapter_name="lora_adapter")
17
- base_model.load_adapter(peft_lora_adapter_path, adapter_name="lora_adapter")
18
-
19
- processor = AutoProcessor.from_pretrained("HuggingFaceH4/vsft-llava-1.5-7b-hf-trl")
20
-
21
- # Function to translate text from Bengali to English
22
- def deep_translator_bn_en(input_sentence):
23
- english_translation = GoogleTranslator(source="bn", target="en").translate(input_sentence)
24
- return english_translation
25
-
26
- # Function to translate text from English to Bengali
27
- def deep_translator_en_bn(input_sentence):
28
- bengali_translation = GoogleTranslator(source="en", target="bn").translate(input_sentence)
29
- return bengali_translation
30
-
31
- def inference(image, image_prompt):
32
- prompt = f"USER: <image>\n{image_prompt} ASSISTANT:"
33
-
34
- # Assuming your model can handle PIL images
35
- image = image.convert("RGB") # Ensure image is RGB mode
36
-
37
- inputs = processor(text=prompt, images=image, return_tensors="pt")
38
- generate_ids = base_model.generate(**inputs, max_new_tokens=15)
39
- decoded_response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
40
- return decoded_response
41
-
42
- def image_to_base64(image_path):
43
- with open(image_path, 'rb') as img:
44
- encoded_string = base64.b64encode(img.read())
45
- return encoded_string.decode('utf-8')
46
-
47
- # Function that takes User Inputs and displays it on ChatUI
48
- def query_message(history,txt,img):
49
- image_prompt = deep_translator_bn_en(txt)
50
- history += [(image_prompt,None)]
51
- base64 = image_to_base64(img)
52
- data_url = f"data:image/jpeg;base64,{base64}"
53
- history += [(f"{image_prompt} ![]({data_url})", None)]
54
- return history
55
-
56
- # Function that takes User Inputs, generates Response and displays on Chat UI
57
- def llm_response(history,text,img):
58
- image_prompt = deep_translator_bn_en(text)
59
- response = inference(img,image_prompt)
60
- assistant_index = response.find("ASSISTANT:")
61
- extracted_string = response[assistant_index + len("ASSISTANT:"):].strip()
62
- output = deep_translator_en_bn(extracted_string)
63
- history += [(text,output)]
64
- return history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  # Interface Code
67
- with gr.Blocks() as app:
68
- with gr.Row():
69
- image_box = gr.Image(type="pil")
70
-
71
- chatbot = gr.Chatbot(
72
- scale = 2,
73
- height=500
74
- )
75
- text_box = gr.Textbox(
76
- placeholder="Enter text and press enter, or upload an image",
77
- container=False,
78
- )
79
-
80
- btn = gr.Button("Submit")
81
- clicked = btn.click(query_message,
82
- [chatbot,text_box,image_box],
83
- chatbot
84
- ).then(llm_response,
85
- [chatbot,text_box,image_box],
86
- chatbot
87
- )
88
- app.queue()
89
- app.launch(debug=True)
90
 
 
1
  import torch
2
+ from transformers import AutoProcessor,LlavaForConditionalGeneration, BitsAndBytesConfig
3
+ from peft import PeftModel
4
+ from PIL import Image
5
  from deep_translator import GoogleTranslator
6
  import gradio as gr
7
+ from transformers import TextIteratorStreamer
8
+ from threading import Thread
9
+ import time
10
+
11
+
12
+ # model_id = "HuggingFaceH4/vsft-llava-1.5-7b-hf-trl"
13
+ # quantization_config = BitsAndBytesConfig(load_in_4bit=True)
14
+ # base_model = LlavaForConditionalGeneration.from_pretrained(model_id, quantization_config=quantization_config, torch_dtype=torch.float16)
15
+
16
+ # # Load the PEFT Lora adapter
17
+ # peft_lora_adapter_path = "Praveen0309/llava-1.5-7b-hf-ft-mix-vsft-3"
18
+ # peft_lora_adapter = PeftModel.from_pretrained(base_model, peft_lora_adapter_path, adapter_name="lora_adapter")
19
+ # base_model.load_adapter(peft_lora_adapter_path, adapter_name="lora_adapter")
20
+
21
+ # processor = AutoProcessor.from_pretrained("HuggingFaceH4/vsft-llava-1.5-7b-hf-trl")
22
+
23
+ # # Function to translate text from Bengali to English
24
+ # def deep_translator_bn_en(input_sentence):
25
+ # english_translation = GoogleTranslator(source="bn", target="en").translate(input_sentence)
26
+ # return english_translation
27
+
28
+ # # Function to translate text from English to Bengali
29
+ # def deep_translator_en_bn(input_sentence):
30
+ # bengali_translation = GoogleTranslator(source="en", target="bn").translate(input_sentence)
31
+ # return bengali_translation
32
+
33
+ # def bot_streaming(message, history):
34
+ # print(message)
35
+
36
+ # if message["files"]:
37
+ # # message["files"][-1] is a Dict or just a string
38
+ # if type(message["files"][-1]) == dict:
39
+ # image = message["files"][-1]["path"]
40
+ # else:
41
+ # image = message["files"][-1]
42
+ # else:
43
+ # # if there's no image uploaded for this turn, look for images in the past turns
44
+ # # kept inside tuples, take the last one
45
+ # for hist in history:
46
+ # if type(hist[0]) == tuple:
47
+ # image = hist[0][0]
48
+ # break # Exit the loop after finding the first image
49
+
50
+ # try:
51
+ # if image is None:
52
+ # # Handle the case where image is None
53
+ # raise Exception("You need to upload an image for LLaVA to work.")
54
+ # except NameError:
55
+ # # Handle the case where 'image' is not defined at all
56
+ # raise Exception("You need to upload an image for LLaVA to work.")
57
+
58
+ # # Translate Bengali input to English before processing
59
+ # english_prompt = deep_translator_bn_en(message['text'])
60
+
61
+ # prompt = f"<|start_header_id|>user<|end_header_id|>\n\n<image>\n{english_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
62
+ # # print(f"prompt: {prompt}")
63
+
64
+ # image = Image.open(image)
65
+ # inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)
66
+
67
+ # streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": False, "skip_prompt": True})
68
+ # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=512, do_sample=False)
69
+
70
+ # thread = Thread(target=base_model.generate, kwargs=generation_kwargs)
71
+ # thread.start()
72
+
73
+ # text_prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{english_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
74
+ # # print(f"text_prompt: {text_prompt}")
75
+
76
+ # buffer = ""
77
+ # time.sleep(0.5)
78
+ # for new_text in streamer:
79
+ # # find <|eot_id|> and remove it from the new_text
80
+ # if "<|eot_id|>" in new_text:
81
+ # new_text = new_text.split("<|eot_id|>")[0]
82
+ # buffer += new_text
83
+
84
+ # # generated_text_without_prompt = buffer[len(text_prompt):]
85
+ # generated_text_without_prompt = buffer
86
+
87
+ # # Translate English response from LLaVA back to Bengali
88
+ # bengali_response = deep_translator_en_bn(generated_text_without_prompt)
89
+
90
+ # # print(f"new_text: {bengali_response}")
91
+ # yield bengali_response
92
+
93
+ # thread.join()
94
+
95
 
96
  # Interface Code
97
+ # chatbot=gr.Chatbot(scale=1)
98
+ # chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
99
+ # with gr.Blocks(fill_height=True, ) as app:
100
+ # gr.ChatInterface(
101
+ # fn=bot_streaming,
102
+ # description="Try Cleaveland Chatbot. Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
103
+ # stop_btn="Stop Generation",
104
+ # multimodal=True,
105
+ # textbox=chat_input,
106
+ # chatbot=chatbot,
107
+ # )
108
+
109
+ # app.queue(api_open=False)
110
+ # app.launch(show_api=False, share=True)
 
 
 
 
 
 
 
 
 
111
 
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ