Spaces:

NandiniLokeshReddy
/

QwenBaseModel

Build error

App Files Files Community

NandiniLokeshReddy commited on Sep 9, 2024

Commit

761cff5

verified ·

1 Parent(s): 8c0556e

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -20

app.py CHANGED Viewed

@@ -4,28 +4,31 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from PIL import Image
 import warnings
-# Disable warnings
 warnings.filterwarnings('ignore')
-# Set default device to GPU
 torch.set_default_device('cuda')
 # Load the model and tokenizer
 model_name = 'qnguyen3/nanoLLaVA-1.5'
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float16,
-    device_map='auto',
-    trust_remote_code=True
-)
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    trust_remote_code=True
-)
-# Function to generate a description of the uploaded image
 def describe_image(image, prompt="Describe this image in detail"):
-    # Prepare input prompt
     messages = [{"role": "user", "content": f'<image>\n{prompt}'}]
     text = tokenizer.apply_chat_template(
         messages,
@@ -33,14 +36,14 @@ def describe_image(image, prompt="Describe this image in detail"):
         add_generation_prompt=True
     )
-    # Tokenize input text
     text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
     input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
     # Process the image
     image_tensor = model.process_images([image], model.config).to(dtype=model.dtype)
-    # Generate response
     output_ids = model.generate(
         input_ids,
         images=image_tensor,
@@ -48,15 +51,15 @@ def describe_image(image, prompt="Describe this image in detail"):
         use_cache=True
     )[0]
-    # Decode the generated text
     description = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
     return description
-# Create Gradio interface
 gr.Interface(
     fn=describe_image,
     inputs=[gr.inputs.Image(type="pil"), gr.inputs.Textbox(default="Describe this image in detail")],
     outputs="text",
     title="Image Description Model",
-    description="Upload an image and get a detailed description generated by the model."
 ).launch()

 from PIL import Image
 import warnings
+# Suppress warnings
 warnings.filterwarnings('ignore')
+# Ensure CUDA device is used
 torch.set_default_device('cuda')
 # Load the model and tokenizer
 model_name = 'qnguyen3/nanoLLaVA-1.5'
+try:
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,
+        device_map='auto',
+        trust_remote_code=True
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_name,
+        trust_remote_code=True
+    )
+except ImportError as e:
+    print("Error: Missing required dependencies. Make sure flash_attn is installed.")
+    raise e
+# Function to describe the uploaded image
 def describe_image(image, prompt="Describe this image in detail"):
     messages = [{"role": "user", "content": f'<image>\n{prompt}'}]
     text = tokenizer.apply_chat_template(
         messages,
         add_generation_prompt=True
     )
+    # Tokenize the text
     text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
     input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
     # Process the image
     image_tensor = model.process_images([image], model.config).to(dtype=model.dtype)
+    # Generate a response
     output_ids = model.generate(
         input_ids,
         images=image_tensor,
         use_cache=True
     )[0]
+    # Decode and return the response
     description = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
     return description
+# Set up the Gradio interface
 gr.Interface(
     fn=describe_image,
     inputs=[gr.inputs.Image(type="pil"), gr.inputs.Textbox(default="Describe this image in detail")],
     outputs="text",
     title="Image Description Model",
+    description="Upload an image and receive a detailed description."
 ).launch()