Hammedalmodel commited on
Commit
0e323a0
·
verified ·
1 Parent(s): 8d587d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -44
app.py CHANGED
@@ -2,8 +2,7 @@ from transformers import MllamaForConditionalGeneration, AutoProcessor
2
  from PIL import Image
3
  import torch
4
  import gradio as gr
5
- import requests
6
- from io import BytesIO
7
 
8
  # Initialize model and processor
9
  ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
@@ -13,54 +12,52 @@ model = MllamaForConditionalGeneration.from_pretrained(
13
  ).to("cuda")
14
  processor = AutoProcessor.from_pretrained(ckpt)
15
 
16
- def extract_text(image_input):
17
- try:
18
- # Handle URL input
19
- if isinstance(image_input, str):
20
- response = requests.get(image_input)
21
- image = Image.open(BytesIO(response.content)).convert("RGB")
22
- # Handle direct file upload
23
- else:
24
- image = Image.open(image_input).convert("RGB")
25
-
26
- # Create message structure
27
- messages = [
28
- {
29
- "role": "user",
30
- "content": [
31
- {"type": "text", "text": "Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output"},
32
- {"type": "image"}
33
- ]
34
- }
35
- ]
36
-
37
- # Process input
38
- texts = processor.apply_chat_template(messages, add_generation_prompt=True)
39
- inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")
40
-
41
- # Generate output
42
- outputs = model.generate(**inputs, max_new_tokens=250)
43
- result = processor.decode(outputs[0], skip_special_tokens=True)
44
-
45
- # Clean up the output
46
- if "assistant" in result.lower():
47
- result = result[result.lower().find("assistant") + len("assistant"):].strip()
48
-
49
- result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
50
-
51
- return f"\n{result}\n"
52
-
53
- except Exception as e:
54
- return f"Error: {str(e)}"
55
 
56
  # Create Gradio interface
57
  demo = gr.Interface(
58
  fn=extract_text,
59
- inputs=gr.Text(label="Image URL or Upload"), # Changed to accept both URL and file
60
  outputs=gr.Textbox(label="Extracted Text"),
61
  title="Handwritten Text Extractor",
62
- description="Enter an image URL or upload an image to extract handwritten text.",
63
  )
64
 
65
  # Launch the app
66
- demo.launch()
 
2
  from PIL import Image
3
  import torch
4
  import gradio as gr
5
+ import spaces
 
6
 
7
  # Initialize model and processor
8
  ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
 
12
  ).to("cuda")
13
  processor = AutoProcessor.from_pretrained(ckpt)
14
 
15
+ @spaces.GPU
16
+ def extract_text(image):
17
+ # Convert image to RGB
18
+ image = Image.open(image).convert("RGB")
19
+
20
+ # Create message structure
21
+ messages = [
22
+ {
23
+ "role": "user",
24
+ "content": [
25
+ {"type": "text", "text": "Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output"},
26
+ {"type": "image"}
27
+ ]
28
+ }
29
+ ]
30
+
31
+ # Process input
32
+ texts = processor.apply_chat_template(messages, add_generation_prompt=True)
33
+ inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")
34
+
35
+
36
+ # Generate output
37
+ outputs = model.generate(**inputs, max_new_tokens=250)
38
+ result = processor.decode(outputs[0], skip_special_tokens=True)
39
+
40
+ print(result)
41
+
42
+ # Clean up the output to remove the prompt and assistant text
43
+ if "assistant" in result.lower():
44
+ result = result[result.lower().find("assistant") + len("assistant"):].strip()
45
+
46
+ # Remove any remaining conversation markers
47
+ result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
48
+
49
+ print(result)
50
+
51
+ return result
 
 
52
 
53
  # Create Gradio interface
54
  demo = gr.Interface(
55
  fn=extract_text,
56
+ inputs=gr.Image(type="filepath", label="Upload Image"),
57
  outputs=gr.Textbox(label="Extracted Text"),
58
  title="Handwritten Text Extractor",
59
+ description="Upload an image containing handwritten text to extract its content.",
60
  )
61
 
62
  # Launch the app
63
+ demo.launch(debug=True)