krishna195 commited on
Commit
fc0bd58
·
verified ·
1 Parent(s): a22ba62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -110
app.py CHANGED
@@ -1,110 +1,110 @@
1
- import gradio as gr
2
- from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
3
- from PIL import Image
4
- import torch
5
- import re
6
-
7
- # Load the pre-trained model and processor
8
- model = Qwen2VLForConditionalGeneration.from_pretrained(
9
- "Qwen/Qwen2-VL-2B-Instruct",
10
- torch_dtype="auto",
11
- device_map="auto",
12
- )
13
-
14
- processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
15
-
16
- # Function to extract text from the image
17
- def extract_text(image):
18
- messages = [
19
- {
20
- "role": "user",
21
- "content": [
22
- {"type": "image"},
23
- {"type": "text", "text": "can u extract the text in hindi"}
24
- ]
25
- }
26
- ]
27
-
28
- # Process input image and text prompt
29
- text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
30
-
31
- inputs = processor(
32
- text=[text_prompt],
33
- images=[image],
34
- padding=True,
35
- return_tensors="pt"
36
- )
37
-
38
- inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")
39
-
40
- # Generate output text from the model
41
- output_ids = model.generate(**inputs, max_new_tokens=1024)
42
-
43
- generated_ids = [
44
- output_ids[len(input_ids):]
45
- for input_ids, output_ids in zip(inputs.input_ids, output_ids)
46
- ]
47
-
48
- # Decode the generated text
49
- extracted_text = processor.batch_decode(
50
- generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
51
- )[0] # Extracted text
52
-
53
- return extracted_text
54
-
55
- # Function to highlight keywords in the text, even for right-to-left scripts like Hindi
56
- def highlight_keywords(extracted_text, keywords):
57
- highlighted_text = extracted_text
58
- if keywords:
59
- for keyword in keywords.split(","):
60
- keyword = keyword.strip()
61
- if keyword:
62
- # Ensure correct Unicode support for keywords (use re.UNICODE for non-ASCII)
63
- highlighted_text = re.sub(
64
- re.escape(keyword), # Use re.escape to handle special characters in keywords
65
- r'<mark>\g<0></mark>', # Highlight the found keyword
66
- highlighted_text,
67
- flags=re.IGNORECASE | re.UNICODE # Ignore case, and handle Unicode characters
68
- )
69
-
70
- return highlighted_text
71
-
72
- # First step: Extract text from the uploaded image
73
- def extract_text_step(image):
74
- extracted_text = extract_text(image)
75
- return extracted_text, extracted_text # Return extracted text and store it in state
76
-
77
- # Second step: Search and highlight keywords in the extracted text
78
- def highlight_keywords_step(extracted_text, keywords):
79
- highlighted_text = highlight_keywords(extracted_text, keywords)
80
- return highlighted_text
81
-
82
- # Gradio UI
83
- with gr.Blocks() as demo:
84
- # Step 1: Image Upload and Text Extraction
85
- with gr.Row():
86
- image_input = gr.Image(type="pil", label="Upload Image")
87
- extract_button = gr.Button("Extract Text")
88
- extracted_text_output = gr.Textbox(label="Extracted Text")
89
-
90
- # Step 2: Keyword Input and Highlighting
91
- with gr.Row():
92
- keyword_input = gr.Textbox(label="Enter keywords (comma-separated)", placeholder="Enter keywords after text extraction")
93
- search_button = gr.Button("Highlight Keywords")
94
- highlighted_text_output = gr.HTML(label="Highlighted Text with Matches")
95
-
96
- # Define interactions
97
- extract_button.click(
98
- fn=extract_text_step, # Call text extraction function
99
- inputs=image_input,
100
- outputs=[extracted_text_output, extracted_text_output], # Display text and store in state
101
- )
102
-
103
- search_button.click(
104
- fn=highlight_keywords_step, # Call keyword highlighting function
105
- inputs=[extracted_text_output, keyword_input], # Use extracted text and keywords
106
- outputs=highlighted_text_output, # Display highlighted text
107
- )
108
-
109
- # Launch the app
110
- demo.launch()
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
3
+ from PIL import Image
4
+ import torch
5
+ import re
6
+
7
+ # Load the pre-trained model and processor
8
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
9
+ "Qwen/Qwen2-VL-2B-Instruct",
10
+ torch_dtype="auto",
11
+ device_map="auto",
12
+ )
13
+
14
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
15
+
16
+ # Function to extract text from the image
17
+ def extract_text(image):
18
+ messages = [
19
+ {
20
+ "role": "user",
21
+ "content": [
22
+ {"type": "image"},
23
+ {"type": "text", "text": "can u extract the text in hindi"}
24
+ ]
25
+ }
26
+ ]
27
+
28
+ # Process input image and text prompt
29
+ text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
30
+
31
+ inputs = processor(
32
+ text=[text_prompt],
33
+ images=[image],
34
+ padding=True,
35
+ return_tensors="pt"
36
+ )
37
+
38
+ inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")
39
+
40
+ # Generate output text from the model
41
+ output_ids = model.generate(**inputs, max_new_tokens=1024)
42
+
43
+ generated_ids = [
44
+ output_ids[len(input_ids):]
45
+ for input_ids, output_ids in zip(inputs.input_ids, output_ids)
46
+ ]
47
+
48
+ # Decode the generated text
49
+ extracted_text = processor.batch_decode(
50
+ generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
51
+ )[0] # Extracted text
52
+
53
+ return extracted_text
54
+
55
+ # Function to highlight keywords in the text, even for right-to-left scripts like Hindi
56
+ def highlight_keywords(extracted_text, keywords):
57
+ highlighted_text = extracted_text
58
+ if keywords:
59
+ for keyword in keywords.split(","):
60
+ keyword = keyword.strip()
61
+ if keyword:
62
+ # Ensure correct Unicode support for keywords (use re.UNICODE for non-ASCII)
63
+ highlighted_text = re.sub(
64
+ re.escape(keyword), # Use re.escape to handle special characters in keywords
65
+ r'<mark>\g<0></mark>', # Highlight the found keyword
66
+ highlighted_text,
67
+ flags=re.IGNORECASE | re.UNICODE # Ignore case, and handle Unicode characters
68
+ )
69
+
70
+ return highlighted_text
71
+
72
+ # First step: Extract text from the uploaded image
73
+ def extract_text_step(image):
74
+ extracted_text = extract_text(image)
75
+ return extracted_text, extracted_text # Return extracted text and store it in state
76
+
77
+ # Second step: Search and highlight keywords in the extracted text
78
+ def highlight_keywords_step(extracted_text, keywords):
79
+ highlighted_text = highlight_keywords(extracted_text, keywords)
80
+ return highlighted_text
81
+
82
+ # Gradio UI
83
+ with gr.Blocks() as demo:
84
+ # Step 1: Image Upload and Text Extraction
85
+ with gr.Row():
86
+ image_input = gr.Image(type="pil", label="Upload Image")
87
+ extract_button = gr.Button("Extract Text")
88
+ extracted_text_output = gr.Textbox(label="Extracted Text")
89
+
90
+ # Step 2: Keyword Input and Highlighting
91
+ with gr.Row():
92
+ keyword_input = gr.Textbox(label="Enter keywords (comma-separated)", placeholder="Enter keywords after text extraction")
93
+ search_button = gr.Button("Highlight Keywords")
94
+ highlighted_text_output = gr.HTML(label="Highlighted Text with Matches")
95
+
96
+ # Define interactions
97
+ extract_button.click(
98
+ fn=extract_text_step, # Call text extraction function
99
+ inputs=image_input,
100
+ outputs=[extracted_text_output, extracted_text_output], # Display text and store in state
101
+ )
102
+
103
+ search_button.click(
104
+ fn=highlight_keywords_step, # Call keyword highlighting function
105
+ inputs=[extracted_text_output, keyword_input], # Use extracted text and keywords
106
+ outputs=highlighted_text_output, # Display highlighted text
107
+ )
108
+
109
+ # Launch the app
110
+ demo.launch()