krishna195 commited on
Commit
a22ba62
·
verified ·
1 Parent(s): 6b6211c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +110 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
3
+ from PIL import Image
4
+ import torch
5
+ import re
6
+
7
+ # Load the pre-trained model and processor
8
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
9
+ "Qwen/Qwen2-VL-2B-Instruct",
10
+ torch_dtype="auto",
11
+ device_map="auto",
12
+ )
13
+
14
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
15
+
16
+ # Function to extract text from the image
17
+ def extract_text(image):
18
+ messages = [
19
+ {
20
+ "role": "user",
21
+ "content": [
22
+ {"type": "image"},
23
+ {"type": "text", "text": "can u extract the text in hindi"}
24
+ ]
25
+ }
26
+ ]
27
+
28
+ # Process input image and text prompt
29
+ text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
30
+
31
+ inputs = processor(
32
+ text=[text_prompt],
33
+ images=[image],
34
+ padding=True,
35
+ return_tensors="pt"
36
+ )
37
+
38
+ inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")
39
+
40
+ # Generate output text from the model
41
+ output_ids = model.generate(**inputs, max_new_tokens=1024)
42
+
43
+ generated_ids = [
44
+ output_ids[len(input_ids):]
45
+ for input_ids, output_ids in zip(inputs.input_ids, output_ids)
46
+ ]
47
+
48
+ # Decode the generated text
49
+ extracted_text = processor.batch_decode(
50
+ generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
51
+ )[0] # Extracted text
52
+
53
+ return extracted_text
54
+
55
+ # Function to highlight keywords in the text, even for right-to-left scripts like Hindi
56
+ def highlight_keywords(extracted_text, keywords):
57
+ highlighted_text = extracted_text
58
+ if keywords:
59
+ for keyword in keywords.split(","):
60
+ keyword = keyword.strip()
61
+ if keyword:
62
+ # Ensure correct Unicode support for keywords (use re.UNICODE for non-ASCII)
63
+ highlighted_text = re.sub(
64
+ re.escape(keyword), # Use re.escape to handle special characters in keywords
65
+ r'<mark>\g<0></mark>', # Highlight the found keyword
66
+ highlighted_text,
67
+ flags=re.IGNORECASE | re.UNICODE # Ignore case, and handle Unicode characters
68
+ )
69
+
70
+ return highlighted_text
71
+
72
+ # First step: Extract text from the uploaded image
73
+ def extract_text_step(image):
74
+ extracted_text = extract_text(image)
75
+ return extracted_text, extracted_text # Return extracted text and store it in state
76
+
77
+ # Second step: Search and highlight keywords in the extracted text
78
+ def highlight_keywords_step(extracted_text, keywords):
79
+ highlighted_text = highlight_keywords(extracted_text, keywords)
80
+ return highlighted_text
81
+
82
+ # Gradio UI
83
+ with gr.Blocks() as demo:
84
+ # Step 1: Image Upload and Text Extraction
85
+ with gr.Row():
86
+ image_input = gr.Image(type="pil", label="Upload Image")
87
+ extract_button = gr.Button("Extract Text")
88
+ extracted_text_output = gr.Textbox(label="Extracted Text")
89
+
90
+ # Step 2: Keyword Input and Highlighting
91
+ with gr.Row():
92
+ keyword_input = gr.Textbox(label="Enter keywords (comma-separated)", placeholder="Enter keywords after text extraction")
93
+ search_button = gr.Button("Highlight Keywords")
94
+ highlighted_text_output = gr.HTML(label="Highlighted Text with Matches")
95
+
96
+ # Define interactions
97
+ extract_button.click(
98
+ fn=extract_text_step, # Call text extraction function
99
+ inputs=image_input,
100
+ outputs=[extracted_text_output, extracted_text_output], # Display text and store in state
101
+ )
102
+
103
+ search_button.click(
104
+ fn=highlight_keywords_step, # Call keyword highlighting function
105
+ inputs=[extracted_text_output, keyword_input], # Use extracted text and keywords
106
+ outputs=highlighted_text_output, # Display highlighted text
107
+ )
108
+
109
+ # Launch the app
110
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/transformers
2
+ gradio
3
+ torch
4
+ pillow
5
+ accelerate>=0.26.0