Shabdobhedi commited on
Commit
fe7691a
·
verified ·
1 Parent(s): f499517

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +53 -0
  2. ocr_utils.py +34 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ from PIL import Image
3
+ import streamlit as st
4
+ import io
5
+
6
+ from ocr_utils import extract_text
7
+ from ocr_utils import highlight_content
8
+
9
+ # Streamlit app layout
10
+ st.title("OCR Text Extraction from Images")
11
+
12
+ # File uploader widget
13
+ uploaded_file = st.file_uploader(
14
+ "Upload an Image", type=["jpg", "jpeg", "png"])
15
+
16
+ if uploaded_file is not None:
17
+ # Open and display the uploaded image
18
+ image = Image.open(uploaded_file)
19
+ st.image(image, caption='Uploaded Image', use_column_width=True)
20
+
21
+ # Extract text from the image
22
+ full_text = extract_text(image)
23
+
24
+ # Display the extracted text
25
+ st.subheader("Extracted Text")
26
+ st.write(full_text)
27
+
28
+ # Text input for keyword search
29
+ keyword = st.text_input("Enter Keyword to Search")
30
+ # Display highlighted content if a keyword is entered
31
+ if keyword:
32
+
33
+ highlighted_content = highlight_content(full_text, keyword)
34
+ st.subheader("Highlighted Search Results")
35
+
36
+ # CSS styles for highlighting
37
+ st.markdown('''
38
+ <style>
39
+ .highlight {
40
+ background-color: yellow;
41
+ color: black;
42
+ padding: 0.2em;
43
+ border-radius: 4px;
44
+ }
45
+ <style>
46
+ ''', unsafe_allow_html=True)
47
+
48
+ # Render the highlighted content with HTML
49
+ st.markdown(highlighted_content, unsafe_allow_html=True)
50
+
51
+ else:
52
+ st.subheader("Highlighted Search Results")
53
+ st.write("No keyword entered for highlighting.")
ocr_utils.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ from PIL import Image
3
+ import streamlit as st
4
+ import io
5
+
6
+
7
+ GOOGLE_API_KEY = "AIzaSyAesEWBYNPVIw5dde2LiZDXWDDJ8by90XI"
8
+ genai.configure(api_key=GOOGLE_API_KEY)
9
+ model = genai.GenerativeModel("gemini-1.5-pro")
10
+
11
+
12
+ def extract_text(image):
13
+ # Ensure the image is in RGB mode
14
+ if image.mode != 'RGB':
15
+ image = image.convert('RGB')
16
+
17
+ # Call the model's generate_content method with the PIL image
18
+ response = model.generate_content(
19
+ ["successfully extracts hindi and english text from the image and returns the extracted text in a structured format (plain text)", image]
20
+ )
21
+
22
+ return response.text
23
+
24
+
25
+ def highlight_content(full_text, keyword):
26
+ if keyword:
27
+ # Highlight the keyword in the text
28
+ highlighted_text = full_text.replace(
29
+ keyword, f"<span class='highlight'>{keyword}</span>"
30
+ )
31
+
32
+ return highlighted_text
33
+ else:
34
+ return "No keyword entered for highlighting."