Spaces:

Shabdobhedi
/

OCR-Document-Search-App-Using_gemini-1

Sleeping

App Files Files Community

Shabdobhedi commited on Sep 30, 2024

Commit

fe7691a

verified ·

1 Parent(s): f499517

Upload 2 files

Browse files

Files changed (2) hide show

app.py +53 -0
ocr_utils.py +34 -0

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import google.generativeai as genai
+from PIL import Image
+import streamlit as st
+import io
+from ocr_utils import extract_text
+from ocr_utils import highlight_content
+# Streamlit app layout
+st.title("OCR Text Extraction from Images")
+# File uploader widget
+uploaded_file = st.file_uploader(
+    "Upload an Image", type=["jpg", "jpeg", "png"])
+if uploaded_file is not None:
+    # Open and display the uploaded image
+    image = Image.open(uploaded_file)
+    st.image(image, caption='Uploaded Image', use_column_width=True)
+    # Extract text from the image
+    full_text = extract_text(image)
+    # Display the extracted text
+    st.subheader("Extracted Text")
+    st.write(full_text)
+    # Text input for keyword search
+    keyword = st.text_input("Enter Keyword to Search")
+    # Display highlighted content if a keyword is entered
+    if keyword:
+        highlighted_content = highlight_content(full_text, keyword)
+        st.subheader("Highlighted Search Results")
+      # CSS styles for highlighting
+        st.markdown('''
+        <style>
+        .highlight {
+            background-color: yellow;
+            color: black;
+            padding: 0.2em;
+            border-radius: 4px;
+        }
+        <style>
+        ''', unsafe_allow_html=True)
+        # Render the highlighted content with HTML
+        st.markdown(highlighted_content, unsafe_allow_html=True)
+    else:
+        st.subheader("Highlighted Search Results")
+        st.write("No keyword entered for highlighting.")

ocr_utils.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import google.generativeai as genai
+from PIL import Image
+import streamlit as st
+import io
+GOOGLE_API_KEY = "AIzaSyAesEWBYNPVIw5dde2LiZDXWDDJ8by90XI"
+genai.configure(api_key=GOOGLE_API_KEY)
+model = genai.GenerativeModel("gemini-1.5-pro")
+def extract_text(image):
+    # Ensure the image is in RGB mode
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    # Call the model's generate_content method with the PIL image
+    response = model.generate_content(
+        ["successfully extracts hindi and english text from the image and returns the extracted text in a structured format (plain text)", image]
+    )
+    return response.text
+def highlight_content(full_text, keyword):
+    if keyword:
+        # Highlight the keyword in the text
+        highlighted_text = full_text.replace(
+            keyword, f"<span class='highlight'>{keyword}</span>"
+        )
+        return highlighted_text
+    else:
+        return "No keyword entered for highlighting."