ocr_api2

Sleeping

App Files Files

xet

Community

Nasma commited on Dec 20, 2024

Commit

e5857ea

verified ·

1 Parent(s): 4c07ccf

Update main.py

Browse files

Files changed (1) hide show

main.py +174 -79

main.py CHANGED Viewed

@@ -1,39 +1,152 @@
-try: from pip._internal.operations import freeze
-except ImportError: # pip < 10.0
-    from pip.operations import freeze
-pkgs = freeze.freeze()
-for pkg in pkgs: print(pkg)
-import os
-from fastapi import FastAPI, HTTPException, File, UploadFile,Query
 from fastapi.middleware.cors import CORSMiddleware
 from PyPDF2 import PdfReader
 import google.generativeai as genai
 import json
-import base64
-from io import BytesIO
 from PIL import Image
 import io
-import requests
 import fitz  # PyMuPDF
 import os
 from dotenv import load_dotenv
-# Load the environment variables from the .env file
-load_dotenv()
-# Configure Gemini API
 secret = os.environ["GEMINI"]
 genai.configure(api_key=secret)
 model_vision = genai.GenerativeModel('gemini-1.5-flash')
 model_text = genai.GenerativeModel('gemini-pro')
 app = FastAPI()
 app.add_middleware(
@@ -44,87 +157,69 @@ app.add_middleware(
     allow_headers=["*"],
 )
-def vision(file_content):
-    # Open the PDF
-    pdf_document = fitz.open("pdf",file_content)
-    gemini_input = ["extract the whole text"]
-    # Iterate through the pages
     for page_num in range(len(pdf_document)):
-        # Select the page
         page = pdf_document.load_page(page_num)
-        # Render the page to a pixmap (image)
         pix = page.get_pixmap()
-        print(type(pix))
-        # Convert the pixmap to bytes
         img_bytes = pix.tobytes("png")
-        # Convert bytes to a PIL Image
         img = Image.open(io.BytesIO(img_bytes))
         gemini_input.append(img)
-        # # Save the image if needed
-        # img.save(f'page_{page_num + 1}.png')
-    print("PDF pages converted to images successfully!")
-    # Now you can pass the PIL image to the model_vision
-    response = model_vision.generate_content(gemini_input).text
-    return response
 @app.post("/get_ocr_data/")
-async def get_data(input_file: UploadFile = File(...)):
-    #try:
-        # Determine the file type by reading the first few bytes
         file_content = await input_file.read()
         file_type = input_file.content_type
-        text = ""
-        if file_type == "application/pdf":
-                # Read PDF file using PyPDF2
-                pdf_reader = PdfReader(io.BytesIO(file_content))
-                for page in pdf_reader.pages:
-                    text += page.extract_text()
-                if len(text)<10:
-                   print("vision called")
-                   text = vision(file_content)
-        else:
             raise HTTPException(status_code=400, detail="Unsupported file type")
-        # Call Gemini (or another model) to extract required data
-        prompt = f"""This is CV data: {text.strip()}
-                IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.
-                Example Output:
-                [
                     "firstname" : "firstname",
                     "lastname" : "lastname",
-                    "gender" : "gender",
                     "email" : "email",
                     "contact_number" : "contact number",
-                    "age" : "age",
                     "home_address" : "full home address",
                     "home_town" : "home town or city",
                     "total_years_of_experience" : "total years of experience",
                     "LinkedIn_link" : "LinkedIn link",
-                    "positions": [ "Job title 1", "Job title 2", "Job title 3" ],
-                    "industry": "industry of work",
                     "experience" : "experience",
-                    "skills" : Skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section)
-                ]
-                """
         response = model_text.generate_content(prompt)
-        print(response.text)
-        data = json.loads(response.text.replace("JSON", "").replace("json", "").replace("```", ""))
         return {"data": data}
-    #except Exception as e:
-        #raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")

+# try: from pip._internal.operations import freeze
+# except ImportError: # pip < 10.0
+#     from pip.operations import freeze
+# pkgs = freeze.freeze()
+# for pkg in pkgs: print(pkg)
+# import os
+# from fastapi import FastAPI, HTTPException, File, UploadFile,Query
+# from fastapi.middleware.cors import CORSMiddleware
+# from PyPDF2 import PdfReader
+# import google.generativeai as genai
+# import json
+# import base64
+# from io import BytesIO
+# from PIL import Image
+# import io
+# import requests
+# import fitz  # PyMuPDF
+# import os
+# from dotenv import load_dotenv
+# # Load the environment variables from the .env file
+# load_dotenv()
+# # Configure Gemini API
+# secret = os.environ["GEMINI"]
+# genai.configure(api_key=secret)
+# model_vision = genai.GenerativeModel('gemini-1.5-flash')
+# model_text = genai.GenerativeModel('gemini-pro')
+# app = FastAPI()
+# app.add_middleware(
+#     CORSMiddleware,
+#     allow_origins=["*"],
+#     allow_credentials=True,
+#     allow_methods=["*"],
+#     allow_headers=["*"],
+# )
+# def vision(file_content):
+#     # Open the PDF
+#     pdf_document = fitz.open("pdf",file_content)
+#     gemini_input = ["extract the whole text"]
+#     # Iterate through the pages
+#     for page_num in range(len(pdf_document)):
+#         # Select the page
+#         page = pdf_document.load_page(page_num)
+#         # Render the page to a pixmap (image)
+#         pix = page.get_pixmap()
+#         print(type(pix))
+#         # Convert the pixmap to bytes
+#         img_bytes = pix.tobytes("png")
+#         # Convert bytes to a PIL Image
+#         img = Image.open(io.BytesIO(img_bytes))
+#         gemini_input.append(img)
+#         # # Save the image if needed
+#         # img.save(f'page_{page_num + 1}.png')
+#     print("PDF pages converted to images successfully!")
+#     # Now you can pass the PIL image to the model_vision
+#     response = model_vision.generate_content(gemini_input).text
+#     return response
+# @app.post("/get_ocr_data/")
+# async def get_data(input_file: UploadFile = File(...)):
+#     #try:
+#         # Determine the file type by reading the first few bytes
+#         file_content = await input_file.read()
+#         file_type = input_file.content_type
+#         text = ""
+#         if file_type == "application/pdf":
+#                 # Read PDF file using PyPDF2
+#                 pdf_reader = PdfReader(io.BytesIO(file_content))
+#                 for page in pdf_reader.pages:
+#                     text += page.extract_text()
+#                 if len(text)<10:
+#                    print("vision called")
+#                    text = vision(file_content)
+#         else:
+#             raise HTTPException(status_code=400, detail="Unsupported file type")
+#         # Call Gemini (or another model) to extract required data
+#         prompt = f"""This is CV data: {text.strip()}
+#                 IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.
+#                 Example Output:
+#                 [
+#                     "firstname" : "firstname",
+#                     "lastname" : "lastname",
+#                     "gender" : "gender",
+#                     "email" : "email",
+#                     "contact_number" : "contact number",
+#                     "age" : "age",
+#                     "home_address" : "full home address",
+#                     "home_town" : "home town or city",
+#                     "total_years_of_experience" : "total years of experience",
+#                     "LinkedIn_link" : "LinkedIn link",
+#                     "positions": [ "Job title 1", "Job title 2", "Job title 3" ],
+#                     "industry": "industry of work",
+#                     "experience" : "experience",
+#                     "skills" : Skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section)
+#                 ]
+#                 """
+#         response = model_text.generate_content(prompt)
+#         print(response.text)
+#         data = json.loads(response.text.replace("JSON", "").replace("json", "").replace("```", ""))
+#         return {"data": data}
+#     #except Exception as e:
+#         #raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
+from fastapi import FastAPI, HTTPException, File, UploadFile, Query
 from fastapi.middleware.cors import CORSMiddleware
 from PyPDF2 import PdfReader
 import google.generativeai as genai
 import json
 from PIL import Image
 import io
 import fitz  # PyMuPDF
 import os
 from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
 secret = os.environ["GEMINI"]
 genai.configure(api_key=secret)
 model_vision = genai.GenerativeModel('gemini-1.5-flash')
 model_text = genai.GenerativeModel('gemini-pro')
 app = FastAPI()
 app.add_middleware(
     allow_headers=["*"],
 )
+def process_pdf_text(file_content):
+    """Extract text from PDF using PyPDF2."""
+    pdf_reader = PdfReader(io.BytesIO(file_content))
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+def process_pdf_images(file_content):
+    """Extract images from PDF and pass to Gemini Vision."""
+    pdf_document = fitz.open("pdf", file_content)
+    gemini_input = []
     for page_num in range(len(pdf_document)):
         page = pdf_document.load_page(page_num)
         pix = page.get_pixmap()
         img_bytes = pix.tobytes("png")
         img = Image.open(io.BytesIO(img_bytes))
         gemini_input.append(img)
+    # Call Gemini Vision with extracted images
+    response = model_vision.generate_content(["extract the whole text", *gemini_input])
+    return response.text
 @app.post("/get_ocr_data/")
+async def get_data(user_id: str = Query(...), input_file: UploadFile = File(...)):
+    try:
         file_content = await input_file.read()
         file_type = input_file.content_type
+        if file_type != "application/pdf":
             raise HTTPException(status_code=400, detail="Unsupported file type")
+        # Process PDF
+        text = process_pdf_text(file_content)
+        if len(text.strip()) < 10:  # Fallback to image-based OCR if text is minimal
+            text = process_pdf_images(file_content)
+        # Call Gemini Text model
+        prompt = f"""
+            This is CV data: {text.strip()}
+            IMPORTANT: The output should be a JSON array! Make sure the JSON is valid.
+            Example Output:
+            [
                     "firstname" : "firstname",
                     "lastname" : "lastname",
                     "email" : "email",
                     "contact_number" : "contact number",
                     "home_address" : "full home address",
                     "home_town" : "home town or city",
                     "total_years_of_experience" : "total years of experience",
+                    "education": "Institution Name, Degree Name",
                     "LinkedIn_link" : "LinkedIn link",
                     "experience" : "experience",
+                    "industry": "industry of work",
+                    "skills" : skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section),
+                    "positions": [ "Job title 1", "Job title 2", "Job title 3" ],
+                    "summary": "Generate a summary of the CV, including key qualifications, notable experiences, and relevant skills."
+            ]
+        """
         response = model_text.generate_content(prompt)
+        data = json.loads(response.text.replace("```", ""))  # Sanitize response
         return {"data": data}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")