ocr_api2

Sleeping

App Files Files Community

Nasma commited on Dec 20, 2024

Commit

3e60e91

verified ·

1 Parent(s): acf9f09

Update main.py

Browse files

Files changed (1) hide show

main.py +82 -170

main.py CHANGED Viewed

@@ -1,152 +1,38 @@
-# try: from pip._internal.operations import freeze
-# except ImportError: # pip < 10.0
-#     from pip.operations import freeze
-# pkgs = freeze.freeze()
-# for pkg in pkgs: print(pkg)
-# import os
-# from fastapi import FastAPI, HTTPException, File, UploadFile,Query
-# from fastapi.middleware.cors import CORSMiddleware
-# from PyPDF2 import PdfReader
-# import google.generativeai as genai
-# import json
-# import base64
-# from io import BytesIO
-# from PIL import Image
-# import io
-# import requests
-# import fitz  # PyMuPDF
-# import os
-# from dotenv import load_dotenv
-# # Load the environment variables from the .env file
-# load_dotenv()
-# # Configure Gemini API
-# secret = os.environ["GEMINI"]
-# genai.configure(api_key=secret)
-# model_vision = genai.GenerativeModel('gemini-1.5-flash')
-# model_text = genai.GenerativeModel('gemini-pro')
-# app = FastAPI()
-# app.add_middleware(
-#     CORSMiddleware,
-#     allow_origins=["*"],
-#     allow_credentials=True,
-#     allow_methods=["*"],
-#     allow_headers=["*"],
-# )
-# def vision(file_content):
-#     # Open the PDF
-#     pdf_document = fitz.open("pdf",file_content)
-#     gemini_input = ["extract the whole text"]
-#     # Iterate through the pages
-#     for page_num in range(len(pdf_document)):
-#         # Select the page
-#         page = pdf_document.load_page(page_num)
-#         # Render the page to a pixmap (image)
-#         pix = page.get_pixmap()
-#         print(type(pix))
-#         # Convert the pixmap to bytes
-#         img_bytes = pix.tobytes("png")
-#         # Convert bytes to a PIL Image
-#         img = Image.open(io.BytesIO(img_bytes))
-#         gemini_input.append(img)
-#         # # Save the image if needed
-#         # img.save(f'page_{page_num + 1}.png')
-#     print("PDF pages converted to images successfully!")
-#     # Now you can pass the PIL image to the model_vision
-#     response = model_vision.generate_content(gemini_input).text
-#     return response
-# @app.post("/get_ocr_data/")
-# async def get_data(input_file: UploadFile = File(...)):
-#     #try:
-#         # Determine the file type by reading the first few bytes
-#         file_content = await input_file.read()
-#         file_type = input_file.content_type
-#         text = ""
-#         if file_type == "application/pdf":
-#                 # Read PDF file using PyPDF2
-#                 pdf_reader = PdfReader(io.BytesIO(file_content))
-#                 for page in pdf_reader.pages:
-#                     text += page.extract_text()
-#                 if len(text)<10:
-#                    print("vision called")
-#                    text = vision(file_content)
-#         else:
-#             raise HTTPException(status_code=400, detail="Unsupported file type")
-#         # Call Gemini (or another model) to extract required data
-#         prompt = f"""This is CV data: {text.strip()}
-#                 IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.
-#                 Example Output:
-#                 [
-#                     "firstname" : "firstname",
-#                     "lastname" : "lastname",
-#                     "gender" : "gender",
-#                     "email" : "email",
-#                     "contact_number" : "contact number",
-#                     "age" : "age",
-#                     "home_address" : "full home address",
-#                     "home_town" : "home town or city",
-#                     "total_years_of_experience" : "total years of experience",
-#                     "LinkedIn_link" : "LinkedIn link",
-#                     "positions": [ "Job title 1", "Job title 2", "Job title 3" ],
-#                     "industry": "industry of work",
-#                     "experience" : "experience",
-#                     "skills" : Skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section)
-#                 ]
-#                 """
-#         response = model_text.generate_content(prompt)
-#         print(response.text)
-#         data = json.loads(response.text.replace("JSON", "").replace("json", "").replace("```", ""))
-#         return {"data": data}
-#     #except Exception as e:
-#         #raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
-from fastapi import FastAPI, HTTPException, File, UploadFile, Query
 from fastapi.middleware.cors import CORSMiddleware
 from PyPDF2 import PdfReader
 import google.generativeai as genai
 import json
 from PIL import Image
 import io
 import fitz  # PyMuPDF
 import os
-from dotenv import load_dotenv
-# Load environment variables
 load_dotenv()
 secret = os.environ["GEMINI"]
 genai.configure(api_key=secret)
 model_vision = genai.GenerativeModel('gemini-1.5-flash')
 model_text = genai.GenerativeModel('gemini-pro')
 app = FastAPI()
 app.add_middleware(
@@ -157,50 +43,68 @@ app.add_middleware(
     allow_headers=["*"],
 )
-def process_pdf_text(file_content):
-    """Extract text from PDF using PyPDF2."""
-    pdf_reader = PdfReader(io.BytesIO(file_content))
-    text = ""
-    for page in pdf_reader.pages:
-        text += page.extract_text()
-    return text
-def process_pdf_images(file_content):
-    """Extract images from PDF and pass to Gemini Vision."""
-    pdf_document = fitz.open("pdf", file_content)
-    gemini_input = []
     for page_num in range(len(pdf_document)):
         page = pdf_document.load_page(page_num)
         pix = page.get_pixmap()
         img_bytes = pix.tobytes("png")
         img = Image.open(io.BytesIO(img_bytes))
         gemini_input.append(img)
-    # Call Gemini Vision with extracted images
-    response = model_vision.generate_content(["extract the whole text", *gemini_input])
-    return response.text
 @app.post("/get_ocr_data/")
-async def get_data(user_id: str = Query(...), input_file: UploadFile = File(...)):
-    try:
-        file_content = await input_file.read()
         file_type = input_file.content_type
-        if file_type != "application/pdf":
             raise HTTPException(status_code=400, detail="Unsupported file type")
-        # Process PDF
-        text = process_pdf_text(file_content)
-        if len(text.strip()) < 10:  # Fallback to image-based OCR if text is minimal
-            text = process_pdf_images(file_content)
-        # Call Gemini Text model
-        prompt = f"""
-            This is CV data: {text.strip()}
-            IMPORTANT: The output should be a JSON array! Make sure the JSON is valid.
-            Example Output:
-            [
                     "firstname" : "firstname",
                     "lastname" : "lastname",
                     "email" : "email",
@@ -215,11 +119,19 @@ async def get_data(user_id: str = Query(...), input_file: UploadFile = File(...)
                     "skills" : skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section),
                     "positions": [ "Job title 1", "Job title 2", "Job title 3" ],
                     "summary": "Generate a summary of the CV, including key qualifications, notable experiences, and relevant skills."
-            ]
-        """
         response = model_text.generate_content(prompt)
-        data = json.loads(response.text.replace("```", ""))  # Sanitize response
         return {"data": data}
-    # except Exception as e:
-    #     raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")

+try: from pip._internal.operations import freeze
+except ImportError: # pip < 10.0
+    from pip.operations import freeze
+pkgs = freeze.freeze()
+for pkg in pkgs: print(pkg)
+import os
+import uvicorn
+from fastapi import FastAPI, HTTPException, File, UploadFile,Query
 from fastapi.middleware.cors import CORSMiddleware
 from PyPDF2 import PdfReader
 import google.generativeai as genai
 import json
 from PIL import Image
 import io
+import requests
 import fitz  # PyMuPDF
 import os
+from dotenv import load_dotenv
+# Load the environment variables from the .env file
 load_dotenv()
+# Configure Gemini API
 secret = os.environ["GEMINI"]
 genai.configure(api_key=secret)
 model_vision = genai.GenerativeModel('gemini-1.5-flash')
 model_text = genai.GenerativeModel('gemini-pro')
 app = FastAPI()
 app.add_middleware(
     allow_headers=["*"],
 )
+def vision(file_content):
+    # Open the PDF
+    pdf_document = fitz.open("pdf",file_content)
+    gemini_input = ["extract the whole text"]
+    # Iterate through the pages
     for page_num in range(len(pdf_document)):
+        # Select the page
         page = pdf_document.load_page(page_num)
+        # Render the page to a pixmap (image)
         pix = page.get_pixmap()
+        print(type(pix))
+        # Convert the pixmap to bytes
         img_bytes = pix.tobytes("png")
+        # Convert bytes to a PIL Image
         img = Image.open(io.BytesIO(img_bytes))
         gemini_input.append(img)
+        # # Save the image if needed
+        # img.save(f'page_{page_num + 1}.png')
+    print("PDF pages converted to images successfully!")
+    # Now you can pass the PIL image to the model_vision
+    response = model_vision.generate_content(gemini_input).text
+    return response
 @app.post("/get_ocr_data/")
+def get_data(input_file: UploadFile = File(...)):
+    #try:
+        # Determine the file type by reading the first few bytes
+        file_content = input_file.file.read()
         file_type = input_file.content_type
+        text = ""
+        if file_type == "application/pdf":
+                # Read PDF file using PyPDF2
+                pdf_reader = PdfReader(io.BytesIO(file_content))
+                for page in pdf_reader.pages:
+                    text += page.extract_text()
+                if len(text)<10:
+                   print("vision called")
+                   text = vision(file_content)
+        else:
             raise HTTPException(status_code=400, detail="Unsupported file type")
+        # Call Gemini (or another model) to extract required data
+        prompt = f"""This is CV data: {text.strip()}
+                IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.
+                Example Output:
+                [
                     "firstname" : "firstname",
                     "lastname" : "lastname",
                     "email" : "email",
                     "skills" : skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section),
                     "positions": [ "Job title 1", "Job title 2", "Job title 3" ],
                     "summary": "Generate a summary of the CV, including key qualifications, notable experiences, and relevant skills."
+                ]
+                """
         response = model_text.generate_content(prompt)
+        print(response.text)
+        data = json.loads(response.text.replace("JSON", "").replace("json", "").replace("```", ""))
         return {"data": data}
+    #except Exception as e:
+        #raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")