ocr_api

Paused

File size: 4,194 Bytes

9ba3ade
 
 
 
 
 
d16f678
9c62372
 
 
a81ff23
 
44ef745
 
7a1124b
ebca3e9
44ef745
3b59cf8
 
a556cdd
3b59cf8
a81ff23
b71edf1
 
 
 
3b59cf8
9c2cf20
e1e9c8f
9c2cf20
3b59cf8
e1e9c8f
d16f678
3b59cf8
 
 
 
 
d92c861
9c62372
d92c861
 
 
 
 
 
 
9c62372
44ef745
 
 
 
3b59cf8
 
e34e74c
3b59cf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44ef745
3b59cf8
44ef745
3b59cf8
 
 
44ef745
 
9c62372
a52e580
 
 
 
e1e9c8f
 
 
 
 
184ce21
55dc24c
44ef745
 
 
 
9c62372
44ef745
 
c6a4a5b
 
 
 
 
5354b70
07846cc
c6a4a5b
44ef745
 
e7eb65e
44ef745
 
e7eb65e
 
 
 
7cbd08f
 
 
 
 
 
e7eb65e
 
2a08189
e7eb65e
a81ff23
 
e7eb65e
0e39dca
e1e9c8f
9c62372
55dc24c

try: from pip._internal.operations import freeze
except ImportError: # pip < 10.0
    from pip.operations import freeze

pkgs = freeze.freeze()
for pkg in pkgs: print(pkg)
import os 
from fastapi import FastAPI, HTTPException, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from PyPDF2 import PdfReader
import google.generativeai as genai
import json
import base64
from io import BytesIO
from PIL import Image
import io
import requests
import fitz  # PyMuPDF
import os
import jwt


from dotenv import load_dotenv
# Load the environment variables from the .env file
load_dotenv()

# Configure Gemini API
secret = os.environ["GEMINI"]
SECRET_KEY = os.environ["SECRET_KEY"]
genai.configure(api_key=secret)
model_vision = genai.GenerativeModel('gemini-1.5-flash')
model_text = genai.GenerativeModel('gemini-1.5-flash')






app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)





def vision(file_content):
    # Open the PDF
    pdf_document = fitz.open("pdf",file_content)
    gemini_input = ["extract the whole text"]
    # Iterate through the pages
    for page_num in range(len(pdf_document)):
        # Select the page
        page = pdf_document.load_page(page_num)
        
        # Render the page to a pixmap (image)
        pix = page.get_pixmap()
        print(type(pix))
        
        # Convert the pixmap to bytes
        img_bytes = pix.tobytes("png")
        
        # Convert bytes to a PIL Image
        img = Image.open(io.BytesIO(img_bytes))
        gemini_input.append(img)
        # # Save the image if needed
        # img.save(f'page_{page_num + 1}.png')
    
    print("PDF pages converted to images successfully!")
    
    # Now you can pass the PIL image to the model_vision
    response = model_vision.generate_content(gemini_input).text
    return response


@app.post("/get_ocr_data/")
async def get_data(user_id:int,
                   token:str,
                   input_file: UploadFile = File(...)
                  ):
        # To decode and verify the token
        try:
            decoded_payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
            print(f"Decoded payload: {decoded_payload}")
        except:
            print("Invalid token")
    #try:
        # Determine the file type by reading the first few bytes
        file_content = await input_file.read()
        file_type = input_file.content_type
        
        text = ""

        if file_type == "application/pdf":
                # Read PDF file using PyPDF2
                pdf_reader = PdfReader(io.BytesIO(file_content))
                for page in pdf_reader.pages:
                    text += page.extract_text()
                    
                if len(text)<10:
                   print("vision called")
                   text = vision(file_content)
        else:
            raise HTTPException(status_code=400, detail="Unsupported file type")

        # Call Gemini (or another model) to extract required data
        prompt = f"""This is CV data: {text.strip()} 
                IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.
                                                                  
                Example Output:
                [
                    "firstname" : "firstname",
                    "lastname" : "lastname",
                    "contact_number" : "contact number"
                    "total_years_of_experience" : "total years of experience",
                    "LinkedIn_link" : "LinkedIn link",
                    "experience" : "experience",
                    "skills" : skills
                ]
                always keep same keys don't change , keep all key in simple letters only
                """
        
        response = model_text.generate_content(prompt)
        print(response.text)
        data = json.loads(response.text.replace("JSON", "").replace("json", "").replace("```", ""))
        return {"data": data,"user_id":user_id}

    #except Exception as e:
        #raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")