File size: 4,194 Bytes
9ba3ade d16f678 9c62372 a81ff23 44ef745 7a1124b ebca3e9 44ef745 3b59cf8 a556cdd 3b59cf8 a81ff23 b71edf1 3b59cf8 9c2cf20 e1e9c8f 9c2cf20 3b59cf8 e1e9c8f d16f678 3b59cf8 d92c861 9c62372 d92c861 9c62372 44ef745 3b59cf8 e34e74c 3b59cf8 44ef745 3b59cf8 44ef745 3b59cf8 44ef745 9c62372 a52e580 e1e9c8f 184ce21 55dc24c 44ef745 9c62372 44ef745 c6a4a5b 5354b70 07846cc c6a4a5b 44ef745 e7eb65e 44ef745 e7eb65e 7cbd08f e7eb65e 2a08189 e7eb65e a81ff23 e7eb65e 0e39dca e1e9c8f 9c62372 55dc24c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
try: from pip._internal.operations import freeze
except ImportError: # pip < 10.0
from pip.operations import freeze
pkgs = freeze.freeze()
for pkg in pkgs: print(pkg)
import os
from fastapi import FastAPI, HTTPException, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from PyPDF2 import PdfReader
import google.generativeai as genai
import json
import base64
from io import BytesIO
from PIL import Image
import io
import requests
import fitz # PyMuPDF
import os
import jwt
from dotenv import load_dotenv
# Load the environment variables from the .env file
load_dotenv()
# Configure Gemini API
secret = os.environ["GEMINI"]
SECRET_KEY = os.environ["SECRET_KEY"]
genai.configure(api_key=secret)
model_vision = genai.GenerativeModel('gemini-1.5-flash')
model_text = genai.GenerativeModel('gemini-1.5-flash')
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def vision(file_content):
# Open the PDF
pdf_document = fitz.open("pdf",file_content)
gemini_input = ["extract the whole text"]
# Iterate through the pages
for page_num in range(len(pdf_document)):
# Select the page
page = pdf_document.load_page(page_num)
# Render the page to a pixmap (image)
pix = page.get_pixmap()
print(type(pix))
# Convert the pixmap to bytes
img_bytes = pix.tobytes("png")
# Convert bytes to a PIL Image
img = Image.open(io.BytesIO(img_bytes))
gemini_input.append(img)
# # Save the image if needed
# img.save(f'page_{page_num + 1}.png')
print("PDF pages converted to images successfully!")
# Now you can pass the PIL image to the model_vision
response = model_vision.generate_content(gemini_input).text
return response
@app.post("/get_ocr_data/")
async def get_data(user_id:int,
token:str,
input_file: UploadFile = File(...)
):
# To decode and verify the token
try:
decoded_payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
print(f"Decoded payload: {decoded_payload}")
except:
print("Invalid token")
#try:
# Determine the file type by reading the first few bytes
file_content = await input_file.read()
file_type = input_file.content_type
text = ""
if file_type == "application/pdf":
# Read PDF file using PyPDF2
pdf_reader = PdfReader(io.BytesIO(file_content))
for page in pdf_reader.pages:
text += page.extract_text()
if len(text)<10:
print("vision called")
text = vision(file_content)
else:
raise HTTPException(status_code=400, detail="Unsupported file type")
# Call Gemini (or another model) to extract required data
prompt = f"""This is CV data: {text.strip()}
IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.
Example Output:
[
"firstname" : "firstname",
"lastname" : "lastname",
"contact_number" : "contact number"
"total_years_of_experience" : "total years of experience",
"LinkedIn_link" : "LinkedIn link",
"experience" : "experience",
"skills" : skills
]
always keep same keys don't change , keep all key in simple letters only
"""
response = model_text.generate_content(prompt)
print(response.text)
data = json.loads(response.text.replace("JSON", "").replace("json", "").replace("```", ""))
return {"data": data,"user_id":user_id}
#except Exception as e:
#raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
|