ocr_api

Paused

App Files Files Community

ocr_api / main.py

Arafath10

Update main.py

7a1124b verified 7 months ago

raw

history blame

3.55 kB

	try: from pip._internal.operations import freeze
	except ImportError: # pip < 10.0
	from pip.operations import freeze

	pkgs = freeze.freeze()
	for pkg in pkgs: print(pkg)
	import os
	from fastapi import FastAPI, HTTPException, File, UploadFile
	from fastapi.middleware.cors import CORSMiddleware
	from PyPDF2 import PdfReader
	import google.generativeai as genai
	import json
	import base64
	from io import BytesIO
	from PIL import Image
	import requests

	secret = os.environ["key"]
	genai.configure(api_key=secret)
	model_vision = genai.GenerativeModel('gemini-pro-vision')
	model_text = genai.GenerativeModel('gemini-pro')

	app = FastAPI()

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)



	def encode_image(image):
	# Convert image to BytesIO object (in memory)
	buffered = BytesIO()
	image.save(buffered, format=image.format) # Use the original image format (e.g., PNG, JPEG)
	img_bytes = buffered.getvalue()

	# Encode image to base64
	base64_image = base64.b64encode(img_bytes).decode('utf-8')
	return base64_image



	def vision(image):
	# OpenAI API Key
	api_key = "sk-proj-1j1aFDCU8KrWAeFMAGPPT3BlbkFJ6rDxGgu8C99E3Wh6siUs"


	# Getting the base64 string
	base64_image = encode_image(image)

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}

	payload = {
	"model": "gpt-4o-mini",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "extract all data from this image"
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	],
	"max_tokens": 300
	}

	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

	print(response.json()['choices'][0]['message']['content'])


	@app.post("/get_ocr_data/")
	async def get_data(input_file: UploadFile = File(...)):
	try:
	# Determine the file type by reading the first few bytes
	file_content = await input_file.read()
	file_type = input_file.content_type

	text = ""

	if file_type == "application/pdf":
	# Read PDF file using PyPDF2
	pdf_reader = PdfReader(io.BytesIO(file_content))
	for page in pdf_reader.pages:
	text += page.extract_text()

	elif file_type in ["image/jpeg", "image/png", "image/jpg"]:
	# Read Image file using PIL and pytesseract
	image = Image.open(io.BytesIO(file_content))
	return encode_image(image)
	text = vision(image)

	else:
	raise HTTPException(status_code=400, detail="Unsupported file type")

	# Call Gemini (or another model) to extract required data
	prompt = f"""This is CV data: {text.strip()}
	I want only:

	firstname, lastname, contact number, total years of experience, LinkedIn link, experience, skills

	in JSON format only"""

	response = model_text.generate_content(prompt)
	data = json.loads(response.text.replace("```json", "").replace("```", ""))
	return {"data": data}

	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")