File size: 3,522 Bytes
de76468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import streamlit as st
from paddleocr import PaddleOCR
import cv2
from langchain.chains import LLMChain
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
from sqlalchemy import create_engine, Column, Integer, String, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import google.generativeai as genai

# Load environment variables
load_dotenv()

# Set up environment variables
api_key = os.getenv('API_KEY')
DATABASE_URL = "sqlite:///mydatabase.db"

# Setup database
Base = declarative_base()

class MyDataModel(Base):
    __tablename__ = 'my_data_table'
    
    id = Column(Integer, primary_key=True)
    name = Column(String)
    data = Column(JSON)

engine = create_engine(DATABASE_URL)
Session = sessionmaker(bind=engine)
session = Session()

Base.metadata.create_all(engine)

# Initialize Google Generative AI API
genai.configure(api_key=api_key)

# Define OCR function using PaddleOCR
def ocr_with_paddle(img_path):
    finaltext = ''
    ocr = PaddleOCR(lang='en', use_angle_cls=True)

    
    img = cv2.imread(img_path)

   
    result = ocr.ocr(img)

 
    for line in result[0]:  
        for word_info in line:
            
            if isinstance(word_info[1], list):
                text = word_info[1][0] 
                text=str(text) 
                finaltext += text + ' '  
            else:
               
               finaltext += str(word_info)+' '

    return finaltext.strip() 

# Define the prompt template for extracting invoice details
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant that extracts invoice details such as invoice number, customer name, date, amount, and other relevant information from a provided invoice text."),
        ("human", "{input}"),
    ]
)

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0.5,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=api_key
)

invoice_chain = LLMChain(prompt=prompt, llm=llm)

def extract_invoice_details(input_text):
    response = invoice_chain({"input": input_text})
    extracted_details = response["text"].strip()
    return extracted_details

# Streamlit UI
st.title("Invoice OCR and Details Extraction")

st.write(
    "Upload an image file to extract the text and invoice details such as invoice number, customer name, date, and amount."
)

# Image Upload
uploaded_image = st.file_uploader("Choose an Image", type=["jpg", "jpeg", "png"])

if uploaded_image is not None:
    # Save uploaded image to a temporary file
    img_path = "temp_image.png"
    with open(img_path, "wb") as f:
        f.write(uploaded_image.getbuffer())
    
    # Perform OCR on the uploaded image
    text = ocr_with_paddle(img_path)
    st.write("Extracted Text:")
    st.text_area("OCR Output", text, height=300)
    
    # Extract invoice details from the text
    invoice_details = extract_invoice_details(text)
    
    st.write("Extracted Invoice Details:")
    st.text_area("Invoice Details", invoice_details, height=300)
    
    # Save details to the database
    new_entry = MyDataModel(name="invoice_details", data=invoice_details)
    session.add(new_entry)
    session.commit()
    session.close()
    
    st.success("Invoice details saved to the database!")