yuvaranianandhan24 commited on
Commit
7ddeba9
·
verified ·
1 Parent(s): cef89c4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import streamlit as st
4
+ import pytesseract
5
+ from tempfile import NamedTemporaryFile
6
+ from langchain.document_loaders import PyPDFLoader
7
+ from langchain.llms import CTransformers
8
+ from langchain.chains import LLMChain
9
+ from langchain.prompts import PromptTemplate
10
+
11
+ def main():
12
+ st.title("Invoice Entity Extractor 📚")
13
+
14
+ uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type="pdf")
15
+ uploaded_image = st.sidebar.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
16
+
17
+ if uploaded_file is not None:
18
+ process_pdf(uploaded_file)
19
+ elif uploaded_image is not None:
20
+ process_image(uploaded_image)
21
+
22
+ def process_pdf(uploaded_file):
23
+ # Save the uploaded file to a temporary location
24
+ with NamedTemporaryFile(delete=False) as temp_file:
25
+ temp_file.write(uploaded_file.read())
26
+ temp_file_path = temp_file.name
27
+
28
+ loader = PyPDFLoader(temp_file_path)
29
+ pages = loader.load()
30
+
31
+ st.write(f"Number of pages: {len(pages)}")
32
+
33
+ for page in pages:
34
+ st.write(page.page_content)
35
+
36
+ llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin",model_type="llama",
37
+ config={'max_new_tokens':128,'temperature':0.01})
38
+
39
+ template = """Extract invoice number, name of organization, address, date,
40
+ Qty, Rate ,Tax ,Amount {pages}
41
+ Output : entity : type
42
+ """
43
+ prompt_template = PromptTemplate(input_variables=["pages"], template=template)
44
+ chain = LLMChain(llm=llm, prompt=prompt_template)
45
+
46
+ result = chain.run(pages=pages[0].page_content)
47
+
48
+ st.write("Extracted entities:")
49
+ entities = result.strip().split("\n")
50
+ table_data = [line.split(":") for line in entities]
51
+ st.table(table_data)
52
+
53
+ def process_image(uploaded_image):
54
+ # Process the uploaded image
55
+ st.write("Image processing is not implemented yet.")
56
+
57
+ if __name__ == "__main__":
58
+ main()