File size: 2,228 Bytes
1108a3a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# import pandas as pd
# import json
#
#
# # Function to parse the LLM output and create a DataFrame
# def process_llm_data(llm_data):
# # Assuming llm_data is a string that exactly matches the dictionary format
# # Safely convert string representation of dictionary to a dictionary
# data_dict = eval(llm_data.strip())
#
# # Convert the dictionary to a DataFrame
# # Since our example has comma-separated values in strings for some fields, let's handle that
# for key, value in data_dict.items():
# if ',' in value:
# data_dict[key] = [v.strip() for v in value.split(',')]
# else:
# data_dict[key] = [value]
#
# df = pd.DataFrame(data_dict)
#
# # Saving the DataFrame to a CSV file
# csv_file_path = "Invoice_Data.csv"
# df.to_csv(csv_file_path, index=False)
#
# return csv_file_path
#
#
# # Example usage
# llm_extracted_data = """
# {
# 'Invoice no.': '001',
# 'Description': 'Widget A, Widget B',
# 'Quantity': '2, 5',
# 'Date': '2024-04-13',
# 'Unit price': '$30.00, $20.00',
# 'Amount': '$60.00, $100.00',
# 'Total': '$160.00',
# 'Email': '[email protected]',
# 'Phone number': '(123) 456-7890',
# 'Address': '123 AI Lane, Model Town, OpenAI'
# }
# """
#
# # Call the function to process the data and create a CSV
# csv_path = process_llm_data(llm_extracted_data)
# print(f"Data saved to CSV at: {csv_path}")
import pandas as pd
from utils import get_pdf_text, extracted_data
from dotenv import load_dotenv
load_dotenv()
def save_data_to_csv(data, filename):
# Convert the dictionary to a DataFrame
df = pd.DataFrame([data])
print(df)
# Save the DataFrame to a CSV file
csv_path = f"{filename}.csv"
df.to_csv(csv_path, index=False)
print(f"Data saved to {csv_path}")
# Example usage
print('filename is {}'.format("Invoice_001.pdf"))
raw_data = get_pdf_text("Invoice_001.pdf")
print('raw_data is {}'.format(raw_data))
llm_extracted_data = extracted_data(raw_data)
print('llm_extracted_data is {}'.format(llm_extracted_data))
# Assuming llm_extracted_data is a dictionary containing the extracted fields
save_data_to_csv(llm_extracted_data, "Extracted_Invoice1")
|