PercivalFletcher commited on
Commit
33019b5
·
verified ·
1 Parent(s): f69ffb2

Update processing_utility.py

Browse files
Files changed (1) hide show
  1. processing_utility.py +47 -0
processing_utility.py CHANGED
@@ -45,6 +45,10 @@ from llama_index.readers.file import PyMuPDFReader
45
 
46
  import PyPDF2
47
 
 
 
 
 
48
  class Insurance(BaseModel):
49
  """
50
  A Pydantic model to define the data schema for extraction.
@@ -56,6 +60,49 @@ class Insurance(BaseModel):
56
  class Insurance(BaseModel):
57
  headings: str = Field(description="An array of headings")
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def process_pdf_chunk(chunk_path: str) -> str:
60
  """
61
  Worker function for the ProcessPoolExecutor.
 
45
 
46
  import PyPDF2
47
 
48
+ # Global variable for the extractor agent
49
+ llama_extract_agent = None
50
+
51
+
52
  class Insurance(BaseModel):
53
  """
54
  A Pydantic model to define the data schema for extraction.
 
60
  class Insurance(BaseModel):
61
  headings: str = Field(description="An array of headings")
62
 
63
+ def initialize_llama_extract_agent():
64
+ global llama_extract_agent
65
+ if llama_extract_agent is None:
66
+ print("Initializing LlamaExtract client and getting agent...")
67
+ try:
68
+ extractor = LlamaExtract()
69
+ llama_extract_agent = extractor.get_agent(name="insurance-parser")
70
+ print("LlamaExtract agent initialized.")
71
+ except Exception as e:
72
+ print(f"Error initializing LlamaExtract agent: {e}")
73
+ llama_extract_agent = None # Ensure it's None if there was an error
74
+
75
+
76
+ def extract_schema_from_file(file_path: str) -> Optional[Insurance]:
77
+ if not os.path.exists(file_path):
78
+ print(f"❌ Error: The file '{file_path}' was not found.")
79
+ return None
80
+
81
+ if llama_extract_agent is None:
82
+ print("LlamaExtract agent not initialized. Attempting to initialize now.")
83
+ initialize_llama_extract_agent()
84
+ if llama_extract_agent is None:
85
+ print("LlamaExtract agent failed to initialize. Cannot proceed with extraction.")
86
+ return None
87
+
88
+ print(f"🚀 Sending '{file_path}' to LlamaCloud for schema extraction...")
89
+
90
+ try:
91
+ result = llama_extract_agent.extract(file_path)
92
+
93
+ if result and result.data:
94
+ print("✅ Extraction successful!")
95
+ return result.data
96
+ else:
97
+ print("⚠️ Extraction did not return any data.")
98
+ return None
99
+
100
+ except Exception as e:
101
+ print(f"\n❌ An error occurred during the API call: {e}")
102
+ print("Please check your API key, network connection, and file format.")
103
+ return None
104
+
105
+
106
  def process_pdf_chunk(chunk_path: str) -> str:
107
  """
108
  Worker function for the ProcessPoolExecutor.