Spaces:

PercivalFletcher
/

Shreyansh-HackRx

Sleeping

App Files Files Community

PercivalFletcher commited on Aug 7

Commit

33019b5

verified ·

1 Parent(s): f69ffb2

Update processing_utility.py

Browse files

Files changed (1) hide show

processing_utility.py +47 -0

processing_utility.py CHANGED Viewed

@@ -45,6 +45,10 @@ from llama_index.readers.file import PyMuPDFReader
 import PyPDF2
 class Insurance(BaseModel):
     """
     A Pydantic model to define the data schema for extraction.
@@ -56,6 +60,49 @@ class Insurance(BaseModel):
 class Insurance(BaseModel):
     headings: str = Field(description="An array of headings")
 def process_pdf_chunk(chunk_path: str) -> str:
     """
     Worker function for the ProcessPoolExecutor.

 import PyPDF2
+# Global variable for the extractor agent
+llama_extract_agent = None
 class Insurance(BaseModel):
     """
     A Pydantic model to define the data schema for extraction.
 class Insurance(BaseModel):
     headings: str = Field(description="An array of headings")
+def initialize_llama_extract_agent():
+    global llama_extract_agent
+    if llama_extract_agent is None:
+        print("Initializing LlamaExtract client and getting agent...")
+        try:
+            extractor = LlamaExtract()
+            llama_extract_agent = extractor.get_agent(name="insurance-parser")
+            print("LlamaExtract agent initialized.")
+        except Exception as e:
+            print(f"Error initializing LlamaExtract agent: {e}")
+            llama_extract_agent = None # Ensure it's None if there was an error
+def extract_schema_from_file(file_path: str) -> Optional[Insurance]:
+    if not os.path.exists(file_path):
+        print(f"❌ Error: The file '{file_path}' was not found.")
+        return None
+    if llama_extract_agent is None:
+        print("LlamaExtract agent not initialized. Attempting to initialize now.")
+        initialize_llama_extract_agent()
+        if llama_extract_agent is None:
+            print("LlamaExtract agent failed to initialize. Cannot proceed with extraction.")
+            return None
+    print(f"🚀 Sending '{file_path}' to LlamaCloud for schema extraction...")
+    try:
+        result = llama_extract_agent.extract(file_path)
+        if result and result.data:
+            print("✅ Extraction successful!")
+            return result.data
+        else:
+            print("⚠️ Extraction did not return any data.")
+            return None
+    except Exception as e:
+        print(f"\n❌ An error occurred during the API call: {e}")
+        print("Please check your API key, network connection, and file format.")
+        return None
 def process_pdf_chunk(chunk_path: str) -> str:
     """
     Worker function for the ProcessPoolExecutor.