Spaces:

seanpedrickcase
/

document_redaction

Running

App Files Files Community

seanpedrickcase commited on Nov 22, 2024

Commit

6622361

1 Parent(s): a3ba5e2

Switched start py file through Dockerfile to lambda_entrypoint. Added gradio links from this .py

Browse files

Files changed (2) hide show

Dockerfile +1 -1
lambda_entrypoint.py +83 -67

Dockerfile CHANGED Viewed

@@ -69,6 +69,6 @@ WORKDIR $HOME/app
 COPY --chown=user . $HOME/app
 # Keep the default entrypoint as flexible
-ENTRYPOINT ["python", "-u", "entrypoint_router.py"]
 #CMD ["python", "app.py"]

 COPY --chown=user . $HOME/app
 # Keep the default entrypoint as flexible
+ENTRYPOINT ["python", "-u", "lambda_entrypoint.py"]
 #CMD ["python", "app.py"]

lambda_entrypoint.py CHANGED Viewed

@@ -13,6 +13,8 @@ except Exception as e:
 TMP_DIR = "/tmp/"
 def download_file_from_s3(bucket_name, key, download_path):
     """Download a file from S3 to the local filesystem."""
     s3_client.download_file(bucket_name, key, download_path)
@@ -24,73 +26,87 @@ def upload_file_to_s3(file_path, bucket_name, key):
     print(f"Uploaded {file_path} to {key}")
 def lambda_handler(event, context):
     print("In lambda_handler function")
-    # Create necessary directories
-    os.makedirs(os.path.join(TMP_DIR, "input"), exist_ok=True)
-    os.makedirs(os.path.join(TMP_DIR, "output"), exist_ok=True)
-    print("Got to record loop")
-    print("Event records is:", event["Records"])
-    # Extract S3 bucket and object key from the Records
-    for record in event.get("Records", [{}]):
-        bucket_name = record.get("s3", {}).get("bucket", {}).get("name")
-        input_key = record.get("s3", {}).get("object", {}).get("key")
-        print(f"Processing file {input_key} from bucket {bucket_name}")
-        # Extract additional arguments
-        arguments = event.get("arguments", {})
-        if not input_key:
-            input_key = arguments.get("input_file", "")
-        ocr_method = arguments.get("ocr_method", "Complex image analysis - docs with handwriting/signatures (AWS Textract)")
-        pii_detector = arguments.get("pii_detector", "AWS Comprehend")
-        page_min = str(arguments.get("page_min", 0))
-        page_max = str(arguments.get("page_max", 0))
-        allow_list = arguments.get("allow_list", None)
-        output_dir = arguments.get("output_dir", os.path.join(TMP_DIR, "output"))
-        print(f"OCR Method: {ocr_method}")
-        print(f"PII Detector: {pii_detector}")
-        print(f"Page Range: {page_min} - {page_max}")
-        print(f"Allow List: {allow_list}")
-        print(f"Output Directory: {output_dir}")
-        # Download input file
-        input_file_path = os.path.join(TMP_DIR, "input", os.path.basename(input_key))
-        download_file_from_s3(bucket_name, input_key, input_file_path)
-        # Construct command
-        command = [
-            "python",
-            "app.py",
-            "--input_file", input_file_path,
-            "--ocr_method", ocr_method,
-            "--pii_detector", pii_detector,
-            "--page_min", page_min,
-            "--page_max", page_max,
-            "--output_dir", output_dir,
-        ]
-        # Add allow_list only if provided
-        if allow_list:
-            allow_list_path = os.path.join(TMP_DIR, "allow_list.csv")
-            download_file_from_s3(bucket_name, allow_list, allow_list_path)
-            command.extend(["--allow_list", allow_list_path])
-        try:
-            result = subprocess.run(command, capture_output=True, text=True, check=True)
-            print("Processing succeeded:", result.stdout)
-        except subprocess.CalledProcessError as e:
-            print("Error during processing:", e.stderr)
-            raise e
-        # Upload output files back to S3
-        for root, _, files in os.walk(output_dir):
-            for file_name in files:
-                local_file_path = os.path.join(root, file_name)
-                output_key = f"{os.path.dirname(input_key)}/output/{file_name}"
-                upload_file_to_s3(local_file_path, bucket_name, output_key)
     return {"statusCode": 200, "body": "Processing complete."}

 TMP_DIR = "/tmp/"
+run_direct_mode = os.getenv("RUN_DIRECT_MODE", "0")
 def download_file_from_s3(bucket_name, key, download_path):
     """Download a file from S3 to the local filesystem."""
     s3_client.download_file(bucket_name, key, download_path)
     print(f"Uploaded {file_path} to {key}")
 def lambda_handler(event, context):
     print("In lambda_handler function")
+    if run_direct_mode == "0":
+        # Gradio App execution
+        from app import app, max_queue_size, max_file_size  # Replace with actual import if needed
+        from tools.auth import authenticate_user
+        if os.getenv("COGNITO_AUTH", "0") == "1":
+            app.queue(max_size=max_queue_size).launch(show_error=True, auth=authenticate_user, max_file_size=max_file_size)
+        else:
+            app.queue(max_size=max_queue_size).launch(show_error=True, inbrowser=True, max_file_size=max_file_size)
+    else:
+        # Create necessary directories
+        os.makedirs(os.path.join(TMP_DIR, "input"), exist_ok=True)
+        os.makedirs(os.path.join(TMP_DIR, "output"), exist_ok=True)
+        print("Got to record loop")
+        print("Event records is:", event["Records"])
+        # Extract S3 bucket and object key from the Records
+        for record in event.get("Records", [{}]):
+            bucket_name = record.get("s3", {}).get("bucket", {}).get("name")
+            input_key = record.get("s3", {}).get("object", {}).get("key")
+            print(f"Processing file {input_key} from bucket {bucket_name}")
+            # Extract additional arguments
+            arguments = event.get("arguments", {})
+            if not input_key:
+                input_key = arguments.get("input_file", "")
+            ocr_method = arguments.get("ocr_method", "Complex image analysis - docs with handwriting/signatures (AWS Textract)")
+            pii_detector = arguments.get("pii_detector", "AWS Comprehend")
+            page_min = str(arguments.get("page_min", 0))
+            page_max = str(arguments.get("page_max", 0))
+            allow_list = arguments.get("allow_list", None)
+            output_dir = arguments.get("output_dir", os.path.join(TMP_DIR, "output"))
+            print(f"OCR Method: {ocr_method}")
+            print(f"PII Detector: {pii_detector}")
+            print(f"Page Range: {page_min} - {page_max}")
+            print(f"Allow List: {allow_list}")
+            print(f"Output Directory: {output_dir}")
+            # Download input file
+            input_file_path = os.path.join(TMP_DIR, "input", os.path.basename(input_key))
+            download_file_from_s3(bucket_name, input_key, input_file_path)
+            # Construct command
+            command = [
+                "python",
+                "app.py",
+                "--input_file", input_file_path,
+                "--ocr_method", ocr_method,
+                "--pii_detector", pii_detector,
+                "--page_min", page_min,
+                "--page_max", page_max,
+                "--output_dir", output_dir,
+            ]
+            # Add allow_list only if provided
+            if allow_list:
+                allow_list_path = os.path.join(TMP_DIR, "allow_list.csv")
+                download_file_from_s3(bucket_name, allow_list, allow_list_path)
+                command.extend(["--allow_list", allow_list_path])
+            try:
+                result = subprocess.run(command, capture_output=True, text=True, check=True)
+                print("Processing succeeded:", result.stdout)
+            except subprocess.CalledProcessError as e:
+                print("Error during processing:", e.stderr)
+                raise e
+            # Upload output files back to S3
+            for root, _, files in os.walk(output_dir):
+                for file_name in files:
+                    local_file_path = os.path.join(root, file_name)
+                    output_key = f"{os.path.dirname(input_key)}/output/{file_name}"
+                    upload_file_to_s3(local_file_path, bucket_name, output_key)
     return {"statusCode": 200, "body": "Processing complete."}