Commit
·
6622361
1
Parent(s):
a3ba5e2
Switched start py file through Dockerfile to lambda_entrypoint. Added gradio links from this .py
Browse files- Dockerfile +1 -1
- lambda_entrypoint.py +83 -67
Dockerfile
CHANGED
@@ -69,6 +69,6 @@ WORKDIR $HOME/app
|
|
69 |
COPY --chown=user . $HOME/app
|
70 |
|
71 |
# Keep the default entrypoint as flexible
|
72 |
-
ENTRYPOINT ["python", "-u", "
|
73 |
|
74 |
#CMD ["python", "app.py"]
|
|
|
69 |
COPY --chown=user . $HOME/app
|
70 |
|
71 |
# Keep the default entrypoint as flexible
|
72 |
+
ENTRYPOINT ["python", "-u", "lambda_entrypoint.py"]
|
73 |
|
74 |
#CMD ["python", "app.py"]
|
lambda_entrypoint.py
CHANGED
@@ -13,6 +13,8 @@ except Exception as e:
|
|
13 |
|
14 |
TMP_DIR = "/tmp/"
|
15 |
|
|
|
|
|
16 |
def download_file_from_s3(bucket_name, key, download_path):
|
17 |
"""Download a file from S3 to the local filesystem."""
|
18 |
s3_client.download_file(bucket_name, key, download_path)
|
@@ -24,73 +26,87 @@ def upload_file_to_s3(file_path, bucket_name, key):
|
|
24 |
print(f"Uploaded {file_path} to {key}")
|
25 |
|
26 |
def lambda_handler(event, context):
|
|
|
27 |
print("In lambda_handler function")
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
#
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
"
|
68 |
-
"
|
69 |
-
"
|
70 |
-
"
|
71 |
-
"
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
return {"statusCode": 200, "body": "Processing complete."}
|
|
|
13 |
|
14 |
TMP_DIR = "/tmp/"
|
15 |
|
16 |
+
run_direct_mode = os.getenv("RUN_DIRECT_MODE", "0")
|
17 |
+
|
18 |
def download_file_from_s3(bucket_name, key, download_path):
|
19 |
"""Download a file from S3 to the local filesystem."""
|
20 |
s3_client.download_file(bucket_name, key, download_path)
|
|
|
26 |
print(f"Uploaded {file_path} to {key}")
|
27 |
|
28 |
def lambda_handler(event, context):
|
29 |
+
|
30 |
print("In lambda_handler function")
|
31 |
+
|
32 |
+
if run_direct_mode == "0":
|
33 |
+
# Gradio App execution
|
34 |
+
from app import app, max_queue_size, max_file_size # Replace with actual import if needed
|
35 |
+
from tools.auth import authenticate_user
|
36 |
+
|
37 |
+
if os.getenv("COGNITO_AUTH", "0") == "1":
|
38 |
+
app.queue(max_size=max_queue_size).launch(show_error=True, auth=authenticate_user, max_file_size=max_file_size)
|
39 |
+
else:
|
40 |
+
app.queue(max_size=max_queue_size).launch(show_error=True, inbrowser=True, max_file_size=max_file_size)
|
41 |
+
|
42 |
+
else:
|
43 |
+
|
44 |
+
# Create necessary directories
|
45 |
+
os.makedirs(os.path.join(TMP_DIR, "input"), exist_ok=True)
|
46 |
+
os.makedirs(os.path.join(TMP_DIR, "output"), exist_ok=True)
|
47 |
+
|
48 |
+
print("Got to record loop")
|
49 |
+
print("Event records is:", event["Records"])
|
50 |
+
|
51 |
+
# Extract S3 bucket and object key from the Records
|
52 |
+
for record in event.get("Records", [{}]):
|
53 |
+
bucket_name = record.get("s3", {}).get("bucket", {}).get("name")
|
54 |
+
input_key = record.get("s3", {}).get("object", {}).get("key")
|
55 |
+
print(f"Processing file {input_key} from bucket {bucket_name}")
|
56 |
+
|
57 |
+
# Extract additional arguments
|
58 |
+
arguments = event.get("arguments", {})
|
59 |
+
|
60 |
+
if not input_key:
|
61 |
+
input_key = arguments.get("input_file", "")
|
62 |
+
|
63 |
+
ocr_method = arguments.get("ocr_method", "Complex image analysis - docs with handwriting/signatures (AWS Textract)")
|
64 |
+
pii_detector = arguments.get("pii_detector", "AWS Comprehend")
|
65 |
+
page_min = str(arguments.get("page_min", 0))
|
66 |
+
page_max = str(arguments.get("page_max", 0))
|
67 |
+
allow_list = arguments.get("allow_list", None)
|
68 |
+
output_dir = arguments.get("output_dir", os.path.join(TMP_DIR, "output"))
|
69 |
+
|
70 |
+
print(f"OCR Method: {ocr_method}")
|
71 |
+
print(f"PII Detector: {pii_detector}")
|
72 |
+
print(f"Page Range: {page_min} - {page_max}")
|
73 |
+
print(f"Allow List: {allow_list}")
|
74 |
+
print(f"Output Directory: {output_dir}")
|
75 |
+
|
76 |
+
# Download input file
|
77 |
+
input_file_path = os.path.join(TMP_DIR, "input", os.path.basename(input_key))
|
78 |
+
download_file_from_s3(bucket_name, input_key, input_file_path)
|
79 |
+
|
80 |
+
# Construct command
|
81 |
+
command = [
|
82 |
+
"python",
|
83 |
+
"app.py",
|
84 |
+
"--input_file", input_file_path,
|
85 |
+
"--ocr_method", ocr_method,
|
86 |
+
"--pii_detector", pii_detector,
|
87 |
+
"--page_min", page_min,
|
88 |
+
"--page_max", page_max,
|
89 |
+
"--output_dir", output_dir,
|
90 |
+
]
|
91 |
+
|
92 |
+
# Add allow_list only if provided
|
93 |
+
if allow_list:
|
94 |
+
allow_list_path = os.path.join(TMP_DIR, "allow_list.csv")
|
95 |
+
download_file_from_s3(bucket_name, allow_list, allow_list_path)
|
96 |
+
command.extend(["--allow_list", allow_list_path])
|
97 |
+
|
98 |
+
try:
|
99 |
+
result = subprocess.run(command, capture_output=True, text=True, check=True)
|
100 |
+
print("Processing succeeded:", result.stdout)
|
101 |
+
except subprocess.CalledProcessError as e:
|
102 |
+
print("Error during processing:", e.stderr)
|
103 |
+
raise e
|
104 |
+
|
105 |
+
# Upload output files back to S3
|
106 |
+
for root, _, files in os.walk(output_dir):
|
107 |
+
for file_name in files:
|
108 |
+
local_file_path = os.path.join(root, file_name)
|
109 |
+
output_key = f"{os.path.dirname(input_key)}/output/{file_name}"
|
110 |
+
upload_file_to_s3(local_file_path, bucket_name, output_key)
|
111 |
|
112 |
return {"statusCode": 200, "body": "Processing complete."}
|