seanpedrickcase commited on
Commit
6622361
·
1 Parent(s): a3ba5e2

Switched start py file through Dockerfile to lambda_entrypoint. Added gradio links from this .py

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. lambda_entrypoint.py +83 -67
Dockerfile CHANGED
@@ -69,6 +69,6 @@ WORKDIR $HOME/app
69
  COPY --chown=user . $HOME/app
70
 
71
  # Keep the default entrypoint as flexible
72
- ENTRYPOINT ["python", "-u", "entrypoint_router.py"]
73
 
74
  #CMD ["python", "app.py"]
 
69
  COPY --chown=user . $HOME/app
70
 
71
  # Keep the default entrypoint as flexible
72
+ ENTRYPOINT ["python", "-u", "lambda_entrypoint.py"]
73
 
74
  #CMD ["python", "app.py"]
lambda_entrypoint.py CHANGED
@@ -13,6 +13,8 @@ except Exception as e:
13
 
14
  TMP_DIR = "/tmp/"
15
 
 
 
16
  def download_file_from_s3(bucket_name, key, download_path):
17
  """Download a file from S3 to the local filesystem."""
18
  s3_client.download_file(bucket_name, key, download_path)
@@ -24,73 +26,87 @@ def upload_file_to_s3(file_path, bucket_name, key):
24
  print(f"Uploaded {file_path} to {key}")
25
 
26
  def lambda_handler(event, context):
 
27
  print("In lambda_handler function")
28
- # Create necessary directories
29
- os.makedirs(os.path.join(TMP_DIR, "input"), exist_ok=True)
30
- os.makedirs(os.path.join(TMP_DIR, "output"), exist_ok=True)
31
-
32
- print("Got to record loop")
33
- print("Event records is:", event["Records"])
34
-
35
- # Extract S3 bucket and object key from the Records
36
- for record in event.get("Records", [{}]):
37
- bucket_name = record.get("s3", {}).get("bucket", {}).get("name")
38
- input_key = record.get("s3", {}).get("object", {}).get("key")
39
- print(f"Processing file {input_key} from bucket {bucket_name}")
40
-
41
- # Extract additional arguments
42
- arguments = event.get("arguments", {})
43
-
44
- if not input_key:
45
- input_key = arguments.get("input_file", "")
46
-
47
- ocr_method = arguments.get("ocr_method", "Complex image analysis - docs with handwriting/signatures (AWS Textract)")
48
- pii_detector = arguments.get("pii_detector", "AWS Comprehend")
49
- page_min = str(arguments.get("page_min", 0))
50
- page_max = str(arguments.get("page_max", 0))
51
- allow_list = arguments.get("allow_list", None)
52
- output_dir = arguments.get("output_dir", os.path.join(TMP_DIR, "output"))
53
-
54
- print(f"OCR Method: {ocr_method}")
55
- print(f"PII Detector: {pii_detector}")
56
- print(f"Page Range: {page_min} - {page_max}")
57
- print(f"Allow List: {allow_list}")
58
- print(f"Output Directory: {output_dir}")
59
-
60
- # Download input file
61
- input_file_path = os.path.join(TMP_DIR, "input", os.path.basename(input_key))
62
- download_file_from_s3(bucket_name, input_key, input_file_path)
63
-
64
- # Construct command
65
- command = [
66
- "python",
67
- "app.py",
68
- "--input_file", input_file_path,
69
- "--ocr_method", ocr_method,
70
- "--pii_detector", pii_detector,
71
- "--page_min", page_min,
72
- "--page_max", page_max,
73
- "--output_dir", output_dir,
74
- ]
75
-
76
- # Add allow_list only if provided
77
- if allow_list:
78
- allow_list_path = os.path.join(TMP_DIR, "allow_list.csv")
79
- download_file_from_s3(bucket_name, allow_list, allow_list_path)
80
- command.extend(["--allow_list", allow_list_path])
81
-
82
- try:
83
- result = subprocess.run(command, capture_output=True, text=True, check=True)
84
- print("Processing succeeded:", result.stdout)
85
- except subprocess.CalledProcessError as e:
86
- print("Error during processing:", e.stderr)
87
- raise e
88
-
89
- # Upload output files back to S3
90
- for root, _, files in os.walk(output_dir):
91
- for file_name in files:
92
- local_file_path = os.path.join(root, file_name)
93
- output_key = f"{os.path.dirname(input_key)}/output/{file_name}"
94
- upload_file_to_s3(local_file_path, bucket_name, output_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  return {"statusCode": 200, "body": "Processing complete."}
 
13
 
14
  TMP_DIR = "/tmp/"
15
 
16
+ run_direct_mode = os.getenv("RUN_DIRECT_MODE", "0")
17
+
18
  def download_file_from_s3(bucket_name, key, download_path):
19
  """Download a file from S3 to the local filesystem."""
20
  s3_client.download_file(bucket_name, key, download_path)
 
26
  print(f"Uploaded {file_path} to {key}")
27
 
28
  def lambda_handler(event, context):
29
+
30
  print("In lambda_handler function")
31
+
32
+ if run_direct_mode == "0":
33
+ # Gradio App execution
34
+ from app import app, max_queue_size, max_file_size # Replace with actual import if needed
35
+ from tools.auth import authenticate_user
36
+
37
+ if os.getenv("COGNITO_AUTH", "0") == "1":
38
+ app.queue(max_size=max_queue_size).launch(show_error=True, auth=authenticate_user, max_file_size=max_file_size)
39
+ else:
40
+ app.queue(max_size=max_queue_size).launch(show_error=True, inbrowser=True, max_file_size=max_file_size)
41
+
42
+ else:
43
+
44
+ # Create necessary directories
45
+ os.makedirs(os.path.join(TMP_DIR, "input"), exist_ok=True)
46
+ os.makedirs(os.path.join(TMP_DIR, "output"), exist_ok=True)
47
+
48
+ print("Got to record loop")
49
+ print("Event records is:", event["Records"])
50
+
51
+ # Extract S3 bucket and object key from the Records
52
+ for record in event.get("Records", [{}]):
53
+ bucket_name = record.get("s3", {}).get("bucket", {}).get("name")
54
+ input_key = record.get("s3", {}).get("object", {}).get("key")
55
+ print(f"Processing file {input_key} from bucket {bucket_name}")
56
+
57
+ # Extract additional arguments
58
+ arguments = event.get("arguments", {})
59
+
60
+ if not input_key:
61
+ input_key = arguments.get("input_file", "")
62
+
63
+ ocr_method = arguments.get("ocr_method", "Complex image analysis - docs with handwriting/signatures (AWS Textract)")
64
+ pii_detector = arguments.get("pii_detector", "AWS Comprehend")
65
+ page_min = str(arguments.get("page_min", 0))
66
+ page_max = str(arguments.get("page_max", 0))
67
+ allow_list = arguments.get("allow_list", None)
68
+ output_dir = arguments.get("output_dir", os.path.join(TMP_DIR, "output"))
69
+
70
+ print(f"OCR Method: {ocr_method}")
71
+ print(f"PII Detector: {pii_detector}")
72
+ print(f"Page Range: {page_min} - {page_max}")
73
+ print(f"Allow List: {allow_list}")
74
+ print(f"Output Directory: {output_dir}")
75
+
76
+ # Download input file
77
+ input_file_path = os.path.join(TMP_DIR, "input", os.path.basename(input_key))
78
+ download_file_from_s3(bucket_name, input_key, input_file_path)
79
+
80
+ # Construct command
81
+ command = [
82
+ "python",
83
+ "app.py",
84
+ "--input_file", input_file_path,
85
+ "--ocr_method", ocr_method,
86
+ "--pii_detector", pii_detector,
87
+ "--page_min", page_min,
88
+ "--page_max", page_max,
89
+ "--output_dir", output_dir,
90
+ ]
91
+
92
+ # Add allow_list only if provided
93
+ if allow_list:
94
+ allow_list_path = os.path.join(TMP_DIR, "allow_list.csv")
95
+ download_file_from_s3(bucket_name, allow_list, allow_list_path)
96
+ command.extend(["--allow_list", allow_list_path])
97
+
98
+ try:
99
+ result = subprocess.run(command, capture_output=True, text=True, check=True)
100
+ print("Processing succeeded:", result.stdout)
101
+ except subprocess.CalledProcessError as e:
102
+ print("Error during processing:", e.stderr)
103
+ raise e
104
+
105
+ # Upload output files back to S3
106
+ for root, _, files in os.walk(output_dir):
107
+ for file_name in files:
108
+ local_file_path = os.path.join(root, file_name)
109
+ output_key = f"{os.path.dirname(input_key)}/output/{file_name}"
110
+ upload_file_to_s3(local_file_path, bucket_name, output_key)
111
 
112
  return {"statusCode": 200, "body": "Processing complete."}