File size: 2,829 Bytes
59d3355 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
"""
Purpose
An AWS lambda function that analyzes documents with Amazon Textract.
"""
import json
import base64
import logging
import boto3
from botocore.exceptions import ClientError
# Set up logging.
logger = logging.getLogger(__name__)
# Get the boto3 client.
textract_client = boto3.client("textract")
def lambda_handler(event, context):
"""
Lambda handler function
param: event: The event object for the Lambda function.
param: context: The context object for the lambda function.
return: The list of Block objects recognized in the document
passed in the event object.
"""
# raw_image = json.loads(event['body'])['image']
# message = f"i love {country}"
# return message
try:
# Determine document source.
# event['image'] = event["queryStringParameters"]['image']
# event['image'] = json.loads(event['body'])["queryStringParameters"]['image']
event["image"] = json.loads(event["body"])["image"]
if "image" in event:
# Decode the image
image_bytes = event["image"].encode("utf-8")
img_b64decoded = base64.b64decode(image_bytes)
image = {"Bytes": img_b64decoded}
elif "S3Object" in event:
image = {
"S3Object": {
"Bucket": event["S3Object"]["Bucket"],
"Name": event["S3Object"]["Name"],
}
}
else:
raise ValueError(
"Invalid source. Only image base 64 encoded image bytes or S3Object are supported."
)
# Analyze the document.
response = textract_client.detect_document_text(Document=image)
# Get the Blocks
blocks = response["Blocks"]
lambda_response = {"statusCode": 200, "body": json.dumps(blocks)}
except ClientError as err:
error_message = "Couldn't analyze image. " + err.response["Error"]["Message"]
lambda_response = {
"statusCode": 400,
"body": {
"Error": err.response["Error"]["Code"],
"ErrorMessage": error_message,
},
}
logger.error(
"Error function %s: %s", context.invoked_function_arn, error_message
)
except ValueError as val_error:
lambda_response = {
"statusCode": 400,
"body": {"Error": "ValueError", "ErrorMessage": format(val_error)},
}
logger.error(
"Error function %s: %s", context.invoked_function_arn, format(val_error)
)
# Create return body
http_resp = {}
http_resp["statusCode"] = 200
http_resp["headers"] = {}
http_resp["headers"]["Content-Type"] = "application/json"
http_resp["body"] = json.dumps(lambda_response)
return http_resp
|