aws-samples · ganesrar · Aug 14, 2025
diff --git a/industry-specific-pocs/financial-services/PDF_2_WebForm/DEPLOYMENT.md b/industry-specific-pocs/financial-services/PDF_2_WebForm/DEPLOYMENT.md
@@ -0,0 +1,128 @@
+# Deployment Guide
+
+## Prerequisites
+- AWS CLI configured with appropriate permissions
+- Node.js 14+ and npm installed
+- Access to AWS services: S3, Lambda, API Gateway, Bedrock
+
+## Frontend Deployment
+
+### Local Development
+```bash
+npm install
+npm start
+```
+
+### Production Build
+```bash
+npm run build
+# Deploy build/ directory to S3 static website hosting or CloudFront
+```
+
+## Lambda Function Deployment
+
+### 1. Package Dependencies
+```bash
+cd Lambda/
+pip install -r requirements.txt -t .
+```
+
+### 2. Create Deployment Packages
+```bash
+# For each Lambda function
+zip -r generateWebFormStream.zip generateWebFormStream.py boto3/ botocore/ other_dependencies/
+zip -r getPdfContent.zip getPdfContent.py boto3/ botocore/ other_dependencies/
+zip -r listS3Objects.zip listS3Objects.py boto3/ botocore/ other_dependencies/
+zip -r getDocumentFields.zip getDocumentFields.py boto3/ botocore/ other_dependencies/
+```
+
+### 3. Deploy to AWS Lambda
+```bash
+# Using AWS CLI
+aws lambda create-function \
+  --function-name generateWebFormStream \
+  --runtime python3.9 \
+  --role arn:aws:iam::ACCOUNT:role/lambda-execution-role \
+  --handler generateWebFormStream.lambda_handler \
+  --zip-file fileb://generateWebFormStream.zip
+
+# Repeat for other functions
+```
+
+## API Gateway Configuration
+
+### REST API Endpoints
+1. Create REST API in API Gateway
+2. Create resources and methods
+3. Integrate with Lambda functions
+4. Enable CORS
+5. Deploy to stage
+
+### WebSocket API
+1. Create WebSocket API
+2. Configure routes: $connect, $disconnect, generate
+3. Integrate with generateWebFormStream Lambda
+4. Deploy to stage
+
+## Required IAM Permissions
+
+### Lambda Execution Role
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": [
+        "logs:CreateLogGroup",
+        "logs:CreateLogStream",
+        "logs:PutLogEvents"
+      ],
+      "Resource": "arn:aws:logs:*:*:*"
+    },
+    {
+      "Effect": "Allow",
+      "Action": [
+        "s3:GetObject",
+        "s3:ListBucket"
+      ],
+      "Resource": [
+        "arn:aws:s3:::your-bucket/*",
+        "arn:aws:s3:::your-bucket"
+      ]
+    },
+    {
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel",
+        "bedrock:InvokeModelWithResponseStream"
+      ],
+      "Resource": "arn:aws:bedrock:*:*:model/us.anthropic.claude-3-5-sonnet-20241022-v2:0"
+    },
+    {
+      "Effect": "Allow",
+      "Action": [
+        "execute-api:ManageConnections"
+      ],
+      "Resource": "arn:aws:execute-api:*:*:*/production/POST/@connections/*"
+    }
+  ]
+}
+```
+
+## Environment Variables
+Set these in Lambda function configuration:
+- `WEBSOCKET_ENDPOINT`: WebSocket API Gateway endpoint
+- `S3_BUCKET_PREFIX`: Optional bucket name filter
+
+## Testing
+1. Test Lambda functions individually
+2. Test API Gateway endpoints
+3. Test WebSocket connections
+4. Test end-to-end workflow
+
+## Monitoring
+- CloudWatch Logs for Lambda functions
+- API Gateway metrics
+- WebSocket connection metrics
+- Bedrock usage metrics
diff --git a/industry-specific-pocs/financial-services/PDF_2_WebForm/Lambda/generateWebFormStream.py b/industry-specific-pocs/financial-services/PDF_2_WebForm/Lambda/generateWebFormStream.py
@@ -0,0 +1,133 @@
+import json
+import boto3
+from botocore.exceptions import ClientError
+
+"""
+WebSocket Lambda Function for Real-time Code Generation
+
+This function handles WebSocket connections and generates code using AWS Bedrock Claude 3.5 Sonnet.
+It supports streaming responses for real-time code generation in multiple programming languages.
+
+Supported Languages:
+- React.js
+- HTML
+- Java
+- Python
+- C#
+
+WebSocket Routes:
+- $connect: Handle new WebSocket connections
+- $disconnect: Handle WebSocket disconnections
+- generate: Process code generation requests with streaming responses
+"""
+
+def lambda_handler(event, context):
+    # Get the WebSocket route key to determine the action
+    route_key = event.get('requestContext', {}).get('routeKey')
+
+    # Handle WebSocket connection establishment
+    if route_key == '$connect':
+        return {'statusCode': 200}
+
+    # Handle WebSocket disconnection
+    elif route_key == '$disconnect':
+        return {'statusCode': 200}
+
+    # Handle code generation requests
+    elif route_key == 'generate':
+        try:
+            # Parse the WebSocket message body
+            if 'body' in event and event['body']:
+                body = json.loads(event['body'])
+                metadata_json = body.get('metadata', {})
+                language = body.get('language', 'reactjs')
+                custom_prompt = body.get('prompt', '')
+            else:
+                return {
+                    'statusCode': 400,
+                    'headers': {
+                        'Access-Control-Allow-Origin': '*',
+                        'Access-Control-Allow-Headers': 'Content-Type',
+                        'Access-Control-Allow-Methods': 'GET,POST,OPTIONS'
+                    },
+                    'body': json.dumps({'error': 'Missing metadata or language'})
+                }
+
+            # Create the prompt for Claude based on custom prompt or default template
+            if custom_prompt:
+                # Use custom prompt and replace metadata placeholder
+                prompt = custom_prompt.replace('{metadata}', json.dumps(metadata_json, indent=2))
+            else:
+                # Use default HTML form generation prompt
+                prompt = f"""
+                Generate ONLY clean HTML form code for the following PDF metadata fields.
+
+                Metadata: {json.dumps(metadata_json, indent=2)}
+
+                Requirements:
+                - Generate a very professional looking HTML form elements (no explanations, no markdown)
+                - Include inline CSS styles for proper formatting
+                - Create appropriate input types for each field
+                - Add form validation attributes
+                - Include a submit button
+                - Start directly with <form> tag
+
+                Return ONLY the HTML form code that can be directly rendered in a browser.
+                """
+
+            # Extract WebSocket connection information
+            connection_id = event['requestContext']['connectionId']
+
+            # Initialize API Gateway Management API client for WebSocket communication
+            apigateway_client = boto3.client(
+                'apigatewaymanagementapi',
+                endpoint_url='https://bb4bk15ec3.execute-api.us-east-1.amazonaws.com/production'
+            )
+
+            # Initialize Bedrock client and invoke Claude 3.5 Sonnet with streaming
+            bedrock = boto3.client('bedrock-runtime', region_name='us-east-1')
+            response = bedrock.invoke_model_with_response_stream(
+                modelId='us.anthropic.claude-3-5-sonnet-20241022-v2:0',
+                body=json.dumps({
+                    'anthropic_version': 'bedrock-2023-05-31',
+                    'max_tokens': 4000,
+                    'messages': [{
+                        'role': 'user',
+                        'content': prompt
+                    }]
+                })
+            )
+
+            # Stream response chunks to WebSocket client in real-time
+            for event_chunk in response['body']:
+                chunk = json.loads(event_chunk['chunk']['bytes'])
+
+                # Send content chunks as they arrive from Claude
+                if chunk['type'] == 'content_block_delta' and chunk['delta']['text']:
+                    apigateway_client.post_to_connection(
+                        ConnectionId=connection_id,
+                        Data=json.dumps({
+                            'type': 'content_block_delta',
+                            'delta': {'text': chunk['delta']['text']}
+                        })
+                    )
+
+            # Send completion signal to client
+            apigateway_client.post_to_connection(
+                ConnectionId=connection_id,
+                Data=json.dumps({'type': 'message_delta', 'delta': {'stop_reason': 'end_turn'}})
+            )
+
+            return {'statusCode': 200}
+        except ClientError as e:
+            return {
+                'statusCode': 500,
+                'headers': {
+                    'Access-Control-Allow-Origin': '*',
+                    'Access-Control-Allow-Headers': 'Content-Type',
+                    'Access-Control-Allow-Methods': 'GET,POST,OPTIONS'
+                },
+                'body': json.dumps({
+                    'error': str(e)
+                })
+            }
diff --git a/industry-specific-pocs/financial-services/PDF_2_WebForm/Lambda/getDocumentFields.py b/industry-specific-pocs/financial-services/PDF_2_WebForm/Lambda/getDocumentFields.py
@@ -0,0 +1,93 @@
+import json
+import boto3
+from botocore.exceptions import ClientError
+
+"""
+Alternative Document Fields Extraction Lambda Function
+
+This function uses AWS Bedrock Data Automation to extract form fields from PDF documents.
+It's an alternative to the main metadata extraction approach used in the application.
+
+Note: This function is not currently used in the main application (App.js) but is kept
+for potential future use or as an alternative implementation.
+
+Features:
+- Uses Bedrock Data Automation service
+- Extracts structured field data from PDFs
+- Asynchronous processing with job tracking
+- Returns field names and job IDs
+
+Input:
+- S3 event structure with bucket and object key
+
+Output:
+- field_names: Array of extracted field names
+- job_id: Bedrock Data Automation job identifier
+"""
+
+def lambda_handler(event, context):
+    # Initialize Bedrock Data Automation client
+    bedrock_agent = boto3.client('bedrock-data-automation')
+
+    try:
+        # Parse S3 event structure from request body or direct event
+        if 'body' in event and event['body']:
+            body = json.loads(event['body'])
+            bucket = body['Records'][0]['s3']['bucket']['name']
+            key = body['Records'][0]['s3']['object']['key']
+        else:
+            bucket = event['Records'][0]['s3']['bucket']['name']
+            key = event['Records'][0]['s3']['object']['key']
+
+        # Bedrock Data Automation project ARN (pre-configured)
+        project_arn = 'arn:aws:bedrock:us-east-1:905418369822:data-automation-project/5f408970390c'
+
+        # Invoke Bedrock Data Automation for document field extraction
+        job_response = bedrock_agent.invoke_data_automation_async(
+            projectArn=project_arn,
+            inputConfiguration={
+                'document': {
+                    's3Uri': f's3://{bucket}/{key}'
+                }
+            },
+            outputConfiguration={
+                's3Uri': f's3://{bucket}/output/'
+            }
+        )
+
+        # Process extraction results to get field names
+        field_names = []
+        if 'extractionResults' in job_response:
+            for result in job_response['extractionResults']:
+                if 'document' in result and 'content' in result['document']:
+                    content = result['document']['content']
+                    # Extract field names from document content
+                    if isinstance(content, dict) and 'fields' in content:
+                        field_names = list(content['fields'].keys())
+
+        return {
+            'statusCode': 200,
+            'headers': {
+                'Access-Control-Allow-Origin': '*',
+                'Access-Control-Allow-Headers': 'Content-Type',
+                'Access-Control-Allow-Methods': 'GET,POST,OPTIONS'
+            },
+            'body': json.dumps({
+                'field_names': field_names,
+                'job_id': job_response.get('invocationArn', '')
+            })
+        }
+
+    except ClientError as e:
+        return {
+            'statusCode': 500,
+            'headers': {
+                'Access-Control-Allow-Origin': '*',
+                'Access-Control-Allow-Headers': 'Content-Type',
+                'Access-Control-Allow-Methods': 'GET,POST,OPTIONS'
+            },
+            'body': json.dumps({
+                'error': str(e),
+                'field_names': []
+            })
+        }