Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions industry-specific-pocs/financial-services/PDF_2_WebForm/DEPLOYMENT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Deployment Guide

## Prerequisites
- AWS CLI configured with appropriate permissions
- Node.js 14+ and npm installed
- Access to AWS services: S3, Lambda, API Gateway, Bedrock

## Frontend Deployment

### Local Development
```bash
npm install
npm start
```

### Production Build
```bash
npm run build
# Deploy build/ directory to S3 static website hosting or CloudFront
```

## Lambda Function Deployment

### 1. Package Dependencies
```bash
cd Lambda/
pip install -r requirements.txt -t .
```

### 2. Create Deployment Packages
```bash
# For each Lambda function
zip -r generateWebFormStream.zip generateWebFormStream.py boto3/ botocore/ other_dependencies/
zip -r getPdfContent.zip getPdfContent.py boto3/ botocore/ other_dependencies/
zip -r listS3Objects.zip listS3Objects.py boto3/ botocore/ other_dependencies/
zip -r getDocumentFields.zip getDocumentFields.py boto3/ botocore/ other_dependencies/
```

### 3. Deploy to AWS Lambda
```bash
# Using AWS CLI
aws lambda create-function \
--function-name generateWebFormStream \
--runtime python3.9 \
--role arn:aws:iam::ACCOUNT:role/lambda-execution-role \
--handler generateWebFormStream.lambda_handler \
--zip-file fileb://generateWebFormStream.zip

# Repeat for other functions
```

## API Gateway Configuration

### REST API Endpoints
1. Create REST API in API Gateway
2. Create resources and methods
3. Integrate with Lambda functions
4. Enable CORS
5. Deploy to stage

### WebSocket API
1. Create WebSocket API
2. Configure routes: $connect, $disconnect, generate
3. Integrate with generateWebFormStream Lambda
4. Deploy to stage

## Required IAM Permissions

### Lambda Execution Role
```json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": "arn:aws:logs:*:*:*"
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:ListBucket"
],
"Resource": [
"arn:aws:s3:::your-bucket/*",
"arn:aws:s3:::your-bucket"
]
},
{
"Effect": "Allow",
"Action": [
"bedrock:InvokeModel",
"bedrock:InvokeModelWithResponseStream"
],
"Resource": "arn:aws:bedrock:*:*:model/us.anthropic.claude-3-5-sonnet-20241022-v2:0"
},
{
"Effect": "Allow",
"Action": [
"execute-api:ManageConnections"
],
"Resource": "arn:aws:execute-api:*:*:*/production/POST/@connections/*"
}
]
}
```

## Environment Variables
Set these in Lambda function configuration:
- `WEBSOCKET_ENDPOINT`: WebSocket API Gateway endpoint
- `S3_BUCKET_PREFIX`: Optional bucket name filter

## Testing
1. Test Lambda functions individually
2. Test API Gateway endpoints
3. Test WebSocket connections
4. Test end-to-end workflow

## Monitoring
- CloudWatch Logs for Lambda functions
- API Gateway metrics
- WebSocket connection metrics
- Bedrock usage metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import json
import boto3
from botocore.exceptions import ClientError

"""
WebSocket Lambda Function for Real-time Code Generation

This function handles WebSocket connections and generates code using AWS Bedrock Claude 3.5 Sonnet.
It supports streaming responses for real-time code generation in multiple programming languages.

Supported Languages:
- React.js
- HTML
- Java
- Python
- C#

WebSocket Routes:
- $connect: Handle new WebSocket connections
- $disconnect: Handle WebSocket disconnections
- generate: Process code generation requests with streaming responses
"""

def lambda_handler(event, context):
# Get the WebSocket route key to determine the action
route_key = event.get('requestContext', {}).get('routeKey')

# Handle WebSocket connection establishment
if route_key == '$connect':
return {'statusCode': 200}

# Handle WebSocket disconnection
elif route_key == '$disconnect':
return {'statusCode': 200}

# Handle code generation requests
elif route_key == 'generate':
try:
# Parse the WebSocket message body
if 'body' in event and event['body']:
body = json.loads(event['body'])
metadata_json = body.get('metadata', {})
language = body.get('language', 'reactjs')
custom_prompt = body.get('prompt', '')
else:
return {
'statusCode': 400,
'headers': {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': 'Content-Type',
'Access-Control-Allow-Methods': 'GET,POST,OPTIONS'
},
'body': json.dumps({'error': 'Missing metadata or language'})
}

# Create the prompt for Claude based on custom prompt or default template
if custom_prompt:
# Use custom prompt and replace metadata placeholder
prompt = custom_prompt.replace('{metadata}', json.dumps(metadata_json, indent=2))
else:
# Use default HTML form generation prompt
prompt = f"""
Generate ONLY clean HTML form code for the following PDF metadata fields.

Metadata: {json.dumps(metadata_json, indent=2)}

Requirements:
- Generate a very professional looking HTML form elements (no explanations, no markdown)
- Include inline CSS styles for proper formatting
- Create appropriate input types for each field
- Add form validation attributes
- Include a submit button
- Start directly with <form> tag

Return ONLY the HTML form code that can be directly rendered in a browser.
"""

# Extract WebSocket connection information
connection_id = event['requestContext']['connectionId']

# Initialize API Gateway Management API client for WebSocket communication
apigateway_client = boto3.client(
'apigatewaymanagementapi',
endpoint_url='https://bb4bk15ec3.execute-api.us-east-1.amazonaws.com/production'
)

# Initialize Bedrock client and invoke Claude 3.5 Sonnet with streaming
bedrock = boto3.client('bedrock-runtime', region_name='us-east-1')
response = bedrock.invoke_model_with_response_stream(
modelId='us.anthropic.claude-3-5-sonnet-20241022-v2:0',
body=json.dumps({
'anthropic_version': 'bedrock-2023-05-31',
'max_tokens': 4000,
'messages': [{
'role': 'user',
'content': prompt
}]
})
)

# Stream response chunks to WebSocket client in real-time
for event_chunk in response['body']:
chunk = json.loads(event_chunk['chunk']['bytes'])

# Send content chunks as they arrive from Claude
if chunk['type'] == 'content_block_delta' and chunk['delta']['text']:
apigateway_client.post_to_connection(
ConnectionId=connection_id,
Data=json.dumps({
'type': 'content_block_delta',
'delta': {'text': chunk['delta']['text']}
})
)

# Send completion signal to client
apigateway_client.post_to_connection(
ConnectionId=connection_id,
Data=json.dumps({'type': 'message_delta', 'delta': {'stop_reason': 'end_turn'}})
)

return {'statusCode': 200}
except ClientError as e:
return {
'statusCode': 500,
'headers': {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': 'Content-Type',
'Access-Control-Allow-Methods': 'GET,POST,OPTIONS'
},
'body': json.dumps({
'error': str(e)
})
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import json
import boto3
from botocore.exceptions import ClientError

"""
Alternative Document Fields Extraction Lambda Function

This function uses AWS Bedrock Data Automation to extract form fields from PDF documents.
It's an alternative to the main metadata extraction approach used in the application.

Note: This function is not currently used in the main application (App.js) but is kept
for potential future use or as an alternative implementation.

Features:
- Uses Bedrock Data Automation service
- Extracts structured field data from PDFs
- Asynchronous processing with job tracking
- Returns field names and job IDs

Input:
- S3 event structure with bucket and object key

Output:
- field_names: Array of extracted field names
- job_id: Bedrock Data Automation job identifier
"""

def lambda_handler(event, context):
# Initialize Bedrock Data Automation client
bedrock_agent = boto3.client('bedrock-data-automation')

try:
# Parse S3 event structure from request body or direct event
if 'body' in event and event['body']:
body = json.loads(event['body'])
bucket = body['Records'][0]['s3']['bucket']['name']
key = body['Records'][0]['s3']['object']['key']
else:
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']

# Bedrock Data Automation project ARN (pre-configured)
project_arn = 'arn:aws:bedrock:us-east-1:905418369822:data-automation-project/5f408970390c'

# Invoke Bedrock Data Automation for document field extraction
job_response = bedrock_agent.invoke_data_automation_async(
projectArn=project_arn,
inputConfiguration={
'document': {
's3Uri': f's3://{bucket}/{key}'
}
},
outputConfiguration={
's3Uri': f's3://{bucket}/output/'
}
)

# Process extraction results to get field names
field_names = []
if 'extractionResults' in job_response:
for result in job_response['extractionResults']:
if 'document' in result and 'content' in result['document']:
content = result['document']['content']
# Extract field names from document content
if isinstance(content, dict) and 'fields' in content:
field_names = list(content['fields'].keys())

return {
'statusCode': 200,
'headers': {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': 'Content-Type',
'Access-Control-Allow-Methods': 'GET,POST,OPTIONS'
},
'body': json.dumps({
'field_names': field_names,
'job_id': job_response.get('invocationArn', '')
})
}

except ClientError as e:
return {
'statusCode': 500,
'headers': {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': 'Content-Type',
'Access-Control-Allow-Methods': 'GET,POST,OPTIONS'
},
'body': json.dumps({
'error': str(e),
'field_names': []
})
}
Loading