forked from awslabs/aws-athena-query-federation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathathena-google-bigquery.yaml
125 lines (125 loc) · 5.4 KB
/
athena-google-bigquery.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
Transform: 'AWS::Serverless-2016-10-31'
Metadata:
'AWS::ServerlessRepo::Application':
Name: AthenaGoogleBigQueryConnector
Description: 'This connector enables Amazon Athena to communicate with Big Query using Google SDK'
Author: 'default author'
SpdxLicenseId: Apache-2.0
LicenseUrl: LICENSE.txt
ReadmeUrl: README.md
Labels:
- Big-Query
- Athena-Federation
- Google-SDK
HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation'
SemanticVersion: 2025.7.1
SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation'
Parameters:
LambdaFunctionName:
Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$'
Type: String
AllowedPattern: ^[a-z0-9-_]{1,64}$
GCPProjectID:
Description: "The project ID within Google Cloud Platform ."
Type: String
BigQueryEndpoint:
Description: "(Optional) BigQuery Private Endpoint"
Default: ''
Type: String
SecretNamePrefix:
Description: "The secret name within AWS Secrets Manager that contains your Google Cloud Platform Credentials."
Type: String
SpillBucket:
Description: 'The name of the bucket where this function can spill data.'
Type: String
SpillPrefix:
Description: 'The prefix within SpillBucket where this function can spill data.'
Type: String
Default: athena-spill
LambdaTimeout:
Description: 'Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)'
Default: 900
Type: Number
LambdaMemory:
Description: 'Lambda memory in MB (min 128 - 3008 max).'
Default: 3008
Type: Number
DisableSpillEncryption:
Description: 'If set to ''false'' data spilled to S3 is encrypted with AES GCM'
Default: 'false'
Type: String
SecurityGroupIds:
Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)'
Type: CommaDelimitedList
Default: ""
SubnetIds:
Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)'
Type: CommaDelimitedList
Default: ""
PermissionsBoundaryARN:
Description: "(Optional) An IAM policy ARN to use as the PermissionsBoundary for the created Lambda function's execution role"
Default: ''
Type: String
Conditions:
HasPermissionsBoundary: !Not [ !Equals [ !Ref PermissionsBoundaryARN, "" ] ]
HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ]
HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ]
IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"]
IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"]
Resources:
AthenaBigQueryConnector:
Type: 'AWS::Serverless::Function'
Properties:
Environment:
Variables:
disable_spill_encryption: !Ref DisableSpillEncryption
spill_bucket: !Ref SpillBucket
spill_prefix: !Ref SpillPrefix
secret_manager_gcp_creds_name: !Ref SecretNamePrefix
gcp_project_id: !Ref GCPProjectID
big_query_endpoint: !Ref BigQueryEndpoint
GOOGLE_APPLICATION_CREDENTIALS: '/tmp/service-account.json'
FunctionName: !Ref LambdaFunctionName
PackageType: "Image"
ImageUri: !Sub
- '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-google-bigquery:2025.7.1'
- Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]]
Description: "Enables Amazon Athena to communicate with BigQuery using Google SDK"
Timeout: !Ref LambdaTimeout
MemorySize: !Ref LambdaMemory
PermissionsBoundary: !If [ HasPermissionsBoundary, !Ref PermissionsBoundaryARN, !Ref "AWS::NoValue" ]
Policies:
- Statement:
- Action:
- secretsmanager:GetSecretValue
Effect: Allow
Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretNamePrefix}*'
Version: '2012-10-17'
- Statement:
- Action:
- logs:CreateLogGroup
Effect: Allow
Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*'
Version: '2012-10-17'
- Statement:
- Action:
- logs:CreateLogStream
- logs:PutLogEvents
Effect: Allow
Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*'
Version: '2012-10-17'
- Statement:
- Action:
- athena:GetQueryExecution
Effect: Allow
Resource: '*'
Version: '2012-10-17'
#S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy
#with one that is more restrictive and can only 'put' but not read,delete, or overwrite files.
- S3CrudPolicy:
BucketName: !Ref SpillBucket
#VPCAccessPolicy allows our connector to run in a VPC so that it can access your data source.
- VPCAccessPolicy: { }
VpcConfig:
SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ]
SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ]