Skip to content

Commit 4cbe0ac

Browse files
HADOOP-19654. Upgrade AWS SDK to 2.35.4 (#7882)
AWS SDK upgraded to 2.35.4. This SDK has changed checksum/checksum headers handling significantly, causing problems with third party stores, and, in some combinations AWS S3 itself. The S3A connector has retained old behavior; options to change these settings are now available. The default settings are chosen for maximum compatiblity and performance. fs.s3a.request.md5.header: true fs.s3a.checksum.generation: false fs.s3a.create.checksum.algorithm: "" Consult the documentation for more details. Contributed by Steve Loughran
1 parent 38f9c1c commit 4cbe0ac

File tree

65 files changed

+1313
-210
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1313
-210
lines changed

LICENSE-binary

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ io.reactivex:rxnetty:0.4.20
295295
io.swagger:swagger-annotations:1.5.4
296296
javax.inject:javax.inject:1
297297
net.java.dev.jna:jna:5.2.0
298-
net.minidev:accessors-smart:1.2
298+
net.minidev:accessors-smart:1.21
299299
org.apache.avro:avro:1.11.4
300300
org.apache.commons:commons-compress:1.26.1
301301
org.apache.commons:commons-configuration2:2.10.1
@@ -360,7 +360,7 @@ org.objenesis:objenesis:2.6
360360
org.xerial.snappy:snappy-java:1.1.10.4
361361
org.yaml:snakeyaml:2.0
362362
org.wildfly.openssl:wildfly-openssl:2.2.5.Final
363-
software.amazon.awssdk:bundle:2.29.52
363+
software.amazon.awssdk:bundle:2.35.4
364364
software.amazon.s3.analyticsaccelerator:analyticsaccelerator-s3:1.3.0
365365

366366
--------------------------------------------------------------------------------

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractUnbufferTest.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hadoop.fs.contract;
2020

21+
import org.assertj.core.api.Assertions;
2122
import org.junit.Test;
2223

2324
import java.io.IOException;
@@ -28,6 +29,7 @@
2829

2930
import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
3031
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
32+
import static org.apache.hadoop.fs.contract.ContractTestUtils.readNBytes;
3133

3234
/**
3335
* Contract tests for {@link org.apache.hadoop.fs.CanUnbuffer#unbuffer}.
@@ -136,10 +138,12 @@ protected void validateFileContents(FSDataInputStream stream, int length,
136138
int startIndex)
137139
throws IOException {
138140
byte[] streamData = new byte[length];
139-
assertEquals("failed to read expected number of bytes from "
140-
+ "stream. This may be transient",
141-
length, stream.read(streamData));
141+
final int read = readNBytes(stream, streamData, 0, length);
142+
Assertions.assertThat(read)
143+
.describedAs("failed to read expected number of bytes from stream. %s", stream)
144+
.isEqualTo(length);
142145
byte[] validateFileBytes;
146+
143147
if (startIndex == 0 && length == fileBytes.length) {
144148
validateFileBytes = fileBytes;
145149
} else {

hadoop-project/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@
202202
<make-maven-plugin.version>1.0-beta-1</make-maven-plugin.version>
203203
<surefire.fork.timeout>900</surefire.fork.timeout>
204204
<aws-java-sdk.version>1.12.720</aws-java-sdk.version>
205-
<aws-java-sdk-v2.version>2.29.52</aws-java-sdk-v2.version>
205+
<aws-java-sdk-v2.version>2.35.4</aws-java-sdk-v2.version>
206206
<amazon-s3-encryption-client-java.version>3.1.1</amazon-s3-encryption-client-java.version>
207207
<amazon-s3-analyticsaccelerator-s3.version>1.3.0</amazon-s3-analyticsaccelerator-s3.version>
208208
<aws.eventstream.version>1.0.1</aws.eventstream.version>

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,8 @@ public String getMessage() {
5454
public boolean retryable() {
5555
return getCause().retryable();
5656
}
57+
58+
public String getOperation() {
59+
return operation;
60+
}
5761
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
* Status code 443, no response from server. This is considered idempotent.
2525
*/
2626
public class AWSNoResponseException extends AWSServiceIOException {
27+
28+
/**
29+
* Constructor.
30+
* @param operation operation in progress.
31+
* @param cause inner cause
32+
*/
2733
public AWSNoResponseException(String operation,
2834
AwsServiceException cause) {
2935
super(operation, cause);

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.hadoop.classification.InterfaceAudience;
2222
import org.apache.hadoop.classification.InterfaceStability;
2323
import org.apache.hadoop.fs.Options;
24+
import org.apache.hadoop.fs.s3a.impl.ChecksumSupport;
2425
import org.apache.hadoop.fs.s3a.impl.streams.StreamIntegration;
2526
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
2627

@@ -1777,15 +1778,53 @@ private Constants() {
17771778
*/
17781779
public static final boolean CHECKSUM_VALIDATION_DEFAULT = false;
17791780

1781+
/**
1782+
* Should checksums always be generated?
1783+
* Not all third-party stores like this being enabled for every request.
1784+
* Value: {@value}.
1785+
*/
1786+
public static final String CHECKSUM_GENERATION =
1787+
"fs.s3a.checksum.generation";
1788+
1789+
/**
1790+
* Default value of {@link #CHECKSUM_GENERATION}.
1791+
* Value: {@value}.
1792+
*/
1793+
public static final boolean DEFAULT_CHECKSUM_GENERATION = false;
1794+
17801795
/**
17811796
* Indicates the algorithm used to create the checksum for the object
17821797
* to be uploaded to S3. Unset by default. It supports the following values:
1783-
* 'CRC32', 'CRC32C', 'SHA1', and 'SHA256'
1798+
* 'CRC32', 'CRC32C', 'SHA1', 'SHA256', 'CRC64_NVME 'NONE', ''.
1799+
* When checksum calculation is enabled this MUST be set to a valid algorithm.
17841800
* value:{@value}
17851801
*/
17861802
public static final String CHECKSUM_ALGORITHM =
17871803
"fs.s3a.create.checksum.algorithm";
17881804

1805+
/**
1806+
* Default checksum algorithm: {@code "NONE"}.
1807+
*/
1808+
public static final String DEFAULT_CHECKSUM_ALGORITHM =
1809+
ChecksumSupport.NONE;
1810+
1811+
/**
1812+
* Send a {@code Content-MD5 header} with every request.
1813+
* This is required when performing some operations with third party stores
1814+
* For example: bulk delete).
1815+
* It is supported by AWS S3, though has unexpected behavior with AWS S3 Express storage.
1816+
* See https://github.com/aws/aws-sdk-java-v2/issues/6459 for details.
1817+
*/
1818+
public static final String REQUEST_MD5_HEADER =
1819+
"fs.s3a.request.md5.header";
1820+
1821+
/**
1822+
* Default value of {@link #REQUEST_MD5_HEADER}.
1823+
* Value: {@value}.
1824+
*/
1825+
public static final boolean DEFAULT_REQUEST_MD5_HEADER = true;
1826+
1827+
17891828
/**
17901829
* Are extensions classes, such as {@code fs.s3a.aws.credentials.provider},
17911830
* going to be loaded from the same classloader that loaded

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import org.slf4j.LoggerFactory;
3131

3232
import software.amazon.awssdk.awscore.util.AwsHostNameUtils;
33+
import software.amazon.awssdk.core.checksums.RequestChecksumCalculation;
34+
import software.amazon.awssdk.core.checksums.ResponseChecksumValidation;
3335
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
3436
import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
3537
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
@@ -40,6 +42,7 @@
4042
import software.amazon.awssdk.identity.spi.AwsCredentialsIdentity;
4143
import software.amazon.awssdk.metrics.LoggingMetricPublisher;
4244
import software.amazon.awssdk.regions.Region;
45+
import software.amazon.awssdk.services.s3.LegacyMd5Plugin;
4346
import software.amazon.awssdk.services.s3.S3AsyncClient;
4447
import software.amazon.awssdk.services.s3.S3AsyncClientBuilder;
4548
import software.amazon.awssdk.services.s3.S3BaseClientBuilder;
@@ -194,9 +197,32 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> Build
194197

195198
configureEndpointAndRegion(builder, parameters, conf);
196199

200+
// add a plugin to add a Content-MD5 header.
201+
// this is required when performing some operations with third party stores
202+
// (for example: bulk delete), and is somewhat harmless when working with AWS S3.
203+
if (parameters.isMd5HeaderEnabled()) {
204+
LOG.debug("MD5 header enabled");
205+
builder.addPlugin(LegacyMd5Plugin.create());
206+
}
207+
208+
//when to calculate request checksums.
209+
final RequestChecksumCalculation checksumCalculation =
210+
parameters.isChecksumCalculationEnabled()
211+
? RequestChecksumCalculation.WHEN_SUPPORTED
212+
: RequestChecksumCalculation.WHEN_REQUIRED;
213+
LOG.debug("Using checksum calculation policy: {}", checksumCalculation);
214+
builder.requestChecksumCalculation(checksumCalculation);
215+
216+
// response checksum validation. Slow, even with CRC32 checksums.
217+
final ResponseChecksumValidation checksumValidation;
218+
checksumValidation = parameters.isChecksumValidationEnabled()
219+
? ResponseChecksumValidation.WHEN_SUPPORTED
220+
: ResponseChecksumValidation.WHEN_REQUIRED;
221+
LOG.debug("Using checksum validation policy: {}", checksumValidation);
222+
builder.responseChecksumValidation(checksumValidation);
223+
197224
S3Configuration serviceConfiguration = S3Configuration.builder()
198225
.pathStyleAccessEnabled(parameters.isPathStyleAccess())
199-
.checksumValidationEnabled(parameters.isChecksumValidationEnabled())
200226
.build();
201227

202228
final ClientOverrideConfiguration.Builder override =

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,10 +1182,15 @@ private ClientManager createClientManager(URI fsURI, boolean dtEnabled) throws I
11821182
.withTransferManagerExecutor(unboundedThreadPool)
11831183
.withRegion(configuredRegion)
11841184
.withFipsEnabled(fipsEnabled)
1185+
.withS3ExpressStore(s3ExpressStore)
11851186
.withExpressCreateSession(
11861187
conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT))
11871188
.withChecksumValidationEnabled(
11881189
conf.getBoolean(CHECKSUM_VALIDATION, CHECKSUM_VALIDATION_DEFAULT))
1190+
.withChecksumCalculationEnabled(
1191+
conf.getBoolean(CHECKSUM_GENERATION, DEFAULT_CHECKSUM_GENERATION))
1192+
.withMd5HeaderEnabled(conf.getBoolean(REQUEST_MD5_HEADER,
1193+
DEFAULT_REQUEST_MD5_HEADER))
11891194
.withClientSideEncryptionEnabled(isCSEEnabled)
11901195
.withClientSideEncryptionMaterials(cseMaterials)
11911196
.withAnalyticsAcceleratorEnabled(isAnalyticsAcceleratorEnabled)

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hadoop.fs.s3a;
2020

21+
import software.amazon.awssdk.awscore.exception.AwsErrorDetails;
2122
import software.amazon.awssdk.awscore.exception.AwsServiceException;
2223
import software.amazon.awssdk.core.exception.AbortedException;
2324
import software.amazon.awssdk.core.exception.ApiCallAttemptTimeoutException;
@@ -239,8 +240,13 @@ public static IOException translateException(@Nullable String operation,
239240
? (S3Exception) ase
240241
: null;
241242
int status = ase.statusCode();
242-
if (ase.awsErrorDetails() != null) {
243-
message = message + ":" + ase.awsErrorDetails().errorCode();
243+
// error details, may be null
244+
final AwsErrorDetails errorDetails = ase.awsErrorDetails();
245+
// error code, will be null if errorDetails is null
246+
String errorCode = "";
247+
if (errorDetails != null) {
248+
errorCode = errorDetails.errorCode();
249+
message = message + ":" + errorCode;
244250
}
245251

246252
// big switch on the HTTP status code.
@@ -307,6 +313,8 @@ public static IOException translateException(@Nullable String operation,
307313
// precondition failure: the object is there, but the precondition
308314
// (e.g. etag) didn't match. Assume remote file change during
309315
// rename or status passed in to openfile had an etag which didn't match.
316+
// See the SC_200 handler for the treatment of the S3 Express failure
317+
// variant.
310318
case SC_412_PRECONDITION_FAILED:
311319
ioe = new RemoteFileChangedException(path, message, "", ase);
312320
break;
@@ -351,6 +359,16 @@ public static IOException translateException(@Nullable String operation,
351359
return ((MultiObjectDeleteException) exception)
352360
.translateException(message);
353361
}
362+
if (PRECONDITION_FAILED.equals(errorCode)) {
363+
// S3 Express stores report conflict in conditional writes
364+
// as a 200 + an error code of "PreconditionFailed".
365+
// This is mapped to RemoteFileChangedException for consistency
366+
// with SC_412_PRECONDITION_FAILED handling.
367+
return new RemoteFileChangedException(path,
368+
operation,
369+
exception.getMessage(),
370+
exception);
371+
}
354372
// other 200: FALL THROUGH
355373

356374
default:

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,11 @@ final class S3ClientCreationParameters {
187187
*/
188188
private String region;
189189

190+
/**
191+
* Is this an S3 Express store?
192+
*/
193+
private boolean s3ExpressStore;
194+
190195
/**
191196
* Enable S3Express create session.
192197
*/
@@ -207,6 +212,17 @@ final class S3ClientCreationParameters {
207212
*/
208213
private boolean isAnalyticsAcceleratorEnabled;
209214

215+
/**
216+
* Is the MD5 Header Enabled?
217+
*/
218+
private boolean md5HeaderEnabled;
219+
220+
/**
221+
* Is Checksum calculation Enabled?
222+
*/
223+
private boolean checksumCalculationEnabled;
224+
225+
210226
/**
211227
* List of execution interceptors to include in the chain
212228
* of interceptors in the SDK.
@@ -255,10 +271,18 @@ public S3ClientCreationParameters withRequesterPays(
255271
return this;
256272
}
257273

274+
/**
275+
* Is this a requester pays bucket?
276+
* @return true if the bucket is requester pays.
277+
*/
258278
public boolean isRequesterPays() {
259279
return requesterPays;
260280
}
261281

282+
/**
283+
* Get the credentials.
284+
* @return the credential provider.
285+
*/
262286
public AwsCredentialsProvider getCredentialSet() {
263287
return credentialSet;
264288
}
@@ -275,6 +299,10 @@ public S3ClientCreationParameters withCredentialSet(
275299
return this;
276300
}
277301

302+
/**
303+
* Get UA suffix.
304+
* @return suffix.
305+
*/
278306
public String getUserAgentSuffix() {
279307
return userAgentSuffix;
280308
}
@@ -536,6 +564,20 @@ public String getKmsRegion() {
536564
return kmsRegion;
537565
}
538566

567+
public boolean isS3ExpressStore() {
568+
return s3ExpressStore;
569+
}
570+
571+
/**
572+
* Set builder value.
573+
* @param value new value
574+
* @return the builder
575+
*/
576+
public S3ClientCreationParameters withS3ExpressStore(final boolean value) {
577+
s3ExpressStore = value;
578+
return this;
579+
}
580+
539581
/**
540582
* Should s3express createSession be called?
541583
* @return true if the client should enable createSession.
@@ -564,10 +606,46 @@ public S3ClientCreationParameters withChecksumValidationEnabled(final boolean va
564606
return this;
565607
}
566608

609+
/**
610+
* Is checksum validation on every request enabled?
611+
* @return true if validation is on every request.
612+
*/
567613
public boolean isChecksumValidationEnabled() {
568614
return checksumValidationEnabled;
569615
}
570616

617+
/**
618+
* Should MD5 headers be added?
619+
* @return true to always add an MD5 header.
620+
*/
621+
public boolean isMd5HeaderEnabled() {
622+
return md5HeaderEnabled;
623+
}
624+
625+
/**
626+
* Set builder value.
627+
* @param value new value
628+
* @return the builder
629+
*/
630+
public S3ClientCreationParameters withMd5HeaderEnabled(final boolean value) {
631+
md5HeaderEnabled = value;
632+
return this;
633+
}
634+
635+
public boolean isChecksumCalculationEnabled() {
636+
return checksumCalculationEnabled;
637+
}
638+
639+
/**
640+
* Set builder value.
641+
* @param value new value
642+
* @return the builder
643+
*/
644+
public S3ClientCreationParameters withChecksumCalculationEnabled(final boolean value) {
645+
checksumCalculationEnabled = value;
646+
return this;
647+
}
648+
571649
@Override
572650
public String toString() {
573651
return "S3ClientCreationParameters{" +
@@ -580,8 +658,10 @@ public String toString() {
580658
", multiPartThreshold=" + multiPartThreshold +
581659
", multipartCopy=" + multipartCopy +
582660
", region='" + region + '\'' +
661+
", s3ExpressStore=" + s3ExpressStore +
583662
", expressCreateSession=" + expressCreateSession +
584663
", checksumValidationEnabled=" + checksumValidationEnabled +
664+
", md5HeaderEnabled=" + md5HeaderEnabled +
585665
'}';
586666
}
587667

0 commit comments

Comments
 (0)