Skip to content

Commit 7543f3a

Browse files
HADOOP-19317. S3A: fs.s3a.connection.expect.continue controls 100 CONTINUE behavior (#7134)
New option fs.s3a.connection.expect.continue This controls whether or not an PUT request to the S3 store sets the "Expect: 100-continue" header and awaits a 100 CONTINUE response before uploading any data. This allows for throttling and other problems to be detected fast. The default is "true" -the header is sent. Contributed by Steve Loughran
1 parent 317db31 commit 7543f3a

File tree

5 files changed

+130
-7
lines changed

5 files changed

+130
-7
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

+14
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,20 @@ private Constants() {
444444
public static final Duration DEFAULT_CONNECTION_IDLE_TIME_DURATION =
445445
Duration.ofSeconds(60);
446446

447+
/**
448+
* Should PUT requests await a 100 CONTINUE responses before uploading
449+
* data?
450+
* <p>
451+
* Value: {@value}.
452+
*/
453+
public static final String CONNECTION_EXPECT_CONTINUE =
454+
"fs.s3a.connection.expect.continue";
455+
456+
/**
457+
* Default value for {@link #CONNECTION_EXPECT_CONTINUE}.
458+
*/
459+
public static final boolean CONNECTION_EXPECT_CONTINUE_DEFAULT = true;
460+
447461
// socket send buffer to be used in Amazon client
448462
public static final String SOCKET_SEND_BUFFER = "fs.s3a.socket.send.buffer";
449463
public static final int DEFAULT_SOCKET_SEND_BUFFER = 8 * 1024;

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java

+18-3
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_ACQUISITION_TIMEOUT;
4646
import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3;
4747
import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS;
48+
import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE;
49+
import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE_DEFAULT;
4850
import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_IDLE_TIME;
4951
import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_KEEPALIVE;
5052
import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_TTL;
@@ -149,6 +151,7 @@ public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration con
149151
.connectionMaxIdleTime(conn.getMaxIdleTime())
150152
.connectionTimeout(conn.getEstablishTimeout())
151153
.connectionTimeToLive(conn.getConnectionTTL())
154+
.expectContinueEnabled(conn.isExpectContinueEnabled())
152155
.maxConnections(conn.getMaxConnections())
153156
.socketTimeout(conn.getSocketTimeout())
154157
.tcpKeepAlive(conn.isKeepAlive())
@@ -491,14 +494,15 @@ public String toString() {
491494
* All the connection settings, wrapped as a class for use by
492495
* both the sync and async client.
493496
*/
494-
static class ConnectionSettings {
497+
static final class ConnectionSettings {
495498
private final int maxConnections;
496499
private final boolean keepAlive;
497500
private final Duration acquisitionTimeout;
498501
private final Duration connectionTTL;
499502
private final Duration establishTimeout;
500503
private final Duration maxIdleTime;
501504
private final Duration socketTimeout;
505+
private final boolean expectContinueEnabled;
502506

503507
private ConnectionSettings(
504508
final int maxConnections,
@@ -507,14 +511,16 @@ private ConnectionSettings(
507511
final Duration connectionTTL,
508512
final Duration establishTimeout,
509513
final Duration maxIdleTime,
510-
final Duration socketTimeout) {
514+
final Duration socketTimeout,
515+
final boolean expectContinueEnabled) {
511516
this.maxConnections = maxConnections;
512517
this.keepAlive = keepAlive;
513518
this.acquisitionTimeout = acquisitionTimeout;
514519
this.connectionTTL = connectionTTL;
515520
this.establishTimeout = establishTimeout;
516521
this.maxIdleTime = maxIdleTime;
517522
this.socketTimeout = socketTimeout;
523+
this.expectContinueEnabled = expectContinueEnabled;
518524
}
519525

520526
int getMaxConnections() {
@@ -545,6 +551,10 @@ Duration getSocketTimeout() {
545551
return socketTimeout;
546552
}
547553

554+
boolean isExpectContinueEnabled() {
555+
return expectContinueEnabled;
556+
}
557+
548558
@Override
549559
public String toString() {
550560
return "ConnectionSettings{" +
@@ -555,6 +565,7 @@ public String toString() {
555565
", establishTimeout=" + establishTimeout +
556566
", maxIdleTime=" + maxIdleTime +
557567
", socketTimeout=" + socketTimeout +
568+
", expectContinueEnabled=" + expectContinueEnabled +
558569
'}';
559570
}
560571
}
@@ -615,14 +626,18 @@ static ConnectionSettings createConnectionSettings(Configuration conf) {
615626
DEFAULT_SOCKET_TIMEOUT_DURATION, TimeUnit.MILLISECONDS,
616627
minimumOperationDuration);
617628

629+
final boolean expectContinueEnabled = conf.getBoolean(CONNECTION_EXPECT_CONTINUE,
630+
CONNECTION_EXPECT_CONTINUE_DEFAULT);
631+
618632
return new ConnectionSettings(
619633
maxConnections,
620634
keepAlive,
621635
acquisitionTimeout,
622636
connectionTTL,
623637
establishTimeout,
624638
maxIdleTime,
625-
socketTimeout);
639+
socketTimeout,
640+
expectContinueEnabled);
626641
}
627642

628643
/**

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md

+76-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,19 @@ If you are working with third party stores, please check [third party stores in
150150

151151
See [Timeouts](performance.html#timeouts).
152152

153-
### <a name="networking"></a> Low-level Network Options
153+
### <a name="networking"></a> Low-level Network/Http Options
154+
155+
The S3A connector uses [Apache HttpClient](https://hc.apache.org/index.html) to connect to
156+
S3 Stores.
157+
The client is configured to create a pool of HTTP connections with S3, so that once
158+
the initial set of connections have been made they can be re-used for followup operations.
159+
160+
Core aspects of pool settings are:
161+
* The pool size is set by `fs.s3a.connection.maximum` -if a process asks for more connections than this then
162+
threads will be blocked until they are available.
163+
* The time blocked before an exception is raised is set in `fs.s3a.connection.acquisition.timeout`.
164+
* The time an idle connection will be kept in the pool is set by `fs.s3a.connection.idle.time`.
165+
* The time limit for even a non-idle connection to be kept open is set in `fs.s3a.connection.ttl`.
154166

155167
```xml
156168

@@ -163,6 +175,69 @@ See [Timeouts](performance.html#timeouts).
163175
</description>
164176
</property>
165177

178+
<property>
179+
<name>fs.s3a.connection.acquisition.timeout</name>
180+
<value>60s</value>
181+
<description>
182+
Time to wait for an HTTP connection from the pool.
183+
Too low: operations fail on a busy process.
184+
When high, it isn't obvious that the connection pool is overloaded,
185+
simply that jobs are slow.
186+
</description>
187+
</property>
188+
189+
<property>
190+
<name>fs.s3a.connection.request.timeout</name>
191+
<value>60s</value>
192+
<description>
193+
Total time for a single request to take from the HTTP verb to the
194+
response from the server.
195+
0 means "no limit"
196+
</description>
197+
</property>
198+
199+
<property>
200+
<name>fs.s3a.connection.part.upload.timeout</name>
201+
<value>15m</value>
202+
<description>
203+
Timeout for uploading all of a small object or a single part
204+
of a larger one.
205+
</description>
206+
</property>
207+
208+
<property>
209+
<name>fs.s3a.connection.ttl</name>
210+
<value>5m</value>
211+
<description>
212+
Expiration time of an Http connection from the connection pool:
213+
</description>
214+
</property>
215+
216+
<property>
217+
<name>fs.s3a.connection.idle.time</name>
218+
<value>60s</value>
219+
<description>
220+
Time for an idle HTTP connection to be kept the HTTP connection
221+
pool before being closed.
222+
Too low: overhead of creating connections.
223+
Too high, risk of stale connections and inability to use the
224+
adaptive load balancing of the S3 front end.
225+
</description>
226+
</property>
227+
228+
<property>
229+
<name>fs.s3a.connection.expect.continue</name>
230+
<value>true</value>
231+
<description>
232+
Should PUT requests await a 100 CONTINUE responses before uploading
233+
data?
234+
This should normally be left alone unless a third party store which
235+
does not support it is encountered, or file upload over long
236+
distance networks time out.
237+
(see HADOOP-19317 as an example)
238+
</description>
239+
</property>
240+
166241
<property>
167242
<name>fs.s3a.connection.ssl.enabled</name>
168243
<value>true</value>

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java

+16-3
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
import org.apache.hadoop.fs.contract.AbstractFSContract;
3030
import org.apache.hadoop.fs.s3a.S3ATestUtils;
3131

32+
import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE;
33+
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
3234
import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags;
3335

3436
/**
@@ -47,8 +49,8 @@ public class ITestS3AContractCreate extends AbstractContractCreateTest {
4749
@Parameterized.Parameters
4850
public static Collection<Object[]> params() {
4951
return Arrays.asList(new Object[][]{
50-
{false},
51-
{true}
52+
{false, false},
53+
{true, true}
5254
});
5355
}
5456

@@ -57,8 +59,15 @@ public static Collection<Object[]> params() {
5759
*/
5860
private final boolean createPerformance;
5961

60-
public ITestS3AContractCreate(final boolean createPerformance) {
62+
/**
63+
* Expect a 100-continue response?
64+
*/
65+
private final boolean expectContinue;
66+
67+
public ITestS3AContractCreate(final boolean createPerformance,
68+
final boolean expectContinue) {
6169
this.createPerformance = createPerformance;
70+
this.expectContinue = expectContinue;
6271
}
6372

6473
@Override
@@ -71,6 +80,10 @@ protected Configuration createConfiguration() {
7180
final Configuration conf = setPerformanceFlags(
7281
super.createConfiguration(),
7382
createPerformance ? "create" : "");
83+
removeBaseAndBucketOverrides(
84+
conf,
85+
CONNECTION_EXPECT_CONTINUE);
86+
conf.setBoolean(CONNECTION_EXPECT_CONTINUE, expectContinue);
7487
S3ATestUtils.disableFilesystemCaching(conf);
7588
return conf;
7689
}

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.hadoop.fs.s3a.Constants;
2525

2626
import static org.apache.hadoop.fs.contract.ContractTestUtils.IO_CHUNK_BUFFER_SIZE;
27+
import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE;
2728
import static org.apache.hadoop.fs.s3a.Constants.MIN_MULTIPART_THRESHOLD;
2829
import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_MIN_SIZE;
2930
import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE;
@@ -69,18 +70,23 @@ private boolean isMultipartCopyEnabled() {
6970
* Create a configuration without multipart upload,
7071
* and a long request timeout to allow for a very slow
7172
* PUT in close.
73+
* <p>
74+
* 100-continue is disabled so as to verify the behavior
75+
* on a large PUT.
7276
* @return the configuration to create the test FS with.
7377
*/
7478
@Override
7579
protected Configuration createScaleConfiguration() {
7680
Configuration conf = super.createScaleConfiguration();
7781
removeBaseAndBucketOverrides(conf,
82+
CONNECTION_EXPECT_CONTINUE,
7883
IO_CHUNK_BUFFER_SIZE,
7984
MIN_MULTIPART_THRESHOLD,
8085
MULTIPART_UPLOADS_ENABLED,
8186
MULTIPART_SIZE,
8287
PART_UPLOAD_TIMEOUT,
8388
REQUEST_TIMEOUT);
89+
conf.setBoolean(CONNECTION_EXPECT_CONTINUE, false);
8490
conf.setInt(IO_CHUNK_BUFFER_SIZE, 655360);
8591
conf.setInt(MIN_MULTIPART_THRESHOLD, MULTIPART_MIN_SIZE);
8692
conf.setInt(MULTIPART_SIZE, MULTIPART_MIN_SIZE);

0 commit comments

Comments
 (0)