-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Support S3 unsigned body #3323
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: version-3
Are you sure you want to change the base?
Support S3 unsigned body #3323
Changes from all commits
da5692a
20bd2b8
392d536
1a761ec
c09bb65
dc9b603
192d651
5d453b3
81dfafd
14b7f34
06e08f2
ad14d42
de72cf1
4abf3f8
4fbff2c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,10 +9,10 @@ class ServiceEnumerator | |
| MANIFEST_PATH = File.expand_path('../../services.json', __FILE__) | ||
|
|
||
| # Minimum `aws-sdk-core` version for new gem builds | ||
| MINIMUM_CORE_VERSION = "3.239.1" | ||
| MINIMUM_CORE_VERSION = "3.240.0" | ||
|
|
||
| # Minimum `aws-sdk-core` version for new S3 gem builds | ||
| MINIMUM_CORE_VERSION_S3 = "3.234.0" | ||
| MINIMUM_CORE_VERSION_S3 = "3.240.0" | ||
|
Comment on lines
-12
to
+15
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Self-reminder to update the minimum core version when this is ready for release. |
||
|
|
||
| EVENTSTREAM_PLUGIN = "Aws::Plugins::EventStreamConfiguration" | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ module Plugins | |
| # @api private | ||
| class ChecksumAlgorithm < Seahorse::Client::Plugin | ||
| CHUNK_SIZE = 1 * 1024 * 1024 # one MB | ||
| MIN_CHUNK_SIZE = 16_384 # 16 KB | ||
|
|
||
| # determine the set of supported client side checksum algorithms | ||
| # CRC32c requires aws-crt (optional sdk dependency) for support | ||
|
|
@@ -162,9 +163,7 @@ def call(context) | |
| context[:http_checksum] ||= {} | ||
|
|
||
| # Set validation mode to enabled when supported. | ||
| if context.config.response_checksum_validation == 'when_supported' | ||
| enable_request_validation_mode(context) | ||
| end | ||
| enable_request_validation_mode(context) if context.config.response_checksum_validation == 'when_supported' | ||
|
|
||
| @handler.call(context) | ||
| end | ||
|
|
@@ -194,9 +193,7 @@ def call(context) | |
| calculate_request_checksum(context, request_algorithm) | ||
| end | ||
|
|
||
| if should_verify_response_checksum?(context) | ||
| add_verify_response_checksum_handlers(context) | ||
| end | ||
| add_verify_response_checksum_handlers(context) if should_verify_response_checksum?(context) | ||
|
|
||
| with_metrics(context.config, algorithm) { @handler.call(context) } | ||
| end | ||
|
|
@@ -346,16 +343,16 @@ def calculate_request_checksum(context, checksum_properties) | |
|
|
||
| def apply_request_checksum(context, headers, checksum_properties) | ||
| header_name = checksum_properties[:name] | ||
| body = context.http_request.body_contents | ||
| headers[header_name] = calculate_checksum( | ||
| checksum_properties[:algorithm], | ||
| body | ||
| context.http_request.body | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this change from
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great question. And yes! This was the root cause why the memory was high during testing. Calling
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to do a rewind after calculating as well? |
||
| ) | ||
| end | ||
|
|
||
| def calculate_checksum(algorithm, body) | ||
| digest = ChecksumAlgorithm.digest_for_algorithm(algorithm) | ||
| if body.respond_to?(:read) | ||
| body.rewind | ||
| update_in_chunks(digest, body) | ||
| else | ||
| digest.update(body) | ||
|
|
@@ -388,13 +385,15 @@ def apply_request_trailer_checksum(context, headers, checksum_properties) | |
| unless context.http_request.body.respond_to?(:size) | ||
| raise Aws::Errors::ChecksumError, 'Could not determine length of the body' | ||
| end | ||
|
|
||
| headers['X-Amz-Decoded-Content-Length'] = context.http_request.body.size | ||
|
|
||
| context.http_request.body = AwsChunkedTrailerDigestIO.new( | ||
| context.http_request.body, | ||
| checksum_properties[:algorithm], | ||
| location_name | ||
| ) | ||
| context.http_request.body = | ||
| AwsChunkedTrailerDigestIO.new( | ||
| io: context.http_request.body, | ||
| algorithm: checksum_properties[:algorithm], | ||
| location_name: location_name | ||
| ) | ||
| end | ||
|
|
||
| def should_verify_response_checksum?(context) | ||
|
|
@@ -417,10 +416,7 @@ def add_verify_response_headers_handler(context, checksum_context) | |
| context[:http_checksum][:validation_list] = validation_list | ||
|
|
||
| context.http_response.on_headers do |_status, headers| | ||
| header_name, algorithm = response_header_to_verify( | ||
| headers, | ||
| validation_list | ||
| ) | ||
| header_name, algorithm = response_header_to_verify(headers, validation_list) | ||
| next unless header_name | ||
|
|
||
| expected = headers[header_name] | ||
|
|
@@ -466,52 +462,65 @@ def response_header_to_verify(headers, validation_list) | |
| # Wrapper for request body that implements application-layer | ||
| # chunking with Digest computed on chunks + added as a trailer | ||
| class AwsChunkedTrailerDigestIO | ||
| CHUNK_SIZE = 16_384 | ||
|
|
||
| def initialize(io, algorithm, location_name) | ||
| @io = io | ||
| @location_name = location_name | ||
| @algorithm = algorithm | ||
| @digest = ChecksumAlgorithm.digest_for_algorithm(algorithm) | ||
| @trailer_io = nil | ||
| def initialize(options = {}) | ||
| @io = options.delete(:io) | ||
| @location_name = options.delete(:location_name) | ||
| @algorithm = options.delete(:algorithm) | ||
| @digest = ChecksumAlgorithm.digest_for_algorithm(@algorithm) | ||
| @chunk_size = Thread.current[:net_http_override_body_stream_chunk] || MIN_CHUNK_SIZE | ||
| @overhead_bytes = calculate_overhead(@chunk_size) | ||
| @max_chunk_size = @chunk_size - @overhead_bytes | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find |
||
| @current_chunk = ''.b | ||
| @eof = false | ||
| end | ||
|
|
||
| # the size of the application layer aws-chunked + trailer body | ||
| def size | ||
| # compute the number of chunks | ||
| # a full chunk has 4 + 4 bytes overhead, a partial chunk is len.to_s(16).size + 4 | ||
| orig_body_size = @io.size | ||
| n_full_chunks = orig_body_size / CHUNK_SIZE | ||
| partial_bytes = orig_body_size % CHUNK_SIZE | ||
| chunked_body_size = n_full_chunks * (CHUNK_SIZE + 8) | ||
| chunked_body_size += partial_bytes.to_s(16).size + partial_bytes + 4 unless partial_bytes.zero? | ||
| n_full_chunks = orig_body_size / @max_chunk_size | ||
| partial_bytes = orig_body_size % @max_chunk_size | ||
|
|
||
| chunked_body_size = n_full_chunks * (@max_chunk_size + @max_chunk_size.to_s(16).size + 4) | ||
| chunked_body_size += partial_bytes.to_s(16).size + partial_bytes + 4 unless partial_bytes.zero? | ||
| trailer_size = ChecksumAlgorithm.trailer_length(@algorithm, @location_name) | ||
| chunked_body_size + trailer_size | ||
| end | ||
|
|
||
| def rewind | ||
| @io.rewind | ||
| @current_chunk.clear | ||
| @eof = false | ||
| end | ||
|
|
||
| def read(length, buf = nil) | ||
| # account for possible leftover bytes at the end, if we have trailer bytes, send them | ||
| if @trailer_io | ||
| return @trailer_io.read(length, buf) | ||
| end | ||
| def read(_length = nil, buf = nil) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The length parameter is ignored since we always expect 16KB (default) or whatever is set by I could make this more dynamic but that gets tricky... the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What would be the benefit of it being dynamic? If we're always expecting 16KB and this is an established expectation, I think it's fine that we don't use the length input. Not sure how feasible this is, but we could consider removing the parameter altogether in the future as to not cause confusion when using this method.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, I don't think we could remove the parameter altogether due to the interfaces... this IO-like object is being read by http-client (net-http in our case) and length is a required parameter. Removing the parameter will break.
In the scenario where we don't use the patch - net-http could change the way their reads work - like increasing the chunk size and etc. We are bottlenecking the cap to 16KB here. We won't know if things have changed unless net-http checks the length on their end. |
||
| return if @eof | ||
|
|
||
| buf&.clear | ||
| output_buffer = buf || ''.b | ||
| fill_chunk if @current_chunk.empty? && !@eof | ||
|
|
||
| output_buffer << @current_chunk | ||
| @current_chunk.clear | ||
| output_buffer | ||
| end | ||
|
|
||
| private | ||
|
|
||
| def calculate_overhead(chunk_size) | ||
| chunk_size.to_s(16).size + 4 # hex_length + "\r\n\r\n" | ||
| end | ||
|
|
||
| chunk = @io.read(length) | ||
| def fill_chunk | ||
| chunk = @io.read(@max_chunk_size) | ||
| if chunk | ||
| chunk.force_encoding('ASCII-8BIT') | ||
| @digest.update(chunk) | ||
| application_chunked = "#{chunk.bytesize.to_s(16)}\r\n#{chunk}\r\n" | ||
| return StringIO.new(application_chunked).read(application_chunked.size, buf) | ||
| @current_chunk << "#{chunk.bytesize.to_s(16)}\r\n#{chunk}\r\n".b | ||
| else | ||
| trailers = {} | ||
| trailers[@location_name] = @digest.base64digest | ||
| trailers = trailers.map { |k,v| "#{k}:#{v}" }.join("\r\n") | ||
| @trailer_io = StringIO.new("0\r\n#{trailers}\r\n\r\n") | ||
| chunk = @trailer_io.read(length, buf) | ||
| trailer_str = { @location_name => @digest.base64digest }.map { |k, v| "#{k}:#{v}" }.join("\r\n") | ||
| @current_chunk << "0\r\n#{trailer_str}\r\n\r\n".b | ||
| @eof = true | ||
| end | ||
| chunk | ||
| end | ||
| end | ||
| end | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,28 +6,61 @@ module Seahorse | |
| module Client | ||
| # @api private | ||
| module NetHttp | ||
|
|
||
| # @api private | ||
| module Patches | ||
|
|
||
| def self.apply! | ||
| Net::HTTPGenericRequest.prepend(PatchDefaultContentType) | ||
| Net::HTTPGenericRequest.prepend(RequestPatches) | ||
| end | ||
|
|
||
| # For requests with bodies, Net::HTTP sets a default content type of: | ||
| # 'application/x-www-form-urlencoded' | ||
| # There are cases where we should not send content type at all. | ||
| # Even when no body is supplied, Net::HTTP uses a default empty body | ||
| # and sets it anyway. This patch disables the behavior when a Thread | ||
| # local variable is set. | ||
| module PatchDefaultContentType | ||
| # Patches intended to override Net::HTTP functionality | ||
| module RequestPatches | ||
| # For requests with bodies, Net::HTTP sets a default content type of: | ||
| # 'application/x-www-form-urlencoded' | ||
| # There are cases where we should not send content type at all. | ||
| # Even when no body is supplied, Net::HTTP uses a default empty body | ||
| # and sets it anyway. This patch disables the behavior when a Thread | ||
| # local variable is set. | ||
| # See: https://github.com/ruby/net-http/issues/205 | ||
| def supply_default_content_type | ||
| return if Thread.current[:net_http_skip_default_content_type] | ||
|
Comment on lines
+23
to
25
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we could eventually get rid of this - given that |
||
|
|
||
| super | ||
| end | ||
| end | ||
|
|
||
| # IO.copy_stream is capped at 16KB buffer so this patch intends to | ||
| # increase its chunk size for better performance. | ||
| # Only intended to use for S3 TM implementation. | ||
| # See: https://github.com/ruby/net-http/blob/master/lib/net/http/generic_request.rb#L292 | ||
| def send_request_with_body_stream(sock, ver, path, f) | ||
| return super unless (chunk_size = Thread.current[:net_http_override_body_stream_chunk]) | ||
|
|
||
| unless content_length || chunked? | ||
| raise ArgumentError, 'Content-Length not given and Transfer-Encoding is not `chunked`' | ||
| end | ||
|
|
||
| supply_default_content_type | ||
| write_header(sock, ver, path) | ||
| wait_for_continue sock, ver if sock.continue_timeout | ||
| if chunked? | ||
| chunker = Chunker.new(sock) | ||
| RequestIO.custom_stream(f, chunker, chunk_size) | ||
| chunker.finish | ||
| else | ||
| RequestIO.custom_stream(f, sock, chunk_size) | ||
| end | ||
| end | ||
|
|
||
| class RequestIO | ||
| def self.custom_stream(src, dst, chunk_size) | ||
| copied = 0 | ||
| while (chunk = src.read(chunk_size)) | ||
| dst.write(chunk) | ||
| copied += chunk.bytesize | ||
| end | ||
| copied | ||
| end | ||
| end | ||
| end | ||
| end | ||
| end | ||
| end | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't add test cases for unsigned body since its already covered by our test cases here: https://github.com/aws/aws-sdk-ruby/blob/version-3/gems/aws-sdk-core/spec/aws/plugins/checksum_algorithm_spec.rb#L24