diff --git a/.gitignore b/.gitignore index d13151b1..e40fa236 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ pkg spec/examples.txt tmp Gemfile.lock +.idea/ diff --git a/README.md b/README.md index 900557fd..e1b3ed4e 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,9 @@ protocol natively and outsources the parsing to native extensions. - **Performance**: using native parsers and a clean, lightweight implementation, http.rb achieves high performance while implementing HTTP in Ruby instead of C. +- **Proxy Support**: http.rb supports both HTTP and SOCKS5 proxies, with or without + authentication. + ## Installation @@ -104,6 +107,30 @@ and call `#readpartial` on it repeatedly until it returns `nil`: => nil ``` +### Using Proxies + +HTTP.rb supports both HTTP and SOCKS5 proxies, with or without authentication. + +#### HTTP Proxy + +```ruby +# Using an HTTP proxy without authentication +response = HTTP.via("proxy.example.com", 8080).get("https://github.com") + +# Using an HTTP proxy with authentication +response = HTTP.via("proxy.example.com", 8080, "username", "password").get("https://github.com") +``` + +#### SOCKS5 Proxy + +```ruby +# Using a SOCKS5 proxy without authentication +response = HTTP.via_socks5("proxy.example.com", 1080).get("https://github.com") + +# Using a SOCKS5 proxy with authentication +response = HTTP.via_socks5("proxy.example.com", 1080, "username", "password").get("https://github.com") +``` + ## Supported Ruby Versions This library aims to support and is [tested against][build-link] diff --git a/bin/console b/bin/console new file mode 100755 index 00000000..5ca54377 --- /dev/null +++ b/bin/console @@ -0,0 +1,11 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "http" + +# You can add fixtures and/or initialization code here to make experimenting +# with your gem easier. You can also use a different console, if you like. + +require "irb" +IRB.start(__FILE__) diff --git a/http.gemspec b/http.gemspec index 9dfdcf71..b827fcfc 100644 --- a/http.gemspec +++ b/http.gemspec @@ -27,15 +27,15 @@ Gem::Specification.new do |gem| gem.required_ruby_version = ">= 3.0" - gem.add_runtime_dependency "addressable", "~> 2.8" - gem.add_runtime_dependency "http-cookie", "~> 1.0" - gem.add_runtime_dependency "http-form_data", "~> 2.2" + gem.add_dependency "addressable", "~> 2.8" + gem.add_dependency "http-cookie", "~> 1.0" + gem.add_dependency "http-form_data", "~> 2.2" # Use native llhttp for MRI (more performant) and llhttp-ffi for other interpreters (better compatibility) if RUBY_ENGINE == "ruby" - gem.add_runtime_dependency "llhttp", "~> 0.5.0" + gem.add_dependency "llhttp", "~> 0.5.0" else - gem.add_runtime_dependency "llhttp-ffi", "~> 0.5.0" + gem.add_dependency "llhttp-ffi", "~> 0.5.0" end gem.metadata = { diff --git a/lib/http/chainable.rb b/lib/http/chainable.rb index 4c3c0912..9471f10f 100644 --- a/lib/http/chainable.rb +++ b/lib/http/chainable.rb @@ -156,6 +156,21 @@ def persistent(host, timeout: 5) # @param [Array] proxy # @raise [Request::Error] if HTTP proxy is invalid def via(*proxy) + proxy_hash = build_proxy_hash(*proxy, type: :http) + + # Validate that we have at least an address and port + if !proxy_hash[:proxy_address] || !proxy_hash[:proxy_port] + raise(RequestError, "invalid HTTP proxy: must provide both address and port") + end + + branch default_options.with_proxy(proxy_hash) + end + + # Build a proxy hash from the given arguments + # @param [Array] proxy + # @param [Symbol] type The proxy type (:http or :socks5) + # @return [Hash] The proxy hash + def build_proxy_hash(*proxy, type:) proxy_hash = {} proxy_hash[:proxy_address] = proxy[0] if proxy[0].is_a?(String) proxy_hash[:proxy_port] = proxy[1] if proxy[1].is_a?(Integer) @@ -163,12 +178,25 @@ def via(*proxy) proxy_hash[:proxy_password] = proxy[3] if proxy[3].is_a?(String) proxy_hash[:proxy_headers] = proxy[2] if proxy[2].is_a?(Hash) proxy_hash[:proxy_headers] = proxy[4] if proxy[4].is_a?(Hash) + proxy_hash[:proxy_type] = type - raise(RequestError, "invalid HTTP proxy: #{proxy_hash}") unless (2..5).cover?(proxy_hash.keys.size) + proxy_hash + end + alias through via + + # Make a request through a SOCKS5 proxy + # @param [Array] proxy + # @raise [Request::Error] if SOCKS5 proxy is invalid + def via_socks5(*proxy) + proxy_hash = build_proxy_hash(*proxy, type: :socks5) + + # Validate that we have at least an address and port + if !proxy_hash[:proxy_address] || !proxy_hash[:proxy_port] + raise(RequestError, "invalid SOCKS5 proxy: must provide both address and port") + end branch default_options.with_proxy(proxy_hash) end - alias through via # Make client follow redirects. # @param options diff --git a/lib/http/connection.rb b/lib/http/connection.rb index adeac882..ec01474b 100644 --- a/lib/http/connection.rb +++ b/lib/http/connection.rb @@ -3,6 +3,7 @@ require "forwardable" require "http/headers" +require "http/socks5_proxy" module HTTP # A connection to the HTTP server @@ -172,8 +173,17 @@ def start_tls(req, options) # Open tunnel through proxy def send_proxy_connect_request(req) - return unless req.uri.https? && req.using_proxy? + return unless req.using_proxy? + if req.using_socks5_proxy? + connect_via_socks5(req) + elsif req.uri.https? && req.using_http_proxy? + connect_via_http_proxy(req) + end + end + + # Connect via HTTP proxy + def connect_via_http_proxy(req) @pending_request = true req.connect_using_proxy @socket @@ -193,6 +203,17 @@ def send_proxy_connect_request(req) @pending_response = false end + # Connect via SOCKS5 proxy + def connect_via_socks5(req) + socks5_proxy = SOCKS5Proxy.new(@socket) + begin + socks5_proxy.connect(req) + rescue ConnectionError + @failed_proxy_connect = true + raise + end + end + # Resets expiration of persistent connection. # @return [void] def reset_timer diff --git a/lib/http/headers.rb b/lib/http/headers.rb index 5f285616..c4ff62a3 100644 --- a/lib/http/headers.rb +++ b/lib/http/headers.rb @@ -147,7 +147,7 @@ def to_a # # @return [String] def inspect - "#<#{self.class} #{to_h.inspect}>" + "#<#{self.class} #{to_h.to_json}>" end # Returns list of header names. diff --git a/lib/http/request.rb b/lib/http/request.rb index ab0eb103..65d090b3 100644 --- a/lib/http/request.rb +++ b/lib/http/request.rb @@ -144,6 +144,16 @@ def using_proxy? proxy && proxy.keys.size >= 2 end + # Is this request using an HTTP proxy? + def using_http_proxy? + using_proxy? && (!proxy.key?(:proxy_type) || proxy[:proxy_type] == :http) + end + + # Is this request using a SOCKS5 proxy? + def using_socks5_proxy? + using_proxy? && proxy[:proxy_type] == :socks5 + end + # Is this request using an authenticated proxy? def using_authenticated_proxy? proxy && proxy.keys.size >= 4 diff --git a/lib/http/response.rb b/lib/http/response.rb index 0dbdbd36..33bcbfee 100644 --- a/lib/http/response.rb +++ b/lib/http/response.rb @@ -113,11 +113,7 @@ def content_length value = @headers[Headers::CONTENT_LENGTH] return nil unless value - begin - Integer(value) - rescue ArgumentError - nil - end + Integer(value, exception: false) end # Parsed Content-Type header @@ -163,7 +159,7 @@ def parse(type = nil) # Inspect a response def inspect - "#<#{self.class}/#{@version} #{code} #{reason} #{headers.to_h.inspect}>" + "#<#{self.class}/#{@version} #{code} #{reason} #{headers.to_h.to_json}>" end private diff --git a/lib/http/socks5_proxy.rb b/lib/http/socks5_proxy.rb new file mode 100644 index 00000000..03d65807 --- /dev/null +++ b/lib/http/socks5_proxy.rb @@ -0,0 +1,226 @@ +# frozen_string_literal: true + +module HTTP + # SOCKS5 proxy implementation + # rubocop:disable Metrics/ClassLength + class SOCKS5Proxy + # @param [Socket] socket The socket to use for the connection + def initialize(socket) + @socket = socket + @failed_connect = false + end + + # Connect to the target host through the SOCKS5 proxy + # @param [HTTP::Request] req The request to connect + # @return [void] + # @raise [HTTP::ConnectionError] if the connection fails + def connect(req) + # SOCKS5 protocol implementation + # See RFC 1928: https://tools.ietf.org/html/rfc1928 + + # Perform initial handshake and get the auth method + auth_method = perform_handshake(req) + + # Handle authentication if required + authenticate(req) if auth_method == 0x02 && req.using_authenticated_proxy? + + # Send connection request + send_connection_request(req) + + # Connection established successfully + end + + # Perform the initial SOCKS5 handshake + # @param [HTTP::Request] req The request to connect + # @return [Integer] The authentication method selected by the server + # @raise [HTTP::ConnectionError] if the handshake fails + def perform_handshake(req) + # Initial handshake + auth_methods = get_auth_methods(req) + + # Send handshake request and get response + response = send_handshake_request(auth_methods) + + # Validate the response and get the auth method + validate_handshake_response(response) + end + + # Get the authentication methods to offer to the server + # @param [HTTP::Request] req The request to connect + # @return [Array] The authentication methods + def get_auth_methods(req) + methods = [0x00] # No authentication + methods << 0x02 if req.using_authenticated_proxy? # Username/Password authentication + methods + end + + # Send the handshake request and get the server's response + # @param [Array] auth_methods The authentication methods to offer + # @return [Array] The version and authentication method selected by the server + def send_handshake_request(auth_methods) + handshake = [0x05, auth_methods.size, *auth_methods].pack("C*") + @socket.write(handshake) + + # Read handshake response + response = @socket.readpartial(2) + version, auth_method = response.unpack("C*") + [version, auth_method] + end + + # Validate the handshake response from the server + # @param [Array] response The version and authentication method from the server + # @raise [HTTP::ConnectionError] if the handshake fails + def validate_handshake_response(response) + version, auth_method = response + + if version != 0x05 + @failed_connect = true + raise ConnectionError, "SOCKS5 proxy server returned invalid version: #{version}" + end + + if auth_method == 0xFF + @failed_connect = true + raise ConnectionError, "SOCKS5 proxy server doesn't support any of our authentication methods" + end + + auth_method + end + + # @return [Boolean] whenever proxy connect failed + def failed_connect? + @failed_connect + end + + private + + # Authenticate with the SOCKS5 proxy using username and password + # @param [HTTP::Request] req The request containing proxy credentials + # @return [void] + # @raise [HTTP::ConnectionError] if authentication fails + def authenticate(req) + # Username/Password authentication (RFC 1929) + username = req.proxy[:proxy_username].to_s + password = req.proxy[:proxy_password].to_s + + auth_request = [0x01, username.bytesize, username, password.bytesize, password].pack("CCA*CA*") + @socket.write(auth_request) + + auth_response = @socket.readpartial(2) + auth_version, auth_status = auth_response.unpack("C*") + + return unless auth_version != 0x01 || auth_status != 0x00 + + @failed_connect = true + raise ConnectionError, "SOCKS5 proxy authentication failed" + end + + # Send a connection request to the SOCKS5 proxy + # @param [HTTP::Request] req The request to connect + # @return [void] + # @raise [HTTP::ConnectionError] if the connection fails + def send_connection_request(req) + host = req.uri.host + port = req.uri.port || req.uri.default_port + + # Determine address type and format + atyp, addr = format_address(host) + + # Send the connection request + send_request(atyp, addr, port) + + # Process the server's response + atyp = process_response + + # Skip the bound address and port in the response + skip_bound_address(atyp) + end + + # Format the address for SOCKS5 protocol + # @param [String] host The host to connect to + # @return [Array] The address type and formatted address + def format_address(host) + if /^\d+\.\d+\.\d+\.\d+$/.match?(host) + # IPv4 address + [0x01, host.split(".").map(&:to_i).pack("C*")] + else + # Domain name + [0x03, [host.bytesize, host].pack("CA*")] + end + end + + # Send the connection request to the SOCKS5 proxy + # @param [Integer] atyp The address type + # @param [String] addr The formatted address + # @param [Integer] port The port to connect to + # @return [void] + def send_request(atyp, addr, port) + connect_request = [0x05, 0x01, 0x00, atyp, addr, port].pack("CCCCA*n") + @socket.write(connect_request) + end + + # Process the server's response to the connection request + # @return [Integer] The address type in the response + # @raise [HTTP::ConnectionError] if the connection fails + def process_response + # Read connection response + response = @socket.readpartial(4) + version, reply, _, atyp = response.unpack("C*") + + if version != 0x05 + @failed_connect = true + raise ConnectionError, "SOCKS5 proxy server returned invalid version: #{version}" + end + + handle_reply_code(reply) + + atyp + end + + # Handle the reply code from the SOCKS5 proxy + # @param [Integer] reply The reply code + # @raise [HTTP::ConnectionError] if the reply indicates an error + def handle_reply_code(reply) + return if reply.zero? + + @failed_connect = true + error_message = get_error_message(reply) + raise ConnectionError, "SOCKS5 proxy connection failed: #{error_message}" + end + + # Get the error message for a SOCKS5 reply code + # @param [Integer] reply The reply code + # @return [String] The error message + # rubocop:disable Metrics/MethodLength + def get_error_message(reply) + error_messages = { + 0x01 => "general SOCKS server failure", + 0x02 => "connection not allowed by ruleset", + 0x03 => "Network unreachable", + 0x04 => "Host unreachable", + 0x05 => "Connection refused", + 0x06 => "TTL expired", + 0x07 => "Command not supported", + 0x08 => "Address type not supported" + } + + error_messages.fetch(reply, "Unknown error (code: #{reply})") + end + # rubocop:enable Metrics/MethodLength + + # Skip the bound address and port in the response + # @param [Integer] atyp The address type + # @return [void] + def skip_bound_address(atyp) + case atyp + when 0x01 # IPv4 + @socket.readpartial(4 + 2) # 4 bytes for IPv4 + 2 bytes for port + when 0x03 # Domain name + domain_len = @socket.readpartial(1).unpack1("C") + @socket.readpartial(domain_len + 2) # domain length + 2 bytes for port + when 0x04 # IPv6 + @socket.readpartial(16 + 2) # 16 bytes for IPv6 + 2 bytes for port + end + end + end + # rubocop:enable Metrics/ClassLength +end diff --git a/spec/lib/http/chainable_socks5_spec.rb b/spec/lib/http/chainable_socks5_spec.rb new file mode 100644 index 00000000..9f91366a --- /dev/null +++ b/spec/lib/http/chainable_socks5_spec.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +RSpec.describe HTTP::Chainable do + describe "via_socks5" do + let(:proxy_address) { "127.0.0.1" } + let(:proxy_port) { 8080 } + let(:proxy_username) { "username" } + let(:proxy_password) { "password" } + + it "creates a client with SOCKS5 proxy" do + client = HTTP.via_socks5(proxy_address, proxy_port) + expect(client.default_options.proxy).to eq( + proxy_address: proxy_address, + proxy_port: proxy_port, + proxy_type: :socks5 + ) + end + + it "creates a client with authenticated SOCKS5 proxy" do + client = HTTP.via_socks5(proxy_address, proxy_port, proxy_username, proxy_password) + expect(client.default_options.proxy).to eq( + proxy_address: proxy_address, + proxy_port: proxy_port, + proxy_username: proxy_username, + proxy_password: proxy_password, + proxy_type: :socks5 + ) + end + + it "raises an error with invalid proxy parameters" do + expect { HTTP.via_socks5 }.to raise_error(HTTP::RequestError) + expect { HTTP.via_socks5(proxy_address) }.to raise_error(HTTP::RequestError) + end + end +end diff --git a/spec/lib/http/client_spec.rb b/spec/lib/http/client_spec.rb index dc86a2fd..52dc7734 100644 --- a/spec/lib/http/client_spec.rb +++ b/spec/lib/http/client_spec.rb @@ -15,7 +15,7 @@ stubbed_client = Class.new(HTTP::Client) do def perform(request, options) stubbed = stubs[HTTP::URI::NORMALIZER.call(request.uri).to_s] - stubbed ? stubbed.call(request) : super(request, options) + stubbed ? stubbed.call(request) : super end def stubs diff --git a/spec/lib/http/headers_spec.rb b/spec/lib/http/headers_spec.rb index e4ac774d..d86294a8 100644 --- a/spec/lib/http/headers_spec.rb +++ b/spec/lib/http/headers_spec.rb @@ -282,7 +282,7 @@ before { headers.set :set_cookie, %w[hoo=ray woo=hoo] } - it { is_expected.to eq '#["hoo=ray", "woo=hoo"]}>' } + it { is_expected.to eq "#" } end describe "#keys" do diff --git a/spec/lib/http/options/socks5_proxy_spec.rb b/spec/lib/http/options/socks5_proxy_spec.rb new file mode 100644 index 00000000..91adb2f8 --- /dev/null +++ b/spec/lib/http/options/socks5_proxy_spec.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +RSpec.describe HTTP::Options, "#proxy" do + let(:opts) { described_class.new } + + it "defaults to no proxy" do + expect(opts.proxy).to eq({}) + end + + it "may be specified with with_proxy" do + opts2 = opts.with_proxy( + proxy_address: "127.0.0.1", + proxy_port: 8080, + proxy_type: :socks5 + ) + expect(opts.proxy).to eq({}) + expect(opts2.proxy).to eq( + proxy_address: "127.0.0.1", + proxy_port: 8080, + proxy_type: :socks5 + ) + end + + it "may be specified with with_proxy including username and password" do + opts2 = opts.with_proxy( + proxy_address: "127.0.0.1", + proxy_port: 8080, + proxy_username: "username", + proxy_password: "password", + proxy_type: :socks5 + ) + expect(opts.proxy).to eq({}) + expect(opts2.proxy).to eq( + proxy_address: "127.0.0.1", + proxy_port: 8080, + proxy_username: "username", + proxy_password: "password", + proxy_type: :socks5 + ) + end +end diff --git a/spec/lib/http/response_spec.rb b/spec/lib/http/response_spec.rb index 322e25b3..133af5fb 100644 --- a/spec/lib/http/response_spec.rb +++ b/spec/lib/http/response_spec.rb @@ -146,7 +146,7 @@ let(:headers) { {content_type: "text/plain"} } let(:body) { double to_s: "foobar" } - it { is_expected.to eq '#"text/plain"}>' } + it { is_expected.to eq "#" } end describe "#cookies" do diff --git a/spec/support/ssl_helper.rb b/spec/support/ssl_helper.rb index 47f7bfb0..715fa127 100644 --- a/spec/support/ssl_helper.rb +++ b/spec/support/ssl_helper.rb @@ -11,7 +11,7 @@ class RootCertificate < ::CertificateAuthority::Certificate EXTENSIONS = {"keyUsage" => {"usage" => %w[critical keyCertSign]}}.freeze def initialize - super() + super subject.common_name = "honestachmed.com" serial_number.number = 1