Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 1 | // Copyright 2020 The Chromium OS Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "system-proxy/proxy_connect_job.h" |
| 6 | |
| 7 | #include <algorithm> |
| 8 | #include <utility> |
| 9 | #include <vector> |
| 10 | |
| 11 | #include <curl/curl.h> |
| 12 | #include <curl/easy.h> |
| 13 | |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 14 | #include <base/base64.h> |
| 15 | #include <base/bind.h> |
| 16 | #include <base/bind_helpers.h> |
| 17 | #include <base/callback_helpers.h> |
| 18 | #include <base/files/file_util.h> |
| 19 | #include <base/strings/stringprintf.h> |
| 20 | #include <base/strings/string_split.h> |
| 21 | #include <base/strings/string_util.h> |
| 22 | #include <base/time/time.h> |
| 23 | #include <brillo/http/http_transport.h> |
Garrick Evans | cd8c297 | 2020-04-14 14:35:52 +0900 | [diff] [blame] | 24 | #include <chromeos/patchpanel/net_util.h> |
| 25 | #include <chromeos/patchpanel/socket.h> |
| 26 | #include <chromeos/patchpanel/socket_forwarder.h> |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 27 | |
| 28 | #include "system-proxy/curl_socket.h" |
| 29 | |
| 30 | // The libarcnetwork-util library overloads << for socket data structures. |
| 31 | // By C++'s argument-dependent lookup rules, operators defined in a |
| 32 | // different namespace are not visible. We need the using directive to make |
| 33 | // the overload available this namespace. |
Garrick Evans | 3388a03 | 2020-03-24 11:25:55 +0900 | [diff] [blame] | 34 | using patchpanel::operator<<; |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 35 | |
| 36 | namespace { |
| 37 | // There's no RFC recomandation for the max size of http request headers but |
| 38 | // popular http server implementations (Apache, IIS, Tomcat) set the lower limit |
| 39 | // to 8000. |
| 40 | constexpr int kMaxHttpRequestHeadersSize = 8000; |
| 41 | constexpr char kConnectMethod[] = "CONNECT"; |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 42 | constexpr base::TimeDelta kCurlConnectTimeout = base::TimeDelta::FromMinutes(2); |
| 43 | constexpr size_t kMaxBadRequestPrintSize = 120; |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 44 | // This sequence is used to identify the end of a HTTP header which should be an |
| 45 | // empty line. Note: all HTTP header lines end with CRLF. HTTP connect requests |
| 46 | // don't have a body so end of header is end of request. |
| 47 | const std::string_view kCrlfCrlf = "\r\n\r\n"; |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 48 | |
| 49 | // HTTP error codes and messages with origin information for debugging (RFC723, |
| 50 | // section 6.1). |
| 51 | const std::string_view kHttpBadRequest = |
| 52 | "HTTP/1.1 400 Bad Request - Origin: local proxy\r\n\r\n"; |
| 53 | const std::string_view kHttpInternalServerError = |
| 54 | "HTTP/1.1 500 Internal Server Error - Origin: local proxy\r\n\r\n"; |
| 55 | const std::string_view kHttpBadGateway = |
| 56 | "HTTP/1.1 502 Bad Gateway - Origin: local proxy\r\n\r\n"; |
| 57 | |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 58 | // Verifies if the http headers are ending with an http empty line, meaning a |
| 59 | // line that contains only CR LF preceded by a line ending with CRLF. |
| 60 | bool IsEndingWithHttpEmptyLine(const char* headers, int headers_size) { |
| 61 | return headers_size > kCrlfCrlf.size() && |
| 62 | std::memcmp(kCrlfCrlf.data(), |
| 63 | headers + headers_size - kCrlfCrlf.size(), |
| 64 | kCrlfCrlf.size()) == 0; |
| 65 | } |
| 66 | |
| 67 | // CURLOPT_HEADERFUNCTION callback implementation that only returns the headers |
| 68 | // from the last response sent by the sever. This is to make sure that we |
| 69 | // send back valid HTTP replies and auhentication data from the HTTP messages is |
| 70 | // not being leaked to the client. |userdata| is set on the libcurl CURL handle |
| 71 | // used to configure the request, using the the CURLOPT_HEADERDATA option. Note, |
| 72 | // from the libcurl documentation: This callback is being called for all the |
| 73 | // responses received from the proxy server after intiating the connection |
| 74 | // request. Multiple responses can be received in an authentication sequence. |
| 75 | // Only the last response's headers should be forwarded to the System-proxy |
| 76 | // client. The header callback will be called once for each header and only |
| 77 | // complete header lines are passed on to the callback. |
| 78 | static size_t WriteHeadersCallback(char* contents, |
| 79 | size_t size, |
| 80 | size_t nmemb, |
| 81 | void* userdata) { |
| 82 | std::vector<char>* vec = (std::vector<char>*)userdata; |
| 83 | |
| 84 | // Check if we are receiving a new HTTP message (after the last one was |
| 85 | // terminated with an empty line). |
| 86 | if (IsEndingWithHttpEmptyLine(vec->data(), vec->size())) { |
| 87 | VLOG(1) << "Removing the http reply headers from the server " |
| 88 | << base::StringPiece(vec->data(), vec->size()); |
| 89 | vec->clear(); |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 90 | } |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 91 | vec->insert(vec->end(), contents, contents + (nmemb * size)); |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 92 | return size * nmemb; |
| 93 | } |
| 94 | |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 95 | // CONNECT requests may have a reply body. This method will capture the reply |
| 96 | // and save it in |userdata|. |userdata| is set on the libcurl CURL handle |
| 97 | // used to configure the request, using the the CURLOPT_WRITEDATA option. |
| 98 | static size_t WriteCallback(char* contents, |
| 99 | size_t size, |
| 100 | size_t nmemb, |
| 101 | void* userdata) { |
| 102 | std::vector<char>* vec = (std::vector<char>*)userdata; |
| 103 | vec->insert(vec->end(), contents, contents + (nmemb * size)); |
| 104 | return size * nmemb; |
| 105 | } |
| 106 | |
| 107 | // Parses the first line of the http CONNECT request and extracts the URI |
| 108 | // authority, defined in RFC3986, section 3.2, as the host name and port number |
| 109 | // separated by a colon. The destination URI is specified in the request line |
| 110 | // (RFC2817, section 5.2): |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 111 | // CONNECT server.example.com:80 HTTP/1.1 |
| 112 | // If the first line in |raw_request| (the Request-Line) is a correctly formed |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 113 | // CONNECT request, it will return the destination URI as host:port, otherwise |
| 114 | // it will return an empty string. |
| 115 | std::string GetUriAuthorityFromHttpHeader( |
| 116 | const std::vector<char>& raw_request) { |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 117 | base::StringPiece request(raw_request.data(), raw_request.size()); |
| 118 | // Request-Line ends with CRLF (RFC2616, section 5.1). |
| 119 | size_t i = request.find_first_of("\r\n"); |
| 120 | if (i == base::StringPiece::npos) |
| 121 | return std::string(); |
| 122 | // Elements are delimited by non-breaking space (SP). |
| 123 | auto pieces = |
| 124 | base::SplitString(request.substr(0, i), " ", base::TRIM_WHITESPACE, |
| 125 | base::SPLIT_WANT_NONEMPTY); |
| 126 | // Request-Line has the format: Method SP Request-URI SP HTTP-Version CRLF. |
| 127 | if (pieces.size() < 3) |
| 128 | return std::string(); |
| 129 | if (pieces[0] != kConnectMethod) |
| 130 | return std::string(); |
| 131 | |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 132 | return pieces[1]; |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 133 | } |
| 134 | } // namespace |
| 135 | |
| 136 | namespace system_proxy { |
| 137 | |
| 138 | ProxyConnectJob::ProxyConnectJob( |
Garrick Evans | 3388a03 | 2020-03-24 11:25:55 +0900 | [diff] [blame] | 139 | std::unique_ptr<patchpanel::Socket> socket, |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 140 | const std::string& credentials, |
| 141 | ResolveProxyCallback resolve_proxy_callback, |
| 142 | OnConnectionSetupFinishedCallback setup_finished_callback) |
| 143 | : credentials_(credentials), |
| 144 | resolve_proxy_callback_(std::move(resolve_proxy_callback)), |
| 145 | setup_finished_callback_(std::move(setup_finished_callback)) { |
| 146 | client_socket_ = std::move(socket); |
| 147 | } |
| 148 | |
| 149 | ProxyConnectJob::~ProxyConnectJob() = default; |
| 150 | |
| 151 | bool ProxyConnectJob::Start() { |
| 152 | // Make the socket non-blocking. |
| 153 | if (!base::SetNonBlocking(client_socket_->fd())) { |
| 154 | PLOG(ERROR) << *this << " Failed to mark the socket as non-blocking."; |
| 155 | client_socket_->SendTo(kHttpInternalServerError.data(), |
| 156 | kHttpInternalServerError.size()); |
| 157 | return false; |
| 158 | } |
| 159 | read_watcher_ = base::FileDescriptorWatcher::WatchReadable( |
| 160 | client_socket_->fd(), |
| 161 | base::Bind(&ProxyConnectJob::OnClientReadReady, base::Unretained(this))); |
| 162 | return true; |
| 163 | } |
| 164 | |
| 165 | void ProxyConnectJob::OnClientReadReady() { |
| 166 | // Stop watching. |
| 167 | read_watcher_.reset(); |
| 168 | // The first message should be a HTTP CONNECT request. |
| 169 | std::vector<char> connect_request; |
| 170 | if (!TryReadHttpHeader(&connect_request)) { |
| 171 | std::string encoded; |
| 172 | base::Base64Encode( |
| 173 | base::StringPiece(connect_request.data(), connect_request.size()), |
| 174 | &encoded); |
| 175 | LOG(ERROR) << *this |
| 176 | << " Failure to read proxy CONNECT request. Base 64 encoded " |
| 177 | "request message from client: " |
| 178 | << encoded; |
| 179 | OnError(kHttpBadRequest); |
| 180 | return; |
| 181 | } |
| 182 | |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 183 | target_url_ = GetUriAuthorityFromHttpHeader(connect_request); |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 184 | if (target_url_.empty()) { |
| 185 | LOG(ERROR) |
| 186 | << *this |
| 187 | << " Failed to extract target url from the HTTP CONNECT request."; |
| 188 | OnError(kHttpBadRequest); |
| 189 | return; |
| 190 | } |
| 191 | |
| 192 | std::move(resolve_proxy_callback_) |
| 193 | .Run(target_url_, base::Bind(&ProxyConnectJob::OnProxyResolution, |
| 194 | base::Unretained(this))); |
| 195 | } |
| 196 | |
| 197 | bool ProxyConnectJob::TryReadHttpHeader(std::vector<char>* raw_request) { |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 198 | size_t read_byte_count = 0; |
| 199 | raw_request->resize(kMaxHttpRequestHeadersSize); |
| 200 | |
| 201 | // Read byte-by-byte and stop when reading an empty line (only CRLF) or when |
| 202 | // exceeding the max buffer size. |
| 203 | // TODO(acostinas, chromium:1064536) This may have some measurable performance |
| 204 | // impact. We should read larger blocks of data, consume the HTTP headers, |
| 205 | // cache the tunneled payload that may have already been included (e.g. TLS |
| 206 | // ClientHello) and send it to server after the connection is established. |
| 207 | while (read_byte_count < kMaxHttpRequestHeadersSize) { |
| 208 | if (client_socket_->RecvFrom(raw_request->data() + read_byte_count, 1) <= |
| 209 | 0) { |
| 210 | raw_request->resize(std::min(read_byte_count, kMaxBadRequestPrintSize)); |
| 211 | return false; |
| 212 | } |
| 213 | ++read_byte_count; |
| 214 | |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 215 | if (IsEndingWithHttpEmptyLine(raw_request->data(), read_byte_count)) { |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 216 | raw_request->resize(read_byte_count); |
| 217 | return true; |
| 218 | } |
| 219 | } |
| 220 | return false; |
| 221 | } |
| 222 | |
| 223 | void ProxyConnectJob::OnProxyResolution( |
| 224 | const std::list<std::string>& proxy_servers) { |
| 225 | proxy_servers_ = proxy_servers; |
| 226 | DoCurlServerConnection(proxy_servers.front()); |
| 227 | } |
| 228 | |
| 229 | void ProxyConnectJob::DoCurlServerConnection(const std::string& proxy_url) { |
| 230 | CURL* easyhandle = curl_easy_init(); |
| 231 | CURLcode res; |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 232 | curl_socket_t newSocket = -1; |
| 233 | std::vector<char> server_header_reply, server_body_reply; |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 234 | |
| 235 | if (!easyhandle) { |
| 236 | // Unfortunately it's not possible to get the failure reason. |
| 237 | LOG(ERROR) << *this << " Failure to create curl handle."; |
| 238 | curl_easy_cleanup(easyhandle); |
| 239 | OnError(kHttpInternalServerError); |
| 240 | return; |
| 241 | } |
| 242 | curl_easy_setopt(easyhandle, CURLOPT_URL, target_url_.c_str()); |
| 243 | |
| 244 | if (proxy_url != brillo::http::kDirectProxy) { |
| 245 | curl_easy_setopt(easyhandle, CURLOPT_PROXY, proxy_url.c_str()); |
| 246 | curl_easy_setopt(easyhandle, CURLOPT_HTTPPROXYTUNNEL, 1L); |
| 247 | curl_easy_setopt(easyhandle, CURLOPT_CONNECT_ONLY, 1); |
| 248 | // Allow libcurl to pick authentication method. Curl will use the most |
| 249 | // secure one the remote site claims to support. |
| 250 | curl_easy_setopt(easyhandle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); |
| 251 | curl_easy_setopt(easyhandle, CURLOPT_PROXYUSERPWD, credentials_.c_str()); |
| 252 | } |
| 253 | curl_easy_setopt(easyhandle, CURLOPT_CONNECTTIMEOUT_MS, |
| 254 | kCurlConnectTimeout.InMilliseconds()); |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 255 | curl_easy_setopt(easyhandle, CURLOPT_HEADERFUNCTION, WriteHeadersCallback); |
| 256 | curl_easy_setopt(easyhandle, CURLOPT_HEADERDATA, &server_header_reply); |
| 257 | curl_easy_setopt(easyhandle, CURLOPT_WRITEFUNCTION, WriteCallback); |
| 258 | curl_easy_setopt(easyhandle, CURLOPT_WRITEDATA, &server_body_reply); |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 259 | |
| 260 | res = curl_easy_perform(easyhandle); |
| 261 | |
| 262 | if (res != CURLE_OK) { |
| 263 | LOG(ERROR) << *this << " curl_easy_perform() failed with error: ", |
| 264 | curl_easy_strerror(res); |
| 265 | curl_easy_cleanup(easyhandle); |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 266 | |
| 267 | if (server_header_reply.size() > 0) { |
| 268 | // Send the error message from the remote server back to the client. |
| 269 | OnError(std::string_view(server_header_reply.data(), |
| 270 | server_header_reply.size())); |
| 271 | } else { |
| 272 | OnError(kHttpInternalServerError); |
| 273 | } |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 274 | return; |
| 275 | } |
| 276 | // Extract the socket from the curl handle. |
| 277 | res = curl_easy_getinfo(easyhandle, CURLINFO_ACTIVESOCKET, &newSocket); |
| 278 | if (res != CURLE_OK) { |
| 279 | LOG(ERROR) << *this << " Failed to get socket from curl with error: " |
| 280 | << curl_easy_strerror(res); |
| 281 | curl_easy_cleanup(easyhandle); |
| 282 | OnError(kHttpBadGateway); |
| 283 | return; |
| 284 | } |
| 285 | |
| 286 | ScopedCurlEasyhandle scoped_handle(easyhandle, FreeCurlEasyhandle()); |
| 287 | auto server_conn = std::make_unique<CurlSocket>(base::ScopedFD(newSocket), |
| 288 | std::move(scoped_handle)); |
| 289 | |
| 290 | // Send the server reply to the client. If the connection is successful, the |
Andreea Costinas | a224659 | 2020-04-12 23:24:01 +0200 | [diff] [blame^] | 291 | // reply headers should be "HTTP/1.1 200 Connection Established". |
| 292 | if (client_socket_->SendTo(server_header_reply.data(), |
| 293 | server_header_reply.size()) != |
| 294 | server_header_reply.size()) { |
| 295 | PLOG(ERROR) << *this << " Failed to send HTTP reply headers to client: " |
| 296 | << base::StringPiece(server_header_reply.data(), |
| 297 | server_header_reply.size()); |
| 298 | OnError(kHttpInternalServerError); |
| 299 | return; |
| 300 | } |
| 301 | // HTTP CONNECT responses can have a payload body which should be forwarded to |
| 302 | // the client. |
| 303 | if (server_body_reply.size() > 0) { |
| 304 | // TODO(acostinas, chromium:1064536) Resend the reply body in case of EAGAIN |
| 305 | // or EWOULDBLOCK errors. |
| 306 | if (client_socket_->SendTo(server_body_reply.data(), |
| 307 | server_body_reply.size()) != |
| 308 | server_body_reply.size()) { |
| 309 | PLOG(ERROR) << *this |
| 310 | << " Failed to send HTTP CONNECT reply body to client: " |
| 311 | << base::StringPiece(server_body_reply.data(), |
| 312 | server_body_reply.size()); |
| 313 | } |
| 314 | } |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 315 | |
Garrick Evans | 3388a03 | 2020-03-24 11:25:55 +0900 | [diff] [blame] | 316 | auto fwd = std::make_unique<patchpanel::SocketForwarder>( |
Andreea Costinas | e45d54b | 2020-03-10 09:21:14 +0100 | [diff] [blame] | 317 | base::StringPrintf("%d-%d", client_socket_->fd(), server_conn->fd()), |
| 318 | std::move(client_socket_), std::move(server_conn)); |
| 319 | // Start forwarding data between sockets. |
| 320 | fwd->Start(); |
| 321 | std::move(setup_finished_callback_).Run(std::move(fwd), this); |
| 322 | } |
| 323 | |
| 324 | void ProxyConnectJob::OnError(const std::string_view& http_error_message) { |
| 325 | client_socket_->SendTo(http_error_message.data(), http_error_message.size()); |
| 326 | std::move(setup_finished_callback_).Run(nullptr, this); |
| 327 | } |
| 328 | |
| 329 | std::ostream& operator<<(std::ostream& stream, const ProxyConnectJob& job) { |
| 330 | stream << "{fd: " << job.client_socket_->fd(); |
| 331 | if (!job.target_url_.empty()) { |
| 332 | stream << ", url: " << job.target_url_; |
| 333 | } |
| 334 | stream << "}"; |
| 335 | return stream; |
| 336 | } |
| 337 | |
| 338 | } // namespace system_proxy |