blob: 120d90cd64a75f33c8f2d6daa5d6c530f879027a [file] [log] [blame]
Andreea Costinase45d54b2020-03-10 09:21:14 +01001// Copyright 2020 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "system-proxy/proxy_connect_job.h"
6
7#include <algorithm>
8#include <utility>
9#include <vector>
10
11#include <curl/curl.h>
12#include <curl/easy.h>
13
Andreea Costinase45d54b2020-03-10 09:21:14 +010014#include <base/base64.h>
15#include <base/bind.h>
16#include <base/bind_helpers.h>
17#include <base/callback_helpers.h>
18#include <base/files/file_util.h>
19#include <base/strings/stringprintf.h>
20#include <base/strings/string_split.h>
21#include <base/strings/string_util.h>
22#include <base/time/time.h>
Andreea Costinas08a5d182020-04-29 22:12:47 +020023#include <base/threading/thread.h>
24#include <base/threading/thread_task_runner_handle.h>
Andreea Costinase45d54b2020-03-10 09:21:14 +010025#include <brillo/http/http_transport.h>
Garrick Evanscd8c2972020-04-14 14:35:52 +090026#include <chromeos/patchpanel/net_util.h>
27#include <chromeos/patchpanel/socket.h>
28#include <chromeos/patchpanel/socket_forwarder.h>
Andreea Costinase45d54b2020-03-10 09:21:14 +010029
30#include "system-proxy/curl_socket.h"
31
Garrick Evans2d5e7c92020-06-08 14:14:28 +090032// The libpatchpanel-util library overloads << for socket data structures.
Andreea Costinase45d54b2020-03-10 09:21:14 +010033// By C++'s argument-dependent lookup rules, operators defined in a
34// different namespace are not visible. We need the using directive to make
35// the overload available this namespace.
Garrick Evans3388a032020-03-24 11:25:55 +090036using patchpanel::operator<<;
Andreea Costinase45d54b2020-03-10 09:21:14 +010037
38namespace {
39// There's no RFC recomandation for the max size of http request headers but
40// popular http server implementations (Apache, IIS, Tomcat) set the lower limit
41// to 8000.
42constexpr int kMaxHttpRequestHeadersSize = 8000;
43constexpr char kConnectMethod[] = "CONNECT";
Andreea Costinase45d54b2020-03-10 09:21:14 +010044constexpr base::TimeDelta kCurlConnectTimeout = base::TimeDelta::FromMinutes(2);
Andreea Costinas08a5d182020-04-29 22:12:47 +020045constexpr base::TimeDelta kWaitClientConnectTimeout =
46 base::TimeDelta::FromMinutes(2);
Andreea Costinase45d54b2020-03-10 09:21:14 +010047constexpr size_t kMaxBadRequestPrintSize = 120;
Andreea Costinased3f9782020-05-20 17:09:46 +020048// The elements in this array are used to identify the end of a HTTP header
49// which should be an empty line. Note: all HTTP header lines end with CRLF.
50// RFC7230, section 3.5 allow LF (without CR) as a valid end of header. HTTP
51// connect requests don't have a body so end of header is end of request.
52static const std::array<std::string, 2> kValidHttpHeaderEnd = {"\r\n\n",
53 "\r\n\r\n"};
Andreea Costinase45d54b2020-03-10 09:21:14 +010054
55// HTTP error codes and messages with origin information for debugging (RFC723,
56// section 6.1).
57const std::string_view kHttpBadRequest =
58 "HTTP/1.1 400 Bad Request - Origin: local proxy\r\n\r\n";
Andreea Costinas08a5d182020-04-29 22:12:47 +020059const std::string_view kHttpConnectionTimeout =
60 "HTTP/1.1 408 Request Timeout - Origin: local proxy\r\n\r\n";
Andreea Costinase45d54b2020-03-10 09:21:14 +010061const std::string_view kHttpInternalServerError =
62 "HTTP/1.1 500 Internal Server Error - Origin: local proxy\r\n\r\n";
63const std::string_view kHttpBadGateway =
64 "HTTP/1.1 502 Bad Gateway - Origin: local proxy\r\n\r\n";
65
Andreea Costinasa2246592020-04-12 23:24:01 +020066// Verifies if the http headers are ending with an http empty line, meaning a
Andreea Costinased3f9782020-05-20 17:09:46 +020067// line that contains only CRLF or LF preceded by a line ending with CRLF.
Andreea Costinasa2246592020-04-12 23:24:01 +020068bool IsEndingWithHttpEmptyLine(const char* headers, int headers_size) {
Andreea Costinased3f9782020-05-20 17:09:46 +020069 for (const auto& header_end : kValidHttpHeaderEnd) {
70 if (headers_size > header_end.size() &&
71 std::memcmp(header_end.data(),
72 headers + headers_size - header_end.size(),
73 header_end.size()) == 0) {
74 return true;
75 }
76 }
77 return false;
Andreea Costinasa2246592020-04-12 23:24:01 +020078}
79
80// CURLOPT_HEADERFUNCTION callback implementation that only returns the headers
81// from the last response sent by the sever. This is to make sure that we
82// send back valid HTTP replies and auhentication data from the HTTP messages is
83// not being leaked to the client. |userdata| is set on the libcurl CURL handle
84// used to configure the request, using the the CURLOPT_HEADERDATA option. Note,
85// from the libcurl documentation: This callback is being called for all the
86// responses received from the proxy server after intiating the connection
87// request. Multiple responses can be received in an authentication sequence.
88// Only the last response's headers should be forwarded to the System-proxy
89// client. The header callback will be called once for each header and only
90// complete header lines are passed on to the callback.
91static size_t WriteHeadersCallback(char* contents,
92 size_t size,
93 size_t nmemb,
94 void* userdata) {
95 std::vector<char>* vec = (std::vector<char>*)userdata;
96
97 // Check if we are receiving a new HTTP message (after the last one was
98 // terminated with an empty line).
99 if (IsEndingWithHttpEmptyLine(vec->data(), vec->size())) {
100 VLOG(1) << "Removing the http reply headers from the server "
101 << base::StringPiece(vec->data(), vec->size());
102 vec->clear();
Andreea Costinase45d54b2020-03-10 09:21:14 +0100103 }
Andreea Costinasa2246592020-04-12 23:24:01 +0200104 vec->insert(vec->end(), contents, contents + (nmemb * size));
Andreea Costinase45d54b2020-03-10 09:21:14 +0100105 return size * nmemb;
106}
107
Andreea Costinasa2246592020-04-12 23:24:01 +0200108// CONNECT requests may have a reply body. This method will capture the reply
109// and save it in |userdata|. |userdata| is set on the libcurl CURL handle
110// used to configure the request, using the the CURLOPT_WRITEDATA option.
111static size_t WriteCallback(char* contents,
112 size_t size,
113 size_t nmemb,
114 void* userdata) {
115 std::vector<char>* vec = (std::vector<char>*)userdata;
116 vec->insert(vec->end(), contents, contents + (nmemb * size));
117 return size * nmemb;
118}
119
120// Parses the first line of the http CONNECT request and extracts the URI
121// authority, defined in RFC3986, section 3.2, as the host name and port number
122// separated by a colon. The destination URI is specified in the request line
123// (RFC2817, section 5.2):
Andreea Costinase45d54b2020-03-10 09:21:14 +0100124// CONNECT server.example.com:80 HTTP/1.1
125// If the first line in |raw_request| (the Request-Line) is a correctly formed
Andreea Costinasa2246592020-04-12 23:24:01 +0200126// CONNECT request, it will return the destination URI as host:port, otherwise
127// it will return an empty string.
128std::string GetUriAuthorityFromHttpHeader(
129 const std::vector<char>& raw_request) {
Andreea Costinase45d54b2020-03-10 09:21:14 +0100130 base::StringPiece request(raw_request.data(), raw_request.size());
131 // Request-Line ends with CRLF (RFC2616, section 5.1).
132 size_t i = request.find_first_of("\r\n");
133 if (i == base::StringPiece::npos)
134 return std::string();
135 // Elements are delimited by non-breaking space (SP).
136 auto pieces =
137 base::SplitString(request.substr(0, i), " ", base::TRIM_WHITESPACE,
138 base::SPLIT_WANT_NONEMPTY);
139 // Request-Line has the format: Method SP Request-URI SP HTTP-Version CRLF.
140 if (pieces.size() < 3)
141 return std::string();
142 if (pieces[0] != kConnectMethod)
143 return std::string();
144
Andreea Costinasa2246592020-04-12 23:24:01 +0200145 return pieces[1];
Andreea Costinase45d54b2020-03-10 09:21:14 +0100146}
147} // namespace
148
149namespace system_proxy {
150
151ProxyConnectJob::ProxyConnectJob(
Garrick Evans3388a032020-03-24 11:25:55 +0900152 std::unique_ptr<patchpanel::Socket> socket,
Andreea Costinase45d54b2020-03-10 09:21:14 +0100153 const std::string& credentials,
154 ResolveProxyCallback resolve_proxy_callback,
155 OnConnectionSetupFinishedCallback setup_finished_callback)
156 : credentials_(credentials),
157 resolve_proxy_callback_(std::move(resolve_proxy_callback)),
Andreea Costinas08a5d182020-04-29 22:12:47 +0200158 setup_finished_callback_(std::move(setup_finished_callback)),
159 // Safe to use |base::Unretained| because the callback will be canceled
160 // when it goes out of scope.
161 client_connect_timeout_callback_(base::Bind(
162 &ProxyConnectJob::OnClientConnectTimeout, base::Unretained(this))) {
Andreea Costinase45d54b2020-03-10 09:21:14 +0100163 client_socket_ = std::move(socket);
164}
165
166ProxyConnectJob::~ProxyConnectJob() = default;
167
168bool ProxyConnectJob::Start() {
169 // Make the socket non-blocking.
170 if (!base::SetNonBlocking(client_socket_->fd())) {
171 PLOG(ERROR) << *this << " Failed to mark the socket as non-blocking.";
172 client_socket_->SendTo(kHttpInternalServerError.data(),
173 kHttpInternalServerError.size());
174 return false;
175 }
Andreea Costinas08a5d182020-04-29 22:12:47 +0200176 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
177 FROM_HERE, client_connect_timeout_callback_.callback(),
178 kWaitClientConnectTimeout);
Andreea Costinase45d54b2020-03-10 09:21:14 +0100179 read_watcher_ = base::FileDescriptorWatcher::WatchReadable(
180 client_socket_->fd(),
181 base::Bind(&ProxyConnectJob::OnClientReadReady, base::Unretained(this)));
182 return true;
183}
184
185void ProxyConnectJob::OnClientReadReady() {
Andreea Costinas08a5d182020-04-29 22:12:47 +0200186 if (!read_watcher_) {
187 // The connection has timed out while waiting for the client's HTTP CONNECT
188 // request. See |OnClientConnectTimeout|.
189 return;
190 }
191 client_connect_timeout_callback_.Cancel();
Andreea Costinase45d54b2020-03-10 09:21:14 +0100192 // Stop watching.
193 read_watcher_.reset();
194 // The first message should be a HTTP CONNECT request.
195 std::vector<char> connect_request;
196 if (!TryReadHttpHeader(&connect_request)) {
197 std::string encoded;
198 base::Base64Encode(
199 base::StringPiece(connect_request.data(), connect_request.size()),
200 &encoded);
201 LOG(ERROR) << *this
202 << " Failure to read proxy CONNECT request. Base 64 encoded "
203 "request message from client: "
204 << encoded;
205 OnError(kHttpBadRequest);
206 return;
207 }
208
Andreea Costinasa2246592020-04-12 23:24:01 +0200209 target_url_ = GetUriAuthorityFromHttpHeader(connect_request);
Andreea Costinase45d54b2020-03-10 09:21:14 +0100210 if (target_url_.empty()) {
211 LOG(ERROR)
212 << *this
213 << " Failed to extract target url from the HTTP CONNECT request.";
214 OnError(kHttpBadRequest);
215 return;
216 }
217
Andreea Costinasa89309d2020-05-08 15:51:12 +0200218 // The proxy resolution service in Chrome expects a proper URL, formatted as
219 // scheme://host:port. It's safe to assume only https will be used for the
220 // target url.
Andreea Costinase45d54b2020-03-10 09:21:14 +0100221 std::move(resolve_proxy_callback_)
Andreea Costinasa89309d2020-05-08 15:51:12 +0200222 .Run(base::StringPrintf("https://%s", target_url_.c_str()),
223 base::Bind(&ProxyConnectJob::OnProxyResolution,
224 base::Unretained(this)));
Andreea Costinase45d54b2020-03-10 09:21:14 +0100225}
226
227bool ProxyConnectJob::TryReadHttpHeader(std::vector<char>* raw_request) {
Andreea Costinase45d54b2020-03-10 09:21:14 +0100228 size_t read_byte_count = 0;
229 raw_request->resize(kMaxHttpRequestHeadersSize);
230
231 // Read byte-by-byte and stop when reading an empty line (only CRLF) or when
232 // exceeding the max buffer size.
233 // TODO(acostinas, chromium:1064536) This may have some measurable performance
234 // impact. We should read larger blocks of data, consume the HTTP headers,
235 // cache the tunneled payload that may have already been included (e.g. TLS
236 // ClientHello) and send it to server after the connection is established.
237 while (read_byte_count < kMaxHttpRequestHeadersSize) {
238 if (client_socket_->RecvFrom(raw_request->data() + read_byte_count, 1) <=
239 0) {
240 raw_request->resize(std::min(read_byte_count, kMaxBadRequestPrintSize));
241 return false;
242 }
243 ++read_byte_count;
244
Andreea Costinasa2246592020-04-12 23:24:01 +0200245 if (IsEndingWithHttpEmptyLine(raw_request->data(), read_byte_count)) {
Andreea Costinase45d54b2020-03-10 09:21:14 +0100246 raw_request->resize(read_byte_count);
247 return true;
248 }
249 }
250 return false;
251}
252
253void ProxyConnectJob::OnProxyResolution(
254 const std::list<std::string>& proxy_servers) {
255 proxy_servers_ = proxy_servers;
256 DoCurlServerConnection(proxy_servers.front());
257}
258
259void ProxyConnectJob::DoCurlServerConnection(const std::string& proxy_url) {
260 CURL* easyhandle = curl_easy_init();
261 CURLcode res;
Andreea Costinasa2246592020-04-12 23:24:01 +0200262 curl_socket_t newSocket = -1;
263 std::vector<char> server_header_reply, server_body_reply;
Andreea Costinase45d54b2020-03-10 09:21:14 +0100264
265 if (!easyhandle) {
266 // Unfortunately it's not possible to get the failure reason.
267 LOG(ERROR) << *this << " Failure to create curl handle.";
268 curl_easy_cleanup(easyhandle);
269 OnError(kHttpInternalServerError);
270 return;
271 }
272 curl_easy_setopt(easyhandle, CURLOPT_URL, target_url_.c_str());
273
274 if (proxy_url != brillo::http::kDirectProxy) {
275 curl_easy_setopt(easyhandle, CURLOPT_PROXY, proxy_url.c_str());
276 curl_easy_setopt(easyhandle, CURLOPT_HTTPPROXYTUNNEL, 1L);
277 curl_easy_setopt(easyhandle, CURLOPT_CONNECT_ONLY, 1);
278 // Allow libcurl to pick authentication method. Curl will use the most
279 // secure one the remote site claims to support.
280 curl_easy_setopt(easyhandle, CURLOPT_PROXYAUTH, CURLAUTH_ANY);
281 curl_easy_setopt(easyhandle, CURLOPT_PROXYUSERPWD, credentials_.c_str());
282 }
283 curl_easy_setopt(easyhandle, CURLOPT_CONNECTTIMEOUT_MS,
284 kCurlConnectTimeout.InMilliseconds());
Andreea Costinasa2246592020-04-12 23:24:01 +0200285 curl_easy_setopt(easyhandle, CURLOPT_HEADERFUNCTION, WriteHeadersCallback);
286 curl_easy_setopt(easyhandle, CURLOPT_HEADERDATA, &server_header_reply);
287 curl_easy_setopt(easyhandle, CURLOPT_WRITEFUNCTION, WriteCallback);
288 curl_easy_setopt(easyhandle, CURLOPT_WRITEDATA, &server_body_reply);
Andreea Costinase45d54b2020-03-10 09:21:14 +0100289
290 res = curl_easy_perform(easyhandle);
291
292 if (res != CURLE_OK) {
293 LOG(ERROR) << *this << " curl_easy_perform() failed with error: ",
294 curl_easy_strerror(res);
295 curl_easy_cleanup(easyhandle);
Andreea Costinasa2246592020-04-12 23:24:01 +0200296
297 if (server_header_reply.size() > 0) {
298 // Send the error message from the remote server back to the client.
299 OnError(std::string_view(server_header_reply.data(),
300 server_header_reply.size()));
301 } else {
302 OnError(kHttpInternalServerError);
303 }
Andreea Costinase45d54b2020-03-10 09:21:14 +0100304 return;
305 }
306 // Extract the socket from the curl handle.
307 res = curl_easy_getinfo(easyhandle, CURLINFO_ACTIVESOCKET, &newSocket);
308 if (res != CURLE_OK) {
309 LOG(ERROR) << *this << " Failed to get socket from curl with error: "
310 << curl_easy_strerror(res);
311 curl_easy_cleanup(easyhandle);
312 OnError(kHttpBadGateway);
313 return;
314 }
315
316 ScopedCurlEasyhandle scoped_handle(easyhandle, FreeCurlEasyhandle());
317 auto server_conn = std::make_unique<CurlSocket>(base::ScopedFD(newSocket),
318 std::move(scoped_handle));
319
320 // Send the server reply to the client. If the connection is successful, the
Andreea Costinasa2246592020-04-12 23:24:01 +0200321 // reply headers should be "HTTP/1.1 200 Connection Established".
322 if (client_socket_->SendTo(server_header_reply.data(),
323 server_header_reply.size()) !=
324 server_header_reply.size()) {
325 PLOG(ERROR) << *this << " Failed to send HTTP reply headers to client: "
326 << base::StringPiece(server_header_reply.data(),
327 server_header_reply.size());
328 OnError(kHttpInternalServerError);
329 return;
330 }
331 // HTTP CONNECT responses can have a payload body which should be forwarded to
332 // the client.
333 if (server_body_reply.size() > 0) {
334 // TODO(acostinas, chromium:1064536) Resend the reply body in case of EAGAIN
335 // or EWOULDBLOCK errors.
336 if (client_socket_->SendTo(server_body_reply.data(),
337 server_body_reply.size()) !=
338 server_body_reply.size()) {
339 PLOG(ERROR) << *this
Andreea Costinas08a5d182020-04-29 22:12:47 +0200340 << " Failed to send HTTP CONNECT reply body to client: "
Andreea Costinasa2246592020-04-12 23:24:01 +0200341 << base::StringPiece(server_body_reply.data(),
342 server_body_reply.size());
343 }
344 }
Andreea Costinase45d54b2020-03-10 09:21:14 +0100345
Garrick Evans3388a032020-03-24 11:25:55 +0900346 auto fwd = std::make_unique<patchpanel::SocketForwarder>(
Andreea Costinase45d54b2020-03-10 09:21:14 +0100347 base::StringPrintf("%d-%d", client_socket_->fd(), server_conn->fd()),
348 std::move(client_socket_), std::move(server_conn));
349 // Start forwarding data between sockets.
350 fwd->Start();
351 std::move(setup_finished_callback_).Run(std::move(fwd), this);
352}
353
354void ProxyConnectJob::OnError(const std::string_view& http_error_message) {
355 client_socket_->SendTo(http_error_message.data(), http_error_message.size());
356 std::move(setup_finished_callback_).Run(nullptr, this);
357}
358
Andreea Costinas08a5d182020-04-29 22:12:47 +0200359void ProxyConnectJob::OnClientConnectTimeout() {
360 // Stop listening for client connect requests.
361 read_watcher_.reset();
362 LOG(ERROR) << *this
363 << " Connection timed out while waiting for the client to send a "
364 "connect request.";
365 OnError(kHttpConnectionTimeout);
366}
367
Andreea Costinase45d54b2020-03-10 09:21:14 +0100368std::ostream& operator<<(std::ostream& stream, const ProxyConnectJob& job) {
369 stream << "{fd: " << job.client_socket_->fd();
370 if (!job.target_url_.empty()) {
371 stream << ", url: " << job.target_url_;
372 }
373 stream << "}";
374 return stream;
375}
376
377} // namespace system_proxy