blob: f2e87f772b6275bf974a6b58fc7db6b6f620c18c [file] [log] [blame]
Andreea Costinase45d54b2020-03-10 09:21:14 +01001// Copyright 2020 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "system-proxy/proxy_connect_job.h"
6
7#include <algorithm>
8#include <utility>
9#include <vector>
10
11#include <curl/curl.h>
12#include <curl/easy.h>
13
Andreea Costinase45d54b2020-03-10 09:21:14 +010014#include <base/base64.h>
15#include <base/bind.h>
16#include <base/bind_helpers.h>
17#include <base/callback_helpers.h>
18#include <base/files/file_util.h>
19#include <base/strings/stringprintf.h>
20#include <base/strings/string_split.h>
21#include <base/strings/string_util.h>
22#include <base/time/time.h>
23#include <brillo/http/http_transport.h>
Garrick Evanscd8c2972020-04-14 14:35:52 +090024#include <chromeos/patchpanel/net_util.h>
25#include <chromeos/patchpanel/socket.h>
26#include <chromeos/patchpanel/socket_forwarder.h>
Andreea Costinase45d54b2020-03-10 09:21:14 +010027
28#include "system-proxy/curl_socket.h"
29
30// The libarcnetwork-util library overloads << for socket data structures.
31// By C++'s argument-dependent lookup rules, operators defined in a
32// different namespace are not visible. We need the using directive to make
33// the overload available this namespace.
Garrick Evans3388a032020-03-24 11:25:55 +090034using patchpanel::operator<<;
Andreea Costinase45d54b2020-03-10 09:21:14 +010035
36namespace {
37// There's no RFC recomandation for the max size of http request headers but
38// popular http server implementations (Apache, IIS, Tomcat) set the lower limit
39// to 8000.
40constexpr int kMaxHttpRequestHeadersSize = 8000;
41constexpr char kConnectMethod[] = "CONNECT";
Andreea Costinase45d54b2020-03-10 09:21:14 +010042constexpr base::TimeDelta kCurlConnectTimeout = base::TimeDelta::FromMinutes(2);
43constexpr size_t kMaxBadRequestPrintSize = 120;
Andreea Costinasa2246592020-04-12 23:24:01 +020044// This sequence is used to identify the end of a HTTP header which should be an
45// empty line. Note: all HTTP header lines end with CRLF. HTTP connect requests
46// don't have a body so end of header is end of request.
47const std::string_view kCrlfCrlf = "\r\n\r\n";
Andreea Costinase45d54b2020-03-10 09:21:14 +010048
49// HTTP error codes and messages with origin information for debugging (RFC723,
50// section 6.1).
51const std::string_view kHttpBadRequest =
52 "HTTP/1.1 400 Bad Request - Origin: local proxy\r\n\r\n";
53const std::string_view kHttpInternalServerError =
54 "HTTP/1.1 500 Internal Server Error - Origin: local proxy\r\n\r\n";
55const std::string_view kHttpBadGateway =
56 "HTTP/1.1 502 Bad Gateway - Origin: local proxy\r\n\r\n";
57
Andreea Costinasa2246592020-04-12 23:24:01 +020058// Verifies if the http headers are ending with an http empty line, meaning a
59// line that contains only CR LF preceded by a line ending with CRLF.
60bool IsEndingWithHttpEmptyLine(const char* headers, int headers_size) {
61 return headers_size > kCrlfCrlf.size() &&
62 std::memcmp(kCrlfCrlf.data(),
63 headers + headers_size - kCrlfCrlf.size(),
64 kCrlfCrlf.size()) == 0;
65}
66
67// CURLOPT_HEADERFUNCTION callback implementation that only returns the headers
68// from the last response sent by the sever. This is to make sure that we
69// send back valid HTTP replies and auhentication data from the HTTP messages is
70// not being leaked to the client. |userdata| is set on the libcurl CURL handle
71// used to configure the request, using the the CURLOPT_HEADERDATA option. Note,
72// from the libcurl documentation: This callback is being called for all the
73// responses received from the proxy server after intiating the connection
74// request. Multiple responses can be received in an authentication sequence.
75// Only the last response's headers should be forwarded to the System-proxy
76// client. The header callback will be called once for each header and only
77// complete header lines are passed on to the callback.
78static size_t WriteHeadersCallback(char* contents,
79 size_t size,
80 size_t nmemb,
81 void* userdata) {
82 std::vector<char>* vec = (std::vector<char>*)userdata;
83
84 // Check if we are receiving a new HTTP message (after the last one was
85 // terminated with an empty line).
86 if (IsEndingWithHttpEmptyLine(vec->data(), vec->size())) {
87 VLOG(1) << "Removing the http reply headers from the server "
88 << base::StringPiece(vec->data(), vec->size());
89 vec->clear();
Andreea Costinase45d54b2020-03-10 09:21:14 +010090 }
Andreea Costinasa2246592020-04-12 23:24:01 +020091 vec->insert(vec->end(), contents, contents + (nmemb * size));
Andreea Costinase45d54b2020-03-10 09:21:14 +010092 return size * nmemb;
93}
94
Andreea Costinasa2246592020-04-12 23:24:01 +020095// CONNECT requests may have a reply body. This method will capture the reply
96// and save it in |userdata|. |userdata| is set on the libcurl CURL handle
97// used to configure the request, using the the CURLOPT_WRITEDATA option.
98static size_t WriteCallback(char* contents,
99 size_t size,
100 size_t nmemb,
101 void* userdata) {
102 std::vector<char>* vec = (std::vector<char>*)userdata;
103 vec->insert(vec->end(), contents, contents + (nmemb * size));
104 return size * nmemb;
105}
106
107// Parses the first line of the http CONNECT request and extracts the URI
108// authority, defined in RFC3986, section 3.2, as the host name and port number
109// separated by a colon. The destination URI is specified in the request line
110// (RFC2817, section 5.2):
Andreea Costinase45d54b2020-03-10 09:21:14 +0100111// CONNECT server.example.com:80 HTTP/1.1
112// If the first line in |raw_request| (the Request-Line) is a correctly formed
Andreea Costinasa2246592020-04-12 23:24:01 +0200113// CONNECT request, it will return the destination URI as host:port, otherwise
114// it will return an empty string.
115std::string GetUriAuthorityFromHttpHeader(
116 const std::vector<char>& raw_request) {
Andreea Costinase45d54b2020-03-10 09:21:14 +0100117 base::StringPiece request(raw_request.data(), raw_request.size());
118 // Request-Line ends with CRLF (RFC2616, section 5.1).
119 size_t i = request.find_first_of("\r\n");
120 if (i == base::StringPiece::npos)
121 return std::string();
122 // Elements are delimited by non-breaking space (SP).
123 auto pieces =
124 base::SplitString(request.substr(0, i), " ", base::TRIM_WHITESPACE,
125 base::SPLIT_WANT_NONEMPTY);
126 // Request-Line has the format: Method SP Request-URI SP HTTP-Version CRLF.
127 if (pieces.size() < 3)
128 return std::string();
129 if (pieces[0] != kConnectMethod)
130 return std::string();
131
Andreea Costinasa2246592020-04-12 23:24:01 +0200132 return pieces[1];
Andreea Costinase45d54b2020-03-10 09:21:14 +0100133}
134} // namespace
135
136namespace system_proxy {
137
138ProxyConnectJob::ProxyConnectJob(
Garrick Evans3388a032020-03-24 11:25:55 +0900139 std::unique_ptr<patchpanel::Socket> socket,
Andreea Costinase45d54b2020-03-10 09:21:14 +0100140 const std::string& credentials,
141 ResolveProxyCallback resolve_proxy_callback,
142 OnConnectionSetupFinishedCallback setup_finished_callback)
143 : credentials_(credentials),
144 resolve_proxy_callback_(std::move(resolve_proxy_callback)),
145 setup_finished_callback_(std::move(setup_finished_callback)) {
146 client_socket_ = std::move(socket);
147}
148
149ProxyConnectJob::~ProxyConnectJob() = default;
150
151bool ProxyConnectJob::Start() {
152 // Make the socket non-blocking.
153 if (!base::SetNonBlocking(client_socket_->fd())) {
154 PLOG(ERROR) << *this << " Failed to mark the socket as non-blocking.";
155 client_socket_->SendTo(kHttpInternalServerError.data(),
156 kHttpInternalServerError.size());
157 return false;
158 }
159 read_watcher_ = base::FileDescriptorWatcher::WatchReadable(
160 client_socket_->fd(),
161 base::Bind(&ProxyConnectJob::OnClientReadReady, base::Unretained(this)));
162 return true;
163}
164
165void ProxyConnectJob::OnClientReadReady() {
166 // Stop watching.
167 read_watcher_.reset();
168 // The first message should be a HTTP CONNECT request.
169 std::vector<char> connect_request;
170 if (!TryReadHttpHeader(&connect_request)) {
171 std::string encoded;
172 base::Base64Encode(
173 base::StringPiece(connect_request.data(), connect_request.size()),
174 &encoded);
175 LOG(ERROR) << *this
176 << " Failure to read proxy CONNECT request. Base 64 encoded "
177 "request message from client: "
178 << encoded;
179 OnError(kHttpBadRequest);
180 return;
181 }
182
Andreea Costinasa2246592020-04-12 23:24:01 +0200183 target_url_ = GetUriAuthorityFromHttpHeader(connect_request);
Andreea Costinase45d54b2020-03-10 09:21:14 +0100184 if (target_url_.empty()) {
185 LOG(ERROR)
186 << *this
187 << " Failed to extract target url from the HTTP CONNECT request.";
188 OnError(kHttpBadRequest);
189 return;
190 }
191
192 std::move(resolve_proxy_callback_)
193 .Run(target_url_, base::Bind(&ProxyConnectJob::OnProxyResolution,
194 base::Unretained(this)));
195}
196
197bool ProxyConnectJob::TryReadHttpHeader(std::vector<char>* raw_request) {
Andreea Costinase45d54b2020-03-10 09:21:14 +0100198 size_t read_byte_count = 0;
199 raw_request->resize(kMaxHttpRequestHeadersSize);
200
201 // Read byte-by-byte and stop when reading an empty line (only CRLF) or when
202 // exceeding the max buffer size.
203 // TODO(acostinas, chromium:1064536) This may have some measurable performance
204 // impact. We should read larger blocks of data, consume the HTTP headers,
205 // cache the tunneled payload that may have already been included (e.g. TLS
206 // ClientHello) and send it to server after the connection is established.
207 while (read_byte_count < kMaxHttpRequestHeadersSize) {
208 if (client_socket_->RecvFrom(raw_request->data() + read_byte_count, 1) <=
209 0) {
210 raw_request->resize(std::min(read_byte_count, kMaxBadRequestPrintSize));
211 return false;
212 }
213 ++read_byte_count;
214
Andreea Costinasa2246592020-04-12 23:24:01 +0200215 if (IsEndingWithHttpEmptyLine(raw_request->data(), read_byte_count)) {
Andreea Costinase45d54b2020-03-10 09:21:14 +0100216 raw_request->resize(read_byte_count);
217 return true;
218 }
219 }
220 return false;
221}
222
223void ProxyConnectJob::OnProxyResolution(
224 const std::list<std::string>& proxy_servers) {
225 proxy_servers_ = proxy_servers;
226 DoCurlServerConnection(proxy_servers.front());
227}
228
229void ProxyConnectJob::DoCurlServerConnection(const std::string& proxy_url) {
230 CURL* easyhandle = curl_easy_init();
231 CURLcode res;
Andreea Costinasa2246592020-04-12 23:24:01 +0200232 curl_socket_t newSocket = -1;
233 std::vector<char> server_header_reply, server_body_reply;
Andreea Costinase45d54b2020-03-10 09:21:14 +0100234
235 if (!easyhandle) {
236 // Unfortunately it's not possible to get the failure reason.
237 LOG(ERROR) << *this << " Failure to create curl handle.";
238 curl_easy_cleanup(easyhandle);
239 OnError(kHttpInternalServerError);
240 return;
241 }
242 curl_easy_setopt(easyhandle, CURLOPT_URL, target_url_.c_str());
243
244 if (proxy_url != brillo::http::kDirectProxy) {
245 curl_easy_setopt(easyhandle, CURLOPT_PROXY, proxy_url.c_str());
246 curl_easy_setopt(easyhandle, CURLOPT_HTTPPROXYTUNNEL, 1L);
247 curl_easy_setopt(easyhandle, CURLOPT_CONNECT_ONLY, 1);
248 // Allow libcurl to pick authentication method. Curl will use the most
249 // secure one the remote site claims to support.
250 curl_easy_setopt(easyhandle, CURLOPT_PROXYAUTH, CURLAUTH_ANY);
251 curl_easy_setopt(easyhandle, CURLOPT_PROXYUSERPWD, credentials_.c_str());
252 }
253 curl_easy_setopt(easyhandle, CURLOPT_CONNECTTIMEOUT_MS,
254 kCurlConnectTimeout.InMilliseconds());
Andreea Costinasa2246592020-04-12 23:24:01 +0200255 curl_easy_setopt(easyhandle, CURLOPT_HEADERFUNCTION, WriteHeadersCallback);
256 curl_easy_setopt(easyhandle, CURLOPT_HEADERDATA, &server_header_reply);
257 curl_easy_setopt(easyhandle, CURLOPT_WRITEFUNCTION, WriteCallback);
258 curl_easy_setopt(easyhandle, CURLOPT_WRITEDATA, &server_body_reply);
Andreea Costinase45d54b2020-03-10 09:21:14 +0100259
260 res = curl_easy_perform(easyhandle);
261
262 if (res != CURLE_OK) {
263 LOG(ERROR) << *this << " curl_easy_perform() failed with error: ",
264 curl_easy_strerror(res);
265 curl_easy_cleanup(easyhandle);
Andreea Costinasa2246592020-04-12 23:24:01 +0200266
267 if (server_header_reply.size() > 0) {
268 // Send the error message from the remote server back to the client.
269 OnError(std::string_view(server_header_reply.data(),
270 server_header_reply.size()));
271 } else {
272 OnError(kHttpInternalServerError);
273 }
Andreea Costinase45d54b2020-03-10 09:21:14 +0100274 return;
275 }
276 // Extract the socket from the curl handle.
277 res = curl_easy_getinfo(easyhandle, CURLINFO_ACTIVESOCKET, &newSocket);
278 if (res != CURLE_OK) {
279 LOG(ERROR) << *this << " Failed to get socket from curl with error: "
280 << curl_easy_strerror(res);
281 curl_easy_cleanup(easyhandle);
282 OnError(kHttpBadGateway);
283 return;
284 }
285
286 ScopedCurlEasyhandle scoped_handle(easyhandle, FreeCurlEasyhandle());
287 auto server_conn = std::make_unique<CurlSocket>(base::ScopedFD(newSocket),
288 std::move(scoped_handle));
289
290 // Send the server reply to the client. If the connection is successful, the
Andreea Costinasa2246592020-04-12 23:24:01 +0200291 // reply headers should be "HTTP/1.1 200 Connection Established".
292 if (client_socket_->SendTo(server_header_reply.data(),
293 server_header_reply.size()) !=
294 server_header_reply.size()) {
295 PLOG(ERROR) << *this << " Failed to send HTTP reply headers to client: "
296 << base::StringPiece(server_header_reply.data(),
297 server_header_reply.size());
298 OnError(kHttpInternalServerError);
299 return;
300 }
301 // HTTP CONNECT responses can have a payload body which should be forwarded to
302 // the client.
303 if (server_body_reply.size() > 0) {
304 // TODO(acostinas, chromium:1064536) Resend the reply body in case of EAGAIN
305 // or EWOULDBLOCK errors.
306 if (client_socket_->SendTo(server_body_reply.data(),
307 server_body_reply.size()) !=
308 server_body_reply.size()) {
309 PLOG(ERROR) << *this
310 << " Failed to send HTTP CONNECT reply body to client: "
311 << base::StringPiece(server_body_reply.data(),
312 server_body_reply.size());
313 }
314 }
Andreea Costinase45d54b2020-03-10 09:21:14 +0100315
Garrick Evans3388a032020-03-24 11:25:55 +0900316 auto fwd = std::make_unique<patchpanel::SocketForwarder>(
Andreea Costinase45d54b2020-03-10 09:21:14 +0100317 base::StringPrintf("%d-%d", client_socket_->fd(), server_conn->fd()),
318 std::move(client_socket_), std::move(server_conn));
319 // Start forwarding data between sockets.
320 fwd->Start();
321 std::move(setup_finished_callback_).Run(std::move(fwd), this);
322}
323
324void ProxyConnectJob::OnError(const std::string_view& http_error_message) {
325 client_socket_->SendTo(http_error_message.data(), http_error_message.size());
326 std::move(setup_finished_callback_).Run(nullptr, this);
327}
328
329std::ostream& operator<<(std::ostream& stream, const ProxyConnectJob& job) {
330 stream << "{fd: " << job.client_socket_->fd();
331 if (!job.target_url_.empty()) {
332 stream << ", url: " << job.target_url_;
333 }
334 stream << "}";
335 return stream;
336}
337
338} // namespace system_proxy