blob: b4104772eabba34bc55c2b772297915bc658e7c9 [file] [log] [blame]
henrike@webrtc.orgf0488722014-05-13 18:00:26 +00001/*
2 * Copyright 2004 The WebRTC Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "rtc_base/stringencode.h"
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000012
13#include <stdio.h>
14#include <stdlib.h>
15
Jonas Olsson6b1985d2018-07-05 11:59:48 +020016#include "rtc_base/arraysize.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "rtc_base/checks.h"
18#include "rtc_base/stringutils.h"
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000019
20namespace rtc {
21
22/////////////////////////////////////////////////////////////////////////////
23// String Encoding Utilities
24/////////////////////////////////////////////////////////////////////////////
25
Yves Gerey665174f2018-06-19 15:03:05 +020026size_t url_decode(char* buffer,
27 size_t buflen,
28 const char* source,
29 size_t srclen) {
deadbeef37f5ecf2017-02-27 14:06:41 -080030 if (nullptr == buffer)
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000031 return srclen + 1;
32 if (buflen <= 0)
33 return 0;
34
35 unsigned char h1, h2;
36 size_t srcpos = 0, bufpos = 0;
37 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
38 unsigned char ch = source[srcpos++];
39 if (ch == '+') {
40 buffer[bufpos++] = ' ';
Yves Gerey665174f2018-06-19 15:03:05 +020041 } else if ((ch == '%') && (srcpos + 1 < srclen) &&
42 hex_decode(source[srcpos], &h1) &&
43 hex_decode(source[srcpos + 1], &h2)) {
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000044 buffer[bufpos++] = (h1 << 4) | h2;
45 srcpos += 2;
46 } else {
47 buffer[bufpos++] = ch;
48 }
49 }
50 buffer[bufpos] = '\0';
51 return bufpos;
52}
53
54size_t utf8_decode(const char* source, size_t srclen, unsigned long* value) {
55 const unsigned char* s = reinterpret_cast<const unsigned char*>(source);
Yves Gerey665174f2018-06-19 15:03:05 +020056 if ((s[0] & 0x80) == 0x00) { // Check s[0] == 0xxxxxxx
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000057 *value = s[0];
58 return 1;
59 }
60 if ((srclen < 2) || ((s[1] & 0xC0) != 0x80)) { // Check s[1] != 10xxxxxx
61 return 0;
62 }
63 // Accumulate the trailer byte values in value16, and combine it with the
64 // relevant bits from s[0], once we've determined the sequence length.
65 unsigned long value16 = (s[1] & 0x3F);
Yves Gerey665174f2018-06-19 15:03:05 +020066 if ((s[0] & 0xE0) == 0xC0) { // Check s[0] == 110xxxxx
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000067 *value = ((s[0] & 0x1F) << 6) | value16;
68 return 2;
69 }
70 if ((srclen < 3) || ((s[2] & 0xC0) != 0x80)) { // Check s[2] != 10xxxxxx
71 return 0;
72 }
73 value16 = (value16 << 6) | (s[2] & 0x3F);
Yves Gerey665174f2018-06-19 15:03:05 +020074 if ((s[0] & 0xF0) == 0xE0) { // Check s[0] == 1110xxxx
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000075 *value = ((s[0] & 0x0F) << 12) | value16;
76 return 3;
77 }
78 if ((srclen < 4) || ((s[3] & 0xC0) != 0x80)) { // Check s[3] != 10xxxxxx
79 return 0;
80 }
81 value16 = (value16 << 6) | (s[3] & 0x3F);
Yves Gerey665174f2018-06-19 15:03:05 +020082 if ((s[0] & 0xF8) == 0xF0) { // Check s[0] == 11110xxx
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000083 *value = ((s[0] & 0x07) << 18) | value16;
84 return 4;
85 }
86 return 0;
87}
88
89size_t utf8_encode(char* buffer, size_t buflen, unsigned long value) {
90 if ((value <= 0x7F) && (buflen >= 1)) {
91 buffer[0] = static_cast<unsigned char>(value);
92 return 1;
93 }
94 if ((value <= 0x7FF) && (buflen >= 2)) {
95 buffer[0] = 0xC0 | static_cast<unsigned char>(value >> 6);
96 buffer[1] = 0x80 | static_cast<unsigned char>(value & 0x3F);
97 return 2;
98 }
99 if ((value <= 0xFFFF) && (buflen >= 3)) {
100 buffer[0] = 0xE0 | static_cast<unsigned char>(value >> 12);
101 buffer[1] = 0x80 | static_cast<unsigned char>((value >> 6) & 0x3F);
102 buffer[2] = 0x80 | static_cast<unsigned char>(value & 0x3F);
103 return 3;
104 }
105 if ((value <= 0x1FFFFF) && (buflen >= 4)) {
106 buffer[0] = 0xF0 | static_cast<unsigned char>(value >> 18);
107 buffer[1] = 0x80 | static_cast<unsigned char>((value >> 12) & 0x3F);
108 buffer[2] = 0x80 | static_cast<unsigned char>((value >> 6) & 0x3F);
109 buffer[3] = 0x80 | static_cast<unsigned char>(value & 0x3F);
110 return 4;
111 }
112 return 0;
113}
114
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000115static const char HEX[] = "0123456789abcdef";
116
117char hex_encode(unsigned char val) {
henrikg91d6ede2015-09-17 00:24:34 -0700118 RTC_DCHECK_LT(val, 16);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000119 return (val < 16) ? HEX[val] : '!';
120}
121
122bool hex_decode(char ch, unsigned char* val) {
123 if ((ch >= '0') && (ch <= '9')) {
124 *val = ch - '0';
125 } else if ((ch >= 'A') && (ch <= 'Z')) {
126 *val = (ch - 'A') + 10;
127 } else if ((ch >= 'a') && (ch <= 'z')) {
128 *val = (ch - 'a') + 10;
129 } else {
130 return false;
131 }
132 return true;
133}
134
Yves Gerey665174f2018-06-19 15:03:05 +0200135size_t hex_encode(char* buffer,
136 size_t buflen,
137 const char* csource,
138 size_t srclen) {
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000139 return hex_encode_with_delimiter(buffer, buflen, csource, srclen, 0);
140}
141
Yves Gerey665174f2018-06-19 15:03:05 +0200142size_t hex_encode_with_delimiter(char* buffer,
143 size_t buflen,
144 const char* csource,
145 size_t srclen,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000146 char delimiter) {
Henrik Grunell84879882018-03-23 15:33:03 +0100147 RTC_DCHECK(buffer); // TODO(kwiberg): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000148 if (buflen == 0)
149 return 0;
150
151 // Init and check bounds.
152 const unsigned char* bsource =
153 reinterpret_cast<const unsigned char*>(csource);
154 size_t srcpos = 0, bufpos = 0;
155 size_t needed = delimiter ? (srclen * 3) : (srclen * 2 + 1);
156 if (buflen < needed)
157 return 0;
158
159 while (srcpos < srclen) {
160 unsigned char ch = bsource[srcpos++];
Yves Gerey665174f2018-06-19 15:03:05 +0200161 buffer[bufpos] = hex_encode((ch >> 4) & 0xF);
162 buffer[bufpos + 1] = hex_encode((ch)&0xF);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000163 bufpos += 2;
164
165 // Don't write a delimiter after the last byte.
166 if (delimiter && (srcpos < srclen)) {
167 buffer[bufpos] = delimiter;
168 ++bufpos;
169 }
170 }
171
172 // Null terminate.
173 buffer[bufpos] = '\0';
174 return bufpos;
175}
176
Peter Thatcher1cf6f812015-05-15 10:40:45 -0700177std::string hex_encode(const std::string& str) {
178 return hex_encode(str.c_str(), str.size());
179}
180
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000181std::string hex_encode(const char* source, size_t srclen) {
182 return hex_encode_with_delimiter(source, srclen, 0);
183}
184
Yves Gerey665174f2018-06-19 15:03:05 +0200185std::string hex_encode_with_delimiter(const char* source,
186 size_t srclen,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000187 char delimiter) {
188 const size_t kBufferSize = srclen * 3;
189 char* buffer = STACK_ARRAY(char, kBufferSize);
Yves Gerey665174f2018-06-19 15:03:05 +0200190 size_t length =
191 hex_encode_with_delimiter(buffer, kBufferSize, source, srclen, delimiter);
henrikg91d6ede2015-09-17 00:24:34 -0700192 RTC_DCHECK(srclen == 0 || length > 0);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000193 return std::string(buffer, length);
194}
195
Yves Gerey665174f2018-06-19 15:03:05 +0200196size_t hex_decode(char* cbuffer,
197 size_t buflen,
198 const char* source,
199 size_t srclen) {
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000200 return hex_decode_with_delimiter(cbuffer, buflen, source, srclen, 0);
201}
202
Yves Gerey665174f2018-06-19 15:03:05 +0200203size_t hex_decode_with_delimiter(char* cbuffer,
204 size_t buflen,
205 const char* source,
206 size_t srclen,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000207 char delimiter) {
Henrik Grunell84879882018-03-23 15:33:03 +0100208 RTC_DCHECK(cbuffer); // TODO(kwiberg): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000209 if (buflen == 0)
210 return 0;
211
212 // Init and bounds check.
213 unsigned char* bbuffer = reinterpret_cast<unsigned char*>(cbuffer);
214 size_t srcpos = 0, bufpos = 0;
215 size_t needed = (delimiter) ? (srclen + 1) / 3 : srclen / 2;
216 if (buflen < needed)
217 return 0;
218
219 while (srcpos < srclen) {
220 if ((srclen - srcpos) < 2) {
221 // This means we have an odd number of bytes.
222 return 0;
223 }
224
225 unsigned char h1, h2;
226 if (!hex_decode(source[srcpos], &h1) ||
227 !hex_decode(source[srcpos + 1], &h2))
228 return 0;
229
230 bbuffer[bufpos++] = (h1 << 4) | h2;
231 srcpos += 2;
232
233 // Remove the delimiter if needed.
234 if (delimiter && (srclen - srcpos) > 1) {
235 if (source[srcpos] != delimiter)
236 return 0;
237 ++srcpos;
238 }
239 }
240
241 return bufpos;
242}
243
244size_t hex_decode(char* buffer, size_t buflen, const std::string& source) {
245 return hex_decode_with_delimiter(buffer, buflen, source, 0);
246}
Yves Gerey665174f2018-06-19 15:03:05 +0200247size_t hex_decode_with_delimiter(char* buffer,
248 size_t buflen,
249 const std::string& source,
250 char delimiter) {
251 return hex_decode_with_delimiter(buffer, buflen, source.c_str(),
252 source.length(), delimiter);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000253}
254
Yves Gerey665174f2018-06-19 15:03:05 +0200255size_t transform(std::string& value,
256 size_t maxlen,
257 const std::string& source,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000258 Transform t) {
259 char* buffer = STACK_ARRAY(char, maxlen + 1);
260 size_t length = t(buffer, maxlen + 1, source.data(), source.length());
261 value.assign(buffer, length);
262 return length;
263}
264
265std::string s_transform(const std::string& source, Transform t) {
Yves Gerey665174f2018-06-19 15:03:05 +0200266 // Ask transformation function to approximate the destination size (returns
267 // upper bound)
deadbeef37f5ecf2017-02-27 14:06:41 -0800268 size_t maxlen = t(nullptr, 0, source.data(), source.length());
Yves Gerey665174f2018-06-19 15:03:05 +0200269 char* buffer = STACK_ARRAY(char, maxlen);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000270 size_t len = t(buffer, maxlen, source.data(), source.length());
271 std::string result(buffer, len);
272 return result;
273}
274
Yves Gerey665174f2018-06-19 15:03:05 +0200275size_t tokenize(const std::string& source,
276 char delimiter,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000277 std::vector<std::string>* fields) {
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000278 fields->clear();
279 size_t last = 0;
280 for (size_t i = 0; i < source.length(); ++i) {
281 if (source[i] == delimiter) {
282 if (i != last) {
283 fields->push_back(source.substr(last, i - last));
284 }
285 last = i + 1;
286 }
287 }
288 if (last != source.length()) {
289 fields->push_back(source.substr(last, source.length() - last));
290 }
291 return fields->size();
292}
293
deadbeef0a6c4ca2015-10-06 11:38:28 -0700294size_t tokenize_with_empty_tokens(const std::string& source,
295 char delimiter,
296 std::vector<std::string>* fields) {
297 fields->clear();
298 size_t last = 0;
299 for (size_t i = 0; i < source.length(); ++i) {
300 if (source[i] == delimiter) {
301 fields->push_back(source.substr(last, i - last));
302 last = i + 1;
303 }
304 }
305 fields->push_back(source.substr(last, source.length() - last));
306 return fields->size();
307}
308
Yves Gerey665174f2018-06-19 15:03:05 +0200309size_t tokenize_append(const std::string& source,
310 char delimiter,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000311 std::vector<std::string>* fields) {
Yves Gerey665174f2018-06-19 15:03:05 +0200312 if (!fields)
313 return 0;
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000314
315 std::vector<std::string> new_fields;
316 tokenize(source, delimiter, &new_fields);
317 fields->insert(fields->end(), new_fields.begin(), new_fields.end());
318 return fields->size();
319}
320
Yves Gerey665174f2018-06-19 15:03:05 +0200321size_t tokenize(const std::string& source,
322 char delimiter,
323 char start_mark,
324 char end_mark,
325 std::vector<std::string>* fields) {
326 if (!fields)
327 return 0;
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000328 fields->clear();
329
330 std::string remain_source = source;
331 while (!remain_source.empty()) {
332 size_t start_pos = remain_source.find(start_mark);
Yves Gerey665174f2018-06-19 15:03:05 +0200333 if (std::string::npos == start_pos)
334 break;
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000335 std::string pre_mark;
336 if (start_pos > 0) {
337 pre_mark = remain_source.substr(0, start_pos - 1);
338 }
339
340 ++start_pos;
341 size_t end_pos = remain_source.find(end_mark, start_pos);
Yves Gerey665174f2018-06-19 15:03:05 +0200342 if (std::string::npos == end_pos)
343 break;
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000344
345 // We have found the matching marks. First tokenize the pre-mask. Then add
346 // the marked part as a single field. Finally, loop back for the post-mark.
347 tokenize_append(pre_mark, delimiter, fields);
348 fields->push_back(remain_source.substr(start_pos, end_pos - start_pos));
349 remain_source = remain_source.substr(end_pos + 1);
350 }
351
352 return tokenize_append(remain_source, delimiter, fields);
353}
354
Donald Curtis144d0182015-05-15 13:14:24 -0700355bool tokenize_first(const std::string& source,
356 const char delimiter,
357 std::string* token,
358 std::string* rest) {
Donald Curtis0e07f922015-05-15 09:21:23 -0700359 // Find the first delimiter
360 size_t left_pos = source.find(delimiter);
361 if (left_pos == std::string::npos) {
362 return false;
363 }
364
365 // Look for additional occurrances of delimiter.
366 size_t right_pos = left_pos + 1;
Donald Curtis144d0182015-05-15 13:14:24 -0700367 while (source[right_pos] == delimiter) {
Donald Curtis0e07f922015-05-15 09:21:23 -0700368 right_pos++;
369 }
370
371 *token = source.substr(0, left_pos);
372 *rest = source.substr(right_pos);
373 return true;
374}
375
Diogo Real7bd1f1b2017-09-08 12:50:41 -0700376std::string join(const std::vector<std::string>& source, char delimiter) {
377 if (source.size() == 0) {
378 return std::string();
379 }
380 // Find length of the string to be returned to pre-allocate memory.
381 size_t source_string_length = 0;
382 for (size_t i = 0; i < source.size(); ++i) {
383 source_string_length += source[i].length();
384 }
385
386 // Build the joined string.
387 std::string joined_string;
388 joined_string.reserve(source_string_length + source.size() - 1);
389 for (size_t i = 0; i < source.size(); ++i) {
390 if (i != 0) {
391 joined_string += delimiter;
392 }
393 joined_string += source[i];
394 }
395 return joined_string;
396}
397
Yves Gerey665174f2018-06-19 15:03:05 +0200398size_t split(const std::string& source,
399 char delimiter,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000400 std::vector<std::string>* fields) {
henrikg91d6ede2015-09-17 00:24:34 -0700401 RTC_DCHECK(fields);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000402 fields->clear();
403 size_t last = 0;
404 for (size_t i = 0; i < source.length(); ++i) {
405 if (source[i] == delimiter) {
406 fields->push_back(source.substr(last, i - last));
407 last = i + 1;
408 }
409 }
410 fields->push_back(source.substr(last, source.length() - last));
411 return fields->size();
412}
413
Jonas Olsson6b1985d2018-07-05 11:59:48 +0200414std::string ToString(const bool b) {
415 return b ? "true" : "false";
416}
417
418std::string ToString(const char* const s) {
419 return std::string(s);
420}
421std::string ToString(const std::string s) {
422 return s;
423}
424
425std::string ToString(const short s) {
426 char buf[32];
427 const int len = std::snprintf(&buf[0], arraysize(buf), "%hd", s);
428 RTC_DCHECK_LE(len, arraysize(buf));
429 return std::string(&buf[0], len);
430}
431std::string ToString(const unsigned short s) {
432 char buf[32];
433 const int len = std::snprintf(&buf[0], arraysize(buf), "%hu", s);
434 RTC_DCHECK_LE(len, arraysize(buf));
435 return std::string(&buf[0], len);
436}
437std::string ToString(const int s) {
438 char buf[32];
439 const int len = std::snprintf(&buf[0], arraysize(buf), "%d", s);
440 RTC_DCHECK_LE(len, arraysize(buf));
441 return std::string(&buf[0], len);
442}
443std::string ToString(const unsigned int s) {
444 char buf[32];
445 const int len = std::snprintf(&buf[0], arraysize(buf), "%u", s);
446 RTC_DCHECK_LE(len, arraysize(buf));
447 return std::string(&buf[0], len);
448}
449std::string ToString(const long int s) {
450 char buf[32];
451 const int len = std::snprintf(&buf[0], arraysize(buf), "%ld", s);
452 RTC_DCHECK_LE(len, arraysize(buf));
453 return std::string(&buf[0], len);
454}
455std::string ToString(const unsigned long int s) {
456 char buf[32];
457 const int len = std::snprintf(&buf[0], arraysize(buf), "%lu", s);
458 RTC_DCHECK_LE(len, arraysize(buf));
459 return std::string(&buf[0], len);
460}
461std::string ToString(const long long int s) {
462 char buf[32];
463 const int len = std::snprintf(&buf[0], arraysize(buf), "%lld", s);
464 RTC_DCHECK_LE(len, arraysize(buf));
465 return std::string(&buf[0], len);
466}
467std::string ToString(const unsigned long long int s) {
468 char buf[32];
469 const int len = std::snprintf(&buf[0], arraysize(buf), "%llu", s);
470 RTC_DCHECK_LE(len, arraysize(buf));
471 return std::string(&buf[0], len);
472}
473
474std::string ToString(const double d) {
475 char buf[32];
476 const int len = std::snprintf(&buf[0], arraysize(buf), "%g", d);
477 RTC_DCHECK_LE(len, arraysize(buf));
478 return std::string(&buf[0], len);
479}
480
481std::string ToString(const void* const p) {
482 char buf[32];
483 const int len = std::snprintf(&buf[0], arraysize(buf), "%p", p);
484 RTC_DCHECK_LE(len, arraysize(buf));
485 return std::string(&buf[0], len);
486}
487
488bool FromString(const std::string& s, bool* b) {
489 if (s == "false") {
490 *b = false;
491 return true;
492 }
493 if (s == "true") {
494 *b = true;
495 return true;
496 }
497 return false;
498}
499
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000500} // namespace rtc