Add script to generate ssl_roots.h from Mozilla's CA bundle

Bug: webrtc:11710
Change-Id: I6eac35a111f89f3c74ce8c2ffc0ab607ad881be2
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/253220
Reviewed-by: Christoffer Jansson <jansson@google.com>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Commit-Queue: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36131}
diff --git a/tools_webrtc/sslroots/generate_sslroots.py b/tools_webrtc/sslroots/generate_sslroots.py
index 2cd0c89..e2da029 100644
--- a/tools_webrtc/sslroots/generate_sslroots.py
+++ b/tools_webrtc/sslroots/generate_sslroots.py
@@ -11,19 +11,22 @@
 """This is a tool to transform a crt file into a C/C++ header.
 
 Usage:
-generate_sslroots.py cert_file.crt [--verbose | -v] [--full_cert | -f]
+python3 generate_sslroots.py certfile.pem [--verbose | -v] [--full_cert | -f]
 
 Arguments:
   -v  Print output while running.
   -f  Add public key and certificate name.  Default is to skip and reduce
       generated file size.
+
+The supported cert files are:
+  - Google: https://pki.goog/roots.pem
+  - Mozilla: https://curl.se/docs/caextract.html
 """
 
 import subprocess
 from optparse import OptionParser
 import os
 import re
-import string
 
 _GENERATED_FILE = 'ssl_roots.h'
 _PREFIX = '__generated__'
@@ -38,6 +41,7 @@
 _INT_TYPE = 'size_t'
 _CHAR_TYPE = 'unsigned char* const'
 _VERBOSE = 'verbose'
+_MOZILLA_BUNDLE_CHECK = '## Certificate data from Mozilla as of:'
 
 
 def main():
@@ -49,24 +53,34 @@
   if len(args) < 1:
     parser.error('No crt file specified.')
     return
-  root_dir = _SplitCrt(args[0], options)
-  _GenCFiles(root_dir, options)
+  root_dir, bundle_type = _SplitCrt(args[0], options)
+  _GenCFiles(root_dir, options, bundle_type)
   _Cleanup(root_dir)
 
 
 def _SplitCrt(source_file, options):
   sub_file_blocks = []
   label_name = ''
+  prev_line = None
   root_dir = os.path.dirname(os.path.abspath(source_file)) + '/'
   _PrintOutput(root_dir, options)
-  f = open(source_file)
-  for line in f:
-    if line.startswith('# Label: '):
+  lines = None
+  with open(source_file) as f:
+    lines = f.readlines()
+  mozilla_bundle = any(l.startswith(_MOZILLA_BUNDLE_CHECK) for l in lines)
+  for line in lines:
+    if line.startswith('#'):
+      if mozilla_bundle:
+        continue
+      if line.startswith('# Label: '):
+        sub_file_blocks.append(line)
+        label = re.search(r'\".*\"', line)
+        temp_label = label.group(0)
+        end = len(temp_label) - 1
+        label_name = _SafeName(temp_label[1:end])
+    if mozilla_bundle and line.startswith('==='):
       sub_file_blocks.append(line)
-      label = re.search(r'\".*\"', line)
-      temp_label = label.group(0)
-      end = len(temp_label) - 1
-      label_name = _SafeName(temp_label[1:end])
+      label_name = _SafeName(prev_line)
     elif line.startswith('-----END CERTIFICATE-----'):
       sub_file_blocks.append(line)
       new_file_name = root_dir + _PREFIX + label_name + _EXTENSION
@@ -78,13 +92,13 @@
       sub_file_blocks = []
     else:
       sub_file_blocks.append(line)
-  f.close()
-  return root_dir
+    prev_line = line
+  return root_dir, 'Mozilla' if mozilla_bundle else 'Google'
 
 
-def _GenCFiles(root_dir, options):
+def _GenCFiles(root_dir, options, bundle_type):
   output_header_file = open(root_dir + _GENERATED_FILE, 'w')
-  output_header_file.write(_CreateOutputHeader())
+  output_header_file.write(_CreateOutputHeader(bundle_type))
   if options.full_cert:
     subject_name_list = _CreateArraySectionHeader(_SUBJECT_NAME_VARIABLE,
                                                   _CHAR_TYPE, options)
@@ -151,7 +165,7 @@
   return filtered_output, cert_size
 
 
-def _CreateOutputHeader():
+def _CreateOutputHeader(bundle_type):
   output = ('/*\n'
             ' *  Copyright 2004 The WebRTC Project Authors. All rights '
             'reserved.\n'
@@ -166,20 +180,18 @@
             ' */\n\n'
             '#ifndef RTC_BASE_SSL_ROOTS_H_\n'
             '#define RTC_BASE_SSL_ROOTS_H_\n\n'
-            '// This file is the root certificates in C form that are needed to'
-            ' connect to\n// Google.\n\n'
-            '// It was generated with the following command line:\n'
-            '// > vpython3 tools_webrtc/sslroots/generate_sslroots.py'
-            '\n//    https://pki.goog/roots.pem\n\n'
+            '// This file is the root certificates in C form.\n\n'
+            '// It was generated with the following script:\n'
+            '// tools_webrtc/sslroots/generate_sslroots.py'
+            ' %s_CA_bundle.pem\n\n'
             '// clang-format off\n'
             '// Don\'t bother formatting generated code,\n'
-            '// also it would breaks subject/issuer lines.\n\n')
+            '// also it would breaks subject/issuer lines.\n\n' % bundle_type)
   return output
 
 
 def _CreateOutputFooter():
-  output = ('// clang-format on\n\n#endif  // RTC_BASE_SSL_ROOTS_H_\n')
-  return output
+  return '// clang-format on\n\n#endif  // RTC_BASE_SSL_ROOTS_H_\n'
 
 
 def _CreateArraySectionHeader(type_name, type_type, options):
@@ -197,11 +209,11 @@
 
 
 def _SafeName(original_file_name):
-  bad_chars = ' -./\\()áéíőú'
+  bad_chars = ' -./\\()áéíőú\r\n'
   replacement_chars = ''
   for _ in bad_chars:
     replacement_chars += '_'
-  translation_table = string.maketrans(bad_chars, replacement_chars)
+  translation_table = str.maketrans(bad_chars, replacement_chars)
   return original_file_name.translate(translation_table)