Add scripts that will export the content for the site.

This copies over the scripts from /experimental/website
that export the content from Google Sites and will
enable us to actually check in the markdown content.

However, the first pass of that will be in a follow-on CL.

Bug: 1260171
Change-Id: Ia257cbbfde089385ca11a391eb4779a0b9bed0ee
Reviewed-on: https://chromium-review.googlesource.com/c/website/+/3258094
Reviewed-by: Struan Shrimpton <sshrimp@google.com>
Commit-Queue: Dirk Pranke <dpranke@google.com>
diff --git a/scripts/common.py b/scripts/common.py
new file mode 100644
index 0000000..5780139
--- /dev/null
+++ b/scripts/common.py
@@ -0,0 +1,232 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import dataclasses
+import multiprocessing
+import os
+import queue
+import sys
+import threading
+import time
+import urllib.parse
+
+
+site = 'https://www.chromium.org'
+
+REPO_DIR = os.path.dirname(os.path.dirname(__file__))
+SOURCE_DIR = 'site'
+BUILD_DIR = 'build'
+DEFAULT_TEMPLATE = '/_includes/page.html'
+
+
+
+alternates = [
+    site,
+    'http://dev.chromium.org',
+    'https://dev.chromium.org',
+    'https://sites.google.com/a/chromium.org/dev',
+    'https://ssl.gstatic.com/sites/p/058338',
+    'http://www.gstatic.com/sites/p/058338',
+]
+
+
+def cpu_count():
+    return multiprocessing.cpu_count()
+
+
+def read_text_file(path):
+    return read_binary_file(path).decode('utf-8')
+
+
+def read_binary_file(path):
+    with open(path, 'rb') as fp:
+        return fp.read()
+
+
+def write_binary_file(path, content):
+    with open(path, 'wb') as fp:
+        return fp.write(content)
+
+def read_paths(path):
+    paths = set()
+    with open(path) as fp:
+        for line in fp.readlines():
+            idx = line.find('#')
+            if idx != -1:
+                line = line[:idx]
+            line = line.strip()
+            if line:
+                paths.add(line)
+    return paths
+
+
+def to_path(page, top=SOURCE_DIR, ext='.md'):
+    page = page.strip()
+    if page == '/':
+        page = ''
+    if os.path.isdir(top + page):
+        return page + '/index' + ext
+    if os.path.exists(top + page):
+        return page
+    if os.path.exists(top + page + ext):
+        return page + ext
+    return page
+
+
+def walk(top, skip=None):
+    skip = skip or set()
+    paths = set()
+    for dirpath, dnames, fnames in os.walk(top):
+        for dname in dnames:
+            rpath = os.path.relpath(os.path.join(dirpath, dname), top)
+            if rpath in skip or dname.startswith('.'):
+                dnames.remove(dname)
+        for fname in fnames:
+            rpath = os.path.relpath(os.path.join(dirpath, fname), top)
+            if rpath in skip or fname.startswith('.'):
+                continue
+            paths.add(rpath)
+    return sorted(paths)
+
+
+def write_if_changed(path, content):
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    if os.path.exists(path):
+        with open(path, 'rb') as fp:
+            old_content = fp.read()
+            if content == old_content:
+                return False
+    write_binary_file(path, content)
+    return True
+
+
+def should_update(dest_page, source_pages):
+    if not os.path.exists(dest_page):
+        return True
+
+    dest_pages = [dest_page]
+    max_source_mtime = max(os.stat(p).st_mtime for p in source_pages)
+    max_dest_mtime = max(os.stat(p).st_mtime for p in dest_pages)
+    return max_source_mtime > max_dest_mtime 
+
+
+class JobQueue:
+    def __init__(self, handler, jobs, multiprocess=None):
+        self.handler = handler
+        self.jobs = jobs
+        self.pending = set()
+        self.started = set()
+        self.finished = set()
+        if multiprocess is None:
+            self.multiprocess = (jobs == 1)
+        else:
+            self.multiprocess = multiprocess
+        if self.multiprocess:
+            self._request_q = multiprocessing.Queue()
+            self._response_q = multiprocessing.Queue()
+        else:
+            self._request_q = queue.Queue()
+            self._response_q = queue.Queue()
+        self._start_time = None
+        self._threads = []
+        self._last_msg = None
+        self._isatty = sys.stdout.isatty()
+
+    def all_tasks(self):
+        return self.pending | self.started | self.finished
+
+    def request(self, task, obj):
+        self.pending.add(task)
+        self._request_q.put(('handle', task, obj))
+
+    def results(self):
+        self._start_time = time.time()
+        self._spawn()
+
+        while self.pending | self.started:
+            msg, task, res, obj = self._response_q.get()
+
+            if msg == 'started':
+                self._mark_started(task)
+            elif msg == 'finished':
+                self._mark_finished(task, res)
+                yield (task, res, obj)
+            else:
+                raise AssertionError
+
+        for _ in self._threads:
+            self._request_q.put(('exit', None, None))
+        for thread in self._threads:
+            thread.join()
+        if self._isatty:
+            print()
+
+    def _spawn(self):
+        args = (self._request_q, self._response_q, self.handler)
+        for i in range(self.jobs):
+            if self.multiprocess:
+                thread = multiprocessing.Process(target=_worker,
+                                                 name='worker-%d' % i,
+                                                 args=args)
+            else:
+                thread = threading.Thread(target=_worker,
+                                          name='worker-%d' % i,
+                                          args=args)
+            self._threads.append(thread)
+            thread.start()
+
+    def _mark_started(self, task):
+        self.pending.remove(task)
+        self.started.add(task)
+
+    def _mark_finished(self, task, res):
+        self.started.remove(task)
+        self.finished.add(task)
+        if res:
+            self._print('%s failed:' % task, truncate=False)
+            print()
+            print(res)
+        else:
+            self._print('%s' % task)
+        sys.stdout.flush()
+
+    def _print(self, msg, truncate=True):
+        if not self._isatty:
+            print('[%d/%d] %s' % (len(self.finished), len(self.all_tasks()),
+                                  msg))
+            return
+
+        if len(msg) > 76 and truncate:
+            msg = msg[:76] + '...'
+        if self._last_msg is not None:
+            print('\r', end='')
+        msg = '[%d/%d] %s' % (len(self.finished), len(self.all_tasks()), msg)
+        print(msg, end='' if self._isatty else '\n')
+        if self._last_msg is not None and len(self._last_msg) > len(msg):
+            print(' ' * (len(self._last_msg) - len(msg)), end='')
+            print('\r', end='')
+        self._last_msg = msg
+
+
+def _worker(request_q, response_q, handler):
+    while True:
+        message, task, obj = request_q.get()
+        if message == 'exit':
+            break
+        elif message == 'handle':
+            response_q.put(('started', task, '', None))
+            res, resp = handler(task, obj)
+            response_q.put(('finished', task, res, resp))
+        else:
+            raise AssertionError