Import latest content as of 12/7 and clean up export.
Bug: 1267345
Change-Id: Ied6c3371e0a9ad38861fe82188f9bf371f22304d
Reviewed-on: https://chromium-review.googlesource.com/c/website/+/3322308
Auto-Submit: Dirk Pranke <dpranke@google.com>
Reviewed-by: Gary Tong <gatong@chromium.org>
Commit-Queue: Gary Tong <gatong@chromium.org>
Commit-Queue: Dirk Pranke <dpranke@google.com>
diff --git a/scripts/export.py b/scripts/export.py
index 45cacea..4e53aea 100755
--- a/scripts/export.py
+++ b/scripts/export.py
@@ -63,6 +63,7 @@
parser.add_argument('-v', '--verbose', action='count')
parser.add_argument('--max_results', type=int, default=5000)
parser.add_argument('--start-index', type=int, default=1)
+ parser.add_argument('--paths-to-skip')
parser.add_argument('--path-list')
parser.add_argument('path', nargs='*')
args = parser.parse_args()
@@ -78,6 +79,13 @@
else:
paths_to_export = []
+ if args.paths_to_skip:
+ paths_to_skip = set(common.read_paths(args.paths_to_skip))
+ else:
+ paths_to_skip = set(
+ common.read_paths(os.path.join(common.REPO_DIR,
+ 'scripts', 'paths_to_skip.txt')))
+
max_input_mtime = max(os.stat(__file__).st_mtime,
os.stat(common.__file__).st_mtime,
os.stat(html2markdown.__file__).st_mtime)
@@ -110,11 +118,16 @@
'announcementspage', 'filecabinet'):
metadata = _metadata(entry, entries)
path = _path(entry, entries)
- exported_pages.add(path.rstrip('/') or '/')
+
+ if path in paths_to_skip:
+ continue
+ exported_pages.add(path)
elif entry['kind'] == 'attachment':
metadata = {}
path = entry['url'].replace(
- 'https://sites.google.com/a/chromium.org/dev/', '/')
+ 'https://sites.google.com/a/chromium.org/dev/', '/').rstrip('/')
+ if path in paths_to_skip:
+ continue
else:
continue
if not paths_to_export or (path in paths_to_export):
@@ -128,10 +141,6 @@
if did_update:
updated += 1
- if ret == 0:
- common.write_text_file(
- os.path.join(common.SITE_DIR, 'pages.json'),
- json.dumps(sorted(exported_pages), indent=2) + '\n')
print('updated %d entries' % updated)
return ret
@@ -197,6 +206,9 @@
md_sio.write('\n\n')
_write_listitems(md_sio, entry)
content = md_sio.getvalue()
+ content = content.replace(
+ 'chromium.googlesource.com/chromium/src/+/master/',
+ 'chromium.googlesource.com/chromium/src/+/HEAD/')
content = content.replace(' \b\b\b\b', '')
did_update = common.write_if_changed(path, content, mode='w')
@@ -209,9 +221,10 @@
# TODO: implement me.
pass
elif entry['kind'] == 'attachment':
- if ':' in task:
- task = _URLConverter().Translate(task)
path = '%s%s' % (common.SITE_DIR, task)
+ path = path.replace(':', '_')
+ path = path.replace('%20', ' ')
+ path = path.replace('%2B', '+')
if task in (
'/developers/design-documents/network-stack/cookiemonster/CM-method-calls-new.png',
'/developers/design-documents/cookie-split-loading/objects.png',
@@ -289,7 +302,8 @@
path = entries[parent_id]['page_name'] + '/' + path
parent_id = entries[parent_id].get('parent_id')
- return '/' + path
+ path = ('/' + path).rstrip('/') or '/'
+ return path
def _metadata(entry, entries):