GS Cache: list member of a tar file
This change adds a feature of GS cache server to list all members of a
tar file as lines of CSV, e.g.
<filename>,<record start>,<record size>,<record end>,<content start>,<content size><content end>
...
This feature is useful when we extract files from a tar by using HTTP
Range header, e.g.
curl -r <content start>-<content end> http://gs-cache/download/path/to/file.tar
Another usecase is extracting some of files and re-create another tar
file:
for f in file_info_list:
reader.seek(f.record_start)
writer.write(reader.read(r.record_size))
BUG=chromium:824580
TEST=Ran unit tests.
Change-Id: I2630a04795e16eb35dcdee46c17db64ba380ca09
Reviewed-on: https://chromium-review.googlesource.com/1047959
Commit-Ready: ChromeOS CL Exonerator Bot <chromiumos-cl-exonerator@appspot.gserviceaccount.com>
Tested-by: Congbin Guo <guocb@chromium.org>
Reviewed-by: Congbin Guo <guocb@chromium.org>
diff --git a/bin/gs_archive_server b/bin/gs_archive_server
index 06cd7ec..a0b5e56 100755
--- a/bin/gs_archive_server
+++ b/bin/gs_archive_server
@@ -9,10 +9,4 @@
readonly homedir=$(cd "$bindir"/../gs_cache; pwd)
export PYTHONPATH=$homedir
-# Run the server, or run tests
-if [[ $(basename "$0") == gs_archive_server_test ]]; then
- exec vpython -vpython-spec $homedir/.vpython -m pytest \
- "$homedir"/gs_archive_server_test.py "$@"
-else
- exec vpython -vpython-spec $homedir/.vpython -m gs_archive_server "$@"
-fi
+exec vpython -vpython-spec $homedir/.vpython -m gs_archive_server "$@"
diff --git a/bin/gs_archive_server_test b/bin/gs_archive_server_test
deleted file mode 120000
index d349ad7..0000000
--- a/bin/gs_archive_server_test
+++ /dev/null
@@ -1 +0,0 @@
-gs_archive_server
\ No newline at end of file
diff --git a/bin/gs_archive_server_test b/bin/gs_archive_server_test
new file mode 100755
index 0000000..e551eaa
--- /dev/null
+++ b/bin/gs_archive_server_test
@@ -0,0 +1,13 @@
+#!/bin/bash
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Run Google Storage archive server from inside virtual environment.
+set -eu
+readonly bindir=$(dirname -- "$(readlink -e -- "$0")")
+readonly homedir=$(cd "$bindir"/../gs_cache; pwd)
+export PYTHONPATH=$homedir
+
+exec vpython -vpython-spec $homedir/.vpython -m pytest \
+ "$homedir"/*.py "$homedir"/tests "$@"
diff --git a/gs_cache/.vpython b/gs_cache/.vpython
index 95ff3f5..2855ba9 100644
--- a/gs_cache/.vpython
+++ b/gs_cache/.vpython
@@ -21,16 +21,31 @@
>
wheel: <
+ name: "infra/python/wheels/coverage/${vpython_platform}"
+ version: "version:4.5.1"
+>
+
+wheel: <
name: "infra/python/wheels/funcsigs-py2_py3"
version: "version:1.0.2"
>
wheel: <
+ name: "infra/python/wheels/mock-py2_py3"
+ version: "version:2.0.0"
+>
+
+wheel: <
name: "infra/python/wheels/more-itertools-py2_py3"
version: "version:4.1.0"
>
wheel: <
+ name: "infra/python/wheels/pbr-py2_py3"
+ version: "version:3.0.0"
+>
+
+wheel: <
name: "infra/python/wheels/pluggy-py2_py3"
version: "version:0.6.0"
>
@@ -51,11 +66,21 @@
>
wheel: <
+ name: "infra/python/wheels/pytest-cov-py2_py3"
+ version: "version:2.5.1"
+>
+
+wheel: <
name: "infra/python/wheels/pytz-py2_py3"
version: "version:2018.4"
>
wheel: <
+ name: "infra/python/wheels/requests-py2_py3"
+ version: "version:2.13.0"
+>
+
+wheel: <
name: "infra/python/wheels/six-py2_py3"
version: "version:1.11.0"
>
diff --git a/gs_cache/gs_archive_server.py b/gs_cache/gs_archive_server.py
index 958904f..5884cee 100644
--- a/gs_cache/gs_archive_server.py
+++ b/gs_cache/gs_archive_server.py
@@ -17,19 +17,35 @@
from __future__ import print_function
import argparse
+import contextlib
+import functools
import os
+import StringIO
+import subprocess
import sys
+import tempfile
+import urllib
+import urlparse
import cherrypy
+import requests
+import tarfile_utils
from chromite.lib import cros_logging as logging
from chromite.lib import gs
# some http status codes
+_HTTP_BAD_REQUEST = 400
_HTTP_UNAUTHORIZED = 401
_HTTP_NOT_FOUND = 404
_HTTP_SERVICE_UNAVAILABLE = 503
+_READ_BUFFER_SIZE_BYTES = 1024 * 1024 # 1 MB
+_WRITE_BUFFER_SIZE_BYTES = 1024 * 1024 # 1 MB
+
+# The max size of temporary spool file in memory.
+_SPOOL_FILE_SIZE_BYTES = 100 * 1024 * 1024 # 100 MB
+
_logger = logging.getLogger(__file__)
@@ -39,13 +55,175 @@
_logger.log(level, extra=cherrypy.request.headers, *args, **kwargs)
-class GSArchiveServer(object):
+def _check_file_extension(filename, ext_names=None):
+ """Check the file name and, optionally, the ext name.
+
+ Args:
+ filename: The file name to be checked.
+ ext_names: The valid extension of |filename| should have.
+
+ Returns:
+ The filename if the check is good.
+
+ Raises:
+ ValueError: Raised if the checking failed.
+ """
+ if not filename:
+ raise ValueError('File name is required.')
+
+ for ext_name in ext_names or []:
+ if filename.endswith(ext_name):
+ break
+ else:
+ raise ValueError("Extension name of '%s' isn't in %s" % (filename,
+ ext_names))
+ return filename
+
+
+def _to_cherrypy_error(func):
+ """A decorator to convert Exceptions raised to proper cherrypy.HTTPError."""
+ @functools.wraps(func)
+ def func_wrapper(*args, **kwargs):
+ try:
+ return func(*args, **kwargs)
+ except requests.HTTPError as err:
+ # cherrypy.HTTPError wraps the error messages with HTML tags. But
+ # requests.HTTPError also do same work. So return the error message
+ # directly.
+ cherrypy.response.status = err.response.status_code
+ return err.response.content
+ except ValueError as err:
+ # The exception message is just a plain text, so wrap it with
+ # cherrypy.HTTPError to have necessary HTML tags
+ raise cherrypy.HTTPError(_HTTP_BAD_REQUEST, err.message)
+ return func_wrapper
+
+
+class _CachingServer(object):
+ r"""The interface of caching server for GsArchiveServer.
+
+ This class provides an interface to work with the caching server (usually a
+ reversed http proxy server) which caches all intermediate results, e.g.
+ downloaded files, etc. and serves to GsArchiveServer.
+
+ The relationship of this class and other components is:
+ /-------------(python function call)-----------------------\
+ | |
+ v |
+ _CachingServer --(http/socket)--> NGINX --(http/socket)--> GsArchiveServer
+ ^ |
+ | (https)
+ End user, DUTs ---(http)------------/ |
+ V
+ GoogleStorage
+ """
+
+ def __init__(self, url):
+ """Constructor
+
+ Args:
+ url: A tuple of URL scheme and netloc.
+
+ Raises:
+ ValueError: Raised when input URL in wrong format.
+ """
+ self._url = url
+
+ def _call(self, action, path, args=None, headers=None):
+ """Helper function to generate all RPC calls to the proxy server."""
+ url = urlparse.urlunsplit(self._url + ('%s/%s' % (action, path),
+ urllib.urlencode(args or {}), None))
+ _log('Sending request to proxy: %s', url)
+ rsp = requests.get(url, headers=headers, stream=True)
+ _log('Proxy response %s', rsp.status_code)
+ rsp.raise_for_status()
+ return rsp
+
+ def download(self, path, headers=None):
+ """Call download RPC."""
+ return self._call('download', path, headers=headers)
+
+
+class GsArchiveServer(object):
"""The backend of Google Storage Cache server."""
- def __init__(self):
+ def __init__(self, caching_server):
self._gsutil = gs.GSContext()
+ self._caching_server = caching_server
@cherrypy.expose
+ @_to_cherrypy_error
+ def list_member(self, *args):
+ """Get file list of an tar archive in CSV format.
+
+ An example, GET /list_member/bucket/path/to/file.tar
+ The output is in format of:
+ <file name>,<data1>,<data2>,...<data6>
+ <file name>,<data1>,<data2>,...<data6>
+ ...
+
+ Details:
+ <file name>: The file name of the member, in URL percent encoding, e.g.
+ path/to/file,name -> path/to/file%2Cname.
+ <data1>: File record start offset, in bytes.
+ <data2>: File record size, in bytes.
+ <data3>: File record end offset, in bytes.
+ <data4>: File content start offset, in bytes.
+ <data5>: File content size, in bytes.
+ <data6>: File content end offset, in bytes.
+
+ This is an internal RPC and shouldn't be called by end user!
+
+ Args:
+ *args: All parts of tar file name (must end with '.tar').
+
+ Returns:
+ The generator of CSV stream.
+ """
+ # TODO(guocb): new parameter to filter the list
+
+ archive = _check_file_extension('/'.join(args), ext_names=['.tar'])
+ rsp = self._caching_server.download(archive, cherrypy.request.headers)
+ cherrypy.response.headers['Content-Type'] = 'text/csv'
+
+ # We run tar command to get member list of a tar file (python tarfile module
+ # is too slow). Option '--block-number/-R' of tar prints out the starting
+ # block number for each file record.
+ _log('list member of the tar %s', archive)
+ tar_tv = tempfile.SpooledTemporaryFile(max_size=_SPOOL_FILE_SIZE_BYTES)
+ tar = subprocess.Popen(['tar', 'tv', '--block-number'],
+ stdin=subprocess.PIPE, stdout=tar_tv)
+ for chunk in rsp.iter_content(_READ_BUFFER_SIZE_BYTES):
+ tar.stdin.write(chunk)
+
+ tar.wait()
+
+ def _tar_member_list():
+ with tar_tv, contextlib.closing(StringIO.StringIO()) as stream:
+ tar_tv.seek(0)
+ for info in tarfile_utils.list_tar_members(tar_tv):
+ # some pre-computation for easier use of clients
+ content_end = info.content_start + info.size - 1
+ record_end = info.record_start + info.record_size - 1
+
+ # encode file name using URL percent encoding, so ',' => '%2C'
+ stream.write('%s,%d,%d,%d,%d,%d,%d\n' % (
+ urllib.quote(info.filename), info.record_start, info.record_size,
+ record_end, info.content_start, info.size, content_end))
+
+ if stream.tell() > _WRITE_BUFFER_SIZE_BYTES:
+ yield stream.getvalue()
+ stream.seek(0)
+
+ if stream.tell():
+ yield stream.getvalue()
+
+ _log('list_member done')
+
+ return _tar_member_list()
+
+ @cherrypy.expose
+ @_to_cherrypy_error
def download(self, *args):
"""Download a file from Google Storage.
@@ -58,7 +236,7 @@
Returns:
The stream of downloaded file.
"""
- path = 'gs://%s' % '/'.join(args)
+ path = 'gs://%s' % _check_file_extension('/'.join(args))
_log('Downloading %s', path, level=logging.INFO)
try:
@@ -85,14 +263,40 @@
# pylint:disable=protected-access
download._cp_config = {'response.stream': True}
+ list_member._cp_config = {'response.stream': True}
+
+
+def _url_type(input_string):
+ """Ensure |input_string| is a valid URL and convert to target type.
+
+ The target type is a tuple of (scheme, netloc).
+ """
+ split_result = urlparse.urlsplit(input_string)
+ if not split_result.scheme:
+ input_string = 'http://%s' % input_string
+
+ split_result = urlparse.urlsplit(input_string)
+ if not split_result.scheme or not split_result.netloc:
+ raise argparse.ArgumentTypeError('Wrong URL format: %s' % input_string)
+
+ return split_result.scheme, split_result.netloc
def parse_args(argv):
"""Parse arguments."""
- parser = argparse.ArgumentParser(description=__doc__)
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=__doc__)
parser.add_argument('-s', '--socket', help='Unix domain socket to bind')
parser.add_argument('-p', '--port', type=int, default=8080,
help='Port number to listen, default: %(default)s.')
+ # TODO(guocb): support Unix domain socket
+ parser.add_argument(
+ '-c', '--caching-server', required=True, type=_url_type,
+ help='URL of the proxy server. Valid format is '
+ '[http://]{<hostname>|<IP>}[:<port_number>]. When skipped, the default '
+ 'scheme is http and port number is 80. Any other components in URL are '
+ 'ignored.')
return parser.parse_args(argv)
@@ -124,7 +328,7 @@
cherrypy.config.update({'server.socket_port': args.port,
'server.socket_host': '127.0.0.1'})
- cherrypy.quickstart(GSArchiveServer())
+ cherrypy.quickstart(GsArchiveServer(_CachingServer(args.caching_server)))
if __name__ == '__main__':
diff --git a/gs_cache/gs_archive_server_test.py b/gs_cache/gs_archive_server_test.py
deleted file mode 100644
index c6047f5..0000000
--- a/gs_cache/gs_archive_server_test.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2018 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-
-"""Tests for gs_archive_server."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import unittest
-
-import cherrypy
-from cherrypy.test import helper
-
-import gs_archive_server
-from chromite.lib import gs
-
-_DIR = '/gs_archive_server_test'
-# some REAL files and info on Google Storage
-_TEST_DATA = {
- 'a_plain_file': {
- 'path': '%s/README.md' % _DIR,
- 'mime': 'application/octet-stream',
- 'size': 139
- },
-}
-
-
-def access_to_gs():
- """Skip some tests if we cannot access google storage."""
- return gs.GSContext()._TestGSLs() # pylint:disable=protected-access
-
-
-@unittest.skipUnless(access_to_gs(), 'Have no access to google storage')
-class UnmockedGSArchiveServerTest(helper.CPWebCase):
- """Some integration tests using cherrypy test framework."""
- @staticmethod
- def setup_server():
- """An API used by cherrypy to setup test environment."""
- cherrypy.tree.mount(gs_archive_server.GSArchiveServer())
-
- def test_download_a_file(self):
- """Test normal files downloading."""
- tested_file = _TEST_DATA['a_plain_file']
- self.getPage('/download%(path)s' % tested_file)
- self.assertStatus(200)
- self.assertHeader('Content-Type', tested_file['mime'])
- self.assertEquals(len(self.body), tested_file['size'])
-
- def test_download_a_non_existing_file(self):
- """Test downloading non-existing files."""
- self.getPage('/download/chromeos-images-archive/existing/file')
- self.assertStatus(404)
-
- def test_download_against_unauthorized_bucket(self):
- """Test downloading from unauthorized bucket."""
- self.getPage('/download/another_bucket/file')
- self.assertStatus(401)
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/gs_cache/pytest.ini b/gs_cache/pytest.ini
new file mode 100644
index 0000000..3cf8346
--- /dev/null
+++ b/gs_cache/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+addopts =
+ --doctest-modules
+ --cov gs_cache
diff --git a/gs_cache/tarfile_utils.py b/gs_cache/tarfile_utils.py
new file mode 100644
index 0000000..200ceb5
--- /dev/null
+++ b/gs_cache/tarfile_utils.py
@@ -0,0 +1,162 @@
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Utils for manipulating tar format archives.
+
+We use tar command to manipulate tar file other than using Python tarfile module
+because that module is very slow in the case of large file.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+
+from chromite.lib import cros_logging as logging
+
+_logger = logging.getLogger(__name__)
+
+
+def _round_up_to_512(number):
+ """Up round the given |number| to smallest multiple of 512.
+
+ Examples:
+ >>> for n in (0, 1, 512, 1025):
+ ... _round_up_to_512(n)
+ 0
+ 512
+ 512
+ 1536
+
+ Args:
+ number: Zero or positive integer.
+
+ Returns:
+ The smallest multiple of 512.
+ """
+ return (number + 511) & -512
+
+
+def _get_command_result_from_tar_tvR(an_output_line):
+ """Get an object of _TarListCommandResult from one line of `tar tvR` output.
+
+ Args:
+ an_output_line: One line of `tar tvR` output. Trailing '\n' is acceptable.
+ The last line of `tar tvR` is acceptable.
+
+ Returns:
+ An object of _TarListCommandResult.
+ """
+ separators = re.compile('[ \t:]+')
+ fields_num = len(_TarListCommandResult._fields)
+ fields = re.split(separators, an_output_line.rstrip('\n'),
+ maxsplit=fields_num - 1)
+ try:
+ return _TarListCommandResult._make(fields)
+ except TypeError:
+ # The last line of `tar tvR` hasn't enough fields. Fill with fake data.
+ _logger.debug('This should be the last line of `tar tvR`: %s',
+ an_output_line)
+ fields.extend(_TarListCommandResult._fields[len(fields):])
+ return _TarListCommandResult._make(fields)
+
+
+def _block_to_bytes(block_num):
+ """Get offset of the block |block_num| in bytes, i.e. times 512"""
+ return block_num << 9 # * 512
+
+
+# The tuple of tar member information to be returned to caller.
+# Fields:
+# filename: The file name of the tar member.
+# record_start: The zero-based start offset of the file record, in bytes.
+# record_size: The size of the file record, in bytes.
+# content_start: The zero-based start offset of the file content, in bytes.
+# size: The size of the file content, in bytes.
+TarMemberInfo = collections.namedtuple(
+ 'TarMemberInfo', ('filename', 'record_start', 'record_size',
+ 'content_start', 'size'))
+
+
+class _TarListCommandResult(collections.namedtuple(
+ '_TarListCommandResult', ('block', 'block_num', 'mode', 'ownership',
+ 'size_str', 'date', 'hour', 'min', 'filename'))):
+ """Information of each member in a Tar archive.
+
+ This class using the output of command `tar tvR` to compute more information
+ we need, e.g. file content start offset, etc.
+
+ The output of `tar tvR` is like:
+ block 0: -rw-r--r-- user/group <size> <date> <time> <file name>
+ ...
+ block 7: ** Block of NULs **
+ """
+
+ @property
+ def record_start(self):
+ """Start offset of the file record, in bytes."""
+ return _block_to_bytes(int(self.block_num))
+
+ @property
+ def size(self):
+ return int(self.size_str)
+
+
+def _get_prev_content_start(cur_record_start, prev_file):
+ """Deduct prev file content information from current file record information.
+
+ In tar format, each file record has a header and followed by file content.
+ Both header and file content are rounded up to 512 Bytes. The header length is
+ variable, but we can get the current file content starting offset by
+ subtracting up rounded file size from next file header starting offset, i.e.
+
+ current_offset = block(next_file) * 512 - round_up_to_512(current_size)
+
+ |********|************************.......|********|****
+ | header | content | header |
+ | |<----- prev_size ----->|
+ | |<- prev_size round up to 512 ->|
+ ^prev_content_start ^cur_record_start
+
+ Args:
+ cur_record_start: The zero-based start position of current file record, in
+ bytes.
+ prev_file: An instance of _TarListCommandResult which has size of the
+ previous file.
+
+ Returns:
+ The zero-based start position of previous file content, in bytes.
+ """
+ return cur_record_start - _round_up_to_512(prev_file.size)
+
+
+def list_tar_members(tar_tvR_output):
+ """List the members of a tar with information.
+
+ Yield each member of the tar archive with information of record start/size,
+ content start/size, etc.
+
+ Args:
+ tar_tvR_output: The output of command 'tar tvR'. Option 'R' print out the
+ starting block number of the file record.
+
+ Yields:
+ A tuple of data described above in the same order.
+ """
+ prev_file = _get_command_result_from_tar_tvR(tar_tvR_output.readline())
+
+ for line in tar_tvR_output:
+ cur_file = _get_command_result_from_tar_tvR(line)
+
+ prev_content_start = _get_prev_content_start(cur_file.record_start,
+ prev_file)
+ prev_record_size = cur_file.record_start - prev_file.record_start
+
+ yield TarMemberInfo(prev_file.filename,
+ prev_file.record_start, prev_record_size,
+ prev_content_start, prev_file.size)
+
+ prev_file = cur_file
diff --git a/gs_cache/tests/conftest.py b/gs_cache/tests/conftest.py
new file mode 100644
index 0000000..a43dcfe
--- /dev/null
+++ b/gs_cache/tests/conftest.py
@@ -0,0 +1,31 @@
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""The configuration python file for Pytest.
+
+In this file, we add below customized command line option:
+ --network: Run tests that depend on good netowrk connectivity.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pytest
+
+
+def pytest_addoption(parser):
+ parser.addoption("--network", action="store_true", default=False,
+ help="Run tests that depend on good network connectivity")
+
+
+def pytest_collection_modifyitems(config, items):
+ if config.getoption("--network"):
+ # run network tests
+ return
+ skip_network_tests = pytest.mark.skip(
+ reason="Skipping network test (re-run w/--network)")
+ for item in items:
+ if "network" in item.keywords:
+ item.add_marker(skip_network_tests)
diff --git a/gs_cache/tests/gs_archive_server_test.py b/gs_cache/tests/gs_archive_server_test.py
new file mode 100644
index 0000000..ae07a39
--- /dev/null
+++ b/gs_cache/tests/gs_archive_server_test.py
@@ -0,0 +1,170 @@
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tests for gs_archive_server."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import base64
+import gzip
+import md5
+import os
+import StringIO
+import unittest
+
+import cherrypy
+import mock
+import pytest
+import requests
+from cherrypy.test import helper
+
+import gs_archive_server
+from chromite.lib import cros_logging as logging
+
+_TESTING_SERVER = 'http://127.0.0.1:8888'
+_DIR = '/gs_archive_server_test'
+# Some REAL files and info on Google Storage.
+_TEST_DATA = {
+ 'a_plain_file': {
+ 'path': '%s/README.md' % _DIR,
+ 'mime': 'application/octet-stream',
+ 'size': 139,
+ },
+ 'a_tar_file': {
+ 'path': '%s/control_files.tar' % _DIR,
+ 'members_md5': '0d5d60e9f10d41c60dd85a7f0081de5d',
+ }
+}
+
+# a tgz file with only one file "bar" which content is "foo\n"
+_A_TGZ_FILE = base64.b64decode(
+ 'H4sIAC8VyFoAA+3OMQ7CMAxGYc+cIkdw3DQ9T4pExYBSuc3A7WlhR2JoWd43+vfwxuJyNN3klC'
+ 'R2McWcRK0feuve9499s2yqQ9r3aKZZgh5etmnLWjwEmVq9jl/+Zr8/ij8nr20+o+skt1ov/24A'
+ 'AAAAAAAAAAAAAAAAAPzuBWP9bg8AKAAA'
+)
+_A_TAR_FILE = gzip.GzipFile(fileobj=StringIO.StringIO(_A_TGZ_FILE)).read()
+
+
+@pytest.mark.network
+class UnmockedGSArchiveServerTest(helper.CPWebCase):
+ """Some integration tests using cherrypy test framework."""
+ @staticmethod
+ def setup_server():
+ """An API used by cherrypy to setup test environment."""
+ cherrypy.tree.mount(gs_archive_server.GsArchiveServer(''))
+
+ def test_download_a_file(self):
+ """Test normal files downloading."""
+ tested_file = _TEST_DATA['a_plain_file']
+ self.getPage('/download%(path)s' % tested_file)
+ self.assertStatus(200)
+ self.assertHeader('Content-Type', tested_file['mime'])
+ self.assertEquals(len(self.body), tested_file['size'])
+
+ def test_download_a_non_existing_file(self):
+ """Test downloading non-existing files."""
+ self.getPage('/download/chromeos-images-archive/existing/file')
+ self.assertStatus(404)
+
+ def test_download_against_unauthorized_bucket(self):
+ """Test downloading from unauthorized bucket."""
+ self.getPage('/download/another_bucket/file')
+ self.assertStatus(401)
+
+
+class MockedGSArchiveServerTest(unittest.TestCase):
+ """Unit test of GsArchiveServer using mock objects."""
+
+ def setUp(self):
+ """Setup method."""
+ self.server = gs_archive_server.GsArchiveServer('')
+
+ def test_list_member(self):
+ """Test list_member RPC."""
+ with mock.patch.object(self.server, '_caching_server') as caching_server:
+ rsp = mock.MagicMock()
+ caching_server.download.return_value = rsp
+ rsp.iter_content.return_value = (_A_TAR_FILE[:100], _A_TAR_FILE[100:])
+ csv = list(self.server.list_member('baz.tar'))
+ self.assertEquals(len(csv), 1)
+ (filename, record_start, record_size, record_end,
+ content_start, size, content_end) = csv[0].split(',')
+ self.assertEquals(filename, 'bar')
+ self.assertEquals(record_start, '0')
+ self.assertEquals(record_size, '1024')
+ self.assertEquals(record_end, '1023') # 1024 - 1
+ self.assertEquals(content_start, '512')
+ self.assertEquals(size, '4')
+ self.assertEquals(content_end, '515\n') # 512 + 4 - 1
+
+ # test char quoting in file name
+ with gzip.open(os.path.join(os.path.dirname(__file__),
+ 'index_tar_member_testing.tgz')) as f:
+ rsp.iter_content.return_value = f.read()
+ members = next(self.server.list_member('baz.tar'))
+ for csv in members.rstrip('\n').split('\n'):
+ # each line can be split into 7 elements, even ',' in filename
+ elements = csv.split(',')
+ self.assertEquals(len(elements), 7)
+ # elements from 1 to 6 are integers
+ _ = [int(d) for d in elements[1:7]]
+
+
+def testing_server_setup():
+ """Check if testing server is setup."""
+ try:
+ rsp = requests.get(_TESTING_SERVER)
+ if rsp.status_code >= 500:
+ logging.warn(
+ 'Testing server %s has internal errors. Some tests are skipped!',
+ _TESTING_SERVER)
+ return False
+ return True
+ except Exception:
+ logging.warn('No testings server detected. Some tests are skipped!')
+ return False
+
+
+@unittest.skipUnless(testing_server_setup(), 'Testing servers not available!')
+class GsCacheBackendFunctionalTest(unittest.TestCase):
+ """This is a functional blackbox test
+
+ These tests depend on a full setup of the server and proxy server.
+ If either of they is not available, all tests in this class are skipped.
+ """
+
+ def _get_page(self, url, headers=None, expect_status=200):
+ headers = headers.copy() if headers else {}
+ if not os.environ.get('WITH_CACHE', None):
+ headers['x-no-cache'] = '1' # bypass all caching to test the whole flow
+
+ rsp = requests.get('%s%s' % (_TESTING_SERVER, url), headers=headers,
+ stream=True)
+ self.assertEquals(rsp.status_code, expect_status)
+ return rsp
+
+ def _verify_md5(self, content, expected_md5):
+ """Verify the md5 sum of input content equals to expteced value."""
+ m = md5.new()
+ m.update(content)
+ self.assertEquals(m.hexdigest(), expected_md5)
+
+ def test_download_plain_file(self):
+ """Test download RPC."""
+ tested_file = _TEST_DATA['a_plain_file']
+ rsp = self._get_page('/download%(path)s' % tested_file)
+ self.assertEquals(rsp.headers['Content-Length'], str(tested_file['size']))
+
+ def test_list_member(self):
+ """Test list member of a tar file."""
+ tested_file = _TEST_DATA['a_tar_file']
+ rsp = self._get_page('/list_member%(path)s' % tested_file)
+ self.assertEquals(rsp.headers['Content-Type'], 'text/csv;charset=utf-8')
+ self._verify_md5(rsp.content, tested_file['members_md5'])
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/gs_cache/tests/index_tar_member_testing.tgz b/gs_cache/tests/index_tar_member_testing.tgz
new file mode 100644
index 0000000..ea4b2fc
--- /dev/null
+++ b/gs_cache/tests/index_tar_member_testing.tgz
Binary files differ
diff --git a/gs_cache/tests/tarfile_utils_test.py b/gs_cache/tests/tarfile_utils_test.py
new file mode 100644
index 0000000..5a821a7
--- /dev/null
+++ b/gs_cache/tests/tarfile_utils_test.py
@@ -0,0 +1,70 @@
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tests for tarfile_utils."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import StringIO
+import subprocess
+import tarfile
+import unittest
+
+import tarfile_utils
+
+
+class TarfileUtilsTest(unittest.TestCase):
+ """Tests of tarfile_utils."""
+
+ def test_list_tar_members_empty_file(self):
+ """Test listing file member of an empty tar."""
+ tar_tvR = StringIO.StringIO('block 0: ** Block of NULs **\n')
+ self.assertFalse(list(tarfile_utils.list_tar_members(tar_tvR)))
+
+ def test_list_tar_members_non_empty_file(self):
+ """Test listing file member of an non-empty tar."""
+ tar_tvR = StringIO.StringIO(
+ 'block 0: mode owner 1 date hour:min\tfilename\n'
+ 'block 2: mode owner 123 date hour:min\tfile name with spaces\n'
+ 'block 4: mode owner 0 date hour:min\tdirectory/\n'
+ 'block 5: mode owner 0 date hour:min\tdirectory/symbol link -> filename'
+ '\n'
+ 'block 6: ** Block of NULs **\n'
+ )
+ result = list(tarfile_utils.list_tar_members(tar_tvR))
+ self.assertEquals(result, [
+ ('filename', 0, 1024, 512, 1),
+ ('file name with spaces', 512 * 2, 1024, 512 * 3, 123),
+ ('directory/', 512 * 4, 512, 512 * 5, 0),
+ ('directory/symbol link -> filename', 512 * 5, 512, 512 * 6, 0)
+ ])
+
+ def test_list_tar_member_with_real_tar_file(self):
+ """Using a real tar file to test listing tar member."""
+ tar_name = os.path.join(os.path.dirname(__file__),
+ 'index_tar_member_testing.tgz')
+ tar_tvR = StringIO.StringIO(
+ subprocess.check_output(['tar', 'tvRzf', tar_name]))
+ members = tarfile_utils.list_tar_members(tar_tvR)
+ with tarfile.open(tar_name, 'r:gz') as tar:
+ for tar_info, result in zip(tar, members):
+ if tar_info.isreg():
+ name = tar_info.name
+
+ if tar_info.isdir():
+ name = '%s/' % tar_info.name
+
+ if tar_info.issym():
+ name = '%s -> %s' % (tar_info.name, tar_info.linkname)
+
+ self.assertEquals(name, result.filename)
+ self.assertEquals(tar_info.offset_data, result.content_start)
+ self.assertEquals(tar_info.size, result.size)
+
+
+if __name__ == '__main__':
+ unittest.main()