Blame - src/utils.cc - android.googlesource.com/platform/external/puffin

blob: 7eae38e5dedab4a1bab96803ffbcc89763abfa8c [file] [log] [blame]

Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	1	// Copyright 2017 The Chromium OS Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "puffin/src/include/puffin/utils.h"
				6
				7	#include <inttypes.h>
				8
Sen Jiang	5eb33e8	2018-05-01 15:01:11 -0700	[diff] [blame]	9	#include <algorithm>
				10	#include <set>
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	11	#include <string>
				12	#include <vector>
				13
Tianjie Xu	1194223	2018-01-18 18:31:42 -0800	[diff] [blame]	14	#include <zlib.h>
				15
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	16	#include "puffin/src/bit_reader.h"
Amin Hassani	00f0832	2017-10-18 11:55:10 -0700	[diff] [blame]	17	#include "puffin/src/file_stream.h"
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	18	#include "puffin/src/include/puffin/common.h"
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	19	#include "puffin/src/include/puffin/puffer.h"
Amin Hassani	e2e9cb0	2018-03-15 14:14:58 -0700	[diff] [blame]	20	#include "puffin/src/logging.h"
Tianjie Xu	1194223	2018-01-18 18:31:42 -0800	[diff] [blame]	21	#include "puffin/src/memory_stream.h"
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	22	#include "puffin/src/puff_writer.h"
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	23
Sen Jiang	5eb33e8	2018-05-01 15:01:11 -0700	[diff] [blame]	24	using std::set;
Amin Hassani	10b869c	2018-03-15 13:22:32 -0700	[diff] [blame]	25	using std::string;
				26	using std::vector;
				27
Tianjie Xu	1194223	2018-01-18 18:31:42 -0800	[diff] [blame]	28	namespace {
				29	// Use memcpy to access the unaligned data of type \|T\|.
				30	template <typename T>
				31	inline T get_unaligned(const void* address) {
				32	T result;
				33	memcpy(&result, address, sizeof(T));
				34	return result;
				35	}
				36
				37	// Calculate both the compressed size and uncompressed size of the deflate
				38	// block that starts from the offset \|start\| of buffer \|data\|.
				39	bool CalculateSizeOfDeflateBlock(const puffin::Buffer& data,
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	40	uint64_t start,
				41	uint64_t* compressed_size,
				42	uint64_t* uncompressed_size) {
Tianjie Xu	1194223	2018-01-18 18:31:42 -0800	[diff] [blame]	43	TEST_AND_RETURN_FALSE(compressed_size != nullptr &&
				44	uncompressed_size != nullptr);
				45
				46	TEST_AND_RETURN_FALSE(start < data.size());
				47
				48	z_stream strm = {};
				49	strm.avail_in = data.size() - start;
				50	strm.next_in = data.data() + start;
				51
				52	// -15 means we are decoding a 'raw' stream without zlib headers.
				53	if (inflateInit2(&strm, -15)) {
				54	LOG(ERROR) << "Failed to initialize inflate: " << strm.msg;
				55	return false;
				56	}
				57
				58	const unsigned int kBufferSize = 32768;
				59	std::vector<uint8_t> uncompressed_data(kBufferSize);
				60	*uncompressed_size = 0;
				61	int status = Z_OK;
				62	do {
				63	// Overwrite the same buffer since we don't need the uncompressed data.
				64	strm.avail_out = kBufferSize;
				65	strm.next_out = uncompressed_data.data();
				66	status = inflate(&strm, Z_NO_FLUSH);
				67	if (status < 0) {
				68	LOG(ERROR) << "Inflate failed: " << strm.msg << ", has decompressed "
				69	<< *uncompressed_size << " bytes.";
				70	return false;
				71	}
				72	*uncompressed_size += kBufferSize - strm.avail_out;
				73	} while (status != Z_STREAM_END);
				74
				75	*compressed_size = data.size() - start - strm.avail_in;
Amin Hassani	da66410	2018-01-30 16:32:43 -0800	[diff] [blame]	76	TEST_AND_RETURN_FALSE(inflateEnd(&strm) == Z_OK);
Tianjie Xu	1194223	2018-01-18 18:31:42 -0800	[diff] [blame]	77	return true;
				78	}
				79
Sen Jiang	5eb33e8	2018-05-01 15:01:11 -0700	[diff] [blame]	80	struct ExtentData {
				81	puffin::BitExtent extent;
				82	uint64_t byte_offset;
				83	uint64_t byte_length;
				84	const puffin::Buffer& data;
				85
				86	ExtentData(const puffin::BitExtent& in_extent, const puffin::Buffer& in_data)
				87	: extent(in_extent), data(in_data) {
				88	// Round start offset up and end offset down to exclude bits not in this
				89	// extent. We simply ignore the bits at start and end that's not on byte
				90	// boundary because as long as the majority of the bytes are the same,
				91	// bsdiff will be able to reference it.
				92	byte_offset = (extent.offset + 7) / 8;
				93	uint64_t byte_end_offset = (extent.offset + extent.length) / 8;
				94	CHECK(byte_end_offset <= data.size());
				95	if (byte_end_offset > byte_offset) {
				96	byte_length = byte_end_offset - byte_offset;
				97	} else {
				98	byte_length = 0;
				99	}
				100	}
				101
				102	int Compare(const ExtentData& other) const {
				103	if (extent.length != other.extent.length) {
				104	return extent.length < other.extent.length ? -1 : 1;
				105	}
				106	return memcmp(data.data() + byte_offset,
				107	other.data.data() + other.byte_offset,
				108	std::min(byte_length, other.byte_length));
				109	}
				110	bool operator<(const ExtentData& other) const { return Compare(other) < 0; }
				111	bool operator==(const ExtentData& other) const { return Compare(other) == 0; }
				112	};
				113
Tianjie Xu	1194223	2018-01-18 18:31:42 -0800	[diff] [blame]	114	} // namespace
				115
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	116	namespace puffin {
				117
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	118	// This function uses RFC1950 (https://www.ietf.org/rfc/rfc1950.txt) for the
Amin Hassani	75a7f2c	2018-02-21 11:51:28 -0800	[diff] [blame]	119	// definition of a zlib stream. For finding the deflate blocks, we relying on
				120	// the proper size of the zlib stream in \|data\|. Basically the size of the zlib
				121	// stream should be known before hand. Otherwise we need to parse the stream and
				122	// find the location of compressed blocks using CalculateSizeOfDeflateBlock().
				123	bool LocateDeflatesInZlib(const Buffer& data,
				124	std::vector<ByteExtent>* deflate_blocks) {
				125	// A zlib stream has the following format:
				126	// 0 1 compression method and flag
				127	// 1 1 flag
				128	// 2 4 preset dictionary (optional)
				129	// 2 or 6 n compressed data
				130	// n+(2 or 6) 4 Adler-32 checksum
				131	TEST_AND_RETURN_FALSE(data.size() >= 6 + 4); // Header + Footer
				132	uint16_t cmf = data[0];
				133	auto compression_method = cmf & 0x0F;
				134	// For deflate compression_method should be 8.
				135	TEST_AND_RETURN_FALSE(compression_method == 8);
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	136
Amin Hassani	75a7f2c	2018-02-21 11:51:28 -0800	[diff] [blame]	137	auto cinfo = (cmf & 0xF0) >> 4;
				138	// Value greater than 7 is not allowed in deflate.
				139	TEST_AND_RETURN_FALSE(cinfo <= 7);
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	140
Amin Hassani	75a7f2c	2018-02-21 11:51:28 -0800	[diff] [blame]	141	auto flag = data[1];
				142	TEST_AND_RETURN_FALSE(((cmf << 8) + flag) % 31 == 0);
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	143
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	144	uint64_t header_len = 2;
Amin Hassani	75a7f2c	2018-02-21 11:51:28 -0800	[diff] [blame]	145	if (flag & 0x20) {
				146	header_len += 4; // 4 bytes for the preset dictionary.
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	147	}
Amin Hassani	75a7f2c	2018-02-21 11:51:28 -0800	[diff] [blame]	148
				149	// 4 is for ADLER32.
				150	deflate_blocks->emplace_back(header_len, data.size() - header_len - 4);
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	151	return true;
				152	}
				153
				154	bool FindDeflateSubBlocks(const UniqueStreamPtr& src,
				155	const vector<ByteExtent>& deflates,
				156	vector<BitExtent>* subblock_deflates) {
				157	Puffer puffer;
				158	Buffer deflate_buffer;
				159	for (const auto& deflate : deflates) {
				160	TEST_AND_RETURN_FALSE(src->Seek(deflate.offset));
				161	// Read from src into deflate_buffer.
				162	deflate_buffer.resize(deflate.length);
				163	TEST_AND_RETURN_FALSE(src->Read(deflate_buffer.data(), deflate.length));
				164
				165	// Find all the subblocks.
				166	BufferBitReader bit_reader(deflate_buffer.data(), deflate.length);
Amin Hassani	b8325c2	2018-05-22 14:57:22 -0700	[diff] [blame]	167	// The uncompressed blocks will be ignored since we are passing a null
				168	// buffered puff writer and a valid deflate locations output array. This
				169	// should not happen in the puffdiff or anywhere else by default.
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	170	BufferPuffWriter puff_writer(nullptr, 0);
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	171	vector<BitExtent> subblocks;
				172	TEST_AND_RETURN_FALSE(
Amin Hassani	e2e9cb0	2018-03-15 14:14:58 -0700	[diff] [blame]	173	puffer.PuffDeflate(&bit_reader, &puff_writer, &subblocks));
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	174	TEST_AND_RETURN_FALSE(deflate.length == bit_reader.Offset());
				175	for (const auto& subblock : subblocks) {
				176	subblock_deflates->emplace_back(subblock.offset + deflate.offset * 8,
				177	subblock.length);
				178	}
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	179	}
				180	return true;
				181	}
				182
Amin Hassani	00f0832	2017-10-18 11:55:10 -0700	[diff] [blame]	183	bool LocateDeflatesInZlibBlocks(const string& file_path,
				184	const vector<ByteExtent>& zlibs,
				185	vector<BitExtent>* deflates) {
				186	auto src = FileStream::Open(file_path, true, false);
				187	TEST_AND_RETURN_FALSE(src);
Amin Hassani	75a7f2c	2018-02-21 11:51:28 -0800	[diff] [blame]	188
				189	Buffer buffer;
				190	for (auto& zlib : zlibs) {
				191	buffer.resize(zlib.length);
				192	TEST_AND_RETURN_FALSE(src->Seek(zlib.offset));
				193	TEST_AND_RETURN_FALSE(src->Read(buffer.data(), buffer.size()));
				194
				195	vector<ByteExtent> deflate_blocks;
				196	TEST_AND_RETURN_FALSE(LocateDeflatesInZlib(buffer, &deflate_blocks));
				197
				198	vector<BitExtent> deflate_subblocks;
				199	auto zlib_blc_src = MemoryStream::CreateForRead(buffer);
				200	TEST_AND_RETURN_FALSE(
				201	FindDeflateSubBlocks(zlib_blc_src, deflate_blocks, &deflate_subblocks));
				202
				203	// Relocated based on the offset of the zlib.
				204	for (const auto& def : deflate_subblocks) {
				205	deflates->emplace_back(zlib.offset * 8 + def.offset, def.length);
				206	}
				207	}
				208	return true;
Amin Hassani	00f0832	2017-10-18 11:55:10 -0700	[diff] [blame]	209	}
				210
Amin Hassani	4a212ed	2018-02-15 10:31:28 -0800	[diff] [blame]	211	// For more information about gzip format, refer to RFC 1952 located at:
				212	// https://www.ietf.org/rfc/rfc1952.txt
				213	bool LocateDeflatesInGzip(const Buffer& data,
				214	vector<ByteExtent>* deflate_blocks) {
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	215	uint64_t member_start = 0;
Amin Hassani	4a212ed	2018-02-15 10:31:28 -0800	[diff] [blame]	216	while (member_start < data.size()) {
				217	// Each member entry has the following format
				218	// 0 1 0x1F
				219	// 1 1 0x8B
				220	// 2 1 compression method (8 denotes deflate)
				221	// 3 1 set of flags
				222	// 4 4 modification time
				223	// 8 1 extra flags
				224	// 9 1 operating system
				225	TEST_AND_RETURN_FALSE(member_start + 10 <= data.size());
				226	TEST_AND_RETURN_FALSE(data[member_start + 0] == 0x1F);
				227	TEST_AND_RETURN_FALSE(data[member_start + 1] == 0x8B);
				228	TEST_AND_RETURN_FALSE(data[member_start + 2] == 8);
				229
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	230	uint64_t offset = member_start + 10;
Amin Hassani	4a212ed	2018-02-15 10:31:28 -0800	[diff] [blame]	231	int flag = data[member_start + 3];
				232	// Extra field
				233	if (flag & 4) {
				234	TEST_AND_RETURN_FALSE(offset + 2 <= data.size());
				235	uint16_t extra_length = data[offset++];
				236	extra_length \|= static_cast<uint16_t>(data[offset++]) << 8;
				237	TEST_AND_RETURN_FALSE(offset + extra_length <= data.size());
				238	offset += extra_length;
				239	}
				240	// File name field
				241	if (flag & 8) {
				242	while (true) {
				243	TEST_AND_RETURN_FALSE(offset + 1 <= data.size());
				244	if (data[offset++] == 0) {
				245	break;
				246	}
				247	}
				248	}
				249	// File comment field
				250	if (flag & 16) {
				251	while (true) {
				252	TEST_AND_RETURN_FALSE(offset + 1 <= data.size());
				253	if (data[offset++] == 0) {
				254	break;
				255	}
				256	}
				257	}
				258	// CRC16 field
				259	if (flag & 2) {
				260	offset += 2;
				261	}
				262
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	263	uint64_t compressed_size, uncompressed_size;
Amin Hassani	4a212ed	2018-02-15 10:31:28 -0800	[diff] [blame]	264	TEST_AND_RETURN_FALSE(CalculateSizeOfDeflateBlock(
				265	data, offset, &compressed_size, &uncompressed_size));
				266	TEST_AND_RETURN_FALSE(offset + compressed_size <= data.size());
				267	deflate_blocks->push_back(ByteExtent(offset, compressed_size));
				268	offset += compressed_size;
				269
				270	// Ignore CRC32;
				271	TEST_AND_RETURN_FALSE(offset + 8 <= data.size());
				272	offset += 4;
				273	uint32_t u_size = 0;
				274	for (size_t i = 0; i < 4; i++) {
				275	u_size \|= static_cast<uint32_t>(data[offset++]) << (i * 8);
				276	}
				277	TEST_AND_RETURN_FALSE(uncompressed_size % (1 << 31) == u_size);
				278	member_start = offset;
				279	}
				280	return true;
				281	}
				282
Tianjie Xu	1194223	2018-01-18 18:31:42 -0800	[diff] [blame]	283	// For more information about the zip format, refer to
				284	// https://support.pkware.com/display/PKZIP/APPNOTE
				285	bool LocateDeflatesInZipArchive(const Buffer& data,
				286	vector<ByteExtent>* deflate_blocks) {
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	287	uint64_t pos = 0;
Tianjie Xu	1194223	2018-01-18 18:31:42 -0800	[diff] [blame]	288	while (pos <= data.size() - 30) {
				289	// TODO(xunchang) add support for big endian system when searching for
				290	// magic numbers.
				291	if (get_unaligned<uint32_t>(data.data() + pos) != 0x04034b50) {
				292	pos++;
				293	continue;
				294	}
				295
				296	// local file header format
				297	// 0 4 0x04034b50
				298	// 4 2 minimum version needed to extract
				299	// 6 2 general purpose bit flag
				300	// 8 2 compression method
				301	// 10 4 file last modification date & time
				302	// 14 4 CRC-32
				303	// 18 4 compressed size
				304	// 22 4 uncompressed size
				305	// 26 2 file name length
				306	// 28 2 extra field length
				307	// 30 n file name
				308	// 30+n m extra field
				309	auto compression_method = get_unaligned<uint16_t>(data.data() + pos + 8);
				310	if (compression_method != 8) { // non-deflate type
				311	pos += 4;
				312	continue;
				313	}
				314
				315	auto compressed_size = get_unaligned<uint32_t>(data.data() + pos + 18);
				316	auto uncompressed_size = get_unaligned<uint32_t>(data.data() + pos + 22);
				317	auto file_name_length = get_unaligned<uint16_t>(data.data() + pos + 26);
				318	auto extra_field_length = get_unaligned<uint16_t>(data.data() + pos + 28);
				319	uint64_t header_size = 30 + file_name_length + extra_field_length;
				320
				321	// sanity check
				322	if (static_cast<uint64_t>(header_size) + compressed_size > data.size() \|\|
				323	pos > data.size() - header_size - compressed_size) {
				324	pos += 4;
				325	continue;
				326	}
				327
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	328	uint64_t calculated_compressed_size;
				329	uint64_t calculated_uncompressed_size;
Tianjie Xu	1194223	2018-01-18 18:31:42 -0800	[diff] [blame]	330	if (!CalculateSizeOfDeflateBlock(data, pos + header_size,
				331	&calculated_compressed_size,
				332	&calculated_uncompressed_size)) {
				333	LOG(ERROR) << "Failed to decompress the zip entry starting from: " << pos
				334	<< ", skip adding deflates for this entry.";
				335	pos += 4;
				336	continue;
				337	}
				338
				339	// Double check the compressed size and uncompressed size if they are
				340	// available in the file header.
				341	if (compressed_size > 0 && compressed_size != calculated_compressed_size) {
				342	LOG(WARNING) << "Compressed size in the file header: " << compressed_size
				343	<< " doesn't equal the real size: "
				344	<< calculated_compressed_size;
				345	}
				346
				347	if (uncompressed_size > 0 &&
				348	uncompressed_size != calculated_uncompressed_size) {
				349	LOG(WARNING) << "Uncompressed size in the file header: "
				350	<< uncompressed_size << " doesn't equal the real size: "
				351	<< calculated_uncompressed_size;
				352	}
				353
				354	deflate_blocks->emplace_back(pos + header_size, calculated_compressed_size);
				355	pos += header_size + calculated_compressed_size;
				356	}
				357
				358	return true;
				359	}
				360
				361	bool LocateDeflateSubBlocksInZipArchive(const Buffer& data,
				362	vector<BitExtent>* deflates) {
				363	vector<ByteExtent> deflate_blocks;
				364	if (!LocateDeflatesInZipArchive(data, &deflate_blocks)) {
				365	return false;
				366	}
				367
				368	auto src = MemoryStream::CreateForRead(data);
				369	return FindDeflateSubBlocks(src, deflate_blocks, deflates);
				370	}
				371
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	372	bool FindPuffLocations(const UniqueStreamPtr& src,
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	373	const vector<BitExtent>& deflates,
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	374	vector<ByteExtent>* puffs,
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	375	uint64_t* out_puff_size) {
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	376	Puffer puffer;
				377	Buffer deflate_buffer;
				378
				379	// Here accumulate the size difference between each corresponding deflate and
				380	// puff. At the end we add this cummulative size difference to the size of the
				381	// deflate stream to get the size of the puff stream. We use signed size
				382	// because puff size could be smaller than deflate size.
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	383	int64_t total_size_difference = 0;
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	384	for (auto deflate = deflates.begin(); deflate != deflates.end(); ++deflate) {
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	385	// Read from src into deflate_buffer.
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	386	auto start_byte = deflate->offset / 8;
				387	auto end_byte = (deflate->offset + deflate->length + 7) / 8;
				388	deflate_buffer.resize(end_byte - start_byte);
				389	TEST_AND_RETURN_FALSE(src->Seek(start_byte));
				390	TEST_AND_RETURN_FALSE(
				391	src->Read(deflate_buffer.data(), deflate_buffer.size()));
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	392	// Find the size of the puff.
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	393	BufferBitReader bit_reader(deflate_buffer.data(), deflate_buffer.size());
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	394	uint64_t bits_to_skip = deflate->offset % 8;
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	395	TEST_AND_RETURN_FALSE(bit_reader.CacheBits(bits_to_skip));
				396	bit_reader.DropBits(bits_to_skip);
				397
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	398	BufferPuffWriter puff_writer(nullptr, 0);
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	399	TEST_AND_RETURN_FALSE(
Amin Hassani	e2e9cb0	2018-03-15 14:14:58 -0700	[diff] [blame]	400	puffer.PuffDeflate(&bit_reader, &puff_writer, nullptr));
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	401	TEST_AND_RETURN_FALSE(deflate_buffer.size() == bit_reader.Offset());
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	402
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	403	// 1 if a deflate ends at the same byte that the next deflate starts and
				404	// there is a few bits gap between them. In practice this may never happen,
				405	// but it is a good idea to support it anyways. If there is a gap, the value
				406	// of the gap will be saved as an integer byte to the puff stream. The parts
				407	// of the byte that belogs to the deflates are shifted out.
				408	int gap = 0;
				409	if (deflate != deflates.begin()) {
				410	auto prev_deflate = std::prev(deflate);
				411	if ((prev_deflate->offset + prev_deflate->length == deflate->offset)
				412	// If deflates are on byte boundary the gap will not be counted later,
				413	// so we won't worry about it.
				414	&& (deflate->offset % 8 != 0)) {
				415	gap = 1;
				416	}
				417	}
				418
				419	start_byte = ((deflate->offset + 7) / 8);
				420	end_byte = (deflate->offset + deflate->length) / 8;
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	421	int64_t deflate_length_in_bytes = end_byte - start_byte;
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	422
				423	// If there was no gap bits between the current and previous deflates, there
				424	// will be no extra gap byte, so the offset will be shifted one byte back.
				425	auto puff_offset = start_byte - gap + total_size_difference;
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	426	auto puff_size = puff_writer.Size();
Amin Hassani	7074da6	2017-09-30 17:14:06 -0700	[diff] [blame]	427	// Add the location into puff.
				428	puffs->emplace_back(puff_offset, puff_size);
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	429	total_size_difference +=
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	430	static_cast<int64_t>(puff_size) - deflate_length_in_bytes - gap;
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	431	}
				432
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	433	uint64_t src_size;
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	434	TEST_AND_RETURN_FALSE(src->GetSize(&src_size));
Amin Hassani	d7768d5	2018-02-28 15:34:21 -0800	[diff] [blame]	435	auto final_size = static_cast<int64_t>(src_size) + total_size_difference;
Amin Hassani	26bcfdd	2017-09-29 17:54:15 -0700	[diff] [blame]	436	TEST_AND_RETURN_FALSE(final_size >= 0);
				437	*out_puff_size = final_size;
				438	return true;
				439	}
				440
Sen Jiang	5eb33e8	2018-05-01 15:01:11 -0700	[diff] [blame]	441	void RemoveEqualBitExtents(const Buffer& data1,
				442	const Buffer& data2,
				443	std::vector<BitExtent>* extents1,
				444	std::vector<BitExtent>* extents2) {
				445	set<ExtentData> extent1_set, equal_extents;
				446	for (const BitExtent& ext : *extents1) {
				447	extent1_set.emplace(ext, data1);
				448	}
				449
				450	auto new_extents2_end = extents2->begin();
				451	for (const BitExtent& ext : *extents2) {
				452	ExtentData extent_data(ext, data2);
				453	if (extent1_set.find(extent_data) != extent1_set.end()) {
				454	equal_extents.insert(extent_data);
				455	} else {
				456	*new_extents2_end++ = ext;
				457	}
				458	}
				459	extents2->erase(new_extents2_end, extents2->end());
				460	extents1->erase(
				461	std::remove_if(extents1->begin(), extents1->end(),
				462	[&equal_extents, &data1](const BitExtent& ext) {
				463	return equal_extents.find(ExtentData(ext, data1)) !=
				464	equal_extents.end();
				465	}),
				466	extents1->end());
				467	}
Amin Hassani	c3e6b53	2017-03-07 17:47:25 -0800	[diff] [blame]	468	} // namespace puffin