blob: feb2019e72cb565249ac9017669d1af01122327b [file] [log] [blame]
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +01001/**************************************************************************
2 *
3 * Copyright 2014-2016 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 *
24 **************************************************************************/
25
26
27#include "memtrace.hpp"
28
29#include <assert.h>
30#include <string.h>
31#include <stdio.h>
32
33#include <algorithm>
34
35#include "crc32c.hpp"
36
37
38#if \
Jose Fonsecaeccf4232016-05-06 10:18:40 +010039 (defined(__i386__) && defined(__SSE2__)) /* gcc */ || \
40 defined(_M_IX86) /* msvc */ || \
41 defined(__x86_64__) /* gcc */ || \
42 defined(_M_X64) /* msvc */ || \
43 defined(_M_AMD64) /* msvc */
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +010044
Jose Fonsecaeccf4232016-05-06 10:18:40 +010045# define HAVE_SSE2
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +010046
47// TODO: Detect and leverage SSE 4.1 and 4.2 at runtime
Jose Fonsecaeccf4232016-05-06 10:18:40 +010048# undef HAVE_SSE41
49# undef HAVE_SSE42
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +010050
51#endif
52
53
54#if defined(HAVE_SSE42)
55# include <nmmintrin.h>
56#elif defined(HAVE_SSE41)
57# include <smmintrin.h>
58#elif defined(HAVE_SSE2)
59# include <emmintrin.h>
60#endif
61
62
Jose Fonsecaf1977be2016-05-11 01:07:10 +010063#define BLOCK_ALIGN 64
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +010064#define BLOCK_SIZE 512
65
66
67template< class T >
68static inline T *
69lAlignPtr(T *p, uintptr_t alignment)
70{
71 return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(p) & ~(alignment - 1));
72}
73
74
75template< class T >
76static inline T *
77rAlignPtr(T *p, uintptr_t alignment)
78{
79 return reinterpret_cast<T *>((reinterpret_cast<uintptr_t>(p) + alignment - 1) & ~(alignment - 1));
80}
81
82
83#ifdef HAVE_SSE2
84
85#ifdef HAVE_SSE41
86 #define mm_stream_load_si128 _mm_stream_load_si128
87 #define mm_extract_epi32_0(x) _mm_extract_epi32(x, 0)
88 #define mm_extract_epi32_1(x) _mm_extract_epi32(x, 1)
89 #define mm_extract_epi32_2(x) _mm_extract_epi32(x, 2)
90 #define mm_extract_epi32_3(x) _mm_extract_epi32(x, 3)
91#else /* !HAVE_SSE41 */
92 #define mm_stream_load_si128 _mm_load_si128
93 #define mm_extract_epi32_0(x) _mm_cvtsi128_si32(x)
94 #define mm_extract_epi32_1(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(1,1,1,1)))
95 #define mm_extract_epi32_2(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(2,2,2,2)))
96 #define mm_extract_epi32_3(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(3,3,3,3)))
97#endif /* !HAVE_SSE41 */
98
99#ifdef HAVE_SSE42
100
101#define mm_crc32_u32 _mm_crc32_u32
102
103#else /* !HAVE_SSE42 */
104
105static inline uint32_t
106mm_crc32_u32(uint32_t crc, uint32_t current)
107{
108 uint32_t one = current ^ crc;
109 crc = crc32c_8x256_table[0][ one >> 24 ] ^
110 crc32c_8x256_table[1][(one >> 16) & 0xff] ^
111 crc32c_8x256_table[2][(one >> 8) & 0xff] ^
112 crc32c_8x256_table[3][ one & 0xff];
113 return crc;
114}
115
116#endif /* !HAVE_SSE42 */
117
118#endif /* HAVE_SSE2 */
119
120
121uint32_t
122hashBlock(const void *p)
123{
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100124 assert(lAlignPtr(p, BLOCK_ALIGN) == p);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100125
126 uint32_t crc;
127
128#ifdef HAVE_SSE2
129 crc = 0;
130
131 __m128i *q = (__m128i *)(void *)p;
132
133 crc = ~crc;
134
135 for (unsigned c = BLOCK_SIZE / (4 * sizeof *q); c; --c) {
136 __m128i m0 = mm_stream_load_si128(q++);
137 __m128i m1 = mm_stream_load_si128(q++);
138 __m128i m2 = mm_stream_load_si128(q++);
139 __m128i m3 = mm_stream_load_si128(q++);
140
141 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m0));
142 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m0));
143 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m0));
144 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m0));
145
146 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m1));
147 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m1));
148 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m1));
149 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m1));
150
151 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m2));
152 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m2));
153 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m2));
154 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m2));
155
156 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m3));
157 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m3));
158 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m3));
159 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m3));
160 }
161
162 crc = ~crc;
163
164#else /* !HAVE_SSE2 */
165
166 crc = crc32c_8bytes(p, BLOCK_SIZE);
167
168#endif
169
170 return crc;
171}
172
173
174// We must reset the data on discard, otherwise the old data could match just
175// by chance.
176//
177// XXX: if the appplication writes 0xCDCDCDCD at the start or the end of the
178// buffer range, we'll fail to detect. The only way to be 100% sure things
179// won't fall through would be to setup memory traps.
180void MemoryShadow::zero(void *_ptr, size_t _size)
181{
182 memset(_ptr, 0xCD, _size);
183}
184
185
186void MemoryShadow::cover(void *_ptr, size_t _size, bool _discard)
187{
188 assert(_ptr);
189
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100190 const uint8_t *ptr = static_cast<const uint8_t *>(_ptr);
191 const uint8_t *basePtr = lAlignPtr(ptr, BLOCK_ALIGN);
192
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100193 if (_size != size) {
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100194 static_assert(BLOCK_SIZE % BLOCK_ALIGN == 0, "inconsistent block align/size");
195 nBlocks = (ptr + _size - basePtr + BLOCK_SIZE - 1)/BLOCK_SIZE;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100196
197 hashPtr = (uint32_t *)realloc(hashPtr, nBlocks * sizeof *hashPtr);
198 size = _size;
199 }
200
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100201 realPtr = ptr;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100202
203 if (_discard) {
204 zero(_ptr, size);
205 }
206
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100207 const uint8_t *blockPtr = basePtr;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100208 if (_discard) {
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100209 hashPtr[0] = hashBlock(blockPtr);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100210 for (size_t i = 1; i < nBlocks; ++i) {
211 hashPtr[i] = hashPtr[0];
212 }
213 } else {
214 for (size_t i = 0; i < nBlocks; ++i) {
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100215 hashPtr[i] = hashBlock(blockPtr);
216 blockPtr += BLOCK_SIZE;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100217 }
218 }
219}
220
221
222void MemoryShadow::update(Callback callback) const
223{
224 const uint8_t *realStart = realPtr + size;
225 const uint8_t *realStop = realPtr;
226
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100227 const uint8_t *blockPtr = lAlignPtr(realPtr, BLOCK_ALIGN);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100228 for (size_t i = 0; i < nBlocks; ++i) {
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100229 uint32_t crc = hashBlock(blockPtr);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100230 if (crc != hashPtr[i]) {
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100231 realStart = std::min(realStart, blockPtr);
232 realStop = std::max(realStop, blockPtr + BLOCK_SIZE);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100233 }
Jose Fonsecaf1977be2016-05-11 01:07:10 +0100234 blockPtr += BLOCK_SIZE;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100235 }
236
237 realStart = std::max(realStart, realPtr);
238 realStop = std::min(realStop, realPtr + size);
239
240 // Update the rest
241 if (realStart < realStop) {
242 callback(realStart, realStop - realStart);
243 }
244}