blob: 4707f8b74c41ec23b0f2df45e942651376c2fc3a [file] [log] [blame]
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +01001/**************************************************************************
2 *
3 * Copyright 2014-2016 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 *
24 **************************************************************************/
25
26
27#include "memtrace.hpp"
28
29#include <assert.h>
30#include <string.h>
31#include <stdio.h>
32
33#include <algorithm>
34
35#include "crc32c.hpp"
36
37
38#if \
39 defined(__i386__) /* gcc */ || defined(_M_IX86) /* msvc */ || \
40 defined(__x86_64__) /* gcc */ || defined(_M_X64) /* msvc */ || defined(_M_AMD64) /* msvc */
41
42#define HAVE_SSE2
43
44// TODO: Detect and leverage SSE 4.1 and 4.2 at runtime
45#undef HAVE_SSE41
46#undef HAVE_SSE42
47
48#endif
49
50
51#if defined(HAVE_SSE42)
52# include <nmmintrin.h>
53#elif defined(HAVE_SSE41)
54# include <smmintrin.h>
55#elif defined(HAVE_SSE2)
56# include <emmintrin.h>
57#endif
58
59
60#define BLOCK_SIZE 512
61
62
63template< class T >
64static inline T *
65lAlignPtr(T *p, uintptr_t alignment)
66{
67 return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(p) & ~(alignment - 1));
68}
69
70
71template< class T >
72static inline T *
73rAlignPtr(T *p, uintptr_t alignment)
74{
75 return reinterpret_cast<T *>((reinterpret_cast<uintptr_t>(p) + alignment - 1) & ~(alignment - 1));
76}
77
78
79#ifdef HAVE_SSE2
80
81#ifdef HAVE_SSE41
82 #define mm_stream_load_si128 _mm_stream_load_si128
83 #define mm_extract_epi32_0(x) _mm_extract_epi32(x, 0)
84 #define mm_extract_epi32_1(x) _mm_extract_epi32(x, 1)
85 #define mm_extract_epi32_2(x) _mm_extract_epi32(x, 2)
86 #define mm_extract_epi32_3(x) _mm_extract_epi32(x, 3)
87#else /* !HAVE_SSE41 */
88 #define mm_stream_load_si128 _mm_load_si128
89 #define mm_extract_epi32_0(x) _mm_cvtsi128_si32(x)
90 #define mm_extract_epi32_1(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(1,1,1,1)))
91 #define mm_extract_epi32_2(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(2,2,2,2)))
92 #define mm_extract_epi32_3(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(3,3,3,3)))
93#endif /* !HAVE_SSE41 */
94
95#ifdef HAVE_SSE42
96
97#define mm_crc32_u32 _mm_crc32_u32
98
99#else /* !HAVE_SSE42 */
100
101static inline uint32_t
102mm_crc32_u32(uint32_t crc, uint32_t current)
103{
104 uint32_t one = current ^ crc;
105 crc = crc32c_8x256_table[0][ one >> 24 ] ^
106 crc32c_8x256_table[1][(one >> 16) & 0xff] ^
107 crc32c_8x256_table[2][(one >> 8) & 0xff] ^
108 crc32c_8x256_table[3][ one & 0xff];
109 return crc;
110}
111
112#endif /* !HAVE_SSE42 */
113
114#endif /* HAVE_SSE2 */
115
116
117uint32_t
118hashBlock(const void *p)
119{
120 assert((intptr_t)p % BLOCK_SIZE == 0);
121
122 uint32_t crc;
123
124#ifdef HAVE_SSE2
125 crc = 0;
126
127 __m128i *q = (__m128i *)(void *)p;
128
129 crc = ~crc;
130
131 for (unsigned c = BLOCK_SIZE / (4 * sizeof *q); c; --c) {
132 __m128i m0 = mm_stream_load_si128(q++);
133 __m128i m1 = mm_stream_load_si128(q++);
134 __m128i m2 = mm_stream_load_si128(q++);
135 __m128i m3 = mm_stream_load_si128(q++);
136
137 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m0));
138 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m0));
139 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m0));
140 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m0));
141
142 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m1));
143 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m1));
144 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m1));
145 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m1));
146
147 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m2));
148 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m2));
149 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m2));
150 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m2));
151
152 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m3));
153 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m3));
154 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m3));
155 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m3));
156 }
157
158 crc = ~crc;
159
160#else /* !HAVE_SSE2 */
161
162 crc = crc32c_8bytes(p, BLOCK_SIZE);
163
164#endif
165
166 return crc;
167}
168
169
170// We must reset the data on discard, otherwise the old data could match just
171// by chance.
172//
173// XXX: if the appplication writes 0xCDCDCDCD at the start or the end of the
174// buffer range, we'll fail to detect. The only way to be 100% sure things
175// won't fall through would be to setup memory traps.
176void MemoryShadow::zero(void *_ptr, size_t _size)
177{
178 memset(_ptr, 0xCD, _size);
179}
180
181
182void MemoryShadow::cover(void *_ptr, size_t _size, bool _discard)
183{
184 assert(_ptr);
185
186 if (_size != size) {
187 nBlocks = ((intptr_t)_ptr + _size + BLOCK_SIZE - 1)/BLOCK_SIZE - (intptr_t)_ptr/BLOCK_SIZE;
188
189 hashPtr = (uint32_t *)realloc(hashPtr, nBlocks * sizeof *hashPtr);
190 size = _size;
191 }
192
193 realPtr = (const uint8_t *)_ptr;
194
195 if (_discard) {
196 zero(_ptr, size);
197 }
198
199 const uint8_t *p = lAlignPtr((const uint8_t *)_ptr, BLOCK_SIZE);
200 if (_discard) {
201 hashPtr[0] = hashBlock(p);
202 for (size_t i = 1; i < nBlocks; ++i) {
203 hashPtr[i] = hashPtr[0];
204 }
205 } else {
206 for (size_t i = 0; i < nBlocks; ++i) {
207 hashPtr[i] = hashBlock(p);
208 p += BLOCK_SIZE;
209 }
210 }
211}
212
213
214void MemoryShadow::update(Callback callback) const
215{
216 const uint8_t *realStart = realPtr + size;
217 const uint8_t *realStop = realPtr;
218
219 const uint8_t *p = lAlignPtr(realPtr, BLOCK_SIZE);
220 for (size_t i = 0; i < nBlocks; ++i) {
221 uint32_t crc = hashBlock(p);
222 if (crc != hashPtr[i]) {
223 realStart = std::min(realStart, p);
224 realStop = std::max(realStop, p + BLOCK_SIZE);
225 }
226 p += BLOCK_SIZE;
227 }
228
229 realStart = std::max(realStart, realPtr);
230 realStop = std::min(realStop, realPtr + size);
231
232 // Update the rest
233 if (realStart < realStop) {
234 callback(realStart, realStop - realStart);
235 }
236}