blob: fc89a8db4d28a2b40aefbe7f97c543aa9a340539 [file] [log] [blame]
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +01001/**************************************************************************
2 *
3 * Copyright 2014-2016 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 *
24 **************************************************************************/
25
26
27#include "memtrace.hpp"
28
29#include <assert.h>
30#include <string.h>
31#include <stdio.h>
32
33#include <algorithm>
34
35#include "crc32c.hpp"
36
37
38#if \
Jose Fonsecaeccf4232016-05-06 10:18:40 +010039 (defined(__i386__) && defined(__SSE2__)) /* gcc */ || \
40 defined(_M_IX86) /* msvc */ || \
41 defined(__x86_64__) /* gcc */ || \
42 defined(_M_X64) /* msvc */ || \
43 defined(_M_AMD64) /* msvc */
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +010044
Jose Fonsecaeccf4232016-05-06 10:18:40 +010045# define HAVE_SSE2
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +010046
47// TODO: Detect and leverage SSE 4.1 and 4.2 at runtime
Jose Fonsecaeccf4232016-05-06 10:18:40 +010048# undef HAVE_SSE41
49# undef HAVE_SSE42
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +010050
51#endif
52
53
54#if defined(HAVE_SSE42)
55# include <nmmintrin.h>
56#elif defined(HAVE_SSE41)
57# include <smmintrin.h>
58#elif defined(HAVE_SSE2)
59# include <emmintrin.h>
60#endif
61
62
63#define BLOCK_SIZE 512
64
65
66template< class T >
67static inline T *
68lAlignPtr(T *p, uintptr_t alignment)
69{
70 return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(p) & ~(alignment - 1));
71}
72
73
74template< class T >
75static inline T *
76rAlignPtr(T *p, uintptr_t alignment)
77{
78 return reinterpret_cast<T *>((reinterpret_cast<uintptr_t>(p) + alignment - 1) & ~(alignment - 1));
79}
80
81
82#ifdef HAVE_SSE2
83
84#ifdef HAVE_SSE41
85 #define mm_stream_load_si128 _mm_stream_load_si128
86 #define mm_extract_epi32_0(x) _mm_extract_epi32(x, 0)
87 #define mm_extract_epi32_1(x) _mm_extract_epi32(x, 1)
88 #define mm_extract_epi32_2(x) _mm_extract_epi32(x, 2)
89 #define mm_extract_epi32_3(x) _mm_extract_epi32(x, 3)
90#else /* !HAVE_SSE41 */
91 #define mm_stream_load_si128 _mm_load_si128
92 #define mm_extract_epi32_0(x) _mm_cvtsi128_si32(x)
93 #define mm_extract_epi32_1(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(1,1,1,1)))
94 #define mm_extract_epi32_2(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(2,2,2,2)))
95 #define mm_extract_epi32_3(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(3,3,3,3)))
96#endif /* !HAVE_SSE41 */
97
98#ifdef HAVE_SSE42
99
100#define mm_crc32_u32 _mm_crc32_u32
101
102#else /* !HAVE_SSE42 */
103
104static inline uint32_t
105mm_crc32_u32(uint32_t crc, uint32_t current)
106{
107 uint32_t one = current ^ crc;
108 crc = crc32c_8x256_table[0][ one >> 24 ] ^
109 crc32c_8x256_table[1][(one >> 16) & 0xff] ^
110 crc32c_8x256_table[2][(one >> 8) & 0xff] ^
111 crc32c_8x256_table[3][ one & 0xff];
112 return crc;
113}
114
115#endif /* !HAVE_SSE42 */
116
117#endif /* HAVE_SSE2 */
118
119
120uint32_t
121hashBlock(const void *p)
122{
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100123 assert((intptr_t)p % BLOCK_SIZE == 0);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100124
125 uint32_t crc;
126
127#ifdef HAVE_SSE2
128 crc = 0;
129
130 __m128i *q = (__m128i *)(void *)p;
131
132 crc = ~crc;
133
134 for (unsigned c = BLOCK_SIZE / (4 * sizeof *q); c; --c) {
135 __m128i m0 = mm_stream_load_si128(q++);
136 __m128i m1 = mm_stream_load_si128(q++);
137 __m128i m2 = mm_stream_load_si128(q++);
138 __m128i m3 = mm_stream_load_si128(q++);
139
140 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m0));
141 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m0));
142 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m0));
143 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m0));
144
145 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m1));
146 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m1));
147 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m1));
148 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m1));
149
150 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m2));
151 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m2));
152 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m2));
153 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m2));
154
155 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m3));
156 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m3));
157 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m3));
158 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m3));
159 }
160
161 crc = ~crc;
162
163#else /* !HAVE_SSE2 */
164
165 crc = crc32c_8bytes(p, BLOCK_SIZE);
166
167#endif
168
169 return crc;
170}
171
172
173// We must reset the data on discard, otherwise the old data could match just
174// by chance.
175//
176// XXX: if the appplication writes 0xCDCDCDCD at the start or the end of the
177// buffer range, we'll fail to detect. The only way to be 100% sure things
178// won't fall through would be to setup memory traps.
179void MemoryShadow::zero(void *_ptr, size_t _size)
180{
181 memset(_ptr, 0xCD, _size);
182}
183
184
185void MemoryShadow::cover(void *_ptr, size_t _size, bool _discard)
186{
187 assert(_ptr);
188
189 if (_size != size) {
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100190 nBlocks = ((intptr_t)_ptr + _size + BLOCK_SIZE - 1)/BLOCK_SIZE - (intptr_t)_ptr/BLOCK_SIZE;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100191
192 hashPtr = (uint32_t *)realloc(hashPtr, nBlocks * sizeof *hashPtr);
193 size = _size;
194 }
195
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100196 realPtr = (const uint8_t *)_ptr;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100197
198 if (_discard) {
199 zero(_ptr, size);
200 }
201
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100202 const uint8_t *p = lAlignPtr((const uint8_t *)_ptr, BLOCK_SIZE);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100203 if (_discard) {
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100204 hashPtr[0] = hashBlock(p);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100205 for (size_t i = 1; i < nBlocks; ++i) {
206 hashPtr[i] = hashPtr[0];
207 }
208 } else {
209 for (size_t i = 0; i < nBlocks; ++i) {
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100210 hashPtr[i] = hashBlock(p);
211 p += BLOCK_SIZE;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100212 }
213 }
214}
215
216
217void MemoryShadow::update(Callback callback) const
218{
219 const uint8_t *realStart = realPtr + size;
220 const uint8_t *realStop = realPtr;
221
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100222 const uint8_t *p = lAlignPtr(realPtr, BLOCK_SIZE);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100223 for (size_t i = 0; i < nBlocks; ++i) {
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100224 uint32_t crc = hashBlock(p);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100225 if (crc != hashPtr[i]) {
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100226 realStart = std::min(realStart, p);
227 realStop = std::max(realStop, p + BLOCK_SIZE);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100228 }
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100229 p += BLOCK_SIZE;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100230 }
231
232 realStart = std::max(realStart, realPtr);
233 realStop = std::min(realStop, realPtr + size);
234
235 // Update the rest
236 if (realStart < realStop) {
237 callback(realStart, realStop - realStart);
238 }
239}