blob: fa48a51b481a822faead9c360874bba0298b4bd0 [file] [log] [blame]
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +01001/**************************************************************************
2 *
3 * Copyright 2014-2016 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 *
24 **************************************************************************/
25
26
27#include "memtrace.hpp"
28
29#include <assert.h>
30#include <string.h>
31#include <stdio.h>
32
33#include <algorithm>
34
35#include "crc32c.hpp"
36
37
38#if \
Jose Fonsecaeccf4232016-05-06 10:18:40 +010039 (defined(__i386__) && defined(__SSE2__)) /* gcc */ || \
40 defined(_M_IX86) /* msvc */ || \
41 defined(__x86_64__) /* gcc */ || \
42 defined(_M_X64) /* msvc */ || \
43 defined(_M_AMD64) /* msvc */
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +010044
Jose Fonsecaeccf4232016-05-06 10:18:40 +010045# define HAVE_SSE2
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +010046
47// TODO: Detect and leverage SSE 4.1 and 4.2 at runtime
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +010048
49#endif
50
51
52#if defined(HAVE_SSE42)
53# include <nmmintrin.h>
54#elif defined(HAVE_SSE41)
55# include <smmintrin.h>
56#elif defined(HAVE_SSE2)
57# include <emmintrin.h>
58#endif
59
60
61#define BLOCK_SIZE 512
62
63
64template< class T >
65static inline T *
66lAlignPtr(T *p, uintptr_t alignment)
67{
68 return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(p) & ~(alignment - 1));
69}
70
71
72template< class T >
73static inline T *
74rAlignPtr(T *p, uintptr_t alignment)
75{
76 return reinterpret_cast<T *>((reinterpret_cast<uintptr_t>(p) + alignment - 1) & ~(alignment - 1));
77}
78
79
80#ifdef HAVE_SSE2
81
82#ifdef HAVE_SSE41
83 #define mm_stream_load_si128 _mm_stream_load_si128
84 #define mm_extract_epi32_0(x) _mm_extract_epi32(x, 0)
85 #define mm_extract_epi32_1(x) _mm_extract_epi32(x, 1)
86 #define mm_extract_epi32_2(x) _mm_extract_epi32(x, 2)
87 #define mm_extract_epi32_3(x) _mm_extract_epi32(x, 3)
88#else /* !HAVE_SSE41 */
89 #define mm_stream_load_si128 _mm_load_si128
90 #define mm_extract_epi32_0(x) _mm_cvtsi128_si32(x)
91 #define mm_extract_epi32_1(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(1,1,1,1)))
92 #define mm_extract_epi32_2(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(2,2,2,2)))
93 #define mm_extract_epi32_3(x) _mm_cvtsi128_si32(_mm_shuffle_epi32(x,_MM_SHUFFLE(3,3,3,3)))
94#endif /* !HAVE_SSE41 */
95
96#ifdef HAVE_SSE42
97
98#define mm_crc32_u32 _mm_crc32_u32
99
100#else /* !HAVE_SSE42 */
101
102static inline uint32_t
103mm_crc32_u32(uint32_t crc, uint32_t current)
104{
105 uint32_t one = current ^ crc;
106 crc = crc32c_8x256_table[0][ one >> 24 ] ^
107 crc32c_8x256_table[1][(one >> 16) & 0xff] ^
108 crc32c_8x256_table[2][(one >> 8) & 0xff] ^
109 crc32c_8x256_table[3][ one & 0xff];
110 return crc;
111}
112
113#endif /* !HAVE_SSE42 */
114
115#endif /* HAVE_SSE2 */
116
117
118uint32_t
119hashBlock(const void *p)
120{
Danylo Piliaievd107f892019-05-14 18:38:26 +0300121 assert((uintptr_t)p % BLOCK_SIZE == 0);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100122
123 uint32_t crc;
124
125#ifdef HAVE_SSE2
126 crc = 0;
127
128 __m128i *q = (__m128i *)(void *)p;
129
130 crc = ~crc;
131
132 for (unsigned c = BLOCK_SIZE / (4 * sizeof *q); c; --c) {
133 __m128i m0 = mm_stream_load_si128(q++);
134 __m128i m1 = mm_stream_load_si128(q++);
135 __m128i m2 = mm_stream_load_si128(q++);
136 __m128i m3 = mm_stream_load_si128(q++);
137
138 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m0));
139 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m0));
140 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m0));
141 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m0));
142
143 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m1));
144 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m1));
145 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m1));
146 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m1));
147
148 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m2));
149 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m2));
150 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m2));
151 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m2));
152
153 crc = mm_crc32_u32(crc, mm_extract_epi32_0(m3));
154 crc = mm_crc32_u32(crc, mm_extract_epi32_1(m3));
155 crc = mm_crc32_u32(crc, mm_extract_epi32_2(m3));
156 crc = mm_crc32_u32(crc, mm_extract_epi32_3(m3));
157 }
158
159 crc = ~crc;
160
161#else /* !HAVE_SSE2 */
162
163 crc = crc32c_8bytes(p, BLOCK_SIZE);
164
165#endif
166
167 return crc;
168}
169
170
171// We must reset the data on discard, otherwise the old data could match just
172// by chance.
173//
174// XXX: if the appplication writes 0xCDCDCDCD at the start or the end of the
175// buffer range, we'll fail to detect. The only way to be 100% sure things
176// won't fall through would be to setup memory traps.
177void MemoryShadow::zero(void *_ptr, size_t _size)
178{
179 memset(_ptr, 0xCD, _size);
180}
181
182
183void MemoryShadow::cover(void *_ptr, size_t _size, bool _discard)
184{
185 assert(_ptr);
186
Andrii Kryvytskyi9bda0642019-05-29 16:19:18 +0300187 if (_size != size || _ptr != realPtr) {
Danylo Piliaievd107f892019-05-14 18:38:26 +0300188 nBlocks = ((uintptr_t)_ptr + _size + BLOCK_SIZE - 1)/BLOCK_SIZE - (uintptr_t)_ptr/BLOCK_SIZE;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100189
190 hashPtr = (uint32_t *)realloc(hashPtr, nBlocks * sizeof *hashPtr);
191 size = _size;
192 }
193
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100194 realPtr = (const uint8_t *)_ptr;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100195
196 if (_discard) {
197 zero(_ptr, size);
198 }
199
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100200 const uint8_t *p = lAlignPtr((const uint8_t *)_ptr, BLOCK_SIZE);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100201 if (_discard) {
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100202 hashPtr[0] = hashBlock(p);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100203 for (size_t i = 1; i < nBlocks; ++i) {
204 hashPtr[i] = hashPtr[0];
205 }
206 } else {
207 for (size_t i = 0; i < nBlocks; ++i) {
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100208 hashPtr[i] = hashBlock(p);
209 p += BLOCK_SIZE;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100210 }
211 }
212}
213
214
215void MemoryShadow::update(Callback callback) const
216{
217 const uint8_t *realStart = realPtr + size;
218 const uint8_t *realStop = realPtr;
219
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100220 const uint8_t *p = lAlignPtr(realPtr, BLOCK_SIZE);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100221 for (size_t i = 0; i < nBlocks; ++i) {
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100222 uint32_t crc = hashBlock(p);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100223 if (crc != hashPtr[i]) {
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100224 realStart = std::min(realStart, p);
225 realStop = std::max(realStop, p + BLOCK_SIZE);
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100226 }
Jose Fonsecabac0ea02017-06-09 15:23:37 +0100227 p += BLOCK_SIZE;
Jose Fonseca1ee7c4c2016-05-05 16:48:29 +0100228 }
229
230 realStart = std::max(realStart, realPtr);
231 realStop = std::min(realStop, realPtr + size);
232
233 // Update the rest
234 if (realStart < realStop) {
235 callback(realStart, realStop - realStart);
236 }
237}