Ben Cheng | 4a5eddd | 2017-12-13 15:42:21 +0800 | [diff] [blame^] | 1 | // Copyright 2017 The Chromium OS Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include <getopt.h> |
| 6 | #include <stdbool.h> |
| 7 | #include <stdio.h> |
| 8 | #include <stdlib.h> |
| 9 | #include <string.h> |
| 10 | #include <sys/types.h> |
| 11 | #include <time.h> |
| 12 | #include <unistd.h> |
| 13 | |
| 14 | // Lack a common header that defines page size on x86 and ARM. So just |
| 15 | // explicitly define it here for convenience. |
| 16 | #define PAGE_SHIFT 12 |
| 17 | #define PAGE_SIZE (1<<PAGE_SHIFT) |
| 18 | |
| 19 | // IPC handshake signals |
| 20 | #define IPC_GOAHEAD "1" |
| 21 | #define IPC_DONE "2" |
| 22 | #define IPC_DONE_C '2' |
| 23 | |
| 24 | // Add 's' after nouns |
| 25 | #define CHECK_PLURAL(x) (x > 1 ? "s" : "") |
| 26 | |
| 27 | // Accumulator used to suppress compiler optimizations |
| 28 | int g; |
| 29 | |
| 30 | // Walk though the pages with specified starting location and number of |
| 31 | // iterations. Return the next page index. Record the elapsed time too. |
| 32 | size_t walk_through_pages(char *buf, size_t start_idx, size_t iters, |
| 33 | size_t num_pages, unsigned int wait_time, |
| 34 | clock_t *elapsed_time) |
| 35 | { |
| 36 | size_t idx = start_idx; |
| 37 | |
| 38 | clock_t start, end; |
| 39 | start = clock(); |
| 40 | for (size_t i = 0; i < iters; i++) { |
| 41 | g += buf[idx << PAGE_SHIFT]; |
| 42 | idx += 1; |
| 43 | // Wrap around idx |
| 44 | if (idx >= num_pages) |
| 45 | idx = 0; |
| 46 | } |
| 47 | end = clock(); |
| 48 | if (elapsed_time) |
| 49 | *elapsed_time = end - start; |
| 50 | if (wait_time) |
| 51 | sleep(wait_time); |
| 52 | return idx; |
| 53 | } |
| 54 | |
| 55 | // Initialize the buffer with contents |
| 56 | void initialize_pages(char *buf, size_t bytes) |
| 57 | { |
| 58 | clock_t start, end; |
| 59 | start = clock(); |
| 60 | // Writing every byte is too slow. We just need to populate some values and |
| 61 | // every 4 byte seems OK. |
| 62 | for (size_t i = 0; i < bytes; i += 4) |
| 63 | buf[i] = i; |
| 64 | end = clock(); |
| 65 | printf("Initializing %u pages took %.2f seconds\n", |
| 66 | (int)(bytes / PAGE_SIZE), ((double)(end - start)/CLOCKS_PER_SEC)); |
| 67 | } |
| 68 | |
| 69 | // Iterate pages in the buffer for repeat_count times to measure the |
| 70 | // throughput. When do_fork is true, run two processes concurrently to |
| 71 | // generate enough memory pressure, in case a single process cannot consume |
| 72 | // enough memory (eg 32-bit userspace and 64-bit kernel). |
| 73 | // Instead of accessing all the pages in a non-stop way, try to slice the |
| 74 | // pages into chunks and use IPC to coordinate the accesses. |
| 75 | void measure_zram_throughput(char *buf, size_t bytes, size_t num_pages, |
| 76 | unsigned int pages_per_chunk, |
| 77 | unsigned int repeat_count, unsigned int wait_time, |
| 78 | int do_fork) |
| 79 | { |
| 80 | clock_t elapsed_time, total_time = 0; |
| 81 | int fd1[2], fd2[2]; |
| 82 | pid_t childpid; |
| 83 | int receiver, sender; |
| 84 | char sync; |
| 85 | int r; |
| 86 | |
| 87 | // For the child process, manage the pipes and let the child process to start |
| 88 | // accessing pages. |
| 89 | if (do_fork) { |
| 90 | r = pipe(fd1); |
| 91 | r = pipe(fd2); |
| 92 | |
| 93 | if ((childpid = fork()) == -1) { |
| 94 | perror("fork failure"); |
| 95 | exit(1); |
| 96 | } |
| 97 | |
| 98 | if (childpid == 0) { |
| 99 | // Child process |
| 100 | close(fd1[0]); |
| 101 | close(fd2[1]); |
| 102 | sender = fd1[1]; |
| 103 | receiver = fd2[0]; |
| 104 | |
| 105 | } else { |
| 106 | // Parent process |
| 107 | close(fd2[0]); |
| 108 | close(fd1[1]); |
| 109 | sender = fd2[1]; |
| 110 | receiver = fd1[0]; |
| 111 | // Let the child process to fetch pages first |
| 112 | r = write(sender, IPC_GOAHEAD, 1); |
| 113 | } |
| 114 | |
| 115 | // Dirty the pages again to trigger copy-on-write so that we have enough |
| 116 | // memory pressure from both processes. |
| 117 | // NOTE: It is found that re-initializing for the parent process can reduce |
| 118 | // the difference in results with the child process. |
| 119 | initialize_pages(buf, bytes); |
| 120 | } |
| 121 | |
| 122 | // Warmup: touch every page in the buffer to thrash memory first |
| 123 | walk_through_pages(buf, 0, num_pages, num_pages, 0, &elapsed_time); |
| 124 | |
| 125 | size_t total_pages = num_pages * repeat_count; |
| 126 | |
| 127 | size_t page_idx = 0; |
| 128 | bool need_to_sync = do_fork; |
| 129 | |
| 130 | // Itereate until the specified amount of pages are all accesses |
| 131 | while (total_pages != 0) { |
| 132 | // Synchronize parent and child page accesses |
| 133 | if (need_to_sync) { |
| 134 | r = read(receiver, &sync, 1); |
| 135 | // The other process is done - no need to wait anymore. |
| 136 | if (sync == IPC_DONE_C) |
| 137 | need_to_sync = false; |
| 138 | } |
| 139 | |
| 140 | // Determine the number of pages to access in this iteration |
| 141 | size_t num_pages_chunk = total_pages >= pages_per_chunk ? |
| 142 | pages_per_chunk : total_pages; |
| 143 | |
| 144 | page_idx = walk_through_pages(buf, page_idx, pages_per_chunk, num_pages, |
| 145 | wait_time, &elapsed_time); |
| 146 | total_time += elapsed_time; |
| 147 | total_pages -= num_pages_chunk; |
| 148 | |
| 149 | // Let the other process proceed |
| 150 | if (need_to_sync) { |
| 151 | if (total_pages) |
| 152 | r = write(sender, IPC_GOAHEAD, 1); |
| 153 | else |
| 154 | r = write(sender, IPC_DONE, 1); |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | printf("Average page access speed: %.0f pages/sec\n", |
| 159 | (num_pages * repeat_count)/((double)(total_time)/CLOCKS_PER_SEC)); |
| 160 | } |
| 161 | |
| 162 | void print_usage(char *name) |
| 163 | { |
| 164 | printf( |
| 165 | "Usage: %s --size MB [--speed] [--fork] [--repeat N] [--chunk P]\n" |
| 166 | " [--wait S]\n", name); |
| 167 | printf( |
| 168 | " --size MB: required to specify the buffer size\n"); |
| 169 | printf( |
| 170 | " --fork: fork a child process to double the memory usage\n"); |
| 171 | printf( |
| 172 | " --repeat N: access the pages in the buffer N times\n"); |
| 173 | printf( |
| 174 | " --chunk P: access P pages in the buffer then wait for S seconds\n"); |
| 175 | printf( |
| 176 | " --wait S: wait S seconds between chunks of page accesses\n"); |
| 177 | } |
| 178 | |
| 179 | int main(int argc, char* argv[]) |
| 180 | { |
| 181 | // Control flags |
| 182 | int opt_do_fork = 0; |
| 183 | int opt_measure_speed = 0; |
| 184 | size_t opt_size_mb = 0; |
| 185 | unsigned opt_repeat_count = 0; |
| 186 | unsigned opt_pages_per_chunk = 0; |
| 187 | unsigned opt_wait_time = 0; |
| 188 | |
| 189 | |
| 190 | if (argc < 2) { |
| 191 | print_usage(argv[0]); |
| 192 | exit(1); |
| 193 | } |
| 194 | |
| 195 | while (1) { |
| 196 | static struct option long_options[] = { |
| 197 | {"help", no_argument, 0, 'h'}, |
| 198 | {"fork", no_argument, 0, 'f'}, |
| 199 | {"speed", no_argument, 0, 'd'}, |
| 200 | {"size", required_argument, 0, 's'}, |
| 201 | {"repeat", required_argument, 0, 'r'}, |
| 202 | {"chunk", required_argument, 0, 'c'}, |
| 203 | {"wait", required_argument, 0, 'w'}, |
| 204 | {0, 0, 0, 0} |
| 205 | }; |
| 206 | |
| 207 | /* getopt_long stores the option index here. */ |
| 208 | int option_index = 0; |
| 209 | |
| 210 | int c = getopt_long(argc, argv, "s:r:c:w:hfd", long_options, &option_index); |
| 211 | |
| 212 | /* Detect the end of the options. */ |
| 213 | if (c == -1) |
| 214 | break; |
| 215 | |
| 216 | switch (c) { |
| 217 | case 0: |
| 218 | break; |
| 219 | case 'd': |
| 220 | opt_measure_speed = 1; |
| 221 | break; |
| 222 | case 'f': |
| 223 | opt_do_fork = 1; |
| 224 | break; |
| 225 | case 's': |
| 226 | opt_size_mb = atol(optarg); |
| 227 | break; |
| 228 | case 'r': |
| 229 | opt_repeat_count = atol(optarg); |
| 230 | break; |
| 231 | case 'c': |
| 232 | opt_pages_per_chunk = atol(optarg); |
| 233 | break; |
| 234 | case 'w': |
| 235 | opt_wait_time = atol(optarg); |
| 236 | break; |
| 237 | case 'h': |
| 238 | default: |
| 239 | print_usage(argv[0]); |
| 240 | exit(1); |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | if (opt_size_mb == 0) { |
| 245 | fprintf(stderr, "Buffer size cannot be zero\n"); |
| 246 | print_usage(argv[0]); |
| 247 | exit(1); |
| 248 | } |
| 249 | printf("Test is configured as below:\n"); |
| 250 | printf("- Size of buffer: %zu MB\n", opt_size_mb); |
| 251 | printf("- The test will sleep for %d second%s after accessing %d page%s\n", |
| 252 | opt_wait_time, CHECK_PLURAL(opt_wait_time), |
| 253 | opt_pages_per_chunk, CHECK_PLURAL(opt_pages_per_chunk)); |
| 254 | if (opt_measure_speed) |
| 255 | printf("- Pages in the buffer will be accessed %d time%s\n", |
| 256 | opt_repeat_count, CHECK_PLURAL(opt_repeat_count)); |
| 257 | if (opt_do_fork) |
| 258 | printf("- The test will fork a child process to double the memory usage\n"); |
| 259 | |
| 260 | // As a result, bytes will always be multiples of PAGE_SIZE |
| 261 | size_t bytes = opt_size_mb * 1024 * 1024; |
| 262 | size_t num_pages = bytes / PAGE_SIZE; |
| 263 | |
| 264 | char *buf = (char*) malloc(bytes); |
| 265 | if (buf == NULL) { |
| 266 | fprintf(stderr, "Buffer allocation failed\n"); |
| 267 | exit(1); |
| 268 | } |
| 269 | |
| 270 | // First, populate the memory buffer |
| 271 | initialize_pages(buf, bytes); |
| 272 | |
| 273 | // Measure the throughput (pages/sec) |
| 274 | if (opt_measure_speed) { |
| 275 | fflush(stdout); |
| 276 | measure_zram_throughput(buf, bytes, num_pages, opt_pages_per_chunk, |
| 277 | opt_repeat_count, opt_wait_time, opt_do_fork); |
| 278 | } |
| 279 | else { |
| 280 | // Otherwise the program will continuously run in the background. |
| 281 | printf("%d waiting for kill\n", getpid()); |
| 282 | printf("consuming memory\n"); |
| 283 | size_t page_idx = 0; |
| 284 | printf("Will touch pages covering %zd KB of data per %d second%s\n", |
| 285 | (size_t) opt_pages_per_chunk * (PAGE_SIZE / 1024), opt_wait_time, |
| 286 | CHECK_PLURAL(opt_wait_time)); |
| 287 | fflush(stdout); |
| 288 | // Access the specified page chunks then wait. Repeat forever. |
| 289 | if (opt_pages_per_chunk) { |
| 290 | while (1) { |
| 291 | // Bring in PAGE_ACCESS_MB_PER_SEC of data every second |
| 292 | page_idx = walk_through_pages(buf, page_idx, opt_pages_per_chunk, |
| 293 | num_pages, opt_wait_time, NULL); |
| 294 | } |
| 295 | } |
| 296 | // If opt_pages_per_chunk is 0, just hold on to the pages without accessing |
| 297 | // them after allocation. |
| 298 | else { |
| 299 | pause(); |
| 300 | } |
| 301 | } |
| 302 | return 0; |
| 303 | } |