Miguel Casas | 175526d | 2021-02-10 15:31:27 -0500 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2021 The Chromium OS Authors. All rights reserved. |
| 3 | * Use of this source code is governed by a BSD-style license that can be |
| 4 | * found in the LICENSE file. |
| 5 | */ |
| 6 | |
| 7 | /* |
| 8 | * This test evaluates the speed at which BOs of various USE flags can be |
| 9 | * accessed when mmap()ped. To represent Chrome graphics buffers uses, a naive |
| 10 | * rotation operation is implemented here in C90. This also factors out the use |
| 11 | * or not of SIMD instructions and/or sophisticated access patterns like those |
| 12 | * employed by libyuv: this is OK here since we're only interested in relative |
| 13 | * measurements comparing one BO USE flag set with another. |
| 14 | * See https://tinyurl.com/cros-video-capture-buffers and b/169302186 for more |
| 15 | * context. |
| 16 | */ |
| 17 | |
| 18 | #include <assert.h> |
| 19 | #include <getopt.h> |
Miguel Casas | 175526d | 2021-02-10 15:31:27 -0500 | [diff] [blame] | 20 | #include <linux/dma-buf.h> |
| 21 | #include <sys/ioctl.h> |
| 22 | #include <time.h> |
| 23 | |
| 24 | #include "bs_drm.h" |
| 25 | |
| 26 | #define HANDLE_EINTR_AND_EAGAIN(x) \ |
| 27 | ({ \ |
| 28 | int result; \ |
| 29 | do { \ |
| 30 | result = (x); \ |
| 31 | } while (result != -1 && (errno == EINTR || errno == EAGAIN)); \ |
| 32 | result; \ |
| 33 | }) |
| 34 | |
| 35 | int dma_sync(int fd, __u64 flags) { |
| 36 | struct dma_buf_sync sync_point = {0}; |
| 37 | sync_point.flags = flags; |
| 38 | return HANDLE_EINTR_AND_EAGAIN(ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync_point)); |
| 39 | } |
| 40 | |
| 41 | // N.B. This function actually does a clockwise 90-degree rotation and then a |
| 42 | // horizontal flip. |
| 43 | void NV12Rotate90(const uint8_t* src_y, int src_stride_y, |
| 44 | const uint8_t* src_uv, int src_stride_uv, |
| 45 | uint8_t* dst_y, int dst_stride_y, |
| 46 | uint8_t* dst_uv, int dst_stride_uv, |
| 47 | int src_width, int src_height) { |
| 48 | // This loop walks the |src_y| samples in scanout order, but writes them in |
| 49 | // the rotated order, hence doing big jumps in the destination space. |
| 50 | for (int row = 0; row < src_height; ++row) { |
| 51 | for (int col = 0; col < src_width; ++col) { |
| 52 | const uint8_t* src_sample = src_y + row * src_stride_y + col; |
| 53 | *(dst_y + col * dst_stride_y + row) = *src_sample; |
| 54 | } |
| 55 | } |
| 56 | // Same idea but note the halving of |src_height| for the UV planes. |
| 57 | const int uv_src_height = (src_height + 1) / 2; |
| 58 | const int uv_src_width = 2 * ((src_width + 1) / 2); |
| 59 | for (int row = 0; row < uv_src_height; ++row) { |
| 60 | for (int col = 0; col < uv_src_width; ++col) { |
| 61 | const uint8_t* src_sample = src_uv + row * src_stride_uv + col; |
| 62 | *(dst_uv + col * dst_stride_uv + row) = *src_sample; |
| 63 | } |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | |
| 68 | struct test_case { |
| 69 | uint32_t format; /* format for allocating buffer object from GBM */ |
| 70 | enum gbm_bo_transfer_flags read_write; |
| 71 | enum gbm_bo_flags usage; |
| 72 | }; |
| 73 | |
| 74 | static void print_format_and_use_flags(FILE* out, |
| 75 | const struct test_case* tcase) { |
| 76 | fprintf(out, "format: "); |
| 77 | switch (tcase->format) { |
| 78 | case GBM_FORMAT_NV12: |
| 79 | fprintf(out, "GBM_FORMAT_NV12"); |
| 80 | break; |
| 81 | default: |
| 82 | fprintf(out, "GBM_FORMAT_????????"); |
| 83 | } |
| 84 | |
| 85 | fprintf(out, ", access: %s%s", |
| 86 | (tcase->read_write & GBM_BO_TRANSFER_READ ? "R" : ""), |
| 87 | (tcase->read_write & GBM_BO_TRANSFER_WRITE ? "W" : "")); |
| 88 | |
| 89 | fprintf(out, ", use flags: "); |
| 90 | bool first = true; |
| 91 | if (tcase->usage & GBM_BO_USE_SCANOUT) { |
| 92 | fprintf(out, "%sGBM_BO_USE_SCANOUT", first ? "" : " | "); |
| 93 | first = false; |
| 94 | } |
| 95 | if (tcase->usage & GBM_BO_USE_LINEAR) { |
| 96 | fprintf(out, "%sGBM_BO_USE_LINEAR", first ? "" : " | "); |
| 97 | first = false; |
| 98 | } |
| 99 | if (tcase->usage & GBM_BO_USE_TEXTURING) { |
| 100 | fprintf(out, "%sGBM_BO_USE_TEXTURING", first ? "" : " | "); |
| 101 | first = false; |
| 102 | } |
| 103 | if (tcase->usage & GBM_BO_USE_CAMERA_READ) { |
| 104 | fprintf(out, "%sGBM_BO_USE_CAMERA_READ", first ? "" : " | "); |
| 105 | first = false; |
| 106 | } |
| 107 | if (tcase->usage & GBM_BO_USE_CAMERA_WRITE) { |
| 108 | fprintf(out, "%sGBM_BO_USE_CAMERA_WRITE", first ? "" : " | "); |
| 109 | first = false; |
| 110 | } |
| 111 | if (tcase->usage & GBM_BO_USE_SW_READ_OFTEN) { |
| 112 | fprintf(out, "%sGBM_BO_USE_SW_READ_OFTEN", first ? "" : " | "); |
| 113 | first = false; |
| 114 | } |
| 115 | if (tcase->usage & GBM_BO_USE_SW_WRITE_OFTEN) { |
| 116 | fprintf(out, "%sGBM_BO_USE_SW_WRITE_OFTEN", first ? "" : " | "); |
| 117 | first = false; |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | static const struct option longopts[] = { |
| 122 | {"help", no_argument, NULL, 'h'}, |
| 123 | {0, 0, 0, 0}, |
| 124 | }; |
| 125 | |
| 126 | static void print_help(const char* argv0) { |
| 127 | printf("Usage: %s [OPTIONS]\n", argv0); |
| 128 | printf(" -h, --help Print help.\n"); |
| 129 | } |
| 130 | |
| 131 | int main(int argc, char** argv) { |
| 132 | // TODO(mcasas): Consider adding other formats/other operations. |
| 133 | // TODO(mcasas): Transform this list into a cartesian product like GTest does. |
| 134 | // TODO(mcasas): add command line flags to run test cases individually/by |
| 135 | // groups, and to list them. |
| 136 | const struct test_case tcases[] = { |
| 137 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_SCANOUT}, |
| 138 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_LINEAR}, |
| 139 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_TEXTURING}, |
| 140 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_CAMERA_READ}, |
| 141 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_CAMERA_WRITE}, |
| 142 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_SW_READ_OFTEN}, |
| 143 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_SW_WRITE_OFTEN}, |
| 144 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, |
| 145 | GBM_BO_USE_LINEAR | GBM_BO_USE_SCANOUT}, |
| 146 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, |
| 147 | GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN}, |
| 148 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, |
| 149 | GBM_BO_USE_LINEAR | GBM_BO_USE_SW_WRITE_OFTEN}, |
| 150 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, |
| 151 | GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN | |
| 152 | GBM_BO_USE_SW_WRITE_OFTEN}, |
| 153 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SCANOUT}, |
| 154 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_LINEAR}, |
| 155 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_TEXTURING}, |
| 156 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_CAMERA_READ}, |
| 157 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_CAMERA_WRITE}, |
| 158 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SW_READ_OFTEN}, |
| 159 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SW_WRITE_OFTEN}, |
| 160 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, |
| 161 | GBM_BO_USE_LINEAR | GBM_BO_USE_SCANOUT}, |
| 162 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, |
| 163 | GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN}, |
| 164 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, |
| 165 | GBM_BO_USE_LINEAR | GBM_BO_USE_SW_WRITE_OFTEN}, |
| 166 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, |
| 167 | GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN | |
| 168 | GBM_BO_USE_SW_WRITE_OFTEN}, |
| 169 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SCANOUT}, |
| 170 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SCANOUT}, |
| 171 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_LINEAR}, |
| 172 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_TEXTURING}, |
| 173 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_CAMERA_READ}, |
| 174 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_CAMERA_WRITE}, |
| 175 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SW_READ_OFTEN}, |
| 176 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SW_WRITE_OFTEN}, |
| 177 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, |
| 178 | GBM_BO_USE_LINEAR | GBM_BO_USE_SCANOUT}, |
| 179 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, |
| 180 | GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN}, |
| 181 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, |
| 182 | GBM_BO_USE_LINEAR | GBM_BO_USE_SW_WRITE_OFTEN}, |
| 183 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, |
| 184 | GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN | |
| 185 | GBM_BO_USE_SW_WRITE_OFTEN}, |
| 186 | {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SCANOUT}, |
| 187 | }; |
| 188 | const size_t tcases_size = BS_ARRAY_LEN(tcases); |
| 189 | |
| 190 | // Make sure that the clock resolution is at least 1ms. |
| 191 | struct timespec clock_resolution; |
| 192 | clock_getres(CLOCK_MONOTONIC, &clock_resolution); |
| 193 | assert(clock_resolution.tv_sec == 0 && clock_resolution.tv_nsec <= 1000000); |
| 194 | |
| 195 | int c; |
| 196 | while ((c = getopt_long(argc, argv, "h", longopts, NULL)) != -1) { |
| 197 | switch (c) { |
| 198 | case 'h': |
| 199 | default: |
| 200 | print_help(argv[0]); |
| 201 | return EXIT_SUCCESS; |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | int display_fd = bs_drm_open_main_display(); |
| 206 | if (display_fd < 0) { |
| 207 | bs_debug_error("failed to open card for display"); |
| 208 | return EXIT_FAILURE; |
| 209 | } |
| 210 | |
| 211 | struct gbm_device* gbm = gbm_create_device(display_fd); |
| 212 | if (!gbm) { |
| 213 | bs_debug_error("failed to create gbm device"); |
| 214 | return EXIT_FAILURE; |
| 215 | } |
| 216 | |
| 217 | // bs_mapper_dma_buf_new() is expected to use mmap(). |
| 218 | struct bs_mapper* mapper = bs_mapper_dma_buf_new(); |
| 219 | if (mapper == NULL) { |
| 220 | bs_debug_error("failed to create mapper object"); |
| 221 | return EXIT_FAILURE; |
| 222 | } |
| 223 | |
| 224 | const uint32_t width = 1920; |
| 225 | const uint32_t height = 1080; |
| 226 | |
| 227 | // We allocate NUM_BOS to replicate a bit what is done in video capture. |
| 228 | #define NUM_BOS 5 |
| 229 | struct gbm_bo* bos[NUM_BOS]; |
| 230 | uint8_t* ptr_y[NUM_BOS]; |
| 231 | uint8_t* ptr_uv[NUM_BOS]; |
| 232 | |
| 233 | uint32_t stride_y[NUM_BOS]; |
| 234 | void* map_data_y[NUM_BOS]; |
| 235 | uint32_t stride_uv[NUM_BOS]; |
| 236 | void* map_data_uv[NUM_BOS]; |
| 237 | |
| 238 | #define NUM_PLANES 2 |
| 239 | int gbm_bo_fds[NUM_PLANES][NUM_BOS]; |
| 240 | #define NUM_ITERS 10 |
| 241 | printf("Running %d iterations. %d BOs allocated (%dx%d)\n", NUM_ITERS, |
| 242 | NUM_BOS, width, height); |
| 243 | |
| 244 | // |draft_canvas| is allocated as if to be an ARGB buffer, and can fit NV12 |
| 245 | // data of the same |width| and |height|. |
| 246 | uint8_t* draft_canvas = malloc(width * height * 4); |
| 247 | // This is not so much for clearing it as it is for accessing it once. |
| 248 | memset(draft_canvas, 0, width * height * 4); |
| 249 | |
| 250 | for (size_t i = 0; i < tcases_size; i++) { |
| 251 | const struct test_case* tcase = &tcases[i]; |
| 252 | print_format_and_use_flags(stdout, tcase); |
| 253 | printf(": "); |
| 254 | |
| 255 | for (size_t j = 0; j < NUM_BOS; j++) { |
| 256 | bos[j] = gbm_bo_create(gbm, width, height, tcase->format, tcase->usage); |
| 257 | if (!bos[j]) { |
| 258 | printf( |
| 259 | "gbm_bo_create() failed (probably format or usage is not " |
| 260 | "supported.\n"); |
| 261 | continue; |
| 262 | } |
| 263 | |
| 264 | const int expected_num_planes = NUM_PLANES; |
| 265 | const int num_planes = gbm_bo_get_plane_count(bos[j]); |
| 266 | if (expected_num_planes != num_planes) { |
| 267 | printf("Incorrect number of planes, expected %d, got %d\n", |
| 268 | expected_num_planes, num_planes); |
| 269 | return EXIT_FAILURE; |
| 270 | } |
| 271 | |
| 272 | ptr_y[j] = bs_mapper_map(mapper, bos[j], 0, &map_data_y[j], &stride_y[j]); |
| 273 | if (ptr_y[j] == MAP_FAILED) { |
| 274 | bs_debug_error("failed to mmap gbm bo plane 0 (Y)"); |
| 275 | return EXIT_FAILURE; |
| 276 | } |
| 277 | |
| 278 | ptr_uv[j] = |
| 279 | bs_mapper_map(mapper, bos[j], 1, &map_data_uv[j], &stride_uv[j]); |
| 280 | if (ptr_uv[j] == MAP_FAILED) { |
| 281 | bs_debug_error("failed to mmap gbm bo plane 1 (UV)"); |
| 282 | return EXIT_FAILURE; |
| 283 | } |
| 284 | |
| 285 | for (size_t plane = 0; plane < NUM_PLANES; plane++) { |
Miguel Casas | 6d4098e | 2021-04-08 12:39:51 -0400 | [diff] [blame] | 286 | gbm_bo_fds[plane][j] = gbm_bo_get_fd_for_plane(bos[j], plane); |
Miguel Casas | 175526d | 2021-02-10 15:31:27 -0500 | [diff] [blame] | 287 | if (gbm_bo_fds[plane][j] < 0) { |
| 288 | bs_debug_error("failed to get BO fd"); |
| 289 | return EXIT_FAILURE; |
| 290 | } |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | struct timespec start, stop; |
| 295 | clock_gettime(CLOCK_MONOTONIC, &start); |
| 296 | for (size_t j = 0; j < NUM_ITERS; j++) { |
| 297 | const uint32_t bo_index = j % NUM_BOS; |
| 298 | |
| 299 | if (tcase->read_write & GBM_BO_TRANSFER_READ) { |
| 300 | assert(dma_sync(gbm_bo_fds[0][bo_index], |
| 301 | DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ) == 0); |
| 302 | assert(dma_sync(gbm_bo_fds[1][bo_index], |
| 303 | DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ) == 0); |
| 304 | |
| 305 | // Typical Chrome access patterns like e.g. libyuv NV12ToARGB/NV12Scale |
| 306 | // are asymmetric in the sense that they create scattered read/writes |
| 307 | // (e.g. pixel packing/unpacking operations) or simply more of those on |
| 308 | // either source or destination. A rotation operation is chosen here to |
| 309 | // avoid part of that asymmetry. |
| 310 | // TODO(mcasas): investigate other functions which might cause other |
| 311 | // memory access patterns. |
| 312 | NV12Rotate90(ptr_y[bo_index], stride_y[bo_index], |
| 313 | ptr_uv[bo_index], stride_uv[bo_index], |
| 314 | draft_canvas, height, |
| 315 | draft_canvas + (height * width), height, |
| 316 | width, height); |
| 317 | |
| 318 | assert(dma_sync(gbm_bo_fds[0][bo_index], |
| 319 | DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ) == 0); |
| 320 | assert(dma_sync(gbm_bo_fds[1][bo_index], |
| 321 | DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ) == 0); |
| 322 | } |
| 323 | |
| 324 | // When writing, use the next BO index so that nobody will try to optimize |
| 325 | // the whole operation chain away when having READ-then-WRITE. |
| 326 | const uint32_t next_bo_index = (j + 1) % NUM_BOS; |
| 327 | if (tcase->read_write & GBM_BO_TRANSFER_WRITE) { |
| 328 | assert(dma_sync(gbm_bo_fds[0][next_bo_index], |
| 329 | DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE) == 0); |
| 330 | assert(dma_sync(gbm_bo_fds[1][next_bo_index], |
| 331 | DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE) == 0); |
| 332 | |
| 333 | // We pretend |draft_canvas| has portrait orientation, so the |
| 334 | // destination of the rotation fits into a landscape orientation BO. |
| 335 | NV12Rotate90(draft_canvas, height, |
| 336 | draft_canvas + (height * width), height, |
| 337 | ptr_y[bo_index], stride_y[bo_index], |
| 338 | ptr_uv[bo_index], stride_uv[bo_index], |
| 339 | height, width); |
| 340 | |
| 341 | assert(dma_sync(gbm_bo_fds[0][next_bo_index], |
| 342 | DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE) == 0); |
| 343 | assert(dma_sync(gbm_bo_fds[1][next_bo_index], |
| 344 | DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE) == 0); |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | clock_gettime(CLOCK_MONOTONIC, &stop); |
| 349 | const double elapsed_ns = |
| 350 | (stop.tv_sec - start.tv_sec) * 1e9 + (stop.tv_nsec - start.tv_nsec); |
| 351 | // TODO(mcasas): find a standardized way to produce results. |
| 352 | printf("%f ms\n", elapsed_ns / 1000000.0); |
| 353 | |
| 354 | for (size_t j = 0; j < NUM_BOS; j++) { |
| 355 | bs_mapper_unmap(mapper, bos[j], map_data_y[j]); |
| 356 | bs_mapper_unmap(mapper, bos[j], map_data_uv[j]); |
| 357 | |
| 358 | for (size_t plane = 0; plane < NUM_PLANES; plane++) |
| 359 | close(gbm_bo_fds[plane][j]); |
| 360 | gbm_bo_destroy(bos[j]); |
| 361 | } |
| 362 | } |
| 363 | |
| 364 | free(draft_canvas); |
| 365 | |
| 366 | // Not really needed, but good to destroy things properly. |
| 367 | bs_mapper_destroy(mapper); |
| 368 | gbm_device_destroy(gbm); |
| 369 | |
| 370 | return EXIT_SUCCESS; |
| 371 | } |