blob: 8f8ae43e4db5fd2ac8ca85e1502b9919c1885f17 [file] [log] [blame]
Miguel Casas175526d2021-02-10 15:31:27 -05001/*
2 * Copyright 2021 The Chromium OS Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7/*
8 * This test evaluates the speed at which BOs of various USE flags can be
9 * accessed when mmap()ped. To represent Chrome graphics buffers uses, a naive
10 * rotation operation is implemented here in C90. This also factors out the use
11 * or not of SIMD instructions and/or sophisticated access patterns like those
12 * employed by libyuv: this is OK here since we're only interested in relative
13 * measurements comparing one BO USE flag set with another.
14 * See https://tinyurl.com/cros-video-capture-buffers and b/169302186 for more
15 * context.
16 */
17
18#include <assert.h>
19#include <getopt.h>
Miguel Casas175526d2021-02-10 15:31:27 -050020#include <linux/dma-buf.h>
21#include <sys/ioctl.h>
22#include <time.h>
23
24#include "bs_drm.h"
25
26#define HANDLE_EINTR_AND_EAGAIN(x) \
27 ({ \
28 int result; \
29 do { \
30 result = (x); \
31 } while (result != -1 && (errno == EINTR || errno == EAGAIN)); \
32 result; \
33 })
34
35int dma_sync(int fd, __u64 flags) {
36 struct dma_buf_sync sync_point = {0};
37 sync_point.flags = flags;
38 return HANDLE_EINTR_AND_EAGAIN(ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync_point));
39}
40
41// N.B. This function actually does a clockwise 90-degree rotation and then a
42// horizontal flip.
43void NV12Rotate90(const uint8_t* src_y, int src_stride_y,
44 const uint8_t* src_uv, int src_stride_uv,
45 uint8_t* dst_y, int dst_stride_y,
46 uint8_t* dst_uv, int dst_stride_uv,
47 int src_width, int src_height) {
48 // This loop walks the |src_y| samples in scanout order, but writes them in
49 // the rotated order, hence doing big jumps in the destination space.
50 for (int row = 0; row < src_height; ++row) {
51 for (int col = 0; col < src_width; ++col) {
52 const uint8_t* src_sample = src_y + row * src_stride_y + col;
53 *(dst_y + col * dst_stride_y + row) = *src_sample;
54 }
55 }
56 // Same idea but note the halving of |src_height| for the UV planes.
57 const int uv_src_height = (src_height + 1) / 2;
58 const int uv_src_width = 2 * ((src_width + 1) / 2);
59 for (int row = 0; row < uv_src_height; ++row) {
60 for (int col = 0; col < uv_src_width; ++col) {
61 const uint8_t* src_sample = src_uv + row * src_stride_uv + col;
62 *(dst_uv + col * dst_stride_uv + row) = *src_sample;
63 }
64 }
65}
66
67
68struct test_case {
69 uint32_t format; /* format for allocating buffer object from GBM */
70 enum gbm_bo_transfer_flags read_write;
71 enum gbm_bo_flags usage;
72};
73
74static void print_format_and_use_flags(FILE* out,
75 const struct test_case* tcase) {
76 fprintf(out, "format: ");
77 switch (tcase->format) {
78 case GBM_FORMAT_NV12:
79 fprintf(out, "GBM_FORMAT_NV12");
80 break;
81 default:
82 fprintf(out, "GBM_FORMAT_????????");
83 }
84
85 fprintf(out, ", access: %s%s",
86 (tcase->read_write & GBM_BO_TRANSFER_READ ? "R" : ""),
87 (tcase->read_write & GBM_BO_TRANSFER_WRITE ? "W" : ""));
88
89 fprintf(out, ", use flags: ");
90 bool first = true;
91 if (tcase->usage & GBM_BO_USE_SCANOUT) {
92 fprintf(out, "%sGBM_BO_USE_SCANOUT", first ? "" : " | ");
93 first = false;
94 }
95 if (tcase->usage & GBM_BO_USE_LINEAR) {
96 fprintf(out, "%sGBM_BO_USE_LINEAR", first ? "" : " | ");
97 first = false;
98 }
99 if (tcase->usage & GBM_BO_USE_TEXTURING) {
100 fprintf(out, "%sGBM_BO_USE_TEXTURING", first ? "" : " | ");
101 first = false;
102 }
103 if (tcase->usage & GBM_BO_USE_CAMERA_READ) {
104 fprintf(out, "%sGBM_BO_USE_CAMERA_READ", first ? "" : " | ");
105 first = false;
106 }
107 if (tcase->usage & GBM_BO_USE_CAMERA_WRITE) {
108 fprintf(out, "%sGBM_BO_USE_CAMERA_WRITE", first ? "" : " | ");
109 first = false;
110 }
111 if (tcase->usage & GBM_BO_USE_SW_READ_OFTEN) {
112 fprintf(out, "%sGBM_BO_USE_SW_READ_OFTEN", first ? "" : " | ");
113 first = false;
114 }
115 if (tcase->usage & GBM_BO_USE_SW_WRITE_OFTEN) {
116 fprintf(out, "%sGBM_BO_USE_SW_WRITE_OFTEN", first ? "" : " | ");
117 first = false;
118 }
119}
120
121static const struct option longopts[] = {
122 {"help", no_argument, NULL, 'h'},
123 {0, 0, 0, 0},
124};
125
126static void print_help(const char* argv0) {
127 printf("Usage: %s [OPTIONS]\n", argv0);
128 printf(" -h, --help Print help.\n");
129}
130
131int main(int argc, char** argv) {
132 // TODO(mcasas): Consider adding other formats/other operations.
133 // TODO(mcasas): Transform this list into a cartesian product like GTest does.
134 // TODO(mcasas): add command line flags to run test cases individually/by
135 // groups, and to list them.
136 const struct test_case tcases[] = {
137 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_SCANOUT},
138 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_LINEAR},
139 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_TEXTURING},
140 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_CAMERA_READ},
141 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_CAMERA_WRITE},
142 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_SW_READ_OFTEN},
143 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_SW_WRITE_OFTEN},
144 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ,
145 GBM_BO_USE_LINEAR | GBM_BO_USE_SCANOUT},
146 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ,
147 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN},
148 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ,
149 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_WRITE_OFTEN},
150 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ,
151 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN |
152 GBM_BO_USE_SW_WRITE_OFTEN},
153 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SCANOUT},
154 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_LINEAR},
155 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_TEXTURING},
156 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_CAMERA_READ},
157 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_CAMERA_WRITE},
158 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SW_READ_OFTEN},
159 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SW_WRITE_OFTEN},
160 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE,
161 GBM_BO_USE_LINEAR | GBM_BO_USE_SCANOUT},
162 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE,
163 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN},
164 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE,
165 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_WRITE_OFTEN},
166 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE,
167 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN |
168 GBM_BO_USE_SW_WRITE_OFTEN},
169 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SCANOUT},
170 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SCANOUT},
171 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_LINEAR},
172 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_TEXTURING},
173 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_CAMERA_READ},
174 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_CAMERA_WRITE},
175 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SW_READ_OFTEN},
176 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SW_WRITE_OFTEN},
177 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE,
178 GBM_BO_USE_LINEAR | GBM_BO_USE_SCANOUT},
179 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE,
180 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN},
181 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE,
182 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_WRITE_OFTEN},
183 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE,
184 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN |
185 GBM_BO_USE_SW_WRITE_OFTEN},
186 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SCANOUT},
187 };
188 const size_t tcases_size = BS_ARRAY_LEN(tcases);
189
190 // Make sure that the clock resolution is at least 1ms.
191 struct timespec clock_resolution;
192 clock_getres(CLOCK_MONOTONIC, &clock_resolution);
193 assert(clock_resolution.tv_sec == 0 && clock_resolution.tv_nsec <= 1000000);
194
195 int c;
196 while ((c = getopt_long(argc, argv, "h", longopts, NULL)) != -1) {
197 switch (c) {
198 case 'h':
199 default:
200 print_help(argv[0]);
201 return EXIT_SUCCESS;
202 }
203 }
204
205 int display_fd = bs_drm_open_main_display();
206 if (display_fd < 0) {
207 bs_debug_error("failed to open card for display");
208 return EXIT_FAILURE;
209 }
210
211 struct gbm_device* gbm = gbm_create_device(display_fd);
212 if (!gbm) {
213 bs_debug_error("failed to create gbm device");
214 return EXIT_FAILURE;
215 }
216
217 // bs_mapper_dma_buf_new() is expected to use mmap().
218 struct bs_mapper* mapper = bs_mapper_dma_buf_new();
219 if (mapper == NULL) {
220 bs_debug_error("failed to create mapper object");
221 return EXIT_FAILURE;
222 }
223
224 const uint32_t width = 1920;
225 const uint32_t height = 1080;
226
227// We allocate NUM_BOS to replicate a bit what is done in video capture.
228#define NUM_BOS 5
229 struct gbm_bo* bos[NUM_BOS];
230 uint8_t* ptr_y[NUM_BOS];
231 uint8_t* ptr_uv[NUM_BOS];
232
233 uint32_t stride_y[NUM_BOS];
234 void* map_data_y[NUM_BOS];
235 uint32_t stride_uv[NUM_BOS];
236 void* map_data_uv[NUM_BOS];
237
238#define NUM_PLANES 2
239 int gbm_bo_fds[NUM_PLANES][NUM_BOS];
240#define NUM_ITERS 10
241 printf("Running %d iterations. %d BOs allocated (%dx%d)\n", NUM_ITERS,
242 NUM_BOS, width, height);
243
244 // |draft_canvas| is allocated as if to be an ARGB buffer, and can fit NV12
245 // data of the same |width| and |height|.
246 uint8_t* draft_canvas = malloc(width * height * 4);
247 // This is not so much for clearing it as it is for accessing it once.
248 memset(draft_canvas, 0, width * height * 4);
249
250 for (size_t i = 0; i < tcases_size; i++) {
251 const struct test_case* tcase = &tcases[i];
252 print_format_and_use_flags(stdout, tcase);
253 printf(": ");
254
255 for (size_t j = 0; j < NUM_BOS; j++) {
256 bos[j] = gbm_bo_create(gbm, width, height, tcase->format, tcase->usage);
257 if (!bos[j]) {
258 printf(
259 "gbm_bo_create() failed (probably format or usage is not "
260 "supported.\n");
261 continue;
262 }
263
264 const int expected_num_planes = NUM_PLANES;
265 const int num_planes = gbm_bo_get_plane_count(bos[j]);
266 if (expected_num_planes != num_planes) {
267 printf("Incorrect number of planes, expected %d, got %d\n",
268 expected_num_planes, num_planes);
269 return EXIT_FAILURE;
270 }
271
272 ptr_y[j] = bs_mapper_map(mapper, bos[j], 0, &map_data_y[j], &stride_y[j]);
273 if (ptr_y[j] == MAP_FAILED) {
274 bs_debug_error("failed to mmap gbm bo plane 0 (Y)");
275 return EXIT_FAILURE;
276 }
277
278 ptr_uv[j] =
279 bs_mapper_map(mapper, bos[j], 1, &map_data_uv[j], &stride_uv[j]);
280 if (ptr_uv[j] == MAP_FAILED) {
281 bs_debug_error("failed to mmap gbm bo plane 1 (UV)");
282 return EXIT_FAILURE;
283 }
284
285 for (size_t plane = 0; plane < NUM_PLANES; plane++) {
Miguel Casas6d4098e2021-04-08 12:39:51 -0400286 gbm_bo_fds[plane][j] = gbm_bo_get_fd_for_plane(bos[j], plane);
Miguel Casas175526d2021-02-10 15:31:27 -0500287 if (gbm_bo_fds[plane][j] < 0) {
288 bs_debug_error("failed to get BO fd");
289 return EXIT_FAILURE;
290 }
291 }
292 }
293
294 struct timespec start, stop;
295 clock_gettime(CLOCK_MONOTONIC, &start);
296 for (size_t j = 0; j < NUM_ITERS; j++) {
297 const uint32_t bo_index = j % NUM_BOS;
298
299 if (tcase->read_write & GBM_BO_TRANSFER_READ) {
300 assert(dma_sync(gbm_bo_fds[0][bo_index],
301 DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ) == 0);
302 assert(dma_sync(gbm_bo_fds[1][bo_index],
303 DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ) == 0);
304
305 // Typical Chrome access patterns like e.g. libyuv NV12ToARGB/NV12Scale
306 // are asymmetric in the sense that they create scattered read/writes
307 // (e.g. pixel packing/unpacking operations) or simply more of those on
308 // either source or destination. A rotation operation is chosen here to
309 // avoid part of that asymmetry.
310 // TODO(mcasas): investigate other functions which might cause other
311 // memory access patterns.
312 NV12Rotate90(ptr_y[bo_index], stride_y[bo_index],
313 ptr_uv[bo_index], stride_uv[bo_index],
314 draft_canvas, height,
315 draft_canvas + (height * width), height,
316 width, height);
317
318 assert(dma_sync(gbm_bo_fds[0][bo_index],
319 DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ) == 0);
320 assert(dma_sync(gbm_bo_fds[1][bo_index],
321 DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ) == 0);
322 }
323
324 // When writing, use the next BO index so that nobody will try to optimize
325 // the whole operation chain away when having READ-then-WRITE.
326 const uint32_t next_bo_index = (j + 1) % NUM_BOS;
327 if (tcase->read_write & GBM_BO_TRANSFER_WRITE) {
328 assert(dma_sync(gbm_bo_fds[0][next_bo_index],
329 DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE) == 0);
330 assert(dma_sync(gbm_bo_fds[1][next_bo_index],
331 DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE) == 0);
332
333 // We pretend |draft_canvas| has portrait orientation, so the
334 // destination of the rotation fits into a landscape orientation BO.
335 NV12Rotate90(draft_canvas, height,
336 draft_canvas + (height * width), height,
337 ptr_y[bo_index], stride_y[bo_index],
338 ptr_uv[bo_index], stride_uv[bo_index],
339 height, width);
340
341 assert(dma_sync(gbm_bo_fds[0][next_bo_index],
342 DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE) == 0);
343 assert(dma_sync(gbm_bo_fds[1][next_bo_index],
344 DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE) == 0);
345 }
346 }
347
348 clock_gettime(CLOCK_MONOTONIC, &stop);
349 const double elapsed_ns =
350 (stop.tv_sec - start.tv_sec) * 1e9 + (stop.tv_nsec - start.tv_nsec);
351 // TODO(mcasas): find a standardized way to produce results.
352 printf("%f ms\n", elapsed_ns / 1000000.0);
353
354 for (size_t j = 0; j < NUM_BOS; j++) {
355 bs_mapper_unmap(mapper, bos[j], map_data_y[j]);
356 bs_mapper_unmap(mapper, bos[j], map_data_uv[j]);
357
358 for (size_t plane = 0; plane < NUM_PLANES; plane++)
359 close(gbm_bo_fds[plane][j]);
360 gbm_bo_destroy(bos[j]);
361 }
362 }
363
364 free(draft_canvas);
365
366 // Not really needed, but good to destroy things properly.
367 bs_mapper_destroy(mapper);
368 gbm_device_destroy(gbm);
369
370 return EXIT_SUCCESS;
371}