blob: 255f5470ed611cc841b62dea505304508e6df4c1 [file] [log] [blame]
Miguel Casas175526d2021-02-10 15:31:27 -05001/*
2 * Copyright 2021 The Chromium OS Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7/*
8 * This test evaluates the speed at which BOs of various USE flags can be
9 * accessed when mmap()ped. To represent Chrome graphics buffers uses, a naive
10 * rotation operation is implemented here in C90. This also factors out the use
11 * or not of SIMD instructions and/or sophisticated access patterns like those
12 * employed by libyuv: this is OK here since we're only interested in relative
13 * measurements comparing one BO USE flag set with another.
14 * See https://tinyurl.com/cros-video-capture-buffers and b/169302186 for more
15 * context.
16 */
17
18#include <assert.h>
19#include <getopt.h>
20#include <libyuv.h>
21#include <linux/dma-buf.h>
22#include <sys/ioctl.h>
23#include <time.h>
24
25#include "bs_drm.h"
26
27#define HANDLE_EINTR_AND_EAGAIN(x) \
28 ({ \
29 int result; \
30 do { \
31 result = (x); \
32 } while (result != -1 && (errno == EINTR || errno == EAGAIN)); \
33 result; \
34 })
35
36int dma_sync(int fd, __u64 flags) {
37 struct dma_buf_sync sync_point = {0};
38 sync_point.flags = flags;
39 return HANDLE_EINTR_AND_EAGAIN(ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync_point));
40}
41
42// N.B. This function actually does a clockwise 90-degree rotation and then a
43// horizontal flip.
44void NV12Rotate90(const uint8_t* src_y, int src_stride_y,
45 const uint8_t* src_uv, int src_stride_uv,
46 uint8_t* dst_y, int dst_stride_y,
47 uint8_t* dst_uv, int dst_stride_uv,
48 int src_width, int src_height) {
49 // This loop walks the |src_y| samples in scanout order, but writes them in
50 // the rotated order, hence doing big jumps in the destination space.
51 for (int row = 0; row < src_height; ++row) {
52 for (int col = 0; col < src_width; ++col) {
53 const uint8_t* src_sample = src_y + row * src_stride_y + col;
54 *(dst_y + col * dst_stride_y + row) = *src_sample;
55 }
56 }
57 // Same idea but note the halving of |src_height| for the UV planes.
58 const int uv_src_height = (src_height + 1) / 2;
59 const int uv_src_width = 2 * ((src_width + 1) / 2);
60 for (int row = 0; row < uv_src_height; ++row) {
61 for (int col = 0; col < uv_src_width; ++col) {
62 const uint8_t* src_sample = src_uv + row * src_stride_uv + col;
63 *(dst_uv + col * dst_stride_uv + row) = *src_sample;
64 }
65 }
66}
67
68
69struct test_case {
70 uint32_t format; /* format for allocating buffer object from GBM */
71 enum gbm_bo_transfer_flags read_write;
72 enum gbm_bo_flags usage;
73};
74
75static void print_format_and_use_flags(FILE* out,
76 const struct test_case* tcase) {
77 fprintf(out, "format: ");
78 switch (tcase->format) {
79 case GBM_FORMAT_NV12:
80 fprintf(out, "GBM_FORMAT_NV12");
81 break;
82 default:
83 fprintf(out, "GBM_FORMAT_????????");
84 }
85
86 fprintf(out, ", access: %s%s",
87 (tcase->read_write & GBM_BO_TRANSFER_READ ? "R" : ""),
88 (tcase->read_write & GBM_BO_TRANSFER_WRITE ? "W" : ""));
89
90 fprintf(out, ", use flags: ");
91 bool first = true;
92 if (tcase->usage & GBM_BO_USE_SCANOUT) {
93 fprintf(out, "%sGBM_BO_USE_SCANOUT", first ? "" : " | ");
94 first = false;
95 }
96 if (tcase->usage & GBM_BO_USE_LINEAR) {
97 fprintf(out, "%sGBM_BO_USE_LINEAR", first ? "" : " | ");
98 first = false;
99 }
100 if (tcase->usage & GBM_BO_USE_TEXTURING) {
101 fprintf(out, "%sGBM_BO_USE_TEXTURING", first ? "" : " | ");
102 first = false;
103 }
104 if (tcase->usage & GBM_BO_USE_CAMERA_READ) {
105 fprintf(out, "%sGBM_BO_USE_CAMERA_READ", first ? "" : " | ");
106 first = false;
107 }
108 if (tcase->usage & GBM_BO_USE_CAMERA_WRITE) {
109 fprintf(out, "%sGBM_BO_USE_CAMERA_WRITE", first ? "" : " | ");
110 first = false;
111 }
112 if (tcase->usage & GBM_BO_USE_SW_READ_OFTEN) {
113 fprintf(out, "%sGBM_BO_USE_SW_READ_OFTEN", first ? "" : " | ");
114 first = false;
115 }
116 if (tcase->usage & GBM_BO_USE_SW_WRITE_OFTEN) {
117 fprintf(out, "%sGBM_BO_USE_SW_WRITE_OFTEN", first ? "" : " | ");
118 first = false;
119 }
120}
121
122static const struct option longopts[] = {
123 {"help", no_argument, NULL, 'h'},
124 {0, 0, 0, 0},
125};
126
127static void print_help(const char* argv0) {
128 printf("Usage: %s [OPTIONS]\n", argv0);
129 printf(" -h, --help Print help.\n");
130}
131
132int main(int argc, char** argv) {
133 // TODO(mcasas): Consider adding other formats/other operations.
134 // TODO(mcasas): Transform this list into a cartesian product like GTest does.
135 // TODO(mcasas): add command line flags to run test cases individually/by
136 // groups, and to list them.
137 const struct test_case tcases[] = {
138 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_SCANOUT},
139 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_LINEAR},
140 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_TEXTURING},
141 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_CAMERA_READ},
142 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_CAMERA_WRITE},
143 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_SW_READ_OFTEN},
144 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ, GBM_BO_USE_SW_WRITE_OFTEN},
145 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ,
146 GBM_BO_USE_LINEAR | GBM_BO_USE_SCANOUT},
147 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ,
148 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN},
149 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ,
150 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_WRITE_OFTEN},
151 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ,
152 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN |
153 GBM_BO_USE_SW_WRITE_OFTEN},
154 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SCANOUT},
155 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_LINEAR},
156 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_TEXTURING},
157 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_CAMERA_READ},
158 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_CAMERA_WRITE},
159 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SW_READ_OFTEN},
160 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SW_WRITE_OFTEN},
161 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE,
162 GBM_BO_USE_LINEAR | GBM_BO_USE_SCANOUT},
163 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE,
164 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN},
165 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE,
166 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_WRITE_OFTEN},
167 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE,
168 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN |
169 GBM_BO_USE_SW_WRITE_OFTEN},
170 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_WRITE, GBM_BO_USE_SCANOUT},
171 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SCANOUT},
172 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_LINEAR},
173 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_TEXTURING},
174 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_CAMERA_READ},
175 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_CAMERA_WRITE},
176 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SW_READ_OFTEN},
177 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SW_WRITE_OFTEN},
178 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE,
179 GBM_BO_USE_LINEAR | GBM_BO_USE_SCANOUT},
180 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE,
181 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN},
182 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE,
183 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_WRITE_OFTEN},
184 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE,
185 GBM_BO_USE_LINEAR | GBM_BO_USE_SW_READ_OFTEN |
186 GBM_BO_USE_SW_WRITE_OFTEN},
187 {GBM_FORMAT_NV12, GBM_BO_TRANSFER_READ_WRITE, GBM_BO_USE_SCANOUT},
188 };
189 const size_t tcases_size = BS_ARRAY_LEN(tcases);
190
191 // Make sure that the clock resolution is at least 1ms.
192 struct timespec clock_resolution;
193 clock_getres(CLOCK_MONOTONIC, &clock_resolution);
194 assert(clock_resolution.tv_sec == 0 && clock_resolution.tv_nsec <= 1000000);
195
196 int c;
197 while ((c = getopt_long(argc, argv, "h", longopts, NULL)) != -1) {
198 switch (c) {
199 case 'h':
200 default:
201 print_help(argv[0]);
202 return EXIT_SUCCESS;
203 }
204 }
205
206 int display_fd = bs_drm_open_main_display();
207 if (display_fd < 0) {
208 bs_debug_error("failed to open card for display");
209 return EXIT_FAILURE;
210 }
211
212 struct gbm_device* gbm = gbm_create_device(display_fd);
213 if (!gbm) {
214 bs_debug_error("failed to create gbm device");
215 return EXIT_FAILURE;
216 }
217
218 // bs_mapper_dma_buf_new() is expected to use mmap().
219 struct bs_mapper* mapper = bs_mapper_dma_buf_new();
220 if (mapper == NULL) {
221 bs_debug_error("failed to create mapper object");
222 return EXIT_FAILURE;
223 }
224
225 const uint32_t width = 1920;
226 const uint32_t height = 1080;
227
228// We allocate NUM_BOS to replicate a bit what is done in video capture.
229#define NUM_BOS 5
230 struct gbm_bo* bos[NUM_BOS];
231 uint8_t* ptr_y[NUM_BOS];
232 uint8_t* ptr_uv[NUM_BOS];
233
234 uint32_t stride_y[NUM_BOS];
235 void* map_data_y[NUM_BOS];
236 uint32_t stride_uv[NUM_BOS];
237 void* map_data_uv[NUM_BOS];
238
239#define NUM_PLANES 2
240 int gbm_bo_fds[NUM_PLANES][NUM_BOS];
241#define NUM_ITERS 10
242 printf("Running %d iterations. %d BOs allocated (%dx%d)\n", NUM_ITERS,
243 NUM_BOS, width, height);
244
245 // |draft_canvas| is allocated as if to be an ARGB buffer, and can fit NV12
246 // data of the same |width| and |height|.
247 uint8_t* draft_canvas = malloc(width * height * 4);
248 // This is not so much for clearing it as it is for accessing it once.
249 memset(draft_canvas, 0, width * height * 4);
250
251 for (size_t i = 0; i < tcases_size; i++) {
252 const struct test_case* tcase = &tcases[i];
253 print_format_and_use_flags(stdout, tcase);
254 printf(": ");
255
256 for (size_t j = 0; j < NUM_BOS; j++) {
257 bos[j] = gbm_bo_create(gbm, width, height, tcase->format, tcase->usage);
258 if (!bos[j]) {
259 printf(
260 "gbm_bo_create() failed (probably format or usage is not "
261 "supported.\n");
262 continue;
263 }
264
265 const int expected_num_planes = NUM_PLANES;
266 const int num_planes = gbm_bo_get_plane_count(bos[j]);
267 if (expected_num_planes != num_planes) {
268 printf("Incorrect number of planes, expected %d, got %d\n",
269 expected_num_planes, num_planes);
270 return EXIT_FAILURE;
271 }
272
273 ptr_y[j] = bs_mapper_map(mapper, bos[j], 0, &map_data_y[j], &stride_y[j]);
274 if (ptr_y[j] == MAP_FAILED) {
275 bs_debug_error("failed to mmap gbm bo plane 0 (Y)");
276 return EXIT_FAILURE;
277 }
278
279 ptr_uv[j] =
280 bs_mapper_map(mapper, bos[j], 1, &map_data_uv[j], &stride_uv[j]);
281 if (ptr_uv[j] == MAP_FAILED) {
282 bs_debug_error("failed to mmap gbm bo plane 1 (UV)");
283 return EXIT_FAILURE;
284 }
285
286 for (size_t plane = 0; plane < NUM_PLANES; plane++) {
287 gbm_bo_fds[plane][j] = gbm_bo_get_plane_fd(bos[j], plane);
288 if (gbm_bo_fds[plane][j] < 0) {
289 bs_debug_error("failed to get BO fd");
290 return EXIT_FAILURE;
291 }
292 }
293 }
294
295 struct timespec start, stop;
296 clock_gettime(CLOCK_MONOTONIC, &start);
297 for (size_t j = 0; j < NUM_ITERS; j++) {
298 const uint32_t bo_index = j % NUM_BOS;
299
300 if (tcase->read_write & GBM_BO_TRANSFER_READ) {
301 assert(dma_sync(gbm_bo_fds[0][bo_index],
302 DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ) == 0);
303 assert(dma_sync(gbm_bo_fds[1][bo_index],
304 DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ) == 0);
305
306 // Typical Chrome access patterns like e.g. libyuv NV12ToARGB/NV12Scale
307 // are asymmetric in the sense that they create scattered read/writes
308 // (e.g. pixel packing/unpacking operations) or simply more of those on
309 // either source or destination. A rotation operation is chosen here to
310 // avoid part of that asymmetry.
311 // TODO(mcasas): investigate other functions which might cause other
312 // memory access patterns.
313 NV12Rotate90(ptr_y[bo_index], stride_y[bo_index],
314 ptr_uv[bo_index], stride_uv[bo_index],
315 draft_canvas, height,
316 draft_canvas + (height * width), height,
317 width, height);
318
319 assert(dma_sync(gbm_bo_fds[0][bo_index],
320 DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ) == 0);
321 assert(dma_sync(gbm_bo_fds[1][bo_index],
322 DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ) == 0);
323 }
324
325 // When writing, use the next BO index so that nobody will try to optimize
326 // the whole operation chain away when having READ-then-WRITE.
327 const uint32_t next_bo_index = (j + 1) % NUM_BOS;
328 if (tcase->read_write & GBM_BO_TRANSFER_WRITE) {
329 assert(dma_sync(gbm_bo_fds[0][next_bo_index],
330 DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE) == 0);
331 assert(dma_sync(gbm_bo_fds[1][next_bo_index],
332 DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE) == 0);
333
334 // We pretend |draft_canvas| has portrait orientation, so the
335 // destination of the rotation fits into a landscape orientation BO.
336 NV12Rotate90(draft_canvas, height,
337 draft_canvas + (height * width), height,
338 ptr_y[bo_index], stride_y[bo_index],
339 ptr_uv[bo_index], stride_uv[bo_index],
340 height, width);
341
342 assert(dma_sync(gbm_bo_fds[0][next_bo_index],
343 DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE) == 0);
344 assert(dma_sync(gbm_bo_fds[1][next_bo_index],
345 DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE) == 0);
346 }
347 }
348
349 clock_gettime(CLOCK_MONOTONIC, &stop);
350 const double elapsed_ns =
351 (stop.tv_sec - start.tv_sec) * 1e9 + (stop.tv_nsec - start.tv_nsec);
352 // TODO(mcasas): find a standardized way to produce results.
353 printf("%f ms\n", elapsed_ns / 1000000.0);
354
355 for (size_t j = 0; j < NUM_BOS; j++) {
356 bs_mapper_unmap(mapper, bos[j], map_data_y[j]);
357 bs_mapper_unmap(mapper, bos[j], map_data_uv[j]);
358
359 for (size_t plane = 0; plane < NUM_PLANES; plane++)
360 close(gbm_bo_fds[plane][j]);
361 gbm_bo_destroy(bos[j]);
362 }
363 }
364
365 free(draft_canvas);
366
367 // Not really needed, but good to destroy things properly.
368 bs_mapper_destroy(mapper);
369 gbm_device_destroy(gbm);
370
371 return EXIT_SUCCESS;
372}