blob: b8bde01a532838931ba858fbbdff28568029c881 [file] [log] [blame]
Scott James Remnant56686d62009-11-09 18:38:51 +00001/* ureadahead
2 *
3 * trace.c - boot tracing
4 *
5 * Copyright © 2009 Canonical Ltd.
6 * Author: Scott James Remnant <scott@netsplit.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2, as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif /* HAVE_CONFIG_H */
25
26#define _ATFILE_SOURCE
27
28
29#include <sys/select.h>
30#include <sys/mount.h>
Yunlian Jiangb2de86a2018-07-19 16:46:39 -040031#include <sys/sysmacros.h>
Scott James Remnant56686d62009-11-09 18:38:51 +000032#include <sys/types.h>
33#include <sys/mman.h>
34#include <sys/stat.h>
Yusuke Satoa2a925e2015-12-17 13:14:55 -080035#include <sys/param.h>
Scott James Remnant56686d62009-11-09 18:38:51 +000036
37#include <errno.h>
38#include <fcntl.h>
39#include <stdio.h>
40#include <signal.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44
45#include <blkid.h>
46#define NO_INLINE_FUNCS
47#include <ext2fs.h>
48
49#include <linux/fs.h>
50#include <linux/fiemap.h>
51
52#include <nih/macros.h>
53#include <nih/alloc.h>
54#include <nih/string.h>
55#include <nih/list.h>
56#include <nih/hash.h>
57#include <nih/main.h>
58#include <nih/logging.h>
59#include <nih/error.h>
60
61#include "trace.h"
62#include "pack.h"
63#include "values.h"
64#include "file.h"
65
66
67/**
68 * PATH_DEBUGFS:
69 *
70 * Path to the usual debugfs mountpoint.
71 **/
72#define PATH_DEBUGFS "/sys/kernel/debug"
73
74/**
75 * PATH_DEBUGFS_TMP:
76 *
77 * Path to the temporary debugfs mountpoint that we mount it on if it
78 * hasn't been mounted at the usual place yet.
79 **/
80#define PATH_DEBUGFS_TMP "/var/lib/ureadahead/debugfs"
81
82/**
83 * INODE_GROUP_PRELOAD_THRESHOLD:
84 *
85 * Number of inodes in a group before we preload that inode's blocks.
86 **/
87#define INODE_GROUP_PRELOAD_THRESHOLD 8
88
89
90/* Prototypes for static functions */
91static int read_trace (const void *parent,
92 int dfd, const char *path,
Philippe Liarde1de5de2019-07-25 07:14:17 -040093 const char *path_prefix_filter,
Yusuke Satoa2a925e2015-12-17 13:14:55 -080094 const PathPrefixOption *path_prefix,
Scott James Remnant56686d62009-11-09 18:38:51 +000095 PackFile **files, size_t *num_files);
96static void fix_path (char *pathname);
97static int trace_add_path (const void *parent, const char *pathname,
98 PackFile **files, size_t *num_files);
99static int ignore_path (const char *pathname);
100static PackFile *trace_file (const void *parent, dev_t dev,
101 PackFile **files, size_t *num_files);
102static int trace_add_chunks (const void *parent,
103 PackFile *file, PackPath *path,
104 int fd, off_t size);
105static int trace_add_extents (const void *parent,
106 PackFile *file, PackPath *path,
107 int fd, off_t size,
108 off_t offset, off_t length);
109static int trace_add_groups (const void *parent, PackFile *file);
110static int trace_sort_blocks (const void *parent, PackFile *file);
111static int trace_sort_paths (const void *parent, PackFile *file);
112
113
114static void
115sig_interrupt (int signum)
116{
117}
118
119int
120trace (int daemonise,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800121 int timeout,
122 const char *filename_to_replace,
Philippe Liarde1de5de2019-07-25 07:14:17 -0400123 const char *pack_file,
124 const char *path_prefix_filter,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800125 const PathPrefixOption *path_prefix)
Scott James Remnant56686d62009-11-09 18:38:51 +0000126{
127 int dfd;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600128 FILE *fp;
Scott James Remnant56686d62009-11-09 18:38:51 +0000129 int unmount = FALSE;
130 int old_sys_open_enabled = 0;
131 int old_open_exec_enabled = 0;
132 int old_uselib_enabled = 0;
133 int old_tracing_enabled = 0;
Tim Gardner73aa2c52010-07-22 04:04:36 -0600134 int old_buffer_size_kb = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000135 struct sigaction act;
136 struct sigaction old_sigterm;
137 struct sigaction old_sigint;
138 struct timeval tv;
139 nih_local PackFile *files = NULL;
140 size_t num_files = 0;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600141 size_t num_cpus = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000142
143 /* Mount debugfs if not already mounted */
Hardik Goyal2c3a17c2019-07-08 19:16:35 -0400144 dfd = open (PATH_DEBUGFS "/tracing", O_NOFOLLOW | O_RDONLY | O_NOATIME);
Scott James Remnant56686d62009-11-09 18:38:51 +0000145 if (dfd < 0) {
146 if (errno != ENOENT)
147 nih_return_system_error (-1);
148
149 if (mount ("none", PATH_DEBUGFS_TMP, "debugfs", 0, NULL) < 0)
150 nih_return_system_error (-1);
151
Hardik Goyal2c3a17c2019-07-08 19:16:35 -0400152 dfd = open (PATH_DEBUGFS_TMP "/tracing", O_NOFOLLOW | O_RDONLY | O_NOATIME);
Scott James Remnant56686d62009-11-09 18:38:51 +0000153 if (dfd < 0) {
154 nih_error_raise_system ();
155 umount (PATH_DEBUGFS_TMP);
156 return -1;
157 }
158
159 unmount = TRUE;
160 }
161
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600162 /*
163 * Count the number of CPUs, default to 1 on error.
164 */
165 fp = fopen("/proc/cpuinfo", "r");
166 if (fp) {
167 int line_size=1024;
168 char *processor="processor";
169 char *line = malloc(line_size);
170 if (line) {
171 num_cpus = 0;
172 while (fgets(line,line_size,fp) != NULL) {
173 if (!strncmp(line,processor,strlen(processor)))
174 num_cpus++;
175 }
176 free(line);
177 nih_message("Counted %d CPUs\n",num_cpus);
178 }
179 fclose(fp);
180 }
181 if (!num_cpus)
182 num_cpus = 1;
183
Scott James Remnante30e2372010-09-20 18:34:31 +0100184 /* Enable tracing of open() syscalls */
Scott James Remnant56686d62009-11-09 18:38:51 +0000185 if (set_value (dfd, "events/fs/do_sys_open/enable",
186 TRUE, &old_sys_open_enabled) < 0)
187 goto error;
188 if (set_value (dfd, "events/fs/open_exec/enable",
189 TRUE, &old_open_exec_enabled) < 0)
190 goto error;
191 if (set_value (dfd, "events/fs/uselib/enable",
192 TRUE, &old_uselib_enabled) < 0) {
193 NihError *err;
194
195 err = nih_error_get ();
196 nih_debug ("Missing uselib tracing: %s", err->message);
197 nih_free (err);
198
199 old_uselib_enabled = -1;
200 }
Scott James Remnante30e2372010-09-20 18:34:31 +0100201 if (set_value (dfd, "buffer_size_kb", 8192/num_cpus, &old_buffer_size_kb) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000202 goto error;
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000203 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000204 TRUE, &old_tracing_enabled) < 0)
205 goto error;
206
207 if (daemonise) {
208 pid_t pid;
209
210 pid = fork ();
211 if (pid < 0) {
212 nih_error_raise_system ();
213 goto error;
214 } else if (pid > 0) {
215 _exit (0);
216 }
217 }
218
219 /* Sleep until we get signals */
220 act.sa_handler = sig_interrupt;
221 sigemptyset (&act.sa_mask);
222 act.sa_flags = 0;
223
224 sigaction (SIGTERM, &act, &old_sigterm);
225 sigaction (SIGINT, &act, &old_sigint);
226
227 if (timeout) {
228 tv.tv_sec = timeout;
229 tv.tv_usec = 0;
230
231 select (0, NULL, NULL, NULL, &tv);
232 } else {
233 pause ();
234 }
235
236 sigaction (SIGTERM, &old_sigterm, NULL);
237 sigaction (SIGINT, &old_sigint, NULL);
238
239 /* Restore previous tracing settings */
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000240 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000241 old_tracing_enabled, NULL) < 0)
242 goto error;
243 if (old_uselib_enabled >= 0)
244 if (set_value (dfd, "events/fs/uselib/enable",
245 old_uselib_enabled, NULL) < 0)
246 goto error;
247 if (set_value (dfd, "events/fs/open_exec/enable",
248 old_open_exec_enabled, NULL) < 0)
249 goto error;
250 if (set_value (dfd, "events/fs/do_sys_open/enable",
251 old_sys_open_enabled, NULL) < 0)
252 goto error;
253
254 /* Be nicer */
Scott James Remnantcc2943b2009-11-29 15:24:15 +0000255 if (nice (15))
256 ;
Scott James Remnant56686d62009-11-09 18:38:51 +0000257
258 /* Read trace log */
Philippe Liarde1de5de2019-07-25 07:14:17 -0400259 if (read_trace (NULL, dfd, "trace", path_prefix_filter, path_prefix,
260 &files, &num_files) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000261 goto error;
262
Tim Gardner73aa2c52010-07-22 04:04:36 -0600263 /*
264 * Restore the trace buffer size (which has just been read) and free
265 * a bunch of memory.
266 */
267 if (set_value (dfd, "buffer_size_kb", old_buffer_size_kb, NULL) < 0)
268 goto error;
269
Scott James Remnant56686d62009-11-09 18:38:51 +0000270 /* Unmount the temporary debugfs mount if we mounted it */
271 if (close (dfd)) {
272 nih_error_raise_system ();
273 goto error;
274 }
275 if (unmount
276 && (umount (PATH_DEBUGFS_TMP) < 0)) {
277 nih_error_raise_system ();
278 goto error;
279 }
280
281 /* Write out pack files */
282 for (size_t i = 0; i < num_files; i++) {
283 nih_local char *filename = NULL;
Philippe Liarde1de5de2019-07-25 07:14:17 -0400284 if (pack_file) {
285 filename = NIH_MUST (nih_strdup (NULL, pack_file));
286 } else {
287 filename = pack_file_name_for_device (NULL,
288 files[i].dev);
289 if (! filename) {
290 NihError *err;
Scott James Remnant56686d62009-11-09 18:38:51 +0000291
Philippe Liarde1de5de2019-07-25 07:14:17 -0400292 err = nih_error_get ();
293 nih_warn ("%s", err->message);
294 nih_free (err);
Scott James Remnant56686d62009-11-09 18:38:51 +0000295
Philippe Liarde1de5de2019-07-25 07:14:17 -0400296 continue;
297 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000298
Philippe Liarde1de5de2019-07-25 07:14:17 -0400299 /* If filename_to_replace is not NULL, only write out
300 * the file and skip others.
301 */
302 if (filename_to_replace &&
303 strcmp (filename_to_replace, filename)) {
304 nih_info ("Skipping %s", filename);
305 continue;
306 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000307 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000308 nih_info ("Writing %s", filename);
309
310 /* We only need to apply additional sorting to the
311 * HDD-optimised packs, the SSD ones can read in random
312 * order quite happily.
313 *
314 * Also for HDD, generate the inode group preloading
315 * array.
316 */
317 if (files[i].rotational) {
318 trace_add_groups (files, &files[i]);
319
320 trace_sort_blocks (files, &files[i]);
321 trace_sort_paths (files, &files[i]);
322 }
323
324 write_pack (filename, &files[i]);
325
326 if (nih_log_priority < NIH_LOG_MESSAGE)
327 pack_dump (&files[i], SORT_OPEN);
328 }
329
330 return 0;
331error:
332 close (dfd);
333 if (unmount)
334 umount (PATH_DEBUGFS_TMP);
335
336 return -1;
337}
338
339
340static int
341read_trace (const void *parent,
342 int dfd,
343 const char *path,
Philippe Liarde1de5de2019-07-25 07:14:17 -0400344 const char *path_prefix_filter, /* May be null */
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800345 const PathPrefixOption *path_prefix,
Scott James Remnant56686d62009-11-09 18:38:51 +0000346 PackFile ** files,
347 size_t * num_files)
348{
349 int fd;
350 FILE *fp;
351 char *line;
352
353 nih_assert (path != NULL);
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800354 nih_assert (path_prefix != NULL);
Scott James Remnant56686d62009-11-09 18:38:51 +0000355 nih_assert (files != NULL);
356 nih_assert (num_files != NULL);
357
358 fd = openat (dfd, path, O_RDONLY);
359 if (fd < 0)
360 nih_return_system_error (-1);
361
362 fp = fdopen (fd, "r");
363 if (! fp) {
364 nih_error_raise_system ();
365 close (fd);
366 return -1;
367 }
368
369 while ((line = fgets_alloc (NULL, fp)) != NULL) {
370 char *ptr;
371 char *end;
372
373 ptr = strstr (line, " do_sys_open:");
374 if (! ptr)
375 ptr = strstr (line, " open_exec:");
376 if (! ptr)
377 ptr = strstr (line, " uselib:");
378 if (! ptr) {
379 nih_free (line);
380 continue;
381 }
382
383 ptr = strchr (ptr, '"');
384 if (! ptr) {
385 nih_free (line);
386 continue;
387 }
388
389 ptr++;
390
391 end = strrchr (ptr, '"');
392 if (! end) {
393 nih_free (line);
394 continue;
395 }
396
397 *end = '\0';
398
399 fix_path (ptr);
Philippe Liarde1de5de2019-07-25 07:14:17 -0400400
401 if (path_prefix_filter &&
402 strncmp (ptr, path_prefix_filter,
403 strlen (path_prefix_filter))) {
404 nih_warn ("Skipping %s due to path prefix filter", ptr);
405 continue;
406 }
407
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800408 if (path_prefix->st_dev != NODEV && ptr[0] == '/') {
409 struct stat stbuf;
410 char *rewritten = nih_sprintf (
411 line, "%s%s", path_prefix->prefix, ptr);
412 if (! lstat (rewritten, &stbuf) &&
413 stbuf.st_dev == path_prefix->st_dev) {
414 /* If |rewritten| exists on the same device as
415 * path_prefix->st_dev, record the rewritten one
416 * instead of the original path.
417 */
418 ptr = rewritten;
419 }
420 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000421 trace_add_path (parent, ptr, files, num_files);
422
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800423 nih_free (line); /* also frees |rewritten| */
Scott James Remnant56686d62009-11-09 18:38:51 +0000424 }
425
426 if (fclose (fp) < 0)
427 nih_return_system_error (-1);
428
429 return 0;
430}
431
432static void
433fix_path (char *pathname)
434{
435 char *ptr;
436
437 nih_assert (pathname != NULL);
438
439 for (ptr = pathname; *ptr; ptr++) {
440 size_t len;
441
442 if (ptr[0] != '/')
443 continue;
444
445 len = strcspn (ptr + 1, "/");
446
447 /* // and /./, we shorten the string and repeat the loop
448 * looking at the new /
449 */
450 if ((len == 0) || ((len == 1) && ptr[1] == '.')) {
451 memmove (ptr, ptr + len + 1, strlen (ptr) - len);
452 ptr--;
453 continue;
454 }
455
456 /* /../, we shorten back to the previous / or the start
457 * of the string and repeat the loop looking at the new /
458 */
459 if ((len == 2) && (ptr[1] == '.') && (ptr[2] == '.')) {
460 char *root;
461
462 for (root = ptr - 1;
463 (root >= pathname) && (root[0] != '/');
464 root--)
465 ;
466 if (root < pathname)
467 root = pathname;
468
469 memmove (root, ptr + len + 1, strlen (ptr) - len);
470 ptr = root - 1;
471 continue;
472 }
473 }
474
475 while ((ptr != pathname) && (*(--ptr) == '/'))
476 *ptr = '\0';
477}
478
479
480static int
481trace_add_path (const void *parent,
482 const char *pathname,
483 PackFile ** files,
484 size_t * num_files)
485{
486 static NihHash *path_hash = NULL;
487 struct stat statbuf;
488 int fd;
489 PackFile * file;
490 PackPath * path;
491 static NihHash *inode_hash = NULL;
492 nih_local char *inode_key = NULL;
493
494 nih_assert (pathname != NULL);
495 nih_assert (files != NULL);
496 nih_assert (num_files != NULL);
497
498 /* We can't really deal with relative paths since we don't know
499 * the working directory that they were opened from.
500 */
501 if (pathname[0] != '/') {
502 nih_warn ("%s: %s", pathname, _("Ignored relative path"));
503 return 0;
504 }
505
506 /* Certain paths aren't worth caching, because they're virtual or
507 * temporary filesystems and would waste pack space.
508 */
509 if (ignore_path (pathname))
510 return 0;
511
512 /* Ignore paths that won't fit in the pack; we could use PATH_MAX,
513 * but with 1000 files that'd be 4M just for the
514 * pack.
515 */
516 if (strlen (pathname) > PACK_PATH_MAX) {
517 nih_warn ("%s: %s", pathname, _("Ignored far too long path"));
518 return 0;
519 }
520
521 /* Use a hash table of paths to eliminate duplicate path names from
522 * the table since that would waste pack space (and fds).
523 */
524 if (! path_hash)
525 path_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
526
527 if (nih_hash_lookup (path_hash, pathname)) {
528 return 0;
529 } else {
530 NihListEntry *entry;
531
532 entry = NIH_MUST (nih_list_entry_new (path_hash));
533 entry->str = NIH_MUST (nih_strdup (entry, pathname));
534
535 nih_hash_add (path_hash, &entry->entry);
536 }
537
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100538 /* Make sure that we have an ordinary file
539 * This avoids us opening a fifo or socket or symlink.
Scott James Remnant56686d62009-11-09 18:38:51 +0000540 */
541 if ((lstat (pathname, &statbuf) < 0)
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100542 || (S_ISLNK (statbuf.st_mode))
Scott James Remnant56686d62009-11-09 18:38:51 +0000543 || (! S_ISREG (statbuf.st_mode)))
544 return 0;
545
546 /* Open and stat again to get the genuine details, in case it
547 * changes under us.
548 */
549 fd = open (pathname, O_RDONLY | O_NOATIME);
550 if (fd < 0) {
551 nih_warn ("%s: %s: %s", pathname,
552 _("File vanished or error reading"),
553 strerror (errno));
554 return -1;
555 }
556
557 if (fstat (fd, &statbuf) < 0) {
558 nih_warn ("%s: %s: %s", pathname,
559 _("Error retrieving file stat"),
560 strerror (errno));
561 close (fd);
562 return -1;
563 }
564
565 /* Double-check that it's really still a file */
566 if (! S_ISREG (statbuf.st_mode)) {
567 close (fd);
568 return 0;
569 }
570
571 /* Some people think it's clever to split their filesystem across
572 * multiple devices, so we need to generate a different pack file
573 * for each device.
574 *
575 * Lookup file based on the dev_t, potentially creating a new
576 * pack file in the array.
577 */
578 file = trace_file (parent, statbuf.st_dev, files, num_files);
579
580 /* Grow the PackPath array and fill in the details for the new
581 * path.
582 */
583 file->paths = NIH_MUST (nih_realloc (file->paths, *files,
584 (sizeof (PackPath)
585 * (file->num_paths + 1))));
586
587 path = &file->paths[file->num_paths++];
588 memset (path, 0, sizeof (PackPath));
589
590 path->group = -1;
591 path->ino = statbuf.st_ino;
592
593 strncpy (path->path, pathname, PACK_PATH_MAX);
594 path->path[PACK_PATH_MAX] = '\0';
595
596 /* The paths array contains each unique path opened, but these
597 * might be symbolic or hard links to the same underlying files
598 * and we don't want to read the same block more than once.
599 *
600 * Use a hash table of dev_t/ino_t pairs to make sure we only
601 * read the blocks of an actual file the first time.
602 */
603 if (! inode_hash)
604 inode_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
605
606 inode_key = NIH_MUST (nih_sprintf (NULL, "%llu:%llu",
607 (unsigned long long)statbuf.st_dev,
608 (unsigned long long)statbuf.st_ino));
609
610 if (nih_hash_lookup (inode_hash, inode_key)) {
611 close (fd);
612 return 0;
613 } else {
614 NihListEntry *entry;
615
616 entry = NIH_MUST (nih_list_entry_new (inode_hash));
617 entry->str = inode_key;
618 nih_ref (entry->str, entry);
619
620 nih_hash_add (inode_hash, &entry->entry);
621 }
622
623 /* There's also no point reading zero byte files, since they
624 * won't have any blocks (and we can't mmap zero bytes anyway).
625 */
626 if (! statbuf.st_size) {
627 close (fd);
628 return 0;
629 }
630
631 /* Now read the in-memory chunks of this file and add those to
632 * the pack file too.
633 */
634 trace_add_chunks (*files, file, path, fd, statbuf.st_size);
635 close (fd);
636
637 return 0;
638}
639
640static int
641ignore_path (const char *pathname)
642{
643 nih_assert (pathname != NULL);
644
645 if (! strncmp (pathname, "/proc/", 6))
646 return TRUE;
647 if (! strncmp (pathname, "/sys/", 5))
648 return TRUE;
649 if (! strncmp (pathname, "/dev/", 5))
650 return TRUE;
651 if (! strncmp (pathname, "/tmp/", 5))
652 return TRUE;
Steve Langasek2c698a12012-02-03 15:27:29 -0800653 if (! strncmp (pathname, "/run/", 5))
654 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000655 if (! strncmp (pathname, "/var/run/", 9))
656 return TRUE;
Bryan Fullerton96c991b2013-03-12 15:06:41 +0000657 if (! strncmp (pathname, "/var/log/", 9))
658 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000659 if (! strncmp (pathname, "/var/lock/", 10))
660 return TRUE;
661
662 return FALSE;
663}
664
665
666static PackFile *
667trace_file (const void *parent,
668 dev_t dev,
669 PackFile ** files,
670 size_t * num_files)
671{
672 nih_local char *filename = NULL;
673 int rotational;
674 PackFile * file;
675
676 nih_assert (files != NULL);
677 nih_assert (num_files != NULL);
678
679 /* Return any existing file structure for this device */
680 for (size_t i = 0; i < *num_files; i++)
681 if ((*files)[i].dev == dev)
682 return &(*files)[i];
683
684 /* Query sysfs to see whether this disk is rotational; this
685 * obviously won't work for virtual devices and the like, so
686 * default to TRUE for now.
687 */
Yusuke Satofc571f12016-04-30 06:17:08 -0400688 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:%d/queue/rotational",
689 major (dev), minor (dev)));
690 if (access (filename, R_OK) < 0) {
691 /* For devices managed by the scsi stack, the minor device number has to be
692 * masked to find the queue/rotational file.
693 */
694 nih_free (filename);
695 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:%d/queue/rotational",
696 major (dev), minor (dev) & 0xffff0));
697 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000698
699 if (get_value (AT_FDCWD, filename, &rotational) < 0) {
700 NihError *err;
701
702 err = nih_error_get ();
703 nih_warn (_("Unable to obtain rotationalness for device %u:%u: %s"),
704 major (dev), minor (dev), err->message);
705 nih_free (err);
706
707 rotational = TRUE;
708 }
709
710 /* Grow the PackFile array and fill in the details for the new
711 * file.
712 */
713 *files = NIH_MUST (nih_realloc (*files, parent,
714 (sizeof (PackFile) * (*num_files + 1))));
715
716 file = &(*files)[(*num_files)++];
717 memset (file, 0, sizeof (PackFile));
718
719 file->dev = dev;
720 file->rotational = rotational;
721 file->num_paths = 0;
722 file->paths = NULL;
723 file->num_blocks = 0;
724 file->blocks = NULL;
725
726 return file;
727}
728
729
730static int
731trace_add_chunks (const void *parent,
732 PackFile * file,
733 PackPath * path,
734 int fd,
735 off_t size)
736{
737 static int page_size = -1;
738 void * buf;
739 off_t num_pages;
740 nih_local unsigned char *vec = NULL;
741
742 nih_assert (file != NULL);
743 nih_assert (path != NULL);
744 nih_assert (fd >= 0);
745 nih_assert (size > 0);
746
747 if (page_size < 0)
748 page_size = sysconf (_SC_PAGESIZE);
749
750 /* Map the file into memory */
751 buf = mmap (NULL, size, PROT_READ, MAP_SHARED, fd, 0);
752 if (buf == MAP_FAILED) {
753 nih_warn ("%s: %s: %s", path->path,
754 _("Error mapping into memory"),
755 strerror (errno));
756 return -1;
757 }
758
759 /* Grab the core memory map of the file */
760 num_pages = (size - 1) / page_size + 1;
761 vec = NIH_MUST (nih_alloc (NULL, num_pages));
762 memset (vec, 0, num_pages);
763
764 if (mincore (buf, size, vec) < 0) {
765 nih_warn ("%s: %s: %s", path->path,
766 _("Error retrieving page cache info"),
767 strerror (errno));
768 munmap (buf, size);
769 return -1;
770 }
771
772 /* Clean up */
773 if (munmap (buf, size) < 0) {
774 nih_warn ("%s: %s: %s", path->path,
775 _("Error unmapping from memory"),
776 strerror (errno));
777 return -1;
778 }
779
780
781 /* Now we can figure out which contiguous bits of the file are
782 * in core memory.
783 */
784 for (off_t i = 0; i < num_pages; i++) {
785 off_t offset;
786 off_t length;
787
788 if (! vec[i])
789 continue;
790
791 offset = i * page_size;
792 length = page_size;
793
794 while (((i + 1) < num_pages) && vec[i + 1]) {
795 length += page_size;
796 i++;
797 }
798
799 /* The rotational crowd need this split down further into
800 * on-disk extents, the non-rotational folks can just use
801 * the chunks data.
802 */
803 if (file->rotational) {
804 trace_add_extents (parent, file, path, fd, size,
805 offset, length);
806 } else {
807 PackBlock *block;
808
809 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
810 (sizeof (PackBlock)
811 * (file->num_blocks + 1))));
812
813 block = &file->blocks[file->num_blocks++];
814 memset (block, 0, sizeof (PackBlock));
815
816 block->pathidx = file->num_paths - 1;
817 block->offset = offset;
818 block->length = length;
819 block->physical = -1;
820 }
821 }
822
823 return 0;
824}
825
826struct fiemap *
827get_fiemap (const void *parent,
828 int fd,
829 off_t offset,
830 off_t length)
831{
832 struct fiemap *fiemap;
833
834 nih_assert (fd >= 0);
835
836 fiemap = NIH_MUST (nih_new (parent, struct fiemap));
837 memset (fiemap, 0, sizeof (struct fiemap));
838
839 fiemap->fm_start = offset;
840 fiemap->fm_length = length;
841 fiemap->fm_flags = 0;
842
843 do {
844 /* Query the current number of extents */
845 fiemap->fm_mapped_extents = 0;
846 fiemap->fm_extent_count = 0;
847
848 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
849 nih_error_raise_system ();
850 nih_free (fiemap);
851 return NULL;
852 }
853
854 /* Always allow room for one extra over what we were told,
855 * so we know if they changed under us.
856 */
857 fiemap = NIH_MUST (nih_realloc (fiemap, parent,
858 (sizeof (struct fiemap)
859 + (sizeof (struct fiemap_extent)
860 * (fiemap->fm_mapped_extents + 1)))));
861 fiemap->fm_extent_count = fiemap->fm_mapped_extents + 1;
862 fiemap->fm_mapped_extents = 0;
863
864 memset (fiemap->fm_extents, 0, (sizeof (struct fiemap_extent)
865 * fiemap->fm_extent_count));
866
867 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
868 nih_error_raise_system ();
869 nih_free (fiemap);
870 return NULL;
871 }
872 } while (fiemap->fm_mapped_extents
873 && (fiemap->fm_mapped_extents >= fiemap->fm_extent_count));
874
875 return fiemap;
876}
877
878static int
879trace_add_extents (const void *parent,
880 PackFile * file,
881 PackPath * path,
882 int fd,
883 off_t size,
884 off_t offset,
885 off_t length)
886{
887 nih_local struct fiemap *fiemap = NULL;
888
889 nih_assert (file != NULL);
890 nih_assert (path != NULL);
891 nih_assert (fd >= 0);
892 nih_assert (size > 0);
893
894 /* Get the extents map for this chunk, then iterate the extents
895 * and put those in the pack instead of the chunks.
896 */
897 fiemap = get_fiemap (NULL, fd, offset, length);
898 if (! fiemap) {
899 NihError *err;
900
901 err = nih_error_get ();
902 nih_warn ("%s: %s: %s", path->path,
903 _("Error retrieving chunk extents"),
904 err->message);
905 nih_free (err);
906
907 return -1;
908 }
909
910 for (__u32 j = 0; j < fiemap->fm_mapped_extents; j++) {
911 PackBlock *block;
912 off_t start;
913 off_t end;
914
915 if (fiemap->fm_extents[j].fe_flags & FIEMAP_EXTENT_UNKNOWN)
916 continue;
917
918 /* Work out the intersection of the chunk and extent */
919 start = nih_max (fiemap->fm_start,
920 fiemap->fm_extents[j].fe_logical);
921 end = nih_min ((fiemap->fm_start + fiemap->fm_length),
922 (fiemap->fm_extents[j].fe_logical
923 + fiemap->fm_extents[j].fe_length));
924
925 /* Grow the blocks array to add the extent */
926 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
927 (sizeof (PackBlock)
928 * (file->num_blocks + 1))));
929
930 block = &file->blocks[file->num_blocks++];
931 memset (block, 0, sizeof (PackBlock));
932
933 block->pathidx = file->num_paths - 1;
934 block->offset = start;
935 block->length = end - start;
936 block->physical = (fiemap->fm_extents[j].fe_physical
937 + (start - fiemap->fm_extents[j].fe_logical));
938 }
939
940 return 0;
941}
942
943static int
944trace_add_groups (const void *parent,
945 PackFile * file)
946{
947 const char *devname;
948 ext2_filsys fs = NULL;
949
950 nih_assert (file != NULL);
951
952 devname = blkid_devno_to_devname (file->dev);
953 if (devname
954 && (! ext2fs_open (devname, 0, 0, 0, unix_io_manager, &fs))) {
955 nih_assert (fs != NULL);
956 size_t num_groups = 0;
957 nih_local size_t *num_inodes = NULL;
958 size_t mean = 0;
959 size_t hits = 0;
960
961 nih_assert (fs != NULL);
962
963 /* Calculate the number of inode groups on this filesystem */
964 num_groups = ((fs->super->s_blocks_count - 1)
965 / fs->super->s_blocks_per_group) + 1;
966
967 /* Fill in the pack path's group member, and count the
968 * number of inodes in each group.
969 */
970 num_inodes = NIH_MUST (nih_alloc (NULL, (sizeof (size_t)
971 * num_groups)));
972 memset (num_inodes, 0, sizeof (size_t) * num_groups);
973
974 for (size_t i = 0; i < file->num_paths; i++) {
975 file->paths[i].group = ext2fs_group_of_ino (fs, file->paths[i].ino);
976 num_inodes[file->paths[i].group]++;
977 }
978
979 /* Iterate the groups and add any group that exceeds the
980 * inode preload threshold.
981 */
982 for (size_t i = 0; i < num_groups; i++) {
983 mean += num_inodes[i];
984 if (num_inodes[i] > INODE_GROUP_PRELOAD_THRESHOLD) {
985 file->groups = NIH_MUST (nih_realloc (file->groups, parent,
986 (sizeof (int)
987 * (file->num_groups + 1))));
988 file->groups[file->num_groups++] = i;
989 hits++;
990 }
991 }
992
993 mean /= num_groups;
994
995 nih_debug ("%zu inode groups, mean %zu inodes per group, %zu hits",
996 num_groups, mean, hits);
997
998 ext2fs_close (fs);
999 }
1000
1001 return 0;
1002}
1003
1004
1005static int
1006block_compar (const void *a,
1007 const void *b)
1008{
1009 const PackBlock *block_a = a;
1010 const PackBlock *block_b = b;
1011
1012 nih_assert (block_a != NULL);
1013 nih_assert (block_b != NULL);
1014
1015 if (block_a->physical < block_b->physical) {
1016 return -1;
1017 } else if (block_a->physical > block_b->physical) {
1018 return 1;
1019 } else {
1020 return 0;
1021 }
1022}
1023
1024static int
1025trace_sort_blocks (const void *parent,
1026 PackFile * file)
1027{
1028 nih_assert (file != NULL);
1029
1030 /* Sort the blocks array by physical location, since these are
1031 * read in a separate pass to opening files, there's no reason
1032 * to consider which path each block is in - and thus resulting
1033 * in a linear disk read.
1034 */
1035 qsort (file->blocks, file->num_blocks, sizeof (PackBlock),
1036 block_compar);
1037
1038 return 0;
1039}
1040
1041static int
1042path_compar (const void *a,
1043 const void *b)
1044{
1045 const PackPath * const *path_a = a;
1046 const PackPath * const *path_b = b;
1047
1048 nih_assert (path_a != NULL);
1049 nih_assert (path_b != NULL);
1050
1051 if ((*path_a)->group < (*path_b)->group) {
1052 return -1;
1053 } else if ((*path_a)->group > (*path_b)->group) {
1054 return 1;
1055 } else if ((*path_a)->ino < (*path_b)->ino) {
1056 return -1;
1057 } else if ((*path_b)->ino > (*path_b)->ino) {
1058 return 1;
1059 } else {
1060 return strcmp ((*path_a)->path, (*path_b)->path);
1061 }
1062}
1063
1064static int
1065trace_sort_paths (const void *parent,
1066 PackFile * file)
1067{
1068 nih_local PackPath **paths = NULL;
1069 nih_local size_t * new_idx = NULL;
1070 PackPath * new_paths;
1071
1072 nih_assert (file != NULL);
1073
1074 /* Sort the paths array by ext2fs inode group, ino_t then path.
1075 *
1076 * Mucking around with things like the physical locations of
1077 * first on-disk blocks of the dentry and stuff didn't work out
1078 * so well, sorting by path was better, but this seems the best.
1079 * (it looks good on blktrace too)
1080 */
1081 paths = NIH_MUST (nih_alloc (NULL, (sizeof (PackPath *)
1082 * file->num_paths)));
1083
1084 for (size_t i = 0; i < file->num_paths; i++)
1085 paths[i] = &file->paths[i];
1086
1087 qsort (paths, file->num_paths, sizeof (PackPath *),
1088 path_compar);
1089
1090 /* Calculate the new indexes of each path element in the old
1091 * array, and then update the block array's path indexes to
1092 * match.
1093 */
1094 new_idx = NIH_MUST (nih_alloc (NULL,
1095 (sizeof (size_t) * file->num_paths)));
1096 for (size_t i = 0; i < file->num_paths; i++)
1097 new_idx[paths[i] - file->paths] = i;
1098
1099 for (size_t i = 0; i < file->num_blocks; i++)
1100 file->blocks[i].pathidx = new_idx[file->blocks[i].pathidx];
1101
1102 /* Finally generate a new paths array with the new order and
1103 * attach it to the file.
1104 */
1105 new_paths = NIH_MUST (nih_alloc (parent,
1106 (sizeof (PackPath) * file->num_paths)));
1107 for (size_t i = 0; i < file->num_paths; i++)
1108 memcpy (&new_paths[new_idx[i]], &file->paths[i],
1109 sizeof (PackPath));
1110
1111 nih_unref (file->paths, parent);
1112 file->paths = new_paths;
1113
1114 return 0;
1115}