blob: d6fc9e0a05e4ef6c1f09aac78bd72d0e51b80125 [file] [log] [blame]
Scott James Remnant56686d62009-11-09 18:38:51 +00001/* ureadahead
2 *
3 * trace.c - boot tracing
4 *
5 * Copyright © 2009 Canonical Ltd.
6 * Author: Scott James Remnant <scott@netsplit.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2, as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif /* HAVE_CONFIG_H */
25
26#define _ATFILE_SOURCE
27
28
29#include <sys/select.h>
30#include <sys/mount.h>
Yunlian Jiangb2de86a2018-07-19 16:46:39 -040031#include <sys/sysmacros.h>
Scott James Remnant56686d62009-11-09 18:38:51 +000032#include <sys/types.h>
33#include <sys/mman.h>
34#include <sys/stat.h>
Yusuke Satoa2a925e2015-12-17 13:14:55 -080035#include <sys/param.h>
Scott James Remnant56686d62009-11-09 18:38:51 +000036
37#include <errno.h>
38#include <fcntl.h>
39#include <stdio.h>
40#include <signal.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44
45#include <blkid.h>
46#define NO_INLINE_FUNCS
47#include <ext2fs.h>
48
49#include <linux/fs.h>
50#include <linux/fiemap.h>
51
52#include <nih/macros.h>
53#include <nih/alloc.h>
54#include <nih/string.h>
55#include <nih/list.h>
56#include <nih/hash.h>
57#include <nih/main.h>
58#include <nih/logging.h>
59#include <nih/error.h>
60
61#include "trace.h"
62#include "pack.h"
63#include "values.h"
64#include "file.h"
65
66
67/**
68 * PATH_DEBUGFS:
69 *
70 * Path to the usual debugfs mountpoint.
71 **/
72#define PATH_DEBUGFS "/sys/kernel/debug"
73
74/**
75 * PATH_DEBUGFS_TMP:
76 *
77 * Path to the temporary debugfs mountpoint that we mount it on if it
78 * hasn't been mounted at the usual place yet.
79 **/
80#define PATH_DEBUGFS_TMP "/var/lib/ureadahead/debugfs"
81
82/**
83 * INODE_GROUP_PRELOAD_THRESHOLD:
84 *
85 * Number of inodes in a group before we preload that inode's blocks.
86 **/
87#define INODE_GROUP_PRELOAD_THRESHOLD 8
88
89
90/* Prototypes for static functions */
91static int read_trace (const void *parent,
92 int dfd, const char *path,
Yusuke Satoa2a925e2015-12-17 13:14:55 -080093 const PathPrefixOption *path_prefix,
Scott James Remnant56686d62009-11-09 18:38:51 +000094 PackFile **files, size_t *num_files);
95static void fix_path (char *pathname);
96static int trace_add_path (const void *parent, const char *pathname,
97 PackFile **files, size_t *num_files);
98static int ignore_path (const char *pathname);
99static PackFile *trace_file (const void *parent, dev_t dev,
100 PackFile **files, size_t *num_files);
101static int trace_add_chunks (const void *parent,
102 PackFile *file, PackPath *path,
103 int fd, off_t size);
104static int trace_add_extents (const void *parent,
105 PackFile *file, PackPath *path,
106 int fd, off_t size,
107 off_t offset, off_t length);
108static int trace_add_groups (const void *parent, PackFile *file);
109static int trace_sort_blocks (const void *parent, PackFile *file);
110static int trace_sort_paths (const void *parent, PackFile *file);
111
112
113static void
114sig_interrupt (int signum)
115{
116}
117
118int
119trace (int daemonise,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800120 int timeout,
121 const char *filename_to_replace,
122 const PathPrefixOption *path_prefix)
Scott James Remnant56686d62009-11-09 18:38:51 +0000123{
124 int dfd;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600125 FILE *fp;
Scott James Remnant56686d62009-11-09 18:38:51 +0000126 int unmount = FALSE;
127 int old_sys_open_enabled = 0;
128 int old_open_exec_enabled = 0;
129 int old_uselib_enabled = 0;
130 int old_tracing_enabled = 0;
Tim Gardner73aa2c52010-07-22 04:04:36 -0600131 int old_buffer_size_kb = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000132 struct sigaction act;
133 struct sigaction old_sigterm;
134 struct sigaction old_sigint;
135 struct timeval tv;
136 nih_local PackFile *files = NULL;
137 size_t num_files = 0;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600138 size_t num_cpus = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000139
140 /* Mount debugfs if not already mounted */
Hardik Goyal2c3a17c2019-07-08 19:16:35 -0400141 dfd = open (PATH_DEBUGFS "/tracing", O_NOFOLLOW | O_RDONLY | O_NOATIME);
Scott James Remnant56686d62009-11-09 18:38:51 +0000142 if (dfd < 0) {
143 if (errno != ENOENT)
144 nih_return_system_error (-1);
145
146 if (mount ("none", PATH_DEBUGFS_TMP, "debugfs", 0, NULL) < 0)
147 nih_return_system_error (-1);
148
Hardik Goyal2c3a17c2019-07-08 19:16:35 -0400149 dfd = open (PATH_DEBUGFS_TMP "/tracing", O_NOFOLLOW | O_RDONLY | O_NOATIME);
Scott James Remnant56686d62009-11-09 18:38:51 +0000150 if (dfd < 0) {
151 nih_error_raise_system ();
152 umount (PATH_DEBUGFS_TMP);
153 return -1;
154 }
155
156 unmount = TRUE;
157 }
158
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600159 /*
160 * Count the number of CPUs, default to 1 on error.
161 */
162 fp = fopen("/proc/cpuinfo", "r");
163 if (fp) {
164 int line_size=1024;
165 char *processor="processor";
166 char *line = malloc(line_size);
167 if (line) {
168 num_cpus = 0;
169 while (fgets(line,line_size,fp) != NULL) {
170 if (!strncmp(line,processor,strlen(processor)))
171 num_cpus++;
172 }
173 free(line);
174 nih_message("Counted %d CPUs\n",num_cpus);
175 }
176 fclose(fp);
177 }
178 if (!num_cpus)
179 num_cpus = 1;
180
Scott James Remnante30e2372010-09-20 18:34:31 +0100181 /* Enable tracing of open() syscalls */
Scott James Remnant56686d62009-11-09 18:38:51 +0000182 if (set_value (dfd, "events/fs/do_sys_open/enable",
183 TRUE, &old_sys_open_enabled) < 0)
184 goto error;
185 if (set_value (dfd, "events/fs/open_exec/enable",
186 TRUE, &old_open_exec_enabled) < 0)
187 goto error;
188 if (set_value (dfd, "events/fs/uselib/enable",
189 TRUE, &old_uselib_enabled) < 0) {
190 NihError *err;
191
192 err = nih_error_get ();
193 nih_debug ("Missing uselib tracing: %s", err->message);
194 nih_free (err);
195
196 old_uselib_enabled = -1;
197 }
Scott James Remnante30e2372010-09-20 18:34:31 +0100198 if (set_value (dfd, "buffer_size_kb", 8192/num_cpus, &old_buffer_size_kb) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000199 goto error;
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000200 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000201 TRUE, &old_tracing_enabled) < 0)
202 goto error;
203
204 if (daemonise) {
205 pid_t pid;
206
207 pid = fork ();
208 if (pid < 0) {
209 nih_error_raise_system ();
210 goto error;
211 } else if (pid > 0) {
212 _exit (0);
213 }
214 }
215
216 /* Sleep until we get signals */
217 act.sa_handler = sig_interrupt;
218 sigemptyset (&act.sa_mask);
219 act.sa_flags = 0;
220
221 sigaction (SIGTERM, &act, &old_sigterm);
222 sigaction (SIGINT, &act, &old_sigint);
223
224 if (timeout) {
225 tv.tv_sec = timeout;
226 tv.tv_usec = 0;
227
228 select (0, NULL, NULL, NULL, &tv);
229 } else {
230 pause ();
231 }
232
233 sigaction (SIGTERM, &old_sigterm, NULL);
234 sigaction (SIGINT, &old_sigint, NULL);
235
236 /* Restore previous tracing settings */
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000237 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000238 old_tracing_enabled, NULL) < 0)
239 goto error;
240 if (old_uselib_enabled >= 0)
241 if (set_value (dfd, "events/fs/uselib/enable",
242 old_uselib_enabled, NULL) < 0)
243 goto error;
244 if (set_value (dfd, "events/fs/open_exec/enable",
245 old_open_exec_enabled, NULL) < 0)
246 goto error;
247 if (set_value (dfd, "events/fs/do_sys_open/enable",
248 old_sys_open_enabled, NULL) < 0)
249 goto error;
250
251 /* Be nicer */
Scott James Remnantcc2943b2009-11-29 15:24:15 +0000252 if (nice (15))
253 ;
Scott James Remnant56686d62009-11-09 18:38:51 +0000254
255 /* Read trace log */
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800256 if (read_trace (NULL, dfd, "trace", path_prefix, &files, &num_files) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000257 goto error;
258
Tim Gardner73aa2c52010-07-22 04:04:36 -0600259 /*
260 * Restore the trace buffer size (which has just been read) and free
261 * a bunch of memory.
262 */
263 if (set_value (dfd, "buffer_size_kb", old_buffer_size_kb, NULL) < 0)
264 goto error;
265
Scott James Remnant56686d62009-11-09 18:38:51 +0000266 /* Unmount the temporary debugfs mount if we mounted it */
267 if (close (dfd)) {
268 nih_error_raise_system ();
269 goto error;
270 }
271 if (unmount
272 && (umount (PATH_DEBUGFS_TMP) < 0)) {
273 nih_error_raise_system ();
274 goto error;
275 }
276
277 /* Write out pack files */
278 for (size_t i = 0; i < num_files; i++) {
279 nih_local char *filename = NULL;
280
281 filename = pack_file_name_for_device (NULL, files[i].dev);
282 if (! filename) {
283 NihError *err;
284
285 err = nih_error_get ();
286 nih_warn ("%s", err->message);
287 nih_free (err);
288
289 continue;
290 }
291
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800292 /* If filename_to_replace is not NULL, only write out the
293 * file and skip others.
294 */
295 if (filename_to_replace &&
296 strcmp (filename_to_replace, filename)) {
297 nih_info ("Skipping %s", filename);
298 continue;
299 }
300
Scott James Remnant56686d62009-11-09 18:38:51 +0000301 nih_info ("Writing %s", filename);
302
303 /* We only need to apply additional sorting to the
304 * HDD-optimised packs, the SSD ones can read in random
305 * order quite happily.
306 *
307 * Also for HDD, generate the inode group preloading
308 * array.
309 */
310 if (files[i].rotational) {
311 trace_add_groups (files, &files[i]);
312
313 trace_sort_blocks (files, &files[i]);
314 trace_sort_paths (files, &files[i]);
315 }
316
317 write_pack (filename, &files[i]);
318
319 if (nih_log_priority < NIH_LOG_MESSAGE)
320 pack_dump (&files[i], SORT_OPEN);
321 }
322
323 return 0;
324error:
325 close (dfd);
326 if (unmount)
327 umount (PATH_DEBUGFS_TMP);
328
329 return -1;
330}
331
332
333static int
334read_trace (const void *parent,
335 int dfd,
336 const char *path,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800337 const PathPrefixOption *path_prefix,
Scott James Remnant56686d62009-11-09 18:38:51 +0000338 PackFile ** files,
339 size_t * num_files)
340{
341 int fd;
342 FILE *fp;
343 char *line;
344
345 nih_assert (path != NULL);
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800346 nih_assert (path_prefix != NULL);
Scott James Remnant56686d62009-11-09 18:38:51 +0000347 nih_assert (files != NULL);
348 nih_assert (num_files != NULL);
349
350 fd = openat (dfd, path, O_RDONLY);
351 if (fd < 0)
352 nih_return_system_error (-1);
353
354 fp = fdopen (fd, "r");
355 if (! fp) {
356 nih_error_raise_system ();
357 close (fd);
358 return -1;
359 }
360
361 while ((line = fgets_alloc (NULL, fp)) != NULL) {
362 char *ptr;
363 char *end;
364
365 ptr = strstr (line, " do_sys_open:");
366 if (! ptr)
367 ptr = strstr (line, " open_exec:");
368 if (! ptr)
369 ptr = strstr (line, " uselib:");
370 if (! ptr) {
371 nih_free (line);
372 continue;
373 }
374
375 ptr = strchr (ptr, '"');
376 if (! ptr) {
377 nih_free (line);
378 continue;
379 }
380
381 ptr++;
382
383 end = strrchr (ptr, '"');
384 if (! end) {
385 nih_free (line);
386 continue;
387 }
388
389 *end = '\0';
390
391 fix_path (ptr);
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800392 if (path_prefix->st_dev != NODEV && ptr[0] == '/') {
393 struct stat stbuf;
394 char *rewritten = nih_sprintf (
395 line, "%s%s", path_prefix->prefix, ptr);
396 if (! lstat (rewritten, &stbuf) &&
397 stbuf.st_dev == path_prefix->st_dev) {
398 /* If |rewritten| exists on the same device as
399 * path_prefix->st_dev, record the rewritten one
400 * instead of the original path.
401 */
402 ptr = rewritten;
403 }
404 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000405 trace_add_path (parent, ptr, files, num_files);
406
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800407 nih_free (line); /* also frees |rewritten| */
Scott James Remnant56686d62009-11-09 18:38:51 +0000408 }
409
410 if (fclose (fp) < 0)
411 nih_return_system_error (-1);
412
413 return 0;
414}
415
416static void
417fix_path (char *pathname)
418{
419 char *ptr;
420
421 nih_assert (pathname != NULL);
422
423 for (ptr = pathname; *ptr; ptr++) {
424 size_t len;
425
426 if (ptr[0] != '/')
427 continue;
428
429 len = strcspn (ptr + 1, "/");
430
431 /* // and /./, we shorten the string and repeat the loop
432 * looking at the new /
433 */
434 if ((len == 0) || ((len == 1) && ptr[1] == '.')) {
435 memmove (ptr, ptr + len + 1, strlen (ptr) - len);
436 ptr--;
437 continue;
438 }
439
440 /* /../, we shorten back to the previous / or the start
441 * of the string and repeat the loop looking at the new /
442 */
443 if ((len == 2) && (ptr[1] == '.') && (ptr[2] == '.')) {
444 char *root;
445
446 for (root = ptr - 1;
447 (root >= pathname) && (root[0] != '/');
448 root--)
449 ;
450 if (root < pathname)
451 root = pathname;
452
453 memmove (root, ptr + len + 1, strlen (ptr) - len);
454 ptr = root - 1;
455 continue;
456 }
457 }
458
459 while ((ptr != pathname) && (*(--ptr) == '/'))
460 *ptr = '\0';
461}
462
463
464static int
465trace_add_path (const void *parent,
466 const char *pathname,
467 PackFile ** files,
468 size_t * num_files)
469{
470 static NihHash *path_hash = NULL;
471 struct stat statbuf;
472 int fd;
473 PackFile * file;
474 PackPath * path;
475 static NihHash *inode_hash = NULL;
476 nih_local char *inode_key = NULL;
477
478 nih_assert (pathname != NULL);
479 nih_assert (files != NULL);
480 nih_assert (num_files != NULL);
481
482 /* We can't really deal with relative paths since we don't know
483 * the working directory that they were opened from.
484 */
485 if (pathname[0] != '/') {
486 nih_warn ("%s: %s", pathname, _("Ignored relative path"));
487 return 0;
488 }
489
490 /* Certain paths aren't worth caching, because they're virtual or
491 * temporary filesystems and would waste pack space.
492 */
493 if (ignore_path (pathname))
494 return 0;
495
496 /* Ignore paths that won't fit in the pack; we could use PATH_MAX,
497 * but with 1000 files that'd be 4M just for the
498 * pack.
499 */
500 if (strlen (pathname) > PACK_PATH_MAX) {
501 nih_warn ("%s: %s", pathname, _("Ignored far too long path"));
502 return 0;
503 }
504
505 /* Use a hash table of paths to eliminate duplicate path names from
506 * the table since that would waste pack space (and fds).
507 */
508 if (! path_hash)
509 path_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
510
511 if (nih_hash_lookup (path_hash, pathname)) {
512 return 0;
513 } else {
514 NihListEntry *entry;
515
516 entry = NIH_MUST (nih_list_entry_new (path_hash));
517 entry->str = NIH_MUST (nih_strdup (entry, pathname));
518
519 nih_hash_add (path_hash, &entry->entry);
520 }
521
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100522 /* Make sure that we have an ordinary file
523 * This avoids us opening a fifo or socket or symlink.
Scott James Remnant56686d62009-11-09 18:38:51 +0000524 */
525 if ((lstat (pathname, &statbuf) < 0)
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100526 || (S_ISLNK (statbuf.st_mode))
Scott James Remnant56686d62009-11-09 18:38:51 +0000527 || (! S_ISREG (statbuf.st_mode)))
528 return 0;
529
530 /* Open and stat again to get the genuine details, in case it
531 * changes under us.
532 */
533 fd = open (pathname, O_RDONLY | O_NOATIME);
534 if (fd < 0) {
535 nih_warn ("%s: %s: %s", pathname,
536 _("File vanished or error reading"),
537 strerror (errno));
538 return -1;
539 }
540
541 if (fstat (fd, &statbuf) < 0) {
542 nih_warn ("%s: %s: %s", pathname,
543 _("Error retrieving file stat"),
544 strerror (errno));
545 close (fd);
546 return -1;
547 }
548
549 /* Double-check that it's really still a file */
550 if (! S_ISREG (statbuf.st_mode)) {
551 close (fd);
552 return 0;
553 }
554
555 /* Some people think it's clever to split their filesystem across
556 * multiple devices, so we need to generate a different pack file
557 * for each device.
558 *
559 * Lookup file based on the dev_t, potentially creating a new
560 * pack file in the array.
561 */
562 file = trace_file (parent, statbuf.st_dev, files, num_files);
563
564 /* Grow the PackPath array and fill in the details for the new
565 * path.
566 */
567 file->paths = NIH_MUST (nih_realloc (file->paths, *files,
568 (sizeof (PackPath)
569 * (file->num_paths + 1))));
570
571 path = &file->paths[file->num_paths++];
572 memset (path, 0, sizeof (PackPath));
573
574 path->group = -1;
575 path->ino = statbuf.st_ino;
576
577 strncpy (path->path, pathname, PACK_PATH_MAX);
578 path->path[PACK_PATH_MAX] = '\0';
579
580 /* The paths array contains each unique path opened, but these
581 * might be symbolic or hard links to the same underlying files
582 * and we don't want to read the same block more than once.
583 *
584 * Use a hash table of dev_t/ino_t pairs to make sure we only
585 * read the blocks of an actual file the first time.
586 */
587 if (! inode_hash)
588 inode_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
589
590 inode_key = NIH_MUST (nih_sprintf (NULL, "%llu:%llu",
591 (unsigned long long)statbuf.st_dev,
592 (unsigned long long)statbuf.st_ino));
593
594 if (nih_hash_lookup (inode_hash, inode_key)) {
595 close (fd);
596 return 0;
597 } else {
598 NihListEntry *entry;
599
600 entry = NIH_MUST (nih_list_entry_new (inode_hash));
601 entry->str = inode_key;
602 nih_ref (entry->str, entry);
603
604 nih_hash_add (inode_hash, &entry->entry);
605 }
606
607 /* There's also no point reading zero byte files, since they
608 * won't have any blocks (and we can't mmap zero bytes anyway).
609 */
610 if (! statbuf.st_size) {
611 close (fd);
612 return 0;
613 }
614
615 /* Now read the in-memory chunks of this file and add those to
616 * the pack file too.
617 */
618 trace_add_chunks (*files, file, path, fd, statbuf.st_size);
619 close (fd);
620
621 return 0;
622}
623
624static int
625ignore_path (const char *pathname)
626{
627 nih_assert (pathname != NULL);
628
629 if (! strncmp (pathname, "/proc/", 6))
630 return TRUE;
631 if (! strncmp (pathname, "/sys/", 5))
632 return TRUE;
633 if (! strncmp (pathname, "/dev/", 5))
634 return TRUE;
635 if (! strncmp (pathname, "/tmp/", 5))
636 return TRUE;
Steve Langasek2c698a12012-02-03 15:27:29 -0800637 if (! strncmp (pathname, "/run/", 5))
638 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000639 if (! strncmp (pathname, "/var/run/", 9))
640 return TRUE;
Bryan Fullerton96c991b2013-03-12 15:06:41 +0000641 if (! strncmp (pathname, "/var/log/", 9))
642 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000643 if (! strncmp (pathname, "/var/lock/", 10))
644 return TRUE;
645
646 return FALSE;
647}
648
649
650static PackFile *
651trace_file (const void *parent,
652 dev_t dev,
653 PackFile ** files,
654 size_t * num_files)
655{
656 nih_local char *filename = NULL;
657 int rotational;
658 PackFile * file;
659
660 nih_assert (files != NULL);
661 nih_assert (num_files != NULL);
662
663 /* Return any existing file structure for this device */
664 for (size_t i = 0; i < *num_files; i++)
665 if ((*files)[i].dev == dev)
666 return &(*files)[i];
667
668 /* Query sysfs to see whether this disk is rotational; this
669 * obviously won't work for virtual devices and the like, so
670 * default to TRUE for now.
671 */
Yusuke Satofc571f12016-04-30 06:17:08 -0400672 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:%d/queue/rotational",
673 major (dev), minor (dev)));
674 if (access (filename, R_OK) < 0) {
675 /* For devices managed by the scsi stack, the minor device number has to be
676 * masked to find the queue/rotational file.
677 */
678 nih_free (filename);
679 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:%d/queue/rotational",
680 major (dev), minor (dev) & 0xffff0));
681 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000682
683 if (get_value (AT_FDCWD, filename, &rotational) < 0) {
684 NihError *err;
685
686 err = nih_error_get ();
687 nih_warn (_("Unable to obtain rotationalness for device %u:%u: %s"),
688 major (dev), minor (dev), err->message);
689 nih_free (err);
690
691 rotational = TRUE;
692 }
693
694 /* Grow the PackFile array and fill in the details for the new
695 * file.
696 */
697 *files = NIH_MUST (nih_realloc (*files, parent,
698 (sizeof (PackFile) * (*num_files + 1))));
699
700 file = &(*files)[(*num_files)++];
701 memset (file, 0, sizeof (PackFile));
702
703 file->dev = dev;
704 file->rotational = rotational;
705 file->num_paths = 0;
706 file->paths = NULL;
707 file->num_blocks = 0;
708 file->blocks = NULL;
709
710 return file;
711}
712
713
714static int
715trace_add_chunks (const void *parent,
716 PackFile * file,
717 PackPath * path,
718 int fd,
719 off_t size)
720{
721 static int page_size = -1;
722 void * buf;
723 off_t num_pages;
724 nih_local unsigned char *vec = NULL;
725
726 nih_assert (file != NULL);
727 nih_assert (path != NULL);
728 nih_assert (fd >= 0);
729 nih_assert (size > 0);
730
731 if (page_size < 0)
732 page_size = sysconf (_SC_PAGESIZE);
733
734 /* Map the file into memory */
735 buf = mmap (NULL, size, PROT_READ, MAP_SHARED, fd, 0);
736 if (buf == MAP_FAILED) {
737 nih_warn ("%s: %s: %s", path->path,
738 _("Error mapping into memory"),
739 strerror (errno));
740 return -1;
741 }
742
743 /* Grab the core memory map of the file */
744 num_pages = (size - 1) / page_size + 1;
745 vec = NIH_MUST (nih_alloc (NULL, num_pages));
746 memset (vec, 0, num_pages);
747
748 if (mincore (buf, size, vec) < 0) {
749 nih_warn ("%s: %s: %s", path->path,
750 _("Error retrieving page cache info"),
751 strerror (errno));
752 munmap (buf, size);
753 return -1;
754 }
755
756 /* Clean up */
757 if (munmap (buf, size) < 0) {
758 nih_warn ("%s: %s: %s", path->path,
759 _("Error unmapping from memory"),
760 strerror (errno));
761 return -1;
762 }
763
764
765 /* Now we can figure out which contiguous bits of the file are
766 * in core memory.
767 */
768 for (off_t i = 0; i < num_pages; i++) {
769 off_t offset;
770 off_t length;
771
772 if (! vec[i])
773 continue;
774
775 offset = i * page_size;
776 length = page_size;
777
778 while (((i + 1) < num_pages) && vec[i + 1]) {
779 length += page_size;
780 i++;
781 }
782
783 /* The rotational crowd need this split down further into
784 * on-disk extents, the non-rotational folks can just use
785 * the chunks data.
786 */
787 if (file->rotational) {
788 trace_add_extents (parent, file, path, fd, size,
789 offset, length);
790 } else {
791 PackBlock *block;
792
793 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
794 (sizeof (PackBlock)
795 * (file->num_blocks + 1))));
796
797 block = &file->blocks[file->num_blocks++];
798 memset (block, 0, sizeof (PackBlock));
799
800 block->pathidx = file->num_paths - 1;
801 block->offset = offset;
802 block->length = length;
803 block->physical = -1;
804 }
805 }
806
807 return 0;
808}
809
810struct fiemap *
811get_fiemap (const void *parent,
812 int fd,
813 off_t offset,
814 off_t length)
815{
816 struct fiemap *fiemap;
817
818 nih_assert (fd >= 0);
819
820 fiemap = NIH_MUST (nih_new (parent, struct fiemap));
821 memset (fiemap, 0, sizeof (struct fiemap));
822
823 fiemap->fm_start = offset;
824 fiemap->fm_length = length;
825 fiemap->fm_flags = 0;
826
827 do {
828 /* Query the current number of extents */
829 fiemap->fm_mapped_extents = 0;
830 fiemap->fm_extent_count = 0;
831
832 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
833 nih_error_raise_system ();
834 nih_free (fiemap);
835 return NULL;
836 }
837
838 /* Always allow room for one extra over what we were told,
839 * so we know if they changed under us.
840 */
841 fiemap = NIH_MUST (nih_realloc (fiemap, parent,
842 (sizeof (struct fiemap)
843 + (sizeof (struct fiemap_extent)
844 * (fiemap->fm_mapped_extents + 1)))));
845 fiemap->fm_extent_count = fiemap->fm_mapped_extents + 1;
846 fiemap->fm_mapped_extents = 0;
847
848 memset (fiemap->fm_extents, 0, (sizeof (struct fiemap_extent)
849 * fiemap->fm_extent_count));
850
851 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
852 nih_error_raise_system ();
853 nih_free (fiemap);
854 return NULL;
855 }
856 } while (fiemap->fm_mapped_extents
857 && (fiemap->fm_mapped_extents >= fiemap->fm_extent_count));
858
859 return fiemap;
860}
861
862static int
863trace_add_extents (const void *parent,
864 PackFile * file,
865 PackPath * path,
866 int fd,
867 off_t size,
868 off_t offset,
869 off_t length)
870{
871 nih_local struct fiemap *fiemap = NULL;
872
873 nih_assert (file != NULL);
874 nih_assert (path != NULL);
875 nih_assert (fd >= 0);
876 nih_assert (size > 0);
877
878 /* Get the extents map for this chunk, then iterate the extents
879 * and put those in the pack instead of the chunks.
880 */
881 fiemap = get_fiemap (NULL, fd, offset, length);
882 if (! fiemap) {
883 NihError *err;
884
885 err = nih_error_get ();
886 nih_warn ("%s: %s: %s", path->path,
887 _("Error retrieving chunk extents"),
888 err->message);
889 nih_free (err);
890
891 return -1;
892 }
893
894 for (__u32 j = 0; j < fiemap->fm_mapped_extents; j++) {
895 PackBlock *block;
896 off_t start;
897 off_t end;
898
899 if (fiemap->fm_extents[j].fe_flags & FIEMAP_EXTENT_UNKNOWN)
900 continue;
901
902 /* Work out the intersection of the chunk and extent */
903 start = nih_max (fiemap->fm_start,
904 fiemap->fm_extents[j].fe_logical);
905 end = nih_min ((fiemap->fm_start + fiemap->fm_length),
906 (fiemap->fm_extents[j].fe_logical
907 + fiemap->fm_extents[j].fe_length));
908
909 /* Grow the blocks array to add the extent */
910 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
911 (sizeof (PackBlock)
912 * (file->num_blocks + 1))));
913
914 block = &file->blocks[file->num_blocks++];
915 memset (block, 0, sizeof (PackBlock));
916
917 block->pathidx = file->num_paths - 1;
918 block->offset = start;
919 block->length = end - start;
920 block->physical = (fiemap->fm_extents[j].fe_physical
921 + (start - fiemap->fm_extents[j].fe_logical));
922 }
923
924 return 0;
925}
926
927static int
928trace_add_groups (const void *parent,
929 PackFile * file)
930{
931 const char *devname;
932 ext2_filsys fs = NULL;
933
934 nih_assert (file != NULL);
935
936 devname = blkid_devno_to_devname (file->dev);
937 if (devname
938 && (! ext2fs_open (devname, 0, 0, 0, unix_io_manager, &fs))) {
939 nih_assert (fs != NULL);
940 size_t num_groups = 0;
941 nih_local size_t *num_inodes = NULL;
942 size_t mean = 0;
943 size_t hits = 0;
944
945 nih_assert (fs != NULL);
946
947 /* Calculate the number of inode groups on this filesystem */
948 num_groups = ((fs->super->s_blocks_count - 1)
949 / fs->super->s_blocks_per_group) + 1;
950
951 /* Fill in the pack path's group member, and count the
952 * number of inodes in each group.
953 */
954 num_inodes = NIH_MUST (nih_alloc (NULL, (sizeof (size_t)
955 * num_groups)));
956 memset (num_inodes, 0, sizeof (size_t) * num_groups);
957
958 for (size_t i = 0; i < file->num_paths; i++) {
959 file->paths[i].group = ext2fs_group_of_ino (fs, file->paths[i].ino);
960 num_inodes[file->paths[i].group]++;
961 }
962
963 /* Iterate the groups and add any group that exceeds the
964 * inode preload threshold.
965 */
966 for (size_t i = 0; i < num_groups; i++) {
967 mean += num_inodes[i];
968 if (num_inodes[i] > INODE_GROUP_PRELOAD_THRESHOLD) {
969 file->groups = NIH_MUST (nih_realloc (file->groups, parent,
970 (sizeof (int)
971 * (file->num_groups + 1))));
972 file->groups[file->num_groups++] = i;
973 hits++;
974 }
975 }
976
977 mean /= num_groups;
978
979 nih_debug ("%zu inode groups, mean %zu inodes per group, %zu hits",
980 num_groups, mean, hits);
981
982 ext2fs_close (fs);
983 }
984
985 return 0;
986}
987
988
989static int
990block_compar (const void *a,
991 const void *b)
992{
993 const PackBlock *block_a = a;
994 const PackBlock *block_b = b;
995
996 nih_assert (block_a != NULL);
997 nih_assert (block_b != NULL);
998
999 if (block_a->physical < block_b->physical) {
1000 return -1;
1001 } else if (block_a->physical > block_b->physical) {
1002 return 1;
1003 } else {
1004 return 0;
1005 }
1006}
1007
1008static int
1009trace_sort_blocks (const void *parent,
1010 PackFile * file)
1011{
1012 nih_assert (file != NULL);
1013
1014 /* Sort the blocks array by physical location, since these are
1015 * read in a separate pass to opening files, there's no reason
1016 * to consider which path each block is in - and thus resulting
1017 * in a linear disk read.
1018 */
1019 qsort (file->blocks, file->num_blocks, sizeof (PackBlock),
1020 block_compar);
1021
1022 return 0;
1023}
1024
1025static int
1026path_compar (const void *a,
1027 const void *b)
1028{
1029 const PackPath * const *path_a = a;
1030 const PackPath * const *path_b = b;
1031
1032 nih_assert (path_a != NULL);
1033 nih_assert (path_b != NULL);
1034
1035 if ((*path_a)->group < (*path_b)->group) {
1036 return -1;
1037 } else if ((*path_a)->group > (*path_b)->group) {
1038 return 1;
1039 } else if ((*path_a)->ino < (*path_b)->ino) {
1040 return -1;
1041 } else if ((*path_b)->ino > (*path_b)->ino) {
1042 return 1;
1043 } else {
1044 return strcmp ((*path_a)->path, (*path_b)->path);
1045 }
1046}
1047
1048static int
1049trace_sort_paths (const void *parent,
1050 PackFile * file)
1051{
1052 nih_local PackPath **paths = NULL;
1053 nih_local size_t * new_idx = NULL;
1054 PackPath * new_paths;
1055
1056 nih_assert (file != NULL);
1057
1058 /* Sort the paths array by ext2fs inode group, ino_t then path.
1059 *
1060 * Mucking around with things like the physical locations of
1061 * first on-disk blocks of the dentry and stuff didn't work out
1062 * so well, sorting by path was better, but this seems the best.
1063 * (it looks good on blktrace too)
1064 */
1065 paths = NIH_MUST (nih_alloc (NULL, (sizeof (PackPath *)
1066 * file->num_paths)));
1067
1068 for (size_t i = 0; i < file->num_paths; i++)
1069 paths[i] = &file->paths[i];
1070
1071 qsort (paths, file->num_paths, sizeof (PackPath *),
1072 path_compar);
1073
1074 /* Calculate the new indexes of each path element in the old
1075 * array, and then update the block array's path indexes to
1076 * match.
1077 */
1078 new_idx = NIH_MUST (nih_alloc (NULL,
1079 (sizeof (size_t) * file->num_paths)));
1080 for (size_t i = 0; i < file->num_paths; i++)
1081 new_idx[paths[i] - file->paths] = i;
1082
1083 for (size_t i = 0; i < file->num_blocks; i++)
1084 file->blocks[i].pathidx = new_idx[file->blocks[i].pathidx];
1085
1086 /* Finally generate a new paths array with the new order and
1087 * attach it to the file.
1088 */
1089 new_paths = NIH_MUST (nih_alloc (parent,
1090 (sizeof (PackPath) * file->num_paths)));
1091 for (size_t i = 0; i < file->num_paths; i++)
1092 memcpy (&new_paths[new_idx[i]], &file->paths[i],
1093 sizeof (PackPath));
1094
1095 nih_unref (file->paths, parent);
1096 file->paths = new_paths;
1097
1098 return 0;
1099}