blob: 240a17f21bc69b347b36b7fb50521b1f05ea8718 [file] [log] [blame]
Scott James Remnant56686d62009-11-09 18:38:51 +00001/* ureadahead
2 *
3 * trace.c - boot tracing
4 *
5 * Copyright © 2009 Canonical Ltd.
6 * Author: Scott James Remnant <scott@netsplit.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2, as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif /* HAVE_CONFIG_H */
25
26#define _ATFILE_SOURCE
27
28
29#include <sys/select.h>
30#include <sys/mount.h>
31#include <sys/types.h>
32#include <sys/mman.h>
33#include <sys/stat.h>
34
35#include <errno.h>
36#include <fcntl.h>
37#include <stdio.h>
38#include <signal.h>
39#include <stdlib.h>
40#include <string.h>
41#include <unistd.h>
42
43#include <blkid.h>
44#define NO_INLINE_FUNCS
45#include <ext2fs.h>
46
47#include <linux/fs.h>
48#include <linux/fiemap.h>
49
50#include <nih/macros.h>
51#include <nih/alloc.h>
52#include <nih/string.h>
53#include <nih/list.h>
54#include <nih/hash.h>
55#include <nih/main.h>
56#include <nih/logging.h>
57#include <nih/error.h>
58
59#include "trace.h"
60#include "pack.h"
61#include "values.h"
62#include "file.h"
63
64
65/**
66 * PATH_DEBUGFS:
67 *
68 * Path to the usual debugfs mountpoint.
69 **/
70#define PATH_DEBUGFS "/sys/kernel/debug"
71
72/**
73 * PATH_DEBUGFS_TMP:
74 *
75 * Path to the temporary debugfs mountpoint that we mount it on if it
76 * hasn't been mounted at the usual place yet.
77 **/
78#define PATH_DEBUGFS_TMP "/var/lib/ureadahead/debugfs"
79
80/**
81 * INODE_GROUP_PRELOAD_THRESHOLD:
82 *
83 * Number of inodes in a group before we preload that inode's blocks.
84 **/
85#define INODE_GROUP_PRELOAD_THRESHOLD 8
86
87
88/* Prototypes for static functions */
89static int read_trace (const void *parent,
90 int dfd, const char *path,
91 PackFile **files, size_t *num_files);
92static void fix_path (char *pathname);
93static int trace_add_path (const void *parent, const char *pathname,
94 PackFile **files, size_t *num_files);
95static int ignore_path (const char *pathname);
96static PackFile *trace_file (const void *parent, dev_t dev,
97 PackFile **files, size_t *num_files);
98static int trace_add_chunks (const void *parent,
99 PackFile *file, PackPath *path,
100 int fd, off_t size);
101static int trace_add_extents (const void *parent,
102 PackFile *file, PackPath *path,
103 int fd, off_t size,
104 off_t offset, off_t length);
105static int trace_add_groups (const void *parent, PackFile *file);
106static int trace_sort_blocks (const void *parent, PackFile *file);
107static int trace_sort_paths (const void *parent, PackFile *file);
108
109
110static void
111sig_interrupt (int signum)
112{
113}
114
115int
116trace (int daemonise,
117 int timeout)
118{
119 int dfd;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600120 FILE *fp;
Scott James Remnant56686d62009-11-09 18:38:51 +0000121 int unmount = FALSE;
122 int old_sys_open_enabled = 0;
123 int old_open_exec_enabled = 0;
124 int old_uselib_enabled = 0;
125 int old_tracing_enabled = 0;
Tim Gardner73aa2c52010-07-22 04:04:36 -0600126 int old_buffer_size_kb = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000127 struct sigaction act;
128 struct sigaction old_sigterm;
129 struct sigaction old_sigint;
130 struct timeval tv;
131 nih_local PackFile *files = NULL;
132 size_t num_files = 0;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600133 size_t num_cpus = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000134
135 /* Mount debugfs if not already mounted */
136 dfd = open (PATH_DEBUGFS "/tracing", O_RDONLY | O_NOATIME);
137 if (dfd < 0) {
138 if (errno != ENOENT)
139 nih_return_system_error (-1);
140
141 if (mount ("none", PATH_DEBUGFS_TMP, "debugfs", 0, NULL) < 0)
142 nih_return_system_error (-1);
143
144 dfd = open (PATH_DEBUGFS_TMP "/tracing", O_RDONLY | O_NOATIME);
145 if (dfd < 0) {
146 nih_error_raise_system ();
147 umount (PATH_DEBUGFS_TMP);
148 return -1;
149 }
150
151 unmount = TRUE;
152 }
153
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600154 /*
155 * Count the number of CPUs, default to 1 on error.
156 */
157 fp = fopen("/proc/cpuinfo", "r");
158 if (fp) {
159 int line_size=1024;
160 char *processor="processor";
161 char *line = malloc(line_size);
162 if (line) {
163 num_cpus = 0;
164 while (fgets(line,line_size,fp) != NULL) {
165 if (!strncmp(line,processor,strlen(processor)))
166 num_cpus++;
167 }
168 free(line);
169 nih_message("Counted %d CPUs\n",num_cpus);
170 }
171 fclose(fp);
172 }
173 if (!num_cpus)
174 num_cpus = 1;
175
Scott James Remnante30e2372010-09-20 18:34:31 +0100176 /* Enable tracing of open() syscalls */
Scott James Remnant56686d62009-11-09 18:38:51 +0000177 if (set_value (dfd, "events/fs/do_sys_open/enable",
178 TRUE, &old_sys_open_enabled) < 0)
179 goto error;
180 if (set_value (dfd, "events/fs/open_exec/enable",
181 TRUE, &old_open_exec_enabled) < 0)
182 goto error;
183 if (set_value (dfd, "events/fs/uselib/enable",
184 TRUE, &old_uselib_enabled) < 0) {
185 NihError *err;
186
187 err = nih_error_get ();
188 nih_debug ("Missing uselib tracing: %s", err->message);
189 nih_free (err);
190
191 old_uselib_enabled = -1;
192 }
Scott James Remnante30e2372010-09-20 18:34:31 +0100193 if (set_value (dfd, "buffer_size_kb", 8192/num_cpus, &old_buffer_size_kb) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000194 goto error;
195 if (set_value (dfd, "tracing_enabled",
196 TRUE, &old_tracing_enabled) < 0)
197 goto error;
198
199 if (daemonise) {
200 pid_t pid;
201
202 pid = fork ();
203 if (pid < 0) {
204 nih_error_raise_system ();
205 goto error;
206 } else if (pid > 0) {
207 _exit (0);
208 }
209 }
210
211 /* Sleep until we get signals */
212 act.sa_handler = sig_interrupt;
213 sigemptyset (&act.sa_mask);
214 act.sa_flags = 0;
215
216 sigaction (SIGTERM, &act, &old_sigterm);
217 sigaction (SIGINT, &act, &old_sigint);
218
219 if (timeout) {
220 tv.tv_sec = timeout;
221 tv.tv_usec = 0;
222
223 select (0, NULL, NULL, NULL, &tv);
224 } else {
225 pause ();
226 }
227
228 sigaction (SIGTERM, &old_sigterm, NULL);
229 sigaction (SIGINT, &old_sigint, NULL);
230
231 /* Restore previous tracing settings */
232 if (set_value (dfd, "tracing_enabled",
233 old_tracing_enabled, NULL) < 0)
234 goto error;
235 if (old_uselib_enabled >= 0)
236 if (set_value (dfd, "events/fs/uselib/enable",
237 old_uselib_enabled, NULL) < 0)
238 goto error;
239 if (set_value (dfd, "events/fs/open_exec/enable",
240 old_open_exec_enabled, NULL) < 0)
241 goto error;
242 if (set_value (dfd, "events/fs/do_sys_open/enable",
243 old_sys_open_enabled, NULL) < 0)
244 goto error;
245
246 /* Be nicer */
Scott James Remnantcc2943b2009-11-29 15:24:15 +0000247 if (nice (15))
248 ;
Scott James Remnant56686d62009-11-09 18:38:51 +0000249
250 /* Read trace log */
251 if (read_trace (NULL, dfd, "trace", &files, &num_files) < 0)
252 goto error;
253
Tim Gardner73aa2c52010-07-22 04:04:36 -0600254 /*
255 * Restore the trace buffer size (which has just been read) and free
256 * a bunch of memory.
257 */
258 if (set_value (dfd, "buffer_size_kb", old_buffer_size_kb, NULL) < 0)
259 goto error;
260
Scott James Remnant56686d62009-11-09 18:38:51 +0000261 /* Unmount the temporary debugfs mount if we mounted it */
262 if (close (dfd)) {
263 nih_error_raise_system ();
264 goto error;
265 }
266 if (unmount
267 && (umount (PATH_DEBUGFS_TMP) < 0)) {
268 nih_error_raise_system ();
269 goto error;
270 }
271
272 /* Write out pack files */
273 for (size_t i = 0; i < num_files; i++) {
274 nih_local char *filename = NULL;
275
276 filename = pack_file_name_for_device (NULL, files[i].dev);
277 if (! filename) {
278 NihError *err;
279
280 err = nih_error_get ();
281 nih_warn ("%s", err->message);
282 nih_free (err);
283
284 continue;
285 }
286
287 nih_info ("Writing %s", filename);
288
289 /* We only need to apply additional sorting to the
290 * HDD-optimised packs, the SSD ones can read in random
291 * order quite happily.
292 *
293 * Also for HDD, generate the inode group preloading
294 * array.
295 */
296 if (files[i].rotational) {
297 trace_add_groups (files, &files[i]);
298
299 trace_sort_blocks (files, &files[i]);
300 trace_sort_paths (files, &files[i]);
301 }
302
303 write_pack (filename, &files[i]);
304
305 if (nih_log_priority < NIH_LOG_MESSAGE)
306 pack_dump (&files[i], SORT_OPEN);
307 }
308
309 return 0;
310error:
311 close (dfd);
312 if (unmount)
313 umount (PATH_DEBUGFS_TMP);
314
315 return -1;
316}
317
318
319static int
320read_trace (const void *parent,
321 int dfd,
322 const char *path,
323 PackFile ** files,
324 size_t * num_files)
325{
326 int fd;
327 FILE *fp;
328 char *line;
329
330 nih_assert (path != NULL);
331 nih_assert (files != NULL);
332 nih_assert (num_files != NULL);
333
334 fd = openat (dfd, path, O_RDONLY);
335 if (fd < 0)
336 nih_return_system_error (-1);
337
338 fp = fdopen (fd, "r");
339 if (! fp) {
340 nih_error_raise_system ();
341 close (fd);
342 return -1;
343 }
344
345 while ((line = fgets_alloc (NULL, fp)) != NULL) {
346 char *ptr;
347 char *end;
348
349 ptr = strstr (line, " do_sys_open:");
350 if (! ptr)
351 ptr = strstr (line, " open_exec:");
352 if (! ptr)
353 ptr = strstr (line, " uselib:");
354 if (! ptr) {
355 nih_free (line);
356 continue;
357 }
358
359 ptr = strchr (ptr, '"');
360 if (! ptr) {
361 nih_free (line);
362 continue;
363 }
364
365 ptr++;
366
367 end = strrchr (ptr, '"');
368 if (! end) {
369 nih_free (line);
370 continue;
371 }
372
373 *end = '\0';
374
375 fix_path (ptr);
376 trace_add_path (parent, ptr, files, num_files);
377
378 nih_free (line);
379 }
380
381 if (fclose (fp) < 0)
382 nih_return_system_error (-1);
383
384 return 0;
385}
386
387static void
388fix_path (char *pathname)
389{
390 char *ptr;
391
392 nih_assert (pathname != NULL);
393
394 for (ptr = pathname; *ptr; ptr++) {
395 size_t len;
396
397 if (ptr[0] != '/')
398 continue;
399
400 len = strcspn (ptr + 1, "/");
401
402 /* // and /./, we shorten the string and repeat the loop
403 * looking at the new /
404 */
405 if ((len == 0) || ((len == 1) && ptr[1] == '.')) {
406 memmove (ptr, ptr + len + 1, strlen (ptr) - len);
407 ptr--;
408 continue;
409 }
410
411 /* /../, we shorten back to the previous / or the start
412 * of the string and repeat the loop looking at the new /
413 */
414 if ((len == 2) && (ptr[1] == '.') && (ptr[2] == '.')) {
415 char *root;
416
417 for (root = ptr - 1;
418 (root >= pathname) && (root[0] != '/');
419 root--)
420 ;
421 if (root < pathname)
422 root = pathname;
423
424 memmove (root, ptr + len + 1, strlen (ptr) - len);
425 ptr = root - 1;
426 continue;
427 }
428 }
429
430 while ((ptr != pathname) && (*(--ptr) == '/'))
431 *ptr = '\0';
432}
433
434
435static int
436trace_add_path (const void *parent,
437 const char *pathname,
438 PackFile ** files,
439 size_t * num_files)
440{
441 static NihHash *path_hash = NULL;
442 struct stat statbuf;
443 int fd;
444 PackFile * file;
445 PackPath * path;
446 static NihHash *inode_hash = NULL;
447 nih_local char *inode_key = NULL;
448
449 nih_assert (pathname != NULL);
450 nih_assert (files != NULL);
451 nih_assert (num_files != NULL);
452
453 /* We can't really deal with relative paths since we don't know
454 * the working directory that they were opened from.
455 */
456 if (pathname[0] != '/') {
457 nih_warn ("%s: %s", pathname, _("Ignored relative path"));
458 return 0;
459 }
460
461 /* Certain paths aren't worth caching, because they're virtual or
462 * temporary filesystems and would waste pack space.
463 */
464 if (ignore_path (pathname))
465 return 0;
466
467 /* Ignore paths that won't fit in the pack; we could use PATH_MAX,
468 * but with 1000 files that'd be 4M just for the
469 * pack.
470 */
471 if (strlen (pathname) > PACK_PATH_MAX) {
472 nih_warn ("%s: %s", pathname, _("Ignored far too long path"));
473 return 0;
474 }
475
476 /* Use a hash table of paths to eliminate duplicate path names from
477 * the table since that would waste pack space (and fds).
478 */
479 if (! path_hash)
480 path_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
481
482 if (nih_hash_lookup (path_hash, pathname)) {
483 return 0;
484 } else {
485 NihListEntry *entry;
486
487 entry = NIH_MUST (nih_list_entry_new (path_hash));
488 entry->str = NIH_MUST (nih_strdup (entry, pathname));
489
490 nih_hash_add (path_hash, &entry->entry);
491 }
492
493 /* Make sure that we have an ordinary file, or a symlink to an
494 * ordinary file. This avoids us opening a fifo or socket.
495 */
496 if ((lstat (pathname, &statbuf) < 0)
497 || (S_ISLNK (statbuf.st_mode)
498 && (stat (pathname, &statbuf) < 0))
499 || (! S_ISREG (statbuf.st_mode)))
500 return 0;
501
502 /* Open and stat again to get the genuine details, in case it
503 * changes under us.
504 */
505 fd = open (pathname, O_RDONLY | O_NOATIME);
506 if (fd < 0) {
507 nih_warn ("%s: %s: %s", pathname,
508 _("File vanished or error reading"),
509 strerror (errno));
510 return -1;
511 }
512
513 if (fstat (fd, &statbuf) < 0) {
514 nih_warn ("%s: %s: %s", pathname,
515 _("Error retrieving file stat"),
516 strerror (errno));
517 close (fd);
518 return -1;
519 }
520
521 /* Double-check that it's really still a file */
522 if (! S_ISREG (statbuf.st_mode)) {
523 close (fd);
524 return 0;
525 }
526
527 /* Some people think it's clever to split their filesystem across
528 * multiple devices, so we need to generate a different pack file
529 * for each device.
530 *
531 * Lookup file based on the dev_t, potentially creating a new
532 * pack file in the array.
533 */
534 file = trace_file (parent, statbuf.st_dev, files, num_files);
535
536 /* Grow the PackPath array and fill in the details for the new
537 * path.
538 */
539 file->paths = NIH_MUST (nih_realloc (file->paths, *files,
540 (sizeof (PackPath)
541 * (file->num_paths + 1))));
542
543 path = &file->paths[file->num_paths++];
544 memset (path, 0, sizeof (PackPath));
545
546 path->group = -1;
547 path->ino = statbuf.st_ino;
548
549 strncpy (path->path, pathname, PACK_PATH_MAX);
550 path->path[PACK_PATH_MAX] = '\0';
551
552 /* The paths array contains each unique path opened, but these
553 * might be symbolic or hard links to the same underlying files
554 * and we don't want to read the same block more than once.
555 *
556 * Use a hash table of dev_t/ino_t pairs to make sure we only
557 * read the blocks of an actual file the first time.
558 */
559 if (! inode_hash)
560 inode_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
561
562 inode_key = NIH_MUST (nih_sprintf (NULL, "%llu:%llu",
563 (unsigned long long)statbuf.st_dev,
564 (unsigned long long)statbuf.st_ino));
565
566 if (nih_hash_lookup (inode_hash, inode_key)) {
567 close (fd);
568 return 0;
569 } else {
570 NihListEntry *entry;
571
572 entry = NIH_MUST (nih_list_entry_new (inode_hash));
573 entry->str = inode_key;
574 nih_ref (entry->str, entry);
575
576 nih_hash_add (inode_hash, &entry->entry);
577 }
578
579 /* There's also no point reading zero byte files, since they
580 * won't have any blocks (and we can't mmap zero bytes anyway).
581 */
582 if (! statbuf.st_size) {
583 close (fd);
584 return 0;
585 }
586
587 /* Now read the in-memory chunks of this file and add those to
588 * the pack file too.
589 */
590 trace_add_chunks (*files, file, path, fd, statbuf.st_size);
591 close (fd);
592
593 return 0;
594}
595
596static int
597ignore_path (const char *pathname)
598{
599 nih_assert (pathname != NULL);
600
601 if (! strncmp (pathname, "/proc/", 6))
602 return TRUE;
603 if (! strncmp (pathname, "/sys/", 5))
604 return TRUE;
605 if (! strncmp (pathname, "/dev/", 5))
606 return TRUE;
607 if (! strncmp (pathname, "/tmp/", 5))
608 return TRUE;
609 if (! strncmp (pathname, "/var/run/", 9))
610 return TRUE;
611 if (! strncmp (pathname, "/var/lock/", 10))
612 return TRUE;
613
614 return FALSE;
615}
616
617
618static PackFile *
619trace_file (const void *parent,
620 dev_t dev,
621 PackFile ** files,
622 size_t * num_files)
623{
624 nih_local char *filename = NULL;
625 int rotational;
626 PackFile * file;
627
628 nih_assert (files != NULL);
629 nih_assert (num_files != NULL);
630
631 /* Return any existing file structure for this device */
632 for (size_t i = 0; i < *num_files; i++)
633 if ((*files)[i].dev == dev)
634 return &(*files)[i];
635
636 /* Query sysfs to see whether this disk is rotational; this
637 * obviously won't work for virtual devices and the like, so
638 * default to TRUE for now.
639 */
640 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:0/queue/rotational",
641 major (dev)));
642
643 if (get_value (AT_FDCWD, filename, &rotational) < 0) {
644 NihError *err;
645
646 err = nih_error_get ();
647 nih_warn (_("Unable to obtain rotationalness for device %u:%u: %s"),
648 major (dev), minor (dev), err->message);
649 nih_free (err);
650
651 rotational = TRUE;
652 }
653
654 /* Grow the PackFile array and fill in the details for the new
655 * file.
656 */
657 *files = NIH_MUST (nih_realloc (*files, parent,
658 (sizeof (PackFile) * (*num_files + 1))));
659
660 file = &(*files)[(*num_files)++];
661 memset (file, 0, sizeof (PackFile));
662
663 file->dev = dev;
664 file->rotational = rotational;
665 file->num_paths = 0;
666 file->paths = NULL;
667 file->num_blocks = 0;
668 file->blocks = NULL;
669
670 return file;
671}
672
673
674static int
675trace_add_chunks (const void *parent,
676 PackFile * file,
677 PackPath * path,
678 int fd,
679 off_t size)
680{
681 static int page_size = -1;
682 void * buf;
683 off_t num_pages;
684 nih_local unsigned char *vec = NULL;
685
686 nih_assert (file != NULL);
687 nih_assert (path != NULL);
688 nih_assert (fd >= 0);
689 nih_assert (size > 0);
690
691 if (page_size < 0)
692 page_size = sysconf (_SC_PAGESIZE);
693
694 /* Map the file into memory */
695 buf = mmap (NULL, size, PROT_READ, MAP_SHARED, fd, 0);
696 if (buf == MAP_FAILED) {
697 nih_warn ("%s: %s: %s", path->path,
698 _("Error mapping into memory"),
699 strerror (errno));
700 return -1;
701 }
702
703 /* Grab the core memory map of the file */
704 num_pages = (size - 1) / page_size + 1;
705 vec = NIH_MUST (nih_alloc (NULL, num_pages));
706 memset (vec, 0, num_pages);
707
708 if (mincore (buf, size, vec) < 0) {
709 nih_warn ("%s: %s: %s", path->path,
710 _("Error retrieving page cache info"),
711 strerror (errno));
712 munmap (buf, size);
713 return -1;
714 }
715
716 /* Clean up */
717 if (munmap (buf, size) < 0) {
718 nih_warn ("%s: %s: %s", path->path,
719 _("Error unmapping from memory"),
720 strerror (errno));
721 return -1;
722 }
723
724
725 /* Now we can figure out which contiguous bits of the file are
726 * in core memory.
727 */
728 for (off_t i = 0; i < num_pages; i++) {
729 off_t offset;
730 off_t length;
731
732 if (! vec[i])
733 continue;
734
735 offset = i * page_size;
736 length = page_size;
737
738 while (((i + 1) < num_pages) && vec[i + 1]) {
739 length += page_size;
740 i++;
741 }
742
743 /* The rotational crowd need this split down further into
744 * on-disk extents, the non-rotational folks can just use
745 * the chunks data.
746 */
747 if (file->rotational) {
748 trace_add_extents (parent, file, path, fd, size,
749 offset, length);
750 } else {
751 PackBlock *block;
752
753 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
754 (sizeof (PackBlock)
755 * (file->num_blocks + 1))));
756
757 block = &file->blocks[file->num_blocks++];
758 memset (block, 0, sizeof (PackBlock));
759
760 block->pathidx = file->num_paths - 1;
761 block->offset = offset;
762 block->length = length;
763 block->physical = -1;
764 }
765 }
766
767 return 0;
768}
769
770struct fiemap *
771get_fiemap (const void *parent,
772 int fd,
773 off_t offset,
774 off_t length)
775{
776 struct fiemap *fiemap;
777
778 nih_assert (fd >= 0);
779
780 fiemap = NIH_MUST (nih_new (parent, struct fiemap));
781 memset (fiemap, 0, sizeof (struct fiemap));
782
783 fiemap->fm_start = offset;
784 fiemap->fm_length = length;
785 fiemap->fm_flags = 0;
786
787 do {
788 /* Query the current number of extents */
789 fiemap->fm_mapped_extents = 0;
790 fiemap->fm_extent_count = 0;
791
792 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
793 nih_error_raise_system ();
794 nih_free (fiemap);
795 return NULL;
796 }
797
798 /* Always allow room for one extra over what we were told,
799 * so we know if they changed under us.
800 */
801 fiemap = NIH_MUST (nih_realloc (fiemap, parent,
802 (sizeof (struct fiemap)
803 + (sizeof (struct fiemap_extent)
804 * (fiemap->fm_mapped_extents + 1)))));
805 fiemap->fm_extent_count = fiemap->fm_mapped_extents + 1;
806 fiemap->fm_mapped_extents = 0;
807
808 memset (fiemap->fm_extents, 0, (sizeof (struct fiemap_extent)
809 * fiemap->fm_extent_count));
810
811 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
812 nih_error_raise_system ();
813 nih_free (fiemap);
814 return NULL;
815 }
816 } while (fiemap->fm_mapped_extents
817 && (fiemap->fm_mapped_extents >= fiemap->fm_extent_count));
818
819 return fiemap;
820}
821
822static int
823trace_add_extents (const void *parent,
824 PackFile * file,
825 PackPath * path,
826 int fd,
827 off_t size,
828 off_t offset,
829 off_t length)
830{
831 nih_local struct fiemap *fiemap = NULL;
832
833 nih_assert (file != NULL);
834 nih_assert (path != NULL);
835 nih_assert (fd >= 0);
836 nih_assert (size > 0);
837
838 /* Get the extents map for this chunk, then iterate the extents
839 * and put those in the pack instead of the chunks.
840 */
841 fiemap = get_fiemap (NULL, fd, offset, length);
842 if (! fiemap) {
843 NihError *err;
844
845 err = nih_error_get ();
846 nih_warn ("%s: %s: %s", path->path,
847 _("Error retrieving chunk extents"),
848 err->message);
849 nih_free (err);
850
851 return -1;
852 }
853
854 for (__u32 j = 0; j < fiemap->fm_mapped_extents; j++) {
855 PackBlock *block;
856 off_t start;
857 off_t end;
858
859 if (fiemap->fm_extents[j].fe_flags & FIEMAP_EXTENT_UNKNOWN)
860 continue;
861
862 /* Work out the intersection of the chunk and extent */
863 start = nih_max (fiemap->fm_start,
864 fiemap->fm_extents[j].fe_logical);
865 end = nih_min ((fiemap->fm_start + fiemap->fm_length),
866 (fiemap->fm_extents[j].fe_logical
867 + fiemap->fm_extents[j].fe_length));
868
869 /* Grow the blocks array to add the extent */
870 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
871 (sizeof (PackBlock)
872 * (file->num_blocks + 1))));
873
874 block = &file->blocks[file->num_blocks++];
875 memset (block, 0, sizeof (PackBlock));
876
877 block->pathidx = file->num_paths - 1;
878 block->offset = start;
879 block->length = end - start;
880 block->physical = (fiemap->fm_extents[j].fe_physical
881 + (start - fiemap->fm_extents[j].fe_logical));
882 }
883
884 return 0;
885}
886
887static int
888trace_add_groups (const void *parent,
889 PackFile * file)
890{
891 const char *devname;
892 ext2_filsys fs = NULL;
893
894 nih_assert (file != NULL);
895
896 devname = blkid_devno_to_devname (file->dev);
897 if (devname
898 && (! ext2fs_open (devname, 0, 0, 0, unix_io_manager, &fs))) {
899 nih_assert (fs != NULL);
900 size_t num_groups = 0;
901 nih_local size_t *num_inodes = NULL;
902 size_t mean = 0;
903 size_t hits = 0;
904
905 nih_assert (fs != NULL);
906
907 /* Calculate the number of inode groups on this filesystem */
908 num_groups = ((fs->super->s_blocks_count - 1)
909 / fs->super->s_blocks_per_group) + 1;
910
911 /* Fill in the pack path's group member, and count the
912 * number of inodes in each group.
913 */
914 num_inodes = NIH_MUST (nih_alloc (NULL, (sizeof (size_t)
915 * num_groups)));
916 memset (num_inodes, 0, sizeof (size_t) * num_groups);
917
918 for (size_t i = 0; i < file->num_paths; i++) {
919 file->paths[i].group = ext2fs_group_of_ino (fs, file->paths[i].ino);
920 num_inodes[file->paths[i].group]++;
921 }
922
923 /* Iterate the groups and add any group that exceeds the
924 * inode preload threshold.
925 */
926 for (size_t i = 0; i < num_groups; i++) {
927 mean += num_inodes[i];
928 if (num_inodes[i] > INODE_GROUP_PRELOAD_THRESHOLD) {
929 file->groups = NIH_MUST (nih_realloc (file->groups, parent,
930 (sizeof (int)
931 * (file->num_groups + 1))));
932 file->groups[file->num_groups++] = i;
933 hits++;
934 }
935 }
936
937 mean /= num_groups;
938
939 nih_debug ("%zu inode groups, mean %zu inodes per group, %zu hits",
940 num_groups, mean, hits);
941
942 ext2fs_close (fs);
943 }
944
945 return 0;
946}
947
948
949static int
950block_compar (const void *a,
951 const void *b)
952{
953 const PackBlock *block_a = a;
954 const PackBlock *block_b = b;
955
956 nih_assert (block_a != NULL);
957 nih_assert (block_b != NULL);
958
959 if (block_a->physical < block_b->physical) {
960 return -1;
961 } else if (block_a->physical > block_b->physical) {
962 return 1;
963 } else {
964 return 0;
965 }
966}
967
968static int
969trace_sort_blocks (const void *parent,
970 PackFile * file)
971{
972 nih_assert (file != NULL);
973
974 /* Sort the blocks array by physical location, since these are
975 * read in a separate pass to opening files, there's no reason
976 * to consider which path each block is in - and thus resulting
977 * in a linear disk read.
978 */
979 qsort (file->blocks, file->num_blocks, sizeof (PackBlock),
980 block_compar);
981
982 return 0;
983}
984
985static int
986path_compar (const void *a,
987 const void *b)
988{
989 const PackPath * const *path_a = a;
990 const PackPath * const *path_b = b;
991
992 nih_assert (path_a != NULL);
993 nih_assert (path_b != NULL);
994
995 if ((*path_a)->group < (*path_b)->group) {
996 return -1;
997 } else if ((*path_a)->group > (*path_b)->group) {
998 return 1;
999 } else if ((*path_a)->ino < (*path_b)->ino) {
1000 return -1;
1001 } else if ((*path_b)->ino > (*path_b)->ino) {
1002 return 1;
1003 } else {
1004 return strcmp ((*path_a)->path, (*path_b)->path);
1005 }
1006}
1007
1008static int
1009trace_sort_paths (const void *parent,
1010 PackFile * file)
1011{
1012 nih_local PackPath **paths = NULL;
1013 nih_local size_t * new_idx = NULL;
1014 PackPath * new_paths;
1015
1016 nih_assert (file != NULL);
1017
1018 /* Sort the paths array by ext2fs inode group, ino_t then path.
1019 *
1020 * Mucking around with things like the physical locations of
1021 * first on-disk blocks of the dentry and stuff didn't work out
1022 * so well, sorting by path was better, but this seems the best.
1023 * (it looks good on blktrace too)
1024 */
1025 paths = NIH_MUST (nih_alloc (NULL, (sizeof (PackPath *)
1026 * file->num_paths)));
1027
1028 for (size_t i = 0; i < file->num_paths; i++)
1029 paths[i] = &file->paths[i];
1030
1031 qsort (paths, file->num_paths, sizeof (PackPath *),
1032 path_compar);
1033
1034 /* Calculate the new indexes of each path element in the old
1035 * array, and then update the block array's path indexes to
1036 * match.
1037 */
1038 new_idx = NIH_MUST (nih_alloc (NULL,
1039 (sizeof (size_t) * file->num_paths)));
1040 for (size_t i = 0; i < file->num_paths; i++)
1041 new_idx[paths[i] - file->paths] = i;
1042
1043 for (size_t i = 0; i < file->num_blocks; i++)
1044 file->blocks[i].pathidx = new_idx[file->blocks[i].pathidx];
1045
1046 /* Finally generate a new paths array with the new order and
1047 * attach it to the file.
1048 */
1049 new_paths = NIH_MUST (nih_alloc (parent,
1050 (sizeof (PackPath) * file->num_paths)));
1051 for (size_t i = 0; i < file->num_paths; i++)
1052 memcpy (&new_paths[new_idx[i]], &file->paths[i],
1053 sizeof (PackPath));
1054
1055 nih_unref (file->paths, parent);
1056 file->paths = new_paths;
1057
1058 return 0;
1059}