blob: 359f9ed1b4bd8d73d4c40f87c904d39bde4ef49b [file] [log] [blame]
Scott James Remnant56686d62009-11-09 18:38:51 +00001/* ureadahead
2 *
3 * trace.c - boot tracing
4 *
5 * Copyright © 2009 Canonical Ltd.
6 * Author: Scott James Remnant <scott@netsplit.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2, as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif /* HAVE_CONFIG_H */
25
26#define _ATFILE_SOURCE
27
28
29#include <sys/select.h>
30#include <sys/mount.h>
31#include <sys/types.h>
32#include <sys/mman.h>
33#include <sys/stat.h>
34
35#include <errno.h>
36#include <fcntl.h>
37#include <stdio.h>
38#include <signal.h>
39#include <stdlib.h>
40#include <string.h>
41#include <unistd.h>
42
43#include <blkid.h>
44#define NO_INLINE_FUNCS
45#include <ext2fs.h>
46
47#include <linux/fs.h>
48#include <linux/fiemap.h>
49
50#include <nih/macros.h>
51#include <nih/alloc.h>
52#include <nih/string.h>
53#include <nih/list.h>
54#include <nih/hash.h>
55#include <nih/main.h>
56#include <nih/logging.h>
57#include <nih/error.h>
58
59#include "trace.h"
60#include "pack.h"
61#include "values.h"
62#include "file.h"
63
64
65/**
66 * PATH_DEBUGFS:
67 *
68 * Path to the usual debugfs mountpoint.
69 **/
70#define PATH_DEBUGFS "/sys/kernel/debug"
71
72/**
73 * PATH_DEBUGFS_TMP:
74 *
75 * Path to the temporary debugfs mountpoint that we mount it on if it
76 * hasn't been mounted at the usual place yet.
77 **/
78#define PATH_DEBUGFS_TMP "/var/lib/ureadahead/debugfs"
79
80/**
81 * INODE_GROUP_PRELOAD_THRESHOLD:
82 *
83 * Number of inodes in a group before we preload that inode's blocks.
84 **/
85#define INODE_GROUP_PRELOAD_THRESHOLD 8
86
87
88/* Prototypes for static functions */
89static int read_trace (const void *parent,
90 int dfd, const char *path,
91 PackFile **files, size_t *num_files);
92static void fix_path (char *pathname);
93static int trace_add_path (const void *parent, const char *pathname,
94 PackFile **files, size_t *num_files);
95static int ignore_path (const char *pathname);
96static PackFile *trace_file (const void *parent, dev_t dev,
97 PackFile **files, size_t *num_files);
98static int trace_add_chunks (const void *parent,
99 PackFile *file, PackPath *path,
100 int fd, off_t size);
101static int trace_add_extents (const void *parent,
102 PackFile *file, PackPath *path,
103 int fd, off_t size,
104 off_t offset, off_t length);
105static int trace_add_groups (const void *parent, PackFile *file);
106static int trace_sort_blocks (const void *parent, PackFile *file);
107static int trace_sort_paths (const void *parent, PackFile *file);
108
109
110static void
111sig_interrupt (int signum)
112{
113}
114
115int
116trace (int daemonise,
117 int timeout)
118{
119 int dfd;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600120 FILE *fp;
Scott James Remnant56686d62009-11-09 18:38:51 +0000121 int unmount = FALSE;
122 int old_sys_open_enabled = 0;
123 int old_open_exec_enabled = 0;
124 int old_uselib_enabled = 0;
125 int old_tracing_enabled = 0;
Tim Gardner73aa2c52010-07-22 04:04:36 -0600126 int old_buffer_size_kb = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000127 struct sigaction act;
128 struct sigaction old_sigterm;
129 struct sigaction old_sigint;
130 struct timeval tv;
131 nih_local PackFile *files = NULL;
132 size_t num_files = 0;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600133 size_t num_cpus = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000134
135 /* Mount debugfs if not already mounted */
136 dfd = open (PATH_DEBUGFS "/tracing", O_RDONLY | O_NOATIME);
137 if (dfd < 0) {
138 if (errno != ENOENT)
139 nih_return_system_error (-1);
140
141 if (mount ("none", PATH_DEBUGFS_TMP, "debugfs", 0, NULL) < 0)
142 nih_return_system_error (-1);
143
144 dfd = open (PATH_DEBUGFS_TMP "/tracing", O_RDONLY | O_NOATIME);
145 if (dfd < 0) {
146 nih_error_raise_system ();
147 umount (PATH_DEBUGFS_TMP);
148 return -1;
149 }
150
151 unmount = TRUE;
152 }
153
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600154 /*
155 * Count the number of CPUs, default to 1 on error.
156 */
157 fp = fopen("/proc/cpuinfo", "r");
158 if (fp) {
159 int line_size=1024;
160 char *processor="processor";
161 char *line = malloc(line_size);
162 if (line) {
163 num_cpus = 0;
164 while (fgets(line,line_size,fp) != NULL) {
165 if (!strncmp(line,processor,strlen(processor)))
166 num_cpus++;
167 }
168 free(line);
169 nih_message("Counted %d CPUs\n",num_cpus);
170 }
171 fclose(fp);
172 }
173 if (!num_cpus)
174 num_cpus = 1;
175
Scott James Remnante30e2372010-09-20 18:34:31 +0100176 /* Enable tracing of open() syscalls */
Scott James Remnant56686d62009-11-09 18:38:51 +0000177 if (set_value (dfd, "events/fs/do_sys_open/enable",
178 TRUE, &old_sys_open_enabled) < 0)
179 goto error;
180 if (set_value (dfd, "events/fs/open_exec/enable",
181 TRUE, &old_open_exec_enabled) < 0)
182 goto error;
183 if (set_value (dfd, "events/fs/uselib/enable",
184 TRUE, &old_uselib_enabled) < 0) {
185 NihError *err;
186
187 err = nih_error_get ();
188 nih_debug ("Missing uselib tracing: %s", err->message);
189 nih_free (err);
190
191 old_uselib_enabled = -1;
192 }
Scott James Remnante30e2372010-09-20 18:34:31 +0100193 if (set_value (dfd, "buffer_size_kb", 8192/num_cpus, &old_buffer_size_kb) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000194 goto error;
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000195 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000196 TRUE, &old_tracing_enabled) < 0)
197 goto error;
198
199 if (daemonise) {
200 pid_t pid;
201
202 pid = fork ();
203 if (pid < 0) {
204 nih_error_raise_system ();
205 goto error;
206 } else if (pid > 0) {
207 _exit (0);
208 }
209 }
210
211 /* Sleep until we get signals */
212 act.sa_handler = sig_interrupt;
213 sigemptyset (&act.sa_mask);
214 act.sa_flags = 0;
215
216 sigaction (SIGTERM, &act, &old_sigterm);
217 sigaction (SIGINT, &act, &old_sigint);
218
219 if (timeout) {
220 tv.tv_sec = timeout;
221 tv.tv_usec = 0;
222
223 select (0, NULL, NULL, NULL, &tv);
224 } else {
225 pause ();
226 }
227
228 sigaction (SIGTERM, &old_sigterm, NULL);
229 sigaction (SIGINT, &old_sigint, NULL);
230
231 /* Restore previous tracing settings */
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000232 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000233 old_tracing_enabled, NULL) < 0)
234 goto error;
235 if (old_uselib_enabled >= 0)
236 if (set_value (dfd, "events/fs/uselib/enable",
237 old_uselib_enabled, NULL) < 0)
238 goto error;
239 if (set_value (dfd, "events/fs/open_exec/enable",
240 old_open_exec_enabled, NULL) < 0)
241 goto error;
242 if (set_value (dfd, "events/fs/do_sys_open/enable",
243 old_sys_open_enabled, NULL) < 0)
244 goto error;
245
246 /* Be nicer */
Scott James Remnantcc2943b2009-11-29 15:24:15 +0000247 if (nice (15))
248 ;
Scott James Remnant56686d62009-11-09 18:38:51 +0000249
250 /* Read trace log */
251 if (read_trace (NULL, dfd, "trace", &files, &num_files) < 0)
252 goto error;
253
Tim Gardner73aa2c52010-07-22 04:04:36 -0600254 /*
255 * Restore the trace buffer size (which has just been read) and free
256 * a bunch of memory.
257 */
258 if (set_value (dfd, "buffer_size_kb", old_buffer_size_kb, NULL) < 0)
259 goto error;
260
Scott James Remnant56686d62009-11-09 18:38:51 +0000261 /* Unmount the temporary debugfs mount if we mounted it */
262 if (close (dfd)) {
263 nih_error_raise_system ();
264 goto error;
265 }
266 if (unmount
267 && (umount (PATH_DEBUGFS_TMP) < 0)) {
268 nih_error_raise_system ();
269 goto error;
270 }
271
272 /* Write out pack files */
273 for (size_t i = 0; i < num_files; i++) {
274 nih_local char *filename = NULL;
275
276 filename = pack_file_name_for_device (NULL, files[i].dev);
277 if (! filename) {
278 NihError *err;
279
280 err = nih_error_get ();
281 nih_warn ("%s", err->message);
282 nih_free (err);
283
284 continue;
285 }
286
287 nih_info ("Writing %s", filename);
288
289 /* We only need to apply additional sorting to the
290 * HDD-optimised packs, the SSD ones can read in random
291 * order quite happily.
292 *
293 * Also for HDD, generate the inode group preloading
294 * array.
295 */
296 if (files[i].rotational) {
297 trace_add_groups (files, &files[i]);
298
299 trace_sort_blocks (files, &files[i]);
300 trace_sort_paths (files, &files[i]);
301 }
302
303 write_pack (filename, &files[i]);
304
305 if (nih_log_priority < NIH_LOG_MESSAGE)
306 pack_dump (&files[i], SORT_OPEN);
307 }
308
309 return 0;
310error:
311 close (dfd);
312 if (unmount)
313 umount (PATH_DEBUGFS_TMP);
314
315 return -1;
316}
317
318
319static int
320read_trace (const void *parent,
321 int dfd,
322 const char *path,
323 PackFile ** files,
324 size_t * num_files)
325{
326 int fd;
327 FILE *fp;
328 char *line;
329
330 nih_assert (path != NULL);
331 nih_assert (files != NULL);
332 nih_assert (num_files != NULL);
333
334 fd = openat (dfd, path, O_RDONLY);
335 if (fd < 0)
336 nih_return_system_error (-1);
337
338 fp = fdopen (fd, "r");
339 if (! fp) {
340 nih_error_raise_system ();
341 close (fd);
342 return -1;
343 }
344
345 while ((line = fgets_alloc (NULL, fp)) != NULL) {
346 char *ptr;
347 char *end;
348
349 ptr = strstr (line, " do_sys_open:");
350 if (! ptr)
351 ptr = strstr (line, " open_exec:");
352 if (! ptr)
353 ptr = strstr (line, " uselib:");
354 if (! ptr) {
355 nih_free (line);
356 continue;
357 }
358
359 ptr = strchr (ptr, '"');
360 if (! ptr) {
361 nih_free (line);
362 continue;
363 }
364
365 ptr++;
366
367 end = strrchr (ptr, '"');
368 if (! end) {
369 nih_free (line);
370 continue;
371 }
372
373 *end = '\0';
374
375 fix_path (ptr);
376 trace_add_path (parent, ptr, files, num_files);
377
378 nih_free (line);
379 }
380
381 if (fclose (fp) < 0)
382 nih_return_system_error (-1);
383
384 return 0;
385}
386
387static void
388fix_path (char *pathname)
389{
390 char *ptr;
391
392 nih_assert (pathname != NULL);
393
394 for (ptr = pathname; *ptr; ptr++) {
395 size_t len;
396
397 if (ptr[0] != '/')
398 continue;
399
400 len = strcspn (ptr + 1, "/");
401
402 /* // and /./, we shorten the string and repeat the loop
403 * looking at the new /
404 */
405 if ((len == 0) || ((len == 1) && ptr[1] == '.')) {
406 memmove (ptr, ptr + len + 1, strlen (ptr) - len);
407 ptr--;
408 continue;
409 }
410
411 /* /../, we shorten back to the previous / or the start
412 * of the string and repeat the loop looking at the new /
413 */
414 if ((len == 2) && (ptr[1] == '.') && (ptr[2] == '.')) {
415 char *root;
416
417 for (root = ptr - 1;
418 (root >= pathname) && (root[0] != '/');
419 root--)
420 ;
421 if (root < pathname)
422 root = pathname;
423
424 memmove (root, ptr + len + 1, strlen (ptr) - len);
425 ptr = root - 1;
426 continue;
427 }
428 }
429
430 while ((ptr != pathname) && (*(--ptr) == '/'))
431 *ptr = '\0';
432}
433
434
435static int
436trace_add_path (const void *parent,
437 const char *pathname,
438 PackFile ** files,
439 size_t * num_files)
440{
441 static NihHash *path_hash = NULL;
442 struct stat statbuf;
443 int fd;
444 PackFile * file;
445 PackPath * path;
446 static NihHash *inode_hash = NULL;
447 nih_local char *inode_key = NULL;
448
449 nih_assert (pathname != NULL);
450 nih_assert (files != NULL);
451 nih_assert (num_files != NULL);
452
453 /* We can't really deal with relative paths since we don't know
454 * the working directory that they were opened from.
455 */
456 if (pathname[0] != '/') {
457 nih_warn ("%s: %s", pathname, _("Ignored relative path"));
458 return 0;
459 }
460
461 /* Certain paths aren't worth caching, because they're virtual or
462 * temporary filesystems and would waste pack space.
463 */
464 if (ignore_path (pathname))
465 return 0;
466
467 /* Ignore paths that won't fit in the pack; we could use PATH_MAX,
468 * but with 1000 files that'd be 4M just for the
469 * pack.
470 */
471 if (strlen (pathname) > PACK_PATH_MAX) {
472 nih_warn ("%s: %s", pathname, _("Ignored far too long path"));
473 return 0;
474 }
475
476 /* Use a hash table of paths to eliminate duplicate path names from
477 * the table since that would waste pack space (and fds).
478 */
479 if (! path_hash)
480 path_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
481
482 if (nih_hash_lookup (path_hash, pathname)) {
483 return 0;
484 } else {
485 NihListEntry *entry;
486
487 entry = NIH_MUST (nih_list_entry_new (path_hash));
488 entry->str = NIH_MUST (nih_strdup (entry, pathname));
489
490 nih_hash_add (path_hash, &entry->entry);
491 }
492
493 /* Make sure that we have an ordinary file, or a symlink to an
494 * ordinary file. This avoids us opening a fifo or socket.
495 */
496 if ((lstat (pathname, &statbuf) < 0)
497 || (S_ISLNK (statbuf.st_mode)
498 && (stat (pathname, &statbuf) < 0))
499 || (! S_ISREG (statbuf.st_mode)))
500 return 0;
501
502 /* Open and stat again to get the genuine details, in case it
503 * changes under us.
504 */
505 fd = open (pathname, O_RDONLY | O_NOATIME);
506 if (fd < 0) {
507 nih_warn ("%s: %s: %s", pathname,
508 _("File vanished or error reading"),
509 strerror (errno));
510 return -1;
511 }
512
513 if (fstat (fd, &statbuf) < 0) {
514 nih_warn ("%s: %s: %s", pathname,
515 _("Error retrieving file stat"),
516 strerror (errno));
517 close (fd);
518 return -1;
519 }
520
521 /* Double-check that it's really still a file */
522 if (! S_ISREG (statbuf.st_mode)) {
523 close (fd);
524 return 0;
525 }
526
527 /* Some people think it's clever to split their filesystem across
528 * multiple devices, so we need to generate a different pack file
529 * for each device.
530 *
531 * Lookup file based on the dev_t, potentially creating a new
532 * pack file in the array.
533 */
534 file = trace_file (parent, statbuf.st_dev, files, num_files);
535
536 /* Grow the PackPath array and fill in the details for the new
537 * path.
538 */
539 file->paths = NIH_MUST (nih_realloc (file->paths, *files,
540 (sizeof (PackPath)
541 * (file->num_paths + 1))));
542
543 path = &file->paths[file->num_paths++];
544 memset (path, 0, sizeof (PackPath));
545
546 path->group = -1;
547 path->ino = statbuf.st_ino;
548
549 strncpy (path->path, pathname, PACK_PATH_MAX);
550 path->path[PACK_PATH_MAX] = '\0';
551
552 /* The paths array contains each unique path opened, but these
553 * might be symbolic or hard links to the same underlying files
554 * and we don't want to read the same block more than once.
555 *
556 * Use a hash table of dev_t/ino_t pairs to make sure we only
557 * read the blocks of an actual file the first time.
558 */
559 if (! inode_hash)
560 inode_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
561
562 inode_key = NIH_MUST (nih_sprintf (NULL, "%llu:%llu",
563 (unsigned long long)statbuf.st_dev,
564 (unsigned long long)statbuf.st_ino));
565
566 if (nih_hash_lookup (inode_hash, inode_key)) {
567 close (fd);
568 return 0;
569 } else {
570 NihListEntry *entry;
571
572 entry = NIH_MUST (nih_list_entry_new (inode_hash));
573 entry->str = inode_key;
574 nih_ref (entry->str, entry);
575
576 nih_hash_add (inode_hash, &entry->entry);
577 }
578
579 /* There's also no point reading zero byte files, since they
580 * won't have any blocks (and we can't mmap zero bytes anyway).
581 */
582 if (! statbuf.st_size) {
583 close (fd);
584 return 0;
585 }
586
587 /* Now read the in-memory chunks of this file and add those to
588 * the pack file too.
589 */
590 trace_add_chunks (*files, file, path, fd, statbuf.st_size);
591 close (fd);
592
593 return 0;
594}
595
596static int
597ignore_path (const char *pathname)
598{
599 nih_assert (pathname != NULL);
600
601 if (! strncmp (pathname, "/proc/", 6))
602 return TRUE;
603 if (! strncmp (pathname, "/sys/", 5))
604 return TRUE;
605 if (! strncmp (pathname, "/dev/", 5))
606 return TRUE;
607 if (! strncmp (pathname, "/tmp/", 5))
608 return TRUE;
Steve Langasek2c698a12012-02-03 15:27:29 -0800609 if (! strncmp (pathname, "/run/", 5))
610 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000611 if (! strncmp (pathname, "/var/run/", 9))
612 return TRUE;
Bryan Fullerton96c991b2013-03-12 15:06:41 +0000613 if (! strncmp (pathname, "/var/log/", 9))
614 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000615 if (! strncmp (pathname, "/var/lock/", 10))
616 return TRUE;
617
618 return FALSE;
619}
620
621
622static PackFile *
623trace_file (const void *parent,
624 dev_t dev,
625 PackFile ** files,
626 size_t * num_files)
627{
628 nih_local char *filename = NULL;
629 int rotational;
630 PackFile * file;
631
632 nih_assert (files != NULL);
633 nih_assert (num_files != NULL);
634
635 /* Return any existing file structure for this device */
636 for (size_t i = 0; i < *num_files; i++)
637 if ((*files)[i].dev == dev)
638 return &(*files)[i];
639
640 /* Query sysfs to see whether this disk is rotational; this
641 * obviously won't work for virtual devices and the like, so
642 * default to TRUE for now.
643 */
644 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:0/queue/rotational",
645 major (dev)));
646
647 if (get_value (AT_FDCWD, filename, &rotational) < 0) {
648 NihError *err;
649
650 err = nih_error_get ();
651 nih_warn (_("Unable to obtain rotationalness for device %u:%u: %s"),
652 major (dev), minor (dev), err->message);
653 nih_free (err);
654
655 rotational = TRUE;
656 }
657
658 /* Grow the PackFile array and fill in the details for the new
659 * file.
660 */
661 *files = NIH_MUST (nih_realloc (*files, parent,
662 (sizeof (PackFile) * (*num_files + 1))));
663
664 file = &(*files)[(*num_files)++];
665 memset (file, 0, sizeof (PackFile));
666
667 file->dev = dev;
668 file->rotational = rotational;
669 file->num_paths = 0;
670 file->paths = NULL;
671 file->num_blocks = 0;
672 file->blocks = NULL;
673
674 return file;
675}
676
677
678static int
679trace_add_chunks (const void *parent,
680 PackFile * file,
681 PackPath * path,
682 int fd,
683 off_t size)
684{
685 static int page_size = -1;
686 void * buf;
687 off_t num_pages;
688 nih_local unsigned char *vec = NULL;
689
690 nih_assert (file != NULL);
691 nih_assert (path != NULL);
692 nih_assert (fd >= 0);
693 nih_assert (size > 0);
694
695 if (page_size < 0)
696 page_size = sysconf (_SC_PAGESIZE);
697
698 /* Map the file into memory */
699 buf = mmap (NULL, size, PROT_READ, MAP_SHARED, fd, 0);
700 if (buf == MAP_FAILED) {
701 nih_warn ("%s: %s: %s", path->path,
702 _("Error mapping into memory"),
703 strerror (errno));
704 return -1;
705 }
706
707 /* Grab the core memory map of the file */
708 num_pages = (size - 1) / page_size + 1;
709 vec = NIH_MUST (nih_alloc (NULL, num_pages));
710 memset (vec, 0, num_pages);
711
712 if (mincore (buf, size, vec) < 0) {
713 nih_warn ("%s: %s: %s", path->path,
714 _("Error retrieving page cache info"),
715 strerror (errno));
716 munmap (buf, size);
717 return -1;
718 }
719
720 /* Clean up */
721 if (munmap (buf, size) < 0) {
722 nih_warn ("%s: %s: %s", path->path,
723 _("Error unmapping from memory"),
724 strerror (errno));
725 return -1;
726 }
727
728
729 /* Now we can figure out which contiguous bits of the file are
730 * in core memory.
731 */
732 for (off_t i = 0; i < num_pages; i++) {
733 off_t offset;
734 off_t length;
735
736 if (! vec[i])
737 continue;
738
739 offset = i * page_size;
740 length = page_size;
741
742 while (((i + 1) < num_pages) && vec[i + 1]) {
743 length += page_size;
744 i++;
745 }
746
747 /* The rotational crowd need this split down further into
748 * on-disk extents, the non-rotational folks can just use
749 * the chunks data.
750 */
751 if (file->rotational) {
752 trace_add_extents (parent, file, path, fd, size,
753 offset, length);
754 } else {
755 PackBlock *block;
756
757 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
758 (sizeof (PackBlock)
759 * (file->num_blocks + 1))));
760
761 block = &file->blocks[file->num_blocks++];
762 memset (block, 0, sizeof (PackBlock));
763
764 block->pathidx = file->num_paths - 1;
765 block->offset = offset;
766 block->length = length;
767 block->physical = -1;
768 }
769 }
770
771 return 0;
772}
773
774struct fiemap *
775get_fiemap (const void *parent,
776 int fd,
777 off_t offset,
778 off_t length)
779{
780 struct fiemap *fiemap;
781
782 nih_assert (fd >= 0);
783
784 fiemap = NIH_MUST (nih_new (parent, struct fiemap));
785 memset (fiemap, 0, sizeof (struct fiemap));
786
787 fiemap->fm_start = offset;
788 fiemap->fm_length = length;
789 fiemap->fm_flags = 0;
790
791 do {
792 /* Query the current number of extents */
793 fiemap->fm_mapped_extents = 0;
794 fiemap->fm_extent_count = 0;
795
796 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
797 nih_error_raise_system ();
798 nih_free (fiemap);
799 return NULL;
800 }
801
802 /* Always allow room for one extra over what we were told,
803 * so we know if they changed under us.
804 */
805 fiemap = NIH_MUST (nih_realloc (fiemap, parent,
806 (sizeof (struct fiemap)
807 + (sizeof (struct fiemap_extent)
808 * (fiemap->fm_mapped_extents + 1)))));
809 fiemap->fm_extent_count = fiemap->fm_mapped_extents + 1;
810 fiemap->fm_mapped_extents = 0;
811
812 memset (fiemap->fm_extents, 0, (sizeof (struct fiemap_extent)
813 * fiemap->fm_extent_count));
814
815 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
816 nih_error_raise_system ();
817 nih_free (fiemap);
818 return NULL;
819 }
820 } while (fiemap->fm_mapped_extents
821 && (fiemap->fm_mapped_extents >= fiemap->fm_extent_count));
822
823 return fiemap;
824}
825
826static int
827trace_add_extents (const void *parent,
828 PackFile * file,
829 PackPath * path,
830 int fd,
831 off_t size,
832 off_t offset,
833 off_t length)
834{
835 nih_local struct fiemap *fiemap = NULL;
836
837 nih_assert (file != NULL);
838 nih_assert (path != NULL);
839 nih_assert (fd >= 0);
840 nih_assert (size > 0);
841
842 /* Get the extents map for this chunk, then iterate the extents
843 * and put those in the pack instead of the chunks.
844 */
845 fiemap = get_fiemap (NULL, fd, offset, length);
846 if (! fiemap) {
847 NihError *err;
848
849 err = nih_error_get ();
850 nih_warn ("%s: %s: %s", path->path,
851 _("Error retrieving chunk extents"),
852 err->message);
853 nih_free (err);
854
855 return -1;
856 }
857
858 for (__u32 j = 0; j < fiemap->fm_mapped_extents; j++) {
859 PackBlock *block;
860 off_t start;
861 off_t end;
862
863 if (fiemap->fm_extents[j].fe_flags & FIEMAP_EXTENT_UNKNOWN)
864 continue;
865
866 /* Work out the intersection of the chunk and extent */
867 start = nih_max (fiemap->fm_start,
868 fiemap->fm_extents[j].fe_logical);
869 end = nih_min ((fiemap->fm_start + fiemap->fm_length),
870 (fiemap->fm_extents[j].fe_logical
871 + fiemap->fm_extents[j].fe_length));
872
873 /* Grow the blocks array to add the extent */
874 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
875 (sizeof (PackBlock)
876 * (file->num_blocks + 1))));
877
878 block = &file->blocks[file->num_blocks++];
879 memset (block, 0, sizeof (PackBlock));
880
881 block->pathidx = file->num_paths - 1;
882 block->offset = start;
883 block->length = end - start;
884 block->physical = (fiemap->fm_extents[j].fe_physical
885 + (start - fiemap->fm_extents[j].fe_logical));
886 }
887
888 return 0;
889}
890
891static int
892trace_add_groups (const void *parent,
893 PackFile * file)
894{
895 const char *devname;
896 ext2_filsys fs = NULL;
897
898 nih_assert (file != NULL);
899
900 devname = blkid_devno_to_devname (file->dev);
901 if (devname
902 && (! ext2fs_open (devname, 0, 0, 0, unix_io_manager, &fs))) {
903 nih_assert (fs != NULL);
904 size_t num_groups = 0;
905 nih_local size_t *num_inodes = NULL;
906 size_t mean = 0;
907 size_t hits = 0;
908
909 nih_assert (fs != NULL);
910
911 /* Calculate the number of inode groups on this filesystem */
912 num_groups = ((fs->super->s_blocks_count - 1)
913 / fs->super->s_blocks_per_group) + 1;
914
915 /* Fill in the pack path's group member, and count the
916 * number of inodes in each group.
917 */
918 num_inodes = NIH_MUST (nih_alloc (NULL, (sizeof (size_t)
919 * num_groups)));
920 memset (num_inodes, 0, sizeof (size_t) * num_groups);
921
922 for (size_t i = 0; i < file->num_paths; i++) {
923 file->paths[i].group = ext2fs_group_of_ino (fs, file->paths[i].ino);
924 num_inodes[file->paths[i].group]++;
925 }
926
927 /* Iterate the groups and add any group that exceeds the
928 * inode preload threshold.
929 */
930 for (size_t i = 0; i < num_groups; i++) {
931 mean += num_inodes[i];
932 if (num_inodes[i] > INODE_GROUP_PRELOAD_THRESHOLD) {
933 file->groups = NIH_MUST (nih_realloc (file->groups, parent,
934 (sizeof (int)
935 * (file->num_groups + 1))));
936 file->groups[file->num_groups++] = i;
937 hits++;
938 }
939 }
940
941 mean /= num_groups;
942
943 nih_debug ("%zu inode groups, mean %zu inodes per group, %zu hits",
944 num_groups, mean, hits);
945
946 ext2fs_close (fs);
947 }
948
949 return 0;
950}
951
952
953static int
954block_compar (const void *a,
955 const void *b)
956{
957 const PackBlock *block_a = a;
958 const PackBlock *block_b = b;
959
960 nih_assert (block_a != NULL);
961 nih_assert (block_b != NULL);
962
963 if (block_a->physical < block_b->physical) {
964 return -1;
965 } else if (block_a->physical > block_b->physical) {
966 return 1;
967 } else {
968 return 0;
969 }
970}
971
972static int
973trace_sort_blocks (const void *parent,
974 PackFile * file)
975{
976 nih_assert (file != NULL);
977
978 /* Sort the blocks array by physical location, since these are
979 * read in a separate pass to opening files, there's no reason
980 * to consider which path each block is in - and thus resulting
981 * in a linear disk read.
982 */
983 qsort (file->blocks, file->num_blocks, sizeof (PackBlock),
984 block_compar);
985
986 return 0;
987}
988
989static int
990path_compar (const void *a,
991 const void *b)
992{
993 const PackPath * const *path_a = a;
994 const PackPath * const *path_b = b;
995
996 nih_assert (path_a != NULL);
997 nih_assert (path_b != NULL);
998
999 if ((*path_a)->group < (*path_b)->group) {
1000 return -1;
1001 } else if ((*path_a)->group > (*path_b)->group) {
1002 return 1;
1003 } else if ((*path_a)->ino < (*path_b)->ino) {
1004 return -1;
1005 } else if ((*path_b)->ino > (*path_b)->ino) {
1006 return 1;
1007 } else {
1008 return strcmp ((*path_a)->path, (*path_b)->path);
1009 }
1010}
1011
1012static int
1013trace_sort_paths (const void *parent,
1014 PackFile * file)
1015{
1016 nih_local PackPath **paths = NULL;
1017 nih_local size_t * new_idx = NULL;
1018 PackPath * new_paths;
1019
1020 nih_assert (file != NULL);
1021
1022 /* Sort the paths array by ext2fs inode group, ino_t then path.
1023 *
1024 * Mucking around with things like the physical locations of
1025 * first on-disk blocks of the dentry and stuff didn't work out
1026 * so well, sorting by path was better, but this seems the best.
1027 * (it looks good on blktrace too)
1028 */
1029 paths = NIH_MUST (nih_alloc (NULL, (sizeof (PackPath *)
1030 * file->num_paths)));
1031
1032 for (size_t i = 0; i < file->num_paths; i++)
1033 paths[i] = &file->paths[i];
1034
1035 qsort (paths, file->num_paths, sizeof (PackPath *),
1036 path_compar);
1037
1038 /* Calculate the new indexes of each path element in the old
1039 * array, and then update the block array's path indexes to
1040 * match.
1041 */
1042 new_idx = NIH_MUST (nih_alloc (NULL,
1043 (sizeof (size_t) * file->num_paths)));
1044 for (size_t i = 0; i < file->num_paths; i++)
1045 new_idx[paths[i] - file->paths] = i;
1046
1047 for (size_t i = 0; i < file->num_blocks; i++)
1048 file->blocks[i].pathidx = new_idx[file->blocks[i].pathidx];
1049
1050 /* Finally generate a new paths array with the new order and
1051 * attach it to the file.
1052 */
1053 new_paths = NIH_MUST (nih_alloc (parent,
1054 (sizeof (PackPath) * file->num_paths)));
1055 for (size_t i = 0; i < file->num_paths; i++)
1056 memcpy (&new_paths[new_idx[i]], &file->paths[i],
1057 sizeof (PackPath));
1058
1059 nih_unref (file->paths, parent);
1060 file->paths = new_paths;
1061
1062 return 0;
1063}