blob: 16f55168b552ed2c7de764477bb9bcfe2349a309 [file] [log] [blame]
Scott James Remnant56686d62009-11-09 18:38:51 +00001/* ureadahead
2 *
3 * trace.c - boot tracing
4 *
5 * Copyright © 2009 Canonical Ltd.
6 * Author: Scott James Remnant <scott@netsplit.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2, as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif /* HAVE_CONFIG_H */
25
26#define _ATFILE_SOURCE
27
28
29#include <sys/select.h>
30#include <sys/mount.h>
31#include <sys/types.h>
32#include <sys/mman.h>
33#include <sys/stat.h>
34
35#include <errno.h>
36#include <fcntl.h>
37#include <stdio.h>
38#include <signal.h>
39#include <stdlib.h>
40#include <string.h>
41#include <unistd.h>
42
43#include <blkid.h>
44#define NO_INLINE_FUNCS
45#include <ext2fs.h>
46
47#include <linux/fs.h>
48#include <linux/fiemap.h>
49
50#include <nih/macros.h>
51#include <nih/alloc.h>
52#include <nih/string.h>
53#include <nih/list.h>
54#include <nih/hash.h>
55#include <nih/main.h>
56#include <nih/logging.h>
57#include <nih/error.h>
58
59#include "trace.h"
60#include "pack.h"
61#include "values.h"
62#include "file.h"
63
64
65/**
66 * PATH_DEBUGFS:
67 *
68 * Path to the usual debugfs mountpoint.
69 **/
70#define PATH_DEBUGFS "/sys/kernel/debug"
71
72/**
73 * PATH_DEBUGFS_TMP:
74 *
75 * Path to the temporary debugfs mountpoint that we mount it on if it
76 * hasn't been mounted at the usual place yet.
77 **/
78#define PATH_DEBUGFS_TMP "/var/lib/ureadahead/debugfs"
79
80/**
81 * INODE_GROUP_PRELOAD_THRESHOLD:
82 *
83 * Number of inodes in a group before we preload that inode's blocks.
84 **/
85#define INODE_GROUP_PRELOAD_THRESHOLD 8
86
87
88/* Prototypes for static functions */
89static int read_trace (const void *parent,
90 int dfd, const char *path,
91 PackFile **files, size_t *num_files);
92static void fix_path (char *pathname);
93static int trace_add_path (const void *parent, const char *pathname,
94 PackFile **files, size_t *num_files);
95static int ignore_path (const char *pathname);
96static PackFile *trace_file (const void *parent, dev_t dev,
97 PackFile **files, size_t *num_files);
98static int trace_add_chunks (const void *parent,
99 PackFile *file, PackPath *path,
100 int fd, off_t size);
101static int trace_add_extents (const void *parent,
102 PackFile *file, PackPath *path,
103 int fd, off_t size,
104 off_t offset, off_t length);
105static int trace_add_groups (const void *parent, PackFile *file);
106static int trace_sort_blocks (const void *parent, PackFile *file);
107static int trace_sort_paths (const void *parent, PackFile *file);
108
109
110static void
111sig_interrupt (int signum)
112{
113}
114
115int
116trace (int daemonise,
117 int timeout)
118{
119 int dfd;
120 int unmount = FALSE;
121 int old_sys_open_enabled = 0;
122 int old_open_exec_enabled = 0;
123 int old_uselib_enabled = 0;
124 int old_tracing_enabled = 0;
Tim Gardner73aa2c52010-07-22 04:04:36 -0600125 int old_buffer_size_kb = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000126 struct sigaction act;
127 struct sigaction old_sigterm;
128 struct sigaction old_sigint;
129 struct timeval tv;
130 nih_local PackFile *files = NULL;
131 size_t num_files = 0;
132
133 /* Mount debugfs if not already mounted */
134 dfd = open (PATH_DEBUGFS "/tracing", O_RDONLY | O_NOATIME);
135 if (dfd < 0) {
136 if (errno != ENOENT)
137 nih_return_system_error (-1);
138
139 if (mount ("none", PATH_DEBUGFS_TMP, "debugfs", 0, NULL) < 0)
140 nih_return_system_error (-1);
141
142 dfd = open (PATH_DEBUGFS_TMP "/tracing", O_RDONLY | O_NOATIME);
143 if (dfd < 0) {
144 nih_error_raise_system ();
145 umount (PATH_DEBUGFS_TMP);
146 return -1;
147 }
148
149 unmount = TRUE;
150 }
151
152 /* Enable tracing of open() syscalls */
153 if (set_value (dfd, "events/fs/do_sys_open/enable",
154 TRUE, &old_sys_open_enabled) < 0)
155 goto error;
156 if (set_value (dfd, "events/fs/open_exec/enable",
157 TRUE, &old_open_exec_enabled) < 0)
158 goto error;
159 if (set_value (dfd, "events/fs/uselib/enable",
160 TRUE, &old_uselib_enabled) < 0) {
161 NihError *err;
162
163 err = nih_error_get ();
164 nih_debug ("Missing uselib tracing: %s", err->message);
165 nih_free (err);
166
167 old_uselib_enabled = -1;
168 }
Tim Gardner73aa2c52010-07-22 04:04:36 -0600169 if (set_value (dfd, "buffer_size_kb", 128000, &old_buffer_size_kb) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000170 goto error;
171 if (set_value (dfd, "tracing_enabled",
172 TRUE, &old_tracing_enabled) < 0)
173 goto error;
174
175 if (daemonise) {
176 pid_t pid;
177
178 pid = fork ();
179 if (pid < 0) {
180 nih_error_raise_system ();
181 goto error;
182 } else if (pid > 0) {
183 _exit (0);
184 }
185 }
186
187 /* Sleep until we get signals */
188 act.sa_handler = sig_interrupt;
189 sigemptyset (&act.sa_mask);
190 act.sa_flags = 0;
191
192 sigaction (SIGTERM, &act, &old_sigterm);
193 sigaction (SIGINT, &act, &old_sigint);
194
195 if (timeout) {
196 tv.tv_sec = timeout;
197 tv.tv_usec = 0;
198
199 select (0, NULL, NULL, NULL, &tv);
200 } else {
201 pause ();
202 }
203
204 sigaction (SIGTERM, &old_sigterm, NULL);
205 sigaction (SIGINT, &old_sigint, NULL);
206
207 /* Restore previous tracing settings */
208 if (set_value (dfd, "tracing_enabled",
209 old_tracing_enabled, NULL) < 0)
210 goto error;
211 if (old_uselib_enabled >= 0)
212 if (set_value (dfd, "events/fs/uselib/enable",
213 old_uselib_enabled, NULL) < 0)
214 goto error;
215 if (set_value (dfd, "events/fs/open_exec/enable",
216 old_open_exec_enabled, NULL) < 0)
217 goto error;
218 if (set_value (dfd, "events/fs/do_sys_open/enable",
219 old_sys_open_enabled, NULL) < 0)
220 goto error;
221
222 /* Be nicer */
Scott James Remnantcc2943b2009-11-29 15:24:15 +0000223 if (nice (15))
224 ;
Scott James Remnant56686d62009-11-09 18:38:51 +0000225
226 /* Read trace log */
227 if (read_trace (NULL, dfd, "trace", &files, &num_files) < 0)
228 goto error;
229
Tim Gardner73aa2c52010-07-22 04:04:36 -0600230 /*
231 * Restore the trace buffer size (which has just been read) and free
232 * a bunch of memory.
233 */
234 if (set_value (dfd, "buffer_size_kb", old_buffer_size_kb, NULL) < 0)
235 goto error;
236
Scott James Remnant56686d62009-11-09 18:38:51 +0000237 /* Unmount the temporary debugfs mount if we mounted it */
238 if (close (dfd)) {
239 nih_error_raise_system ();
240 goto error;
241 }
242 if (unmount
243 && (umount (PATH_DEBUGFS_TMP) < 0)) {
244 nih_error_raise_system ();
245 goto error;
246 }
247
248 /* Write out pack files */
249 for (size_t i = 0; i < num_files; i++) {
250 nih_local char *filename = NULL;
251
252 filename = pack_file_name_for_device (NULL, files[i].dev);
253 if (! filename) {
254 NihError *err;
255
256 err = nih_error_get ();
257 nih_warn ("%s", err->message);
258 nih_free (err);
259
260 continue;
261 }
262
263 nih_info ("Writing %s", filename);
264
265 /* We only need to apply additional sorting to the
266 * HDD-optimised packs, the SSD ones can read in random
267 * order quite happily.
268 *
269 * Also for HDD, generate the inode group preloading
270 * array.
271 */
272 if (files[i].rotational) {
273 trace_add_groups (files, &files[i]);
274
275 trace_sort_blocks (files, &files[i]);
276 trace_sort_paths (files, &files[i]);
277 }
278
279 write_pack (filename, &files[i]);
280
281 if (nih_log_priority < NIH_LOG_MESSAGE)
282 pack_dump (&files[i], SORT_OPEN);
283 }
284
285 return 0;
286error:
287 close (dfd);
288 if (unmount)
289 umount (PATH_DEBUGFS_TMP);
290
291 return -1;
292}
293
294
295static int
296read_trace (const void *parent,
297 int dfd,
298 const char *path,
299 PackFile ** files,
300 size_t * num_files)
301{
302 int fd;
303 FILE *fp;
304 char *line;
305
306 nih_assert (path != NULL);
307 nih_assert (files != NULL);
308 nih_assert (num_files != NULL);
309
310 fd = openat (dfd, path, O_RDONLY);
311 if (fd < 0)
312 nih_return_system_error (-1);
313
314 fp = fdopen (fd, "r");
315 if (! fp) {
316 nih_error_raise_system ();
317 close (fd);
318 return -1;
319 }
320
321 while ((line = fgets_alloc (NULL, fp)) != NULL) {
322 char *ptr;
323 char *end;
324
325 ptr = strstr (line, " do_sys_open:");
326 if (! ptr)
327 ptr = strstr (line, " open_exec:");
328 if (! ptr)
329 ptr = strstr (line, " uselib:");
330 if (! ptr) {
331 nih_free (line);
332 continue;
333 }
334
335 ptr = strchr (ptr, '"');
336 if (! ptr) {
337 nih_free (line);
338 continue;
339 }
340
341 ptr++;
342
343 end = strrchr (ptr, '"');
344 if (! end) {
345 nih_free (line);
346 continue;
347 }
348
349 *end = '\0';
350
351 fix_path (ptr);
352 trace_add_path (parent, ptr, files, num_files);
353
354 nih_free (line);
355 }
356
357 if (fclose (fp) < 0)
358 nih_return_system_error (-1);
359
360 return 0;
361}
362
363static void
364fix_path (char *pathname)
365{
366 char *ptr;
367
368 nih_assert (pathname != NULL);
369
370 for (ptr = pathname; *ptr; ptr++) {
371 size_t len;
372
373 if (ptr[0] != '/')
374 continue;
375
376 len = strcspn (ptr + 1, "/");
377
378 /* // and /./, we shorten the string and repeat the loop
379 * looking at the new /
380 */
381 if ((len == 0) || ((len == 1) && ptr[1] == '.')) {
382 memmove (ptr, ptr + len + 1, strlen (ptr) - len);
383 ptr--;
384 continue;
385 }
386
387 /* /../, we shorten back to the previous / or the start
388 * of the string and repeat the loop looking at the new /
389 */
390 if ((len == 2) && (ptr[1] == '.') && (ptr[2] == '.')) {
391 char *root;
392
393 for (root = ptr - 1;
394 (root >= pathname) && (root[0] != '/');
395 root--)
396 ;
397 if (root < pathname)
398 root = pathname;
399
400 memmove (root, ptr + len + 1, strlen (ptr) - len);
401 ptr = root - 1;
402 continue;
403 }
404 }
405
406 while ((ptr != pathname) && (*(--ptr) == '/'))
407 *ptr = '\0';
408}
409
410
411static int
412trace_add_path (const void *parent,
413 const char *pathname,
414 PackFile ** files,
415 size_t * num_files)
416{
417 static NihHash *path_hash = NULL;
418 struct stat statbuf;
419 int fd;
420 PackFile * file;
421 PackPath * path;
422 static NihHash *inode_hash = NULL;
423 nih_local char *inode_key = NULL;
424
425 nih_assert (pathname != NULL);
426 nih_assert (files != NULL);
427 nih_assert (num_files != NULL);
428
429 /* We can't really deal with relative paths since we don't know
430 * the working directory that they were opened from.
431 */
432 if (pathname[0] != '/') {
433 nih_warn ("%s: %s", pathname, _("Ignored relative path"));
434 return 0;
435 }
436
437 /* Certain paths aren't worth caching, because they're virtual or
438 * temporary filesystems and would waste pack space.
439 */
440 if (ignore_path (pathname))
441 return 0;
442
443 /* Ignore paths that won't fit in the pack; we could use PATH_MAX,
444 * but with 1000 files that'd be 4M just for the
445 * pack.
446 */
447 if (strlen (pathname) > PACK_PATH_MAX) {
448 nih_warn ("%s: %s", pathname, _("Ignored far too long path"));
449 return 0;
450 }
451
452 /* Use a hash table of paths to eliminate duplicate path names from
453 * the table since that would waste pack space (and fds).
454 */
455 if (! path_hash)
456 path_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
457
458 if (nih_hash_lookup (path_hash, pathname)) {
459 return 0;
460 } else {
461 NihListEntry *entry;
462
463 entry = NIH_MUST (nih_list_entry_new (path_hash));
464 entry->str = NIH_MUST (nih_strdup (entry, pathname));
465
466 nih_hash_add (path_hash, &entry->entry);
467 }
468
469 /* Make sure that we have an ordinary file, or a symlink to an
470 * ordinary file. This avoids us opening a fifo or socket.
471 */
472 if ((lstat (pathname, &statbuf) < 0)
473 || (S_ISLNK (statbuf.st_mode)
474 && (stat (pathname, &statbuf) < 0))
475 || (! S_ISREG (statbuf.st_mode)))
476 return 0;
477
478 /* Open and stat again to get the genuine details, in case it
479 * changes under us.
480 */
481 fd = open (pathname, O_RDONLY | O_NOATIME);
482 if (fd < 0) {
483 nih_warn ("%s: %s: %s", pathname,
484 _("File vanished or error reading"),
485 strerror (errno));
486 return -1;
487 }
488
489 if (fstat (fd, &statbuf) < 0) {
490 nih_warn ("%s: %s: %s", pathname,
491 _("Error retrieving file stat"),
492 strerror (errno));
493 close (fd);
494 return -1;
495 }
496
497 /* Double-check that it's really still a file */
498 if (! S_ISREG (statbuf.st_mode)) {
499 close (fd);
500 return 0;
501 }
502
503 /* Some people think it's clever to split their filesystem across
504 * multiple devices, so we need to generate a different pack file
505 * for each device.
506 *
507 * Lookup file based on the dev_t, potentially creating a new
508 * pack file in the array.
509 */
510 file = trace_file (parent, statbuf.st_dev, files, num_files);
511
512 /* Grow the PackPath array and fill in the details for the new
513 * path.
514 */
515 file->paths = NIH_MUST (nih_realloc (file->paths, *files,
516 (sizeof (PackPath)
517 * (file->num_paths + 1))));
518
519 path = &file->paths[file->num_paths++];
520 memset (path, 0, sizeof (PackPath));
521
522 path->group = -1;
523 path->ino = statbuf.st_ino;
524
525 strncpy (path->path, pathname, PACK_PATH_MAX);
526 path->path[PACK_PATH_MAX] = '\0';
527
528 /* The paths array contains each unique path opened, but these
529 * might be symbolic or hard links to the same underlying files
530 * and we don't want to read the same block more than once.
531 *
532 * Use a hash table of dev_t/ino_t pairs to make sure we only
533 * read the blocks of an actual file the first time.
534 */
535 if (! inode_hash)
536 inode_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
537
538 inode_key = NIH_MUST (nih_sprintf (NULL, "%llu:%llu",
539 (unsigned long long)statbuf.st_dev,
540 (unsigned long long)statbuf.st_ino));
541
542 if (nih_hash_lookup (inode_hash, inode_key)) {
543 close (fd);
544 return 0;
545 } else {
546 NihListEntry *entry;
547
548 entry = NIH_MUST (nih_list_entry_new (inode_hash));
549 entry->str = inode_key;
550 nih_ref (entry->str, entry);
551
552 nih_hash_add (inode_hash, &entry->entry);
553 }
554
555 /* There's also no point reading zero byte files, since they
556 * won't have any blocks (and we can't mmap zero bytes anyway).
557 */
558 if (! statbuf.st_size) {
559 close (fd);
560 return 0;
561 }
562
563 /* Now read the in-memory chunks of this file and add those to
564 * the pack file too.
565 */
566 trace_add_chunks (*files, file, path, fd, statbuf.st_size);
567 close (fd);
568
569 return 0;
570}
571
572static int
573ignore_path (const char *pathname)
574{
575 nih_assert (pathname != NULL);
576
577 if (! strncmp (pathname, "/proc/", 6))
578 return TRUE;
579 if (! strncmp (pathname, "/sys/", 5))
580 return TRUE;
581 if (! strncmp (pathname, "/dev/", 5))
582 return TRUE;
583 if (! strncmp (pathname, "/tmp/", 5))
584 return TRUE;
585 if (! strncmp (pathname, "/var/run/", 9))
586 return TRUE;
587 if (! strncmp (pathname, "/var/lock/", 10))
588 return TRUE;
589
590 return FALSE;
591}
592
593
594static PackFile *
595trace_file (const void *parent,
596 dev_t dev,
597 PackFile ** files,
598 size_t * num_files)
599{
600 nih_local char *filename = NULL;
601 int rotational;
602 PackFile * file;
603
604 nih_assert (files != NULL);
605 nih_assert (num_files != NULL);
606
607 /* Return any existing file structure for this device */
608 for (size_t i = 0; i < *num_files; i++)
609 if ((*files)[i].dev == dev)
610 return &(*files)[i];
611
612 /* Query sysfs to see whether this disk is rotational; this
613 * obviously won't work for virtual devices and the like, so
614 * default to TRUE for now.
615 */
616 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:0/queue/rotational",
617 major (dev)));
618
619 if (get_value (AT_FDCWD, filename, &rotational) < 0) {
620 NihError *err;
621
622 err = nih_error_get ();
623 nih_warn (_("Unable to obtain rotationalness for device %u:%u: %s"),
624 major (dev), minor (dev), err->message);
625 nih_free (err);
626
627 rotational = TRUE;
628 }
629
630 /* Grow the PackFile array and fill in the details for the new
631 * file.
632 */
633 *files = NIH_MUST (nih_realloc (*files, parent,
634 (sizeof (PackFile) * (*num_files + 1))));
635
636 file = &(*files)[(*num_files)++];
637 memset (file, 0, sizeof (PackFile));
638
639 file->dev = dev;
640 file->rotational = rotational;
641 file->num_paths = 0;
642 file->paths = NULL;
643 file->num_blocks = 0;
644 file->blocks = NULL;
645
646 return file;
647}
648
649
650static int
651trace_add_chunks (const void *parent,
652 PackFile * file,
653 PackPath * path,
654 int fd,
655 off_t size)
656{
657 static int page_size = -1;
658 void * buf;
659 off_t num_pages;
660 nih_local unsigned char *vec = NULL;
661
662 nih_assert (file != NULL);
663 nih_assert (path != NULL);
664 nih_assert (fd >= 0);
665 nih_assert (size > 0);
666
667 if (page_size < 0)
668 page_size = sysconf (_SC_PAGESIZE);
669
670 /* Map the file into memory */
671 buf = mmap (NULL, size, PROT_READ, MAP_SHARED, fd, 0);
672 if (buf == MAP_FAILED) {
673 nih_warn ("%s: %s: %s", path->path,
674 _("Error mapping into memory"),
675 strerror (errno));
676 return -1;
677 }
678
679 /* Grab the core memory map of the file */
680 num_pages = (size - 1) / page_size + 1;
681 vec = NIH_MUST (nih_alloc (NULL, num_pages));
682 memset (vec, 0, num_pages);
683
684 if (mincore (buf, size, vec) < 0) {
685 nih_warn ("%s: %s: %s", path->path,
686 _("Error retrieving page cache info"),
687 strerror (errno));
688 munmap (buf, size);
689 return -1;
690 }
691
692 /* Clean up */
693 if (munmap (buf, size) < 0) {
694 nih_warn ("%s: %s: %s", path->path,
695 _("Error unmapping from memory"),
696 strerror (errno));
697 return -1;
698 }
699
700
701 /* Now we can figure out which contiguous bits of the file are
702 * in core memory.
703 */
704 for (off_t i = 0; i < num_pages; i++) {
705 off_t offset;
706 off_t length;
707
708 if (! vec[i])
709 continue;
710
711 offset = i * page_size;
712 length = page_size;
713
714 while (((i + 1) < num_pages) && vec[i + 1]) {
715 length += page_size;
716 i++;
717 }
718
719 /* The rotational crowd need this split down further into
720 * on-disk extents, the non-rotational folks can just use
721 * the chunks data.
722 */
723 if (file->rotational) {
724 trace_add_extents (parent, file, path, fd, size,
725 offset, length);
726 } else {
727 PackBlock *block;
728
729 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
730 (sizeof (PackBlock)
731 * (file->num_blocks + 1))));
732
733 block = &file->blocks[file->num_blocks++];
734 memset (block, 0, sizeof (PackBlock));
735
736 block->pathidx = file->num_paths - 1;
737 block->offset = offset;
738 block->length = length;
739 block->physical = -1;
740 }
741 }
742
743 return 0;
744}
745
746struct fiemap *
747get_fiemap (const void *parent,
748 int fd,
749 off_t offset,
750 off_t length)
751{
752 struct fiemap *fiemap;
753
754 nih_assert (fd >= 0);
755
756 fiemap = NIH_MUST (nih_new (parent, struct fiemap));
757 memset (fiemap, 0, sizeof (struct fiemap));
758
759 fiemap->fm_start = offset;
760 fiemap->fm_length = length;
761 fiemap->fm_flags = 0;
762
763 do {
764 /* Query the current number of extents */
765 fiemap->fm_mapped_extents = 0;
766 fiemap->fm_extent_count = 0;
767
768 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
769 nih_error_raise_system ();
770 nih_free (fiemap);
771 return NULL;
772 }
773
774 /* Always allow room for one extra over what we were told,
775 * so we know if they changed under us.
776 */
777 fiemap = NIH_MUST (nih_realloc (fiemap, parent,
778 (sizeof (struct fiemap)
779 + (sizeof (struct fiemap_extent)
780 * (fiemap->fm_mapped_extents + 1)))));
781 fiemap->fm_extent_count = fiemap->fm_mapped_extents + 1;
782 fiemap->fm_mapped_extents = 0;
783
784 memset (fiemap->fm_extents, 0, (sizeof (struct fiemap_extent)
785 * fiemap->fm_extent_count));
786
787 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
788 nih_error_raise_system ();
789 nih_free (fiemap);
790 return NULL;
791 }
792 } while (fiemap->fm_mapped_extents
793 && (fiemap->fm_mapped_extents >= fiemap->fm_extent_count));
794
795 return fiemap;
796}
797
798static int
799trace_add_extents (const void *parent,
800 PackFile * file,
801 PackPath * path,
802 int fd,
803 off_t size,
804 off_t offset,
805 off_t length)
806{
807 nih_local struct fiemap *fiemap = NULL;
808
809 nih_assert (file != NULL);
810 nih_assert (path != NULL);
811 nih_assert (fd >= 0);
812 nih_assert (size > 0);
813
814 /* Get the extents map for this chunk, then iterate the extents
815 * and put those in the pack instead of the chunks.
816 */
817 fiemap = get_fiemap (NULL, fd, offset, length);
818 if (! fiemap) {
819 NihError *err;
820
821 err = nih_error_get ();
822 nih_warn ("%s: %s: %s", path->path,
823 _("Error retrieving chunk extents"),
824 err->message);
825 nih_free (err);
826
827 return -1;
828 }
829
830 for (__u32 j = 0; j < fiemap->fm_mapped_extents; j++) {
831 PackBlock *block;
832 off_t start;
833 off_t end;
834
835 if (fiemap->fm_extents[j].fe_flags & FIEMAP_EXTENT_UNKNOWN)
836 continue;
837
838 /* Work out the intersection of the chunk and extent */
839 start = nih_max (fiemap->fm_start,
840 fiemap->fm_extents[j].fe_logical);
841 end = nih_min ((fiemap->fm_start + fiemap->fm_length),
842 (fiemap->fm_extents[j].fe_logical
843 + fiemap->fm_extents[j].fe_length));
844
845 /* Grow the blocks array to add the extent */
846 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
847 (sizeof (PackBlock)
848 * (file->num_blocks + 1))));
849
850 block = &file->blocks[file->num_blocks++];
851 memset (block, 0, sizeof (PackBlock));
852
853 block->pathidx = file->num_paths - 1;
854 block->offset = start;
855 block->length = end - start;
856 block->physical = (fiemap->fm_extents[j].fe_physical
857 + (start - fiemap->fm_extents[j].fe_logical));
858 }
859
860 return 0;
861}
862
863static int
864trace_add_groups (const void *parent,
865 PackFile * file)
866{
867 const char *devname;
868 ext2_filsys fs = NULL;
869
870 nih_assert (file != NULL);
871
872 devname = blkid_devno_to_devname (file->dev);
873 if (devname
874 && (! ext2fs_open (devname, 0, 0, 0, unix_io_manager, &fs))) {
875 nih_assert (fs != NULL);
876 size_t num_groups = 0;
877 nih_local size_t *num_inodes = NULL;
878 size_t mean = 0;
879 size_t hits = 0;
880
881 nih_assert (fs != NULL);
882
883 /* Calculate the number of inode groups on this filesystem */
884 num_groups = ((fs->super->s_blocks_count - 1)
885 / fs->super->s_blocks_per_group) + 1;
886
887 /* Fill in the pack path's group member, and count the
888 * number of inodes in each group.
889 */
890 num_inodes = NIH_MUST (nih_alloc (NULL, (sizeof (size_t)
891 * num_groups)));
892 memset (num_inodes, 0, sizeof (size_t) * num_groups);
893
894 for (size_t i = 0; i < file->num_paths; i++) {
895 file->paths[i].group = ext2fs_group_of_ino (fs, file->paths[i].ino);
896 num_inodes[file->paths[i].group]++;
897 }
898
899 /* Iterate the groups and add any group that exceeds the
900 * inode preload threshold.
901 */
902 for (size_t i = 0; i < num_groups; i++) {
903 mean += num_inodes[i];
904 if (num_inodes[i] > INODE_GROUP_PRELOAD_THRESHOLD) {
905 file->groups = NIH_MUST (nih_realloc (file->groups, parent,
906 (sizeof (int)
907 * (file->num_groups + 1))));
908 file->groups[file->num_groups++] = i;
909 hits++;
910 }
911 }
912
913 mean /= num_groups;
914
915 nih_debug ("%zu inode groups, mean %zu inodes per group, %zu hits",
916 num_groups, mean, hits);
917
918 ext2fs_close (fs);
919 }
920
921 return 0;
922}
923
924
925static int
926block_compar (const void *a,
927 const void *b)
928{
929 const PackBlock *block_a = a;
930 const PackBlock *block_b = b;
931
932 nih_assert (block_a != NULL);
933 nih_assert (block_b != NULL);
934
935 if (block_a->physical < block_b->physical) {
936 return -1;
937 } else if (block_a->physical > block_b->physical) {
938 return 1;
939 } else {
940 return 0;
941 }
942}
943
944static int
945trace_sort_blocks (const void *parent,
946 PackFile * file)
947{
948 nih_assert (file != NULL);
949
950 /* Sort the blocks array by physical location, since these are
951 * read in a separate pass to opening files, there's no reason
952 * to consider which path each block is in - and thus resulting
953 * in a linear disk read.
954 */
955 qsort (file->blocks, file->num_blocks, sizeof (PackBlock),
956 block_compar);
957
958 return 0;
959}
960
961static int
962path_compar (const void *a,
963 const void *b)
964{
965 const PackPath * const *path_a = a;
966 const PackPath * const *path_b = b;
967
968 nih_assert (path_a != NULL);
969 nih_assert (path_b != NULL);
970
971 if ((*path_a)->group < (*path_b)->group) {
972 return -1;
973 } else if ((*path_a)->group > (*path_b)->group) {
974 return 1;
975 } else if ((*path_a)->ino < (*path_b)->ino) {
976 return -1;
977 } else if ((*path_b)->ino > (*path_b)->ino) {
978 return 1;
979 } else {
980 return strcmp ((*path_a)->path, (*path_b)->path);
981 }
982}
983
984static int
985trace_sort_paths (const void *parent,
986 PackFile * file)
987{
988 nih_local PackPath **paths = NULL;
989 nih_local size_t * new_idx = NULL;
990 PackPath * new_paths;
991
992 nih_assert (file != NULL);
993
994 /* Sort the paths array by ext2fs inode group, ino_t then path.
995 *
996 * Mucking around with things like the physical locations of
997 * first on-disk blocks of the dentry and stuff didn't work out
998 * so well, sorting by path was better, but this seems the best.
999 * (it looks good on blktrace too)
1000 */
1001 paths = NIH_MUST (nih_alloc (NULL, (sizeof (PackPath *)
1002 * file->num_paths)));
1003
1004 for (size_t i = 0; i < file->num_paths; i++)
1005 paths[i] = &file->paths[i];
1006
1007 qsort (paths, file->num_paths, sizeof (PackPath *),
1008 path_compar);
1009
1010 /* Calculate the new indexes of each path element in the old
1011 * array, and then update the block array's path indexes to
1012 * match.
1013 */
1014 new_idx = NIH_MUST (nih_alloc (NULL,
1015 (sizeof (size_t) * file->num_paths)));
1016 for (size_t i = 0; i < file->num_paths; i++)
1017 new_idx[paths[i] - file->paths] = i;
1018
1019 for (size_t i = 0; i < file->num_blocks; i++)
1020 file->blocks[i].pathidx = new_idx[file->blocks[i].pathidx];
1021
1022 /* Finally generate a new paths array with the new order and
1023 * attach it to the file.
1024 */
1025 new_paths = NIH_MUST (nih_alloc (parent,
1026 (sizeof (PackPath) * file->num_paths)));
1027 for (size_t i = 0; i < file->num_paths; i++)
1028 memcpy (&new_paths[new_idx[i]], &file->paths[i],
1029 sizeof (PackPath));
1030
1031 nih_unref (file->paths, parent);
1032 file->paths = new_paths;
1033
1034 return 0;
1035}