blob: 29d6e7be87019e073d4634f5248ae02dd2120639 [file] [log] [blame]
Scott James Remnant56686d62009-11-09 18:38:51 +00001/* ureadahead
2 *
3 * trace.c - boot tracing
4 *
5 * Copyright © 2009 Canonical Ltd.
6 * Author: Scott James Remnant <scott@netsplit.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2, as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif /* HAVE_CONFIG_H */
25
26#define _ATFILE_SOURCE
27
28
29#include <sys/select.h>
30#include <sys/mount.h>
31#include <sys/types.h>
32#include <sys/mman.h>
33#include <sys/stat.h>
Yusuke Satoa2a925e2015-12-17 13:14:55 -080034#include <sys/param.h>
Scott James Remnant56686d62009-11-09 18:38:51 +000035
36#include <errno.h>
37#include <fcntl.h>
38#include <stdio.h>
39#include <signal.h>
40#include <stdlib.h>
41#include <string.h>
42#include <unistd.h>
43
44#include <blkid.h>
45#define NO_INLINE_FUNCS
46#include <ext2fs.h>
47
48#include <linux/fs.h>
49#include <linux/fiemap.h>
50
51#include <nih/macros.h>
52#include <nih/alloc.h>
53#include <nih/string.h>
54#include <nih/list.h>
55#include <nih/hash.h>
56#include <nih/main.h>
57#include <nih/logging.h>
58#include <nih/error.h>
59
60#include "trace.h"
61#include "pack.h"
62#include "values.h"
63#include "file.h"
64
65
66/**
67 * PATH_DEBUGFS:
68 *
69 * Path to the usual debugfs mountpoint.
70 **/
71#define PATH_DEBUGFS "/sys/kernel/debug"
72
73/**
74 * PATH_DEBUGFS_TMP:
75 *
76 * Path to the temporary debugfs mountpoint that we mount it on if it
77 * hasn't been mounted at the usual place yet.
78 **/
79#define PATH_DEBUGFS_TMP "/var/lib/ureadahead/debugfs"
80
81/**
82 * INODE_GROUP_PRELOAD_THRESHOLD:
83 *
84 * Number of inodes in a group before we preload that inode's blocks.
85 **/
86#define INODE_GROUP_PRELOAD_THRESHOLD 8
87
88
89/* Prototypes for static functions */
90static int read_trace (const void *parent,
91 int dfd, const char *path,
Yusuke Satoa2a925e2015-12-17 13:14:55 -080092 const PathPrefixOption *path_prefix,
Scott James Remnant56686d62009-11-09 18:38:51 +000093 PackFile **files, size_t *num_files);
94static void fix_path (char *pathname);
95static int trace_add_path (const void *parent, const char *pathname,
96 PackFile **files, size_t *num_files);
97static int ignore_path (const char *pathname);
98static PackFile *trace_file (const void *parent, dev_t dev,
99 PackFile **files, size_t *num_files);
100static int trace_add_chunks (const void *parent,
101 PackFile *file, PackPath *path,
102 int fd, off_t size);
103static int trace_add_extents (const void *parent,
104 PackFile *file, PackPath *path,
105 int fd, off_t size,
106 off_t offset, off_t length);
107static int trace_add_groups (const void *parent, PackFile *file);
108static int trace_sort_blocks (const void *parent, PackFile *file);
109static int trace_sort_paths (const void *parent, PackFile *file);
110
111
112static void
113sig_interrupt (int signum)
114{
115}
116
117int
118trace (int daemonise,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800119 int timeout,
120 const char *filename_to_replace,
121 const PathPrefixOption *path_prefix)
Scott James Remnant56686d62009-11-09 18:38:51 +0000122{
123 int dfd;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600124 FILE *fp;
Scott James Remnant56686d62009-11-09 18:38:51 +0000125 int unmount = FALSE;
126 int old_sys_open_enabled = 0;
127 int old_open_exec_enabled = 0;
128 int old_uselib_enabled = 0;
129 int old_tracing_enabled = 0;
Tim Gardner73aa2c52010-07-22 04:04:36 -0600130 int old_buffer_size_kb = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000131 struct sigaction act;
132 struct sigaction old_sigterm;
133 struct sigaction old_sigint;
134 struct timeval tv;
135 nih_local PackFile *files = NULL;
136 size_t num_files = 0;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600137 size_t num_cpus = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000138
139 /* Mount debugfs if not already mounted */
140 dfd = open (PATH_DEBUGFS "/tracing", O_RDONLY | O_NOATIME);
141 if (dfd < 0) {
142 if (errno != ENOENT)
143 nih_return_system_error (-1);
144
145 if (mount ("none", PATH_DEBUGFS_TMP, "debugfs", 0, NULL) < 0)
146 nih_return_system_error (-1);
147
148 dfd = open (PATH_DEBUGFS_TMP "/tracing", O_RDONLY | O_NOATIME);
149 if (dfd < 0) {
150 nih_error_raise_system ();
151 umount (PATH_DEBUGFS_TMP);
152 return -1;
153 }
154
155 unmount = TRUE;
156 }
157
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600158 /*
159 * Count the number of CPUs, default to 1 on error.
160 */
161 fp = fopen("/proc/cpuinfo", "r");
162 if (fp) {
163 int line_size=1024;
164 char *processor="processor";
165 char *line = malloc(line_size);
166 if (line) {
167 num_cpus = 0;
168 while (fgets(line,line_size,fp) != NULL) {
169 if (!strncmp(line,processor,strlen(processor)))
170 num_cpus++;
171 }
172 free(line);
173 nih_message("Counted %d CPUs\n",num_cpus);
174 }
175 fclose(fp);
176 }
177 if (!num_cpus)
178 num_cpus = 1;
179
Scott James Remnante30e2372010-09-20 18:34:31 +0100180 /* Enable tracing of open() syscalls */
Scott James Remnant56686d62009-11-09 18:38:51 +0000181 if (set_value (dfd, "events/fs/do_sys_open/enable",
182 TRUE, &old_sys_open_enabled) < 0)
183 goto error;
184 if (set_value (dfd, "events/fs/open_exec/enable",
185 TRUE, &old_open_exec_enabled) < 0)
186 goto error;
187 if (set_value (dfd, "events/fs/uselib/enable",
188 TRUE, &old_uselib_enabled) < 0) {
189 NihError *err;
190
191 err = nih_error_get ();
192 nih_debug ("Missing uselib tracing: %s", err->message);
193 nih_free (err);
194
195 old_uselib_enabled = -1;
196 }
Scott James Remnante30e2372010-09-20 18:34:31 +0100197 if (set_value (dfd, "buffer_size_kb", 8192/num_cpus, &old_buffer_size_kb) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000198 goto error;
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000199 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000200 TRUE, &old_tracing_enabled) < 0)
201 goto error;
202
203 if (daemonise) {
204 pid_t pid;
205
206 pid = fork ();
207 if (pid < 0) {
208 nih_error_raise_system ();
209 goto error;
210 } else if (pid > 0) {
211 _exit (0);
212 }
213 }
214
215 /* Sleep until we get signals */
216 act.sa_handler = sig_interrupt;
217 sigemptyset (&act.sa_mask);
218 act.sa_flags = 0;
219
220 sigaction (SIGTERM, &act, &old_sigterm);
221 sigaction (SIGINT, &act, &old_sigint);
222
223 if (timeout) {
224 tv.tv_sec = timeout;
225 tv.tv_usec = 0;
226
227 select (0, NULL, NULL, NULL, &tv);
228 } else {
229 pause ();
230 }
231
232 sigaction (SIGTERM, &old_sigterm, NULL);
233 sigaction (SIGINT, &old_sigint, NULL);
234
235 /* Restore previous tracing settings */
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000236 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000237 old_tracing_enabled, NULL) < 0)
238 goto error;
239 if (old_uselib_enabled >= 0)
240 if (set_value (dfd, "events/fs/uselib/enable",
241 old_uselib_enabled, NULL) < 0)
242 goto error;
243 if (set_value (dfd, "events/fs/open_exec/enable",
244 old_open_exec_enabled, NULL) < 0)
245 goto error;
246 if (set_value (dfd, "events/fs/do_sys_open/enable",
247 old_sys_open_enabled, NULL) < 0)
248 goto error;
249
250 /* Be nicer */
Scott James Remnantcc2943b2009-11-29 15:24:15 +0000251 if (nice (15))
252 ;
Scott James Remnant56686d62009-11-09 18:38:51 +0000253
254 /* Read trace log */
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800255 if (read_trace (NULL, dfd, "trace", path_prefix, &files, &num_files) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000256 goto error;
257
Tim Gardner73aa2c52010-07-22 04:04:36 -0600258 /*
259 * Restore the trace buffer size (which has just been read) and free
260 * a bunch of memory.
261 */
262 if (set_value (dfd, "buffer_size_kb", old_buffer_size_kb, NULL) < 0)
263 goto error;
264
Scott James Remnant56686d62009-11-09 18:38:51 +0000265 /* Unmount the temporary debugfs mount if we mounted it */
266 if (close (dfd)) {
267 nih_error_raise_system ();
268 goto error;
269 }
270 if (unmount
271 && (umount (PATH_DEBUGFS_TMP) < 0)) {
272 nih_error_raise_system ();
273 goto error;
274 }
275
276 /* Write out pack files */
277 for (size_t i = 0; i < num_files; i++) {
278 nih_local char *filename = NULL;
279
280 filename = pack_file_name_for_device (NULL, files[i].dev);
281 if (! filename) {
282 NihError *err;
283
284 err = nih_error_get ();
285 nih_warn ("%s", err->message);
286 nih_free (err);
287
288 continue;
289 }
290
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800291 /* If filename_to_replace is not NULL, only write out the
292 * file and skip others.
293 */
294 if (filename_to_replace &&
295 strcmp (filename_to_replace, filename)) {
296 nih_info ("Skipping %s", filename);
297 continue;
298 }
299
Scott James Remnant56686d62009-11-09 18:38:51 +0000300 nih_info ("Writing %s", filename);
301
302 /* We only need to apply additional sorting to the
303 * HDD-optimised packs, the SSD ones can read in random
304 * order quite happily.
305 *
306 * Also for HDD, generate the inode group preloading
307 * array.
308 */
309 if (files[i].rotational) {
310 trace_add_groups (files, &files[i]);
311
312 trace_sort_blocks (files, &files[i]);
313 trace_sort_paths (files, &files[i]);
314 }
315
316 write_pack (filename, &files[i]);
317
318 if (nih_log_priority < NIH_LOG_MESSAGE)
319 pack_dump (&files[i], SORT_OPEN);
320 }
321
322 return 0;
323error:
324 close (dfd);
325 if (unmount)
326 umount (PATH_DEBUGFS_TMP);
327
328 return -1;
329}
330
331
332static int
333read_trace (const void *parent,
334 int dfd,
335 const char *path,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800336 const PathPrefixOption *path_prefix,
Scott James Remnant56686d62009-11-09 18:38:51 +0000337 PackFile ** files,
338 size_t * num_files)
339{
340 int fd;
341 FILE *fp;
342 char *line;
343
344 nih_assert (path != NULL);
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800345 nih_assert (path_prefix != NULL);
Scott James Remnant56686d62009-11-09 18:38:51 +0000346 nih_assert (files != NULL);
347 nih_assert (num_files != NULL);
348
349 fd = openat (dfd, path, O_RDONLY);
350 if (fd < 0)
351 nih_return_system_error (-1);
352
353 fp = fdopen (fd, "r");
354 if (! fp) {
355 nih_error_raise_system ();
356 close (fd);
357 return -1;
358 }
359
360 while ((line = fgets_alloc (NULL, fp)) != NULL) {
361 char *ptr;
362 char *end;
363
364 ptr = strstr (line, " do_sys_open:");
365 if (! ptr)
366 ptr = strstr (line, " open_exec:");
367 if (! ptr)
368 ptr = strstr (line, " uselib:");
369 if (! ptr) {
370 nih_free (line);
371 continue;
372 }
373
374 ptr = strchr (ptr, '"');
375 if (! ptr) {
376 nih_free (line);
377 continue;
378 }
379
380 ptr++;
381
382 end = strrchr (ptr, '"');
383 if (! end) {
384 nih_free (line);
385 continue;
386 }
387
388 *end = '\0';
389
390 fix_path (ptr);
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800391 if (path_prefix->st_dev != NODEV && ptr[0] == '/') {
392 struct stat stbuf;
393 char *rewritten = nih_sprintf (
394 line, "%s%s", path_prefix->prefix, ptr);
395 if (! lstat (rewritten, &stbuf) &&
396 stbuf.st_dev == path_prefix->st_dev) {
397 /* If |rewritten| exists on the same device as
398 * path_prefix->st_dev, record the rewritten one
399 * instead of the original path.
400 */
401 ptr = rewritten;
402 }
403 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000404 trace_add_path (parent, ptr, files, num_files);
405
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800406 nih_free (line); /* also frees |rewritten| */
Scott James Remnant56686d62009-11-09 18:38:51 +0000407 }
408
409 if (fclose (fp) < 0)
410 nih_return_system_error (-1);
411
412 return 0;
413}
414
415static void
416fix_path (char *pathname)
417{
418 char *ptr;
419
420 nih_assert (pathname != NULL);
421
422 for (ptr = pathname; *ptr; ptr++) {
423 size_t len;
424
425 if (ptr[0] != '/')
426 continue;
427
428 len = strcspn (ptr + 1, "/");
429
430 /* // and /./, we shorten the string and repeat the loop
431 * looking at the new /
432 */
433 if ((len == 0) || ((len == 1) && ptr[1] == '.')) {
434 memmove (ptr, ptr + len + 1, strlen (ptr) - len);
435 ptr--;
436 continue;
437 }
438
439 /* /../, we shorten back to the previous / or the start
440 * of the string and repeat the loop looking at the new /
441 */
442 if ((len == 2) && (ptr[1] == '.') && (ptr[2] == '.')) {
443 char *root;
444
445 for (root = ptr - 1;
446 (root >= pathname) && (root[0] != '/');
447 root--)
448 ;
449 if (root < pathname)
450 root = pathname;
451
452 memmove (root, ptr + len + 1, strlen (ptr) - len);
453 ptr = root - 1;
454 continue;
455 }
456 }
457
458 while ((ptr != pathname) && (*(--ptr) == '/'))
459 *ptr = '\0';
460}
461
462
463static int
464trace_add_path (const void *parent,
465 const char *pathname,
466 PackFile ** files,
467 size_t * num_files)
468{
469 static NihHash *path_hash = NULL;
470 struct stat statbuf;
471 int fd;
472 PackFile * file;
473 PackPath * path;
474 static NihHash *inode_hash = NULL;
475 nih_local char *inode_key = NULL;
476
477 nih_assert (pathname != NULL);
478 nih_assert (files != NULL);
479 nih_assert (num_files != NULL);
480
481 /* We can't really deal with relative paths since we don't know
482 * the working directory that they were opened from.
483 */
484 if (pathname[0] != '/') {
485 nih_warn ("%s: %s", pathname, _("Ignored relative path"));
486 return 0;
487 }
488
489 /* Certain paths aren't worth caching, because they're virtual or
490 * temporary filesystems and would waste pack space.
491 */
492 if (ignore_path (pathname))
493 return 0;
494
495 /* Ignore paths that won't fit in the pack; we could use PATH_MAX,
496 * but with 1000 files that'd be 4M just for the
497 * pack.
498 */
499 if (strlen (pathname) > PACK_PATH_MAX) {
500 nih_warn ("%s: %s", pathname, _("Ignored far too long path"));
501 return 0;
502 }
503
504 /* Use a hash table of paths to eliminate duplicate path names from
505 * the table since that would waste pack space (and fds).
506 */
507 if (! path_hash)
508 path_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
509
510 if (nih_hash_lookup (path_hash, pathname)) {
511 return 0;
512 } else {
513 NihListEntry *entry;
514
515 entry = NIH_MUST (nih_list_entry_new (path_hash));
516 entry->str = NIH_MUST (nih_strdup (entry, pathname));
517
518 nih_hash_add (path_hash, &entry->entry);
519 }
520
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100521 /* Make sure that we have an ordinary file
522 * This avoids us opening a fifo or socket or symlink.
Scott James Remnant56686d62009-11-09 18:38:51 +0000523 */
524 if ((lstat (pathname, &statbuf) < 0)
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100525 || (S_ISLNK (statbuf.st_mode))
Scott James Remnant56686d62009-11-09 18:38:51 +0000526 || (! S_ISREG (statbuf.st_mode)))
527 return 0;
528
529 /* Open and stat again to get the genuine details, in case it
530 * changes under us.
531 */
532 fd = open (pathname, O_RDONLY | O_NOATIME);
533 if (fd < 0) {
534 nih_warn ("%s: %s: %s", pathname,
535 _("File vanished or error reading"),
536 strerror (errno));
537 return -1;
538 }
539
540 if (fstat (fd, &statbuf) < 0) {
541 nih_warn ("%s: %s: %s", pathname,
542 _("Error retrieving file stat"),
543 strerror (errno));
544 close (fd);
545 return -1;
546 }
547
548 /* Double-check that it's really still a file */
549 if (! S_ISREG (statbuf.st_mode)) {
550 close (fd);
551 return 0;
552 }
553
554 /* Some people think it's clever to split their filesystem across
555 * multiple devices, so we need to generate a different pack file
556 * for each device.
557 *
558 * Lookup file based on the dev_t, potentially creating a new
559 * pack file in the array.
560 */
561 file = trace_file (parent, statbuf.st_dev, files, num_files);
562
563 /* Grow the PackPath array and fill in the details for the new
564 * path.
565 */
566 file->paths = NIH_MUST (nih_realloc (file->paths, *files,
567 (sizeof (PackPath)
568 * (file->num_paths + 1))));
569
570 path = &file->paths[file->num_paths++];
571 memset (path, 0, sizeof (PackPath));
572
573 path->group = -1;
574 path->ino = statbuf.st_ino;
575
576 strncpy (path->path, pathname, PACK_PATH_MAX);
577 path->path[PACK_PATH_MAX] = '\0';
578
579 /* The paths array contains each unique path opened, but these
580 * might be symbolic or hard links to the same underlying files
581 * and we don't want to read the same block more than once.
582 *
583 * Use a hash table of dev_t/ino_t pairs to make sure we only
584 * read the blocks of an actual file the first time.
585 */
586 if (! inode_hash)
587 inode_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
588
589 inode_key = NIH_MUST (nih_sprintf (NULL, "%llu:%llu",
590 (unsigned long long)statbuf.st_dev,
591 (unsigned long long)statbuf.st_ino));
592
593 if (nih_hash_lookup (inode_hash, inode_key)) {
594 close (fd);
595 return 0;
596 } else {
597 NihListEntry *entry;
598
599 entry = NIH_MUST (nih_list_entry_new (inode_hash));
600 entry->str = inode_key;
601 nih_ref (entry->str, entry);
602
603 nih_hash_add (inode_hash, &entry->entry);
604 }
605
606 /* There's also no point reading zero byte files, since they
607 * won't have any blocks (and we can't mmap zero bytes anyway).
608 */
609 if (! statbuf.st_size) {
610 close (fd);
611 return 0;
612 }
613
614 /* Now read the in-memory chunks of this file and add those to
615 * the pack file too.
616 */
617 trace_add_chunks (*files, file, path, fd, statbuf.st_size);
618 close (fd);
619
620 return 0;
621}
622
623static int
624ignore_path (const char *pathname)
625{
626 nih_assert (pathname != NULL);
627
628 if (! strncmp (pathname, "/proc/", 6))
629 return TRUE;
630 if (! strncmp (pathname, "/sys/", 5))
631 return TRUE;
632 if (! strncmp (pathname, "/dev/", 5))
633 return TRUE;
634 if (! strncmp (pathname, "/tmp/", 5))
635 return TRUE;
Steve Langasek2c698a12012-02-03 15:27:29 -0800636 if (! strncmp (pathname, "/run/", 5))
637 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000638 if (! strncmp (pathname, "/var/run/", 9))
639 return TRUE;
Bryan Fullerton96c991b2013-03-12 15:06:41 +0000640 if (! strncmp (pathname, "/var/log/", 9))
641 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000642 if (! strncmp (pathname, "/var/lock/", 10))
643 return TRUE;
644
645 return FALSE;
646}
647
648
649static PackFile *
650trace_file (const void *parent,
651 dev_t dev,
652 PackFile ** files,
653 size_t * num_files)
654{
655 nih_local char *filename = NULL;
656 int rotational;
657 PackFile * file;
658
659 nih_assert (files != NULL);
660 nih_assert (num_files != NULL);
661
662 /* Return any existing file structure for this device */
663 for (size_t i = 0; i < *num_files; i++)
664 if ((*files)[i].dev == dev)
665 return &(*files)[i];
666
667 /* Query sysfs to see whether this disk is rotational; this
668 * obviously won't work for virtual devices and the like, so
669 * default to TRUE for now.
670 */
671 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:0/queue/rotational",
672 major (dev)));
673
674 if (get_value (AT_FDCWD, filename, &rotational) < 0) {
675 NihError *err;
676
677 err = nih_error_get ();
678 nih_warn (_("Unable to obtain rotationalness for device %u:%u: %s"),
679 major (dev), minor (dev), err->message);
680 nih_free (err);
681
682 rotational = TRUE;
683 }
684
685 /* Grow the PackFile array and fill in the details for the new
686 * file.
687 */
688 *files = NIH_MUST (nih_realloc (*files, parent,
689 (sizeof (PackFile) * (*num_files + 1))));
690
691 file = &(*files)[(*num_files)++];
692 memset (file, 0, sizeof (PackFile));
693
694 file->dev = dev;
695 file->rotational = rotational;
696 file->num_paths = 0;
697 file->paths = NULL;
698 file->num_blocks = 0;
699 file->blocks = NULL;
700
701 return file;
702}
703
704
705static int
706trace_add_chunks (const void *parent,
707 PackFile * file,
708 PackPath * path,
709 int fd,
710 off_t size)
711{
712 static int page_size = -1;
713 void * buf;
714 off_t num_pages;
715 nih_local unsigned char *vec = NULL;
716
717 nih_assert (file != NULL);
718 nih_assert (path != NULL);
719 nih_assert (fd >= 0);
720 nih_assert (size > 0);
721
722 if (page_size < 0)
723 page_size = sysconf (_SC_PAGESIZE);
724
725 /* Map the file into memory */
726 buf = mmap (NULL, size, PROT_READ, MAP_SHARED, fd, 0);
727 if (buf == MAP_FAILED) {
728 nih_warn ("%s: %s: %s", path->path,
729 _("Error mapping into memory"),
730 strerror (errno));
731 return -1;
732 }
733
734 /* Grab the core memory map of the file */
735 num_pages = (size - 1) / page_size + 1;
736 vec = NIH_MUST (nih_alloc (NULL, num_pages));
737 memset (vec, 0, num_pages);
738
739 if (mincore (buf, size, vec) < 0) {
740 nih_warn ("%s: %s: %s", path->path,
741 _("Error retrieving page cache info"),
742 strerror (errno));
743 munmap (buf, size);
744 return -1;
745 }
746
747 /* Clean up */
748 if (munmap (buf, size) < 0) {
749 nih_warn ("%s: %s: %s", path->path,
750 _("Error unmapping from memory"),
751 strerror (errno));
752 return -1;
753 }
754
755
756 /* Now we can figure out which contiguous bits of the file are
757 * in core memory.
758 */
759 for (off_t i = 0; i < num_pages; i++) {
760 off_t offset;
761 off_t length;
762
763 if (! vec[i])
764 continue;
765
766 offset = i * page_size;
767 length = page_size;
768
769 while (((i + 1) < num_pages) && vec[i + 1]) {
770 length += page_size;
771 i++;
772 }
773
774 /* The rotational crowd need this split down further into
775 * on-disk extents, the non-rotational folks can just use
776 * the chunks data.
777 */
778 if (file->rotational) {
779 trace_add_extents (parent, file, path, fd, size,
780 offset, length);
781 } else {
782 PackBlock *block;
783
784 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
785 (sizeof (PackBlock)
786 * (file->num_blocks + 1))));
787
788 block = &file->blocks[file->num_blocks++];
789 memset (block, 0, sizeof (PackBlock));
790
791 block->pathidx = file->num_paths - 1;
792 block->offset = offset;
793 block->length = length;
794 block->physical = -1;
795 }
796 }
797
798 return 0;
799}
800
801struct fiemap *
802get_fiemap (const void *parent,
803 int fd,
804 off_t offset,
805 off_t length)
806{
807 struct fiemap *fiemap;
808
809 nih_assert (fd >= 0);
810
811 fiemap = NIH_MUST (nih_new (parent, struct fiemap));
812 memset (fiemap, 0, sizeof (struct fiemap));
813
814 fiemap->fm_start = offset;
815 fiemap->fm_length = length;
816 fiemap->fm_flags = 0;
817
818 do {
819 /* Query the current number of extents */
820 fiemap->fm_mapped_extents = 0;
821 fiemap->fm_extent_count = 0;
822
823 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
824 nih_error_raise_system ();
825 nih_free (fiemap);
826 return NULL;
827 }
828
829 /* Always allow room for one extra over what we were told,
830 * so we know if they changed under us.
831 */
832 fiemap = NIH_MUST (nih_realloc (fiemap, parent,
833 (sizeof (struct fiemap)
834 + (sizeof (struct fiemap_extent)
835 * (fiemap->fm_mapped_extents + 1)))));
836 fiemap->fm_extent_count = fiemap->fm_mapped_extents + 1;
837 fiemap->fm_mapped_extents = 0;
838
839 memset (fiemap->fm_extents, 0, (sizeof (struct fiemap_extent)
840 * fiemap->fm_extent_count));
841
842 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
843 nih_error_raise_system ();
844 nih_free (fiemap);
845 return NULL;
846 }
847 } while (fiemap->fm_mapped_extents
848 && (fiemap->fm_mapped_extents >= fiemap->fm_extent_count));
849
850 return fiemap;
851}
852
853static int
854trace_add_extents (const void *parent,
855 PackFile * file,
856 PackPath * path,
857 int fd,
858 off_t size,
859 off_t offset,
860 off_t length)
861{
862 nih_local struct fiemap *fiemap = NULL;
863
864 nih_assert (file != NULL);
865 nih_assert (path != NULL);
866 nih_assert (fd >= 0);
867 nih_assert (size > 0);
868
869 /* Get the extents map for this chunk, then iterate the extents
870 * and put those in the pack instead of the chunks.
871 */
872 fiemap = get_fiemap (NULL, fd, offset, length);
873 if (! fiemap) {
874 NihError *err;
875
876 err = nih_error_get ();
877 nih_warn ("%s: %s: %s", path->path,
878 _("Error retrieving chunk extents"),
879 err->message);
880 nih_free (err);
881
882 return -1;
883 }
884
885 for (__u32 j = 0; j < fiemap->fm_mapped_extents; j++) {
886 PackBlock *block;
887 off_t start;
888 off_t end;
889
890 if (fiemap->fm_extents[j].fe_flags & FIEMAP_EXTENT_UNKNOWN)
891 continue;
892
893 /* Work out the intersection of the chunk and extent */
894 start = nih_max (fiemap->fm_start,
895 fiemap->fm_extents[j].fe_logical);
896 end = nih_min ((fiemap->fm_start + fiemap->fm_length),
897 (fiemap->fm_extents[j].fe_logical
898 + fiemap->fm_extents[j].fe_length));
899
900 /* Grow the blocks array to add the extent */
901 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
902 (sizeof (PackBlock)
903 * (file->num_blocks + 1))));
904
905 block = &file->blocks[file->num_blocks++];
906 memset (block, 0, sizeof (PackBlock));
907
908 block->pathidx = file->num_paths - 1;
909 block->offset = start;
910 block->length = end - start;
911 block->physical = (fiemap->fm_extents[j].fe_physical
912 + (start - fiemap->fm_extents[j].fe_logical));
913 }
914
915 return 0;
916}
917
918static int
919trace_add_groups (const void *parent,
920 PackFile * file)
921{
922 const char *devname;
923 ext2_filsys fs = NULL;
924
925 nih_assert (file != NULL);
926
927 devname = blkid_devno_to_devname (file->dev);
928 if (devname
929 && (! ext2fs_open (devname, 0, 0, 0, unix_io_manager, &fs))) {
930 nih_assert (fs != NULL);
931 size_t num_groups = 0;
932 nih_local size_t *num_inodes = NULL;
933 size_t mean = 0;
934 size_t hits = 0;
935
936 nih_assert (fs != NULL);
937
938 /* Calculate the number of inode groups on this filesystem */
939 num_groups = ((fs->super->s_blocks_count - 1)
940 / fs->super->s_blocks_per_group) + 1;
941
942 /* Fill in the pack path's group member, and count the
943 * number of inodes in each group.
944 */
945 num_inodes = NIH_MUST (nih_alloc (NULL, (sizeof (size_t)
946 * num_groups)));
947 memset (num_inodes, 0, sizeof (size_t) * num_groups);
948
949 for (size_t i = 0; i < file->num_paths; i++) {
950 file->paths[i].group = ext2fs_group_of_ino (fs, file->paths[i].ino);
951 num_inodes[file->paths[i].group]++;
952 }
953
954 /* Iterate the groups and add any group that exceeds the
955 * inode preload threshold.
956 */
957 for (size_t i = 0; i < num_groups; i++) {
958 mean += num_inodes[i];
959 if (num_inodes[i] > INODE_GROUP_PRELOAD_THRESHOLD) {
960 file->groups = NIH_MUST (nih_realloc (file->groups, parent,
961 (sizeof (int)
962 * (file->num_groups + 1))));
963 file->groups[file->num_groups++] = i;
964 hits++;
965 }
966 }
967
968 mean /= num_groups;
969
970 nih_debug ("%zu inode groups, mean %zu inodes per group, %zu hits",
971 num_groups, mean, hits);
972
973 ext2fs_close (fs);
974 }
975
976 return 0;
977}
978
979
980static int
981block_compar (const void *a,
982 const void *b)
983{
984 const PackBlock *block_a = a;
985 const PackBlock *block_b = b;
986
987 nih_assert (block_a != NULL);
988 nih_assert (block_b != NULL);
989
990 if (block_a->physical < block_b->physical) {
991 return -1;
992 } else if (block_a->physical > block_b->physical) {
993 return 1;
994 } else {
995 return 0;
996 }
997}
998
999static int
1000trace_sort_blocks (const void *parent,
1001 PackFile * file)
1002{
1003 nih_assert (file != NULL);
1004
1005 /* Sort the blocks array by physical location, since these are
1006 * read in a separate pass to opening files, there's no reason
1007 * to consider which path each block is in - and thus resulting
1008 * in a linear disk read.
1009 */
1010 qsort (file->blocks, file->num_blocks, sizeof (PackBlock),
1011 block_compar);
1012
1013 return 0;
1014}
1015
1016static int
1017path_compar (const void *a,
1018 const void *b)
1019{
1020 const PackPath * const *path_a = a;
1021 const PackPath * const *path_b = b;
1022
1023 nih_assert (path_a != NULL);
1024 nih_assert (path_b != NULL);
1025
1026 if ((*path_a)->group < (*path_b)->group) {
1027 return -1;
1028 } else if ((*path_a)->group > (*path_b)->group) {
1029 return 1;
1030 } else if ((*path_a)->ino < (*path_b)->ino) {
1031 return -1;
1032 } else if ((*path_b)->ino > (*path_b)->ino) {
1033 return 1;
1034 } else {
1035 return strcmp ((*path_a)->path, (*path_b)->path);
1036 }
1037}
1038
1039static int
1040trace_sort_paths (const void *parent,
1041 PackFile * file)
1042{
1043 nih_local PackPath **paths = NULL;
1044 nih_local size_t * new_idx = NULL;
1045 PackPath * new_paths;
1046
1047 nih_assert (file != NULL);
1048
1049 /* Sort the paths array by ext2fs inode group, ino_t then path.
1050 *
1051 * Mucking around with things like the physical locations of
1052 * first on-disk blocks of the dentry and stuff didn't work out
1053 * so well, sorting by path was better, but this seems the best.
1054 * (it looks good on blktrace too)
1055 */
1056 paths = NIH_MUST (nih_alloc (NULL, (sizeof (PackPath *)
1057 * file->num_paths)));
1058
1059 for (size_t i = 0; i < file->num_paths; i++)
1060 paths[i] = &file->paths[i];
1061
1062 qsort (paths, file->num_paths, sizeof (PackPath *),
1063 path_compar);
1064
1065 /* Calculate the new indexes of each path element in the old
1066 * array, and then update the block array's path indexes to
1067 * match.
1068 */
1069 new_idx = NIH_MUST (nih_alloc (NULL,
1070 (sizeof (size_t) * file->num_paths)));
1071 for (size_t i = 0; i < file->num_paths; i++)
1072 new_idx[paths[i] - file->paths] = i;
1073
1074 for (size_t i = 0; i < file->num_blocks; i++)
1075 file->blocks[i].pathidx = new_idx[file->blocks[i].pathidx];
1076
1077 /* Finally generate a new paths array with the new order and
1078 * attach it to the file.
1079 */
1080 new_paths = NIH_MUST (nih_alloc (parent,
1081 (sizeof (PackPath) * file->num_paths)));
1082 for (size_t i = 0; i < file->num_paths; i++)
1083 memcpy (&new_paths[new_idx[i]], &file->paths[i],
1084 sizeof (PackPath));
1085
1086 nih_unref (file->paths, parent);
1087 file->paths = new_paths;
1088
1089 return 0;
1090}