blob: 897fbc2ab5026dee882a47ebcd689670f3f746b2 [file] [log] [blame]
Scott James Remnant56686d62009-11-09 18:38:51 +00001/* ureadahead
2 *
3 * trace.c - boot tracing
4 *
5 * Copyright © 2009 Canonical Ltd.
6 * Author: Scott James Remnant <scott@netsplit.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2, as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif /* HAVE_CONFIG_H */
25
26#define _ATFILE_SOURCE
27
28
29#include <sys/select.h>
30#include <sys/mount.h>
31#include <sys/types.h>
32#include <sys/mman.h>
33#include <sys/stat.h>
Yusuke Satoa2a925e2015-12-17 13:14:55 -080034#include <sys/param.h>
Scott James Remnant56686d62009-11-09 18:38:51 +000035
36#include <errno.h>
37#include <fcntl.h>
38#include <stdio.h>
39#include <signal.h>
40#include <stdlib.h>
41#include <string.h>
42#include <unistd.h>
43
44#include <blkid.h>
45#define NO_INLINE_FUNCS
46#include <ext2fs.h>
47
48#include <linux/fs.h>
49#include <linux/fiemap.h>
50
51#include <nih/macros.h>
52#include <nih/alloc.h>
53#include <nih/string.h>
54#include <nih/list.h>
55#include <nih/hash.h>
56#include <nih/main.h>
57#include <nih/logging.h>
58#include <nih/error.h>
59
60#include "trace.h"
61#include "pack.h"
62#include "values.h"
63#include "file.h"
64
65
66/**
67 * PATH_DEBUGFS:
68 *
69 * Path to the usual debugfs mountpoint.
70 **/
71#define PATH_DEBUGFS "/sys/kernel/debug"
72
73/**
74 * PATH_DEBUGFS_TMP:
75 *
76 * Path to the temporary debugfs mountpoint that we mount it on if it
77 * hasn't been mounted at the usual place yet.
78 **/
79#define PATH_DEBUGFS_TMP "/var/lib/ureadahead/debugfs"
80
81/**
82 * INODE_GROUP_PRELOAD_THRESHOLD:
83 *
84 * Number of inodes in a group before we preload that inode's blocks.
85 **/
86#define INODE_GROUP_PRELOAD_THRESHOLD 8
87
88
89/* Prototypes for static functions */
90static int read_trace (const void *parent,
91 int dfd, const char *path,
Yusuke Satoa2a925e2015-12-17 13:14:55 -080092 const PathPrefixOption *path_prefix,
Scott James Remnant56686d62009-11-09 18:38:51 +000093 PackFile **files, size_t *num_files);
94static void fix_path (char *pathname);
95static int trace_add_path (const void *parent, const char *pathname,
96 PackFile **files, size_t *num_files);
97static int ignore_path (const char *pathname);
98static PackFile *trace_file (const void *parent, dev_t dev,
99 PackFile **files, size_t *num_files);
100static int trace_add_chunks (const void *parent,
101 PackFile *file, PackPath *path,
102 int fd, off_t size);
103static int trace_add_extents (const void *parent,
104 PackFile *file, PackPath *path,
105 int fd, off_t size,
106 off_t offset, off_t length);
107static int trace_add_groups (const void *parent, PackFile *file);
108static int trace_sort_blocks (const void *parent, PackFile *file);
109static int trace_sort_paths (const void *parent, PackFile *file);
110
111
112static void
113sig_interrupt (int signum)
114{
115}
116
117int
118trace (int daemonise,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800119 int timeout,
120 const char *filename_to_replace,
121 const PathPrefixOption *path_prefix)
Scott James Remnant56686d62009-11-09 18:38:51 +0000122{
123 int dfd;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600124 FILE *fp;
Scott James Remnant56686d62009-11-09 18:38:51 +0000125 int unmount = FALSE;
126 int old_sys_open_enabled = 0;
127 int old_open_exec_enabled = 0;
128 int old_uselib_enabled = 0;
129 int old_tracing_enabled = 0;
Tim Gardner73aa2c52010-07-22 04:04:36 -0600130 int old_buffer_size_kb = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000131 struct sigaction act;
132 struct sigaction old_sigterm;
133 struct sigaction old_sigint;
134 struct timeval tv;
135 nih_local PackFile *files = NULL;
136 size_t num_files = 0;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600137 size_t num_cpus = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000138
139 /* Mount debugfs if not already mounted */
140 dfd = open (PATH_DEBUGFS "/tracing", O_RDONLY | O_NOATIME);
141 if (dfd < 0) {
142 if (errno != ENOENT)
143 nih_return_system_error (-1);
144
145 if (mount ("none", PATH_DEBUGFS_TMP, "debugfs", 0, NULL) < 0)
146 nih_return_system_error (-1);
147
148 dfd = open (PATH_DEBUGFS_TMP "/tracing", O_RDONLY | O_NOATIME);
149 if (dfd < 0) {
150 nih_error_raise_system ();
151 umount (PATH_DEBUGFS_TMP);
152 return -1;
153 }
154
155 unmount = TRUE;
156 }
157
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600158 /*
159 * Count the number of CPUs, default to 1 on error.
160 */
161 fp = fopen("/proc/cpuinfo", "r");
162 if (fp) {
163 int line_size=1024;
164 char *processor="processor";
165 char *line = malloc(line_size);
166 if (line) {
167 num_cpus = 0;
168 while (fgets(line,line_size,fp) != NULL) {
169 if (!strncmp(line,processor,strlen(processor)))
170 num_cpus++;
171 }
172 free(line);
173 nih_message("Counted %d CPUs\n",num_cpus);
174 }
175 fclose(fp);
176 }
177 if (!num_cpus)
178 num_cpus = 1;
179
Scott James Remnante30e2372010-09-20 18:34:31 +0100180 /* Enable tracing of open() syscalls */
Scott James Remnant56686d62009-11-09 18:38:51 +0000181 if (set_value (dfd, "events/fs/do_sys_open/enable",
182 TRUE, &old_sys_open_enabled) < 0)
183 goto error;
184 if (set_value (dfd, "events/fs/open_exec/enable",
185 TRUE, &old_open_exec_enabled) < 0)
186 goto error;
187 if (set_value (dfd, "events/fs/uselib/enable",
188 TRUE, &old_uselib_enabled) < 0) {
189 NihError *err;
190
191 err = nih_error_get ();
192 nih_debug ("Missing uselib tracing: %s", err->message);
193 nih_free (err);
194
195 old_uselib_enabled = -1;
196 }
Scott James Remnante30e2372010-09-20 18:34:31 +0100197 if (set_value (dfd, "buffer_size_kb", 8192/num_cpus, &old_buffer_size_kb) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000198 goto error;
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000199 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000200 TRUE, &old_tracing_enabled) < 0)
201 goto error;
202
203 if (daemonise) {
204 pid_t pid;
205
206 pid = fork ();
207 if (pid < 0) {
208 nih_error_raise_system ();
209 goto error;
210 } else if (pid > 0) {
211 _exit (0);
212 }
213 }
214
215 /* Sleep until we get signals */
216 act.sa_handler = sig_interrupt;
217 sigemptyset (&act.sa_mask);
218 act.sa_flags = 0;
219
220 sigaction (SIGTERM, &act, &old_sigterm);
221 sigaction (SIGINT, &act, &old_sigint);
222
223 if (timeout) {
224 tv.tv_sec = timeout;
225 tv.tv_usec = 0;
226
227 select (0, NULL, NULL, NULL, &tv);
228 } else {
229 pause ();
230 }
231
232 sigaction (SIGTERM, &old_sigterm, NULL);
233 sigaction (SIGINT, &old_sigint, NULL);
234
235 /* Restore previous tracing settings */
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000236 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000237 old_tracing_enabled, NULL) < 0)
238 goto error;
239 if (old_uselib_enabled >= 0)
240 if (set_value (dfd, "events/fs/uselib/enable",
241 old_uselib_enabled, NULL) < 0)
242 goto error;
243 if (set_value (dfd, "events/fs/open_exec/enable",
244 old_open_exec_enabled, NULL) < 0)
245 goto error;
246 if (set_value (dfd, "events/fs/do_sys_open/enable",
247 old_sys_open_enabled, NULL) < 0)
248 goto error;
249
250 /* Be nicer */
Scott James Remnantcc2943b2009-11-29 15:24:15 +0000251 if (nice (15))
252 ;
Scott James Remnant56686d62009-11-09 18:38:51 +0000253
254 /* Read trace log */
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800255 if (read_trace (NULL, dfd, "trace", path_prefix, &files, &num_files) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000256 goto error;
257
Tim Gardner73aa2c52010-07-22 04:04:36 -0600258 /*
259 * Restore the trace buffer size (which has just been read) and free
260 * a bunch of memory.
261 */
262 if (set_value (dfd, "buffer_size_kb", old_buffer_size_kb, NULL) < 0)
263 goto error;
264
Scott James Remnant56686d62009-11-09 18:38:51 +0000265 /* Unmount the temporary debugfs mount if we mounted it */
266 if (close (dfd)) {
267 nih_error_raise_system ();
268 goto error;
269 }
270 if (unmount
271 && (umount (PATH_DEBUGFS_TMP) < 0)) {
272 nih_error_raise_system ();
273 goto error;
274 }
275
276 /* Write out pack files */
277 for (size_t i = 0; i < num_files; i++) {
278 nih_local char *filename = NULL;
279
280 filename = pack_file_name_for_device (NULL, files[i].dev);
281 if (! filename) {
282 NihError *err;
283
284 err = nih_error_get ();
285 nih_warn ("%s", err->message);
286 nih_free (err);
287
288 continue;
289 }
290
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800291 /* If filename_to_replace is not NULL, only write out the
292 * file and skip others.
293 */
294 if (filename_to_replace &&
295 strcmp (filename_to_replace, filename)) {
296 nih_info ("Skipping %s", filename);
297 continue;
298 }
299
Scott James Remnant56686d62009-11-09 18:38:51 +0000300 nih_info ("Writing %s", filename);
301
302 /* We only need to apply additional sorting to the
303 * HDD-optimised packs, the SSD ones can read in random
304 * order quite happily.
305 *
306 * Also for HDD, generate the inode group preloading
307 * array.
308 */
309 if (files[i].rotational) {
310 trace_add_groups (files, &files[i]);
311
312 trace_sort_blocks (files, &files[i]);
313 trace_sort_paths (files, &files[i]);
314 }
315
316 write_pack (filename, &files[i]);
317
318 if (nih_log_priority < NIH_LOG_MESSAGE)
319 pack_dump (&files[i], SORT_OPEN);
320 }
321
322 return 0;
323error:
324 close (dfd);
325 if (unmount)
326 umount (PATH_DEBUGFS_TMP);
327
328 return -1;
329}
330
331
332static int
333read_trace (const void *parent,
334 int dfd,
335 const char *path,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800336 const PathPrefixOption *path_prefix,
Scott James Remnant56686d62009-11-09 18:38:51 +0000337 PackFile ** files,
338 size_t * num_files)
339{
340 int fd;
341 FILE *fp;
342 char *line;
343
344 nih_assert (path != NULL);
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800345 nih_assert (path_prefix != NULL);
Scott James Remnant56686d62009-11-09 18:38:51 +0000346 nih_assert (files != NULL);
347 nih_assert (num_files != NULL);
348
349 fd = openat (dfd, path, O_RDONLY);
350 if (fd < 0)
351 nih_return_system_error (-1);
352
353 fp = fdopen (fd, "r");
354 if (! fp) {
355 nih_error_raise_system ();
356 close (fd);
357 return -1;
358 }
359
360 while ((line = fgets_alloc (NULL, fp)) != NULL) {
361 char *ptr;
362 char *end;
363
364 ptr = strstr (line, " do_sys_open:");
365 if (! ptr)
366 ptr = strstr (line, " open_exec:");
367 if (! ptr)
368 ptr = strstr (line, " uselib:");
369 if (! ptr) {
370 nih_free (line);
371 continue;
372 }
373
374 ptr = strchr (ptr, '"');
375 if (! ptr) {
376 nih_free (line);
377 continue;
378 }
379
380 ptr++;
381
382 end = strrchr (ptr, '"');
383 if (! end) {
384 nih_free (line);
385 continue;
386 }
387
388 *end = '\0';
389
390 fix_path (ptr);
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800391 if (path_prefix->st_dev != NODEV && ptr[0] == '/') {
392 struct stat stbuf;
393 char *rewritten = nih_sprintf (
394 line, "%s%s", path_prefix->prefix, ptr);
395 if (! lstat (rewritten, &stbuf) &&
396 stbuf.st_dev == path_prefix->st_dev) {
397 /* If |rewritten| exists on the same device as
398 * path_prefix->st_dev, record the rewritten one
399 * instead of the original path.
400 */
401 ptr = rewritten;
402 }
403 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000404 trace_add_path (parent, ptr, files, num_files);
405
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800406 nih_free (line); /* also frees |rewritten| */
Scott James Remnant56686d62009-11-09 18:38:51 +0000407 }
408
409 if (fclose (fp) < 0)
410 nih_return_system_error (-1);
411
412 return 0;
413}
414
415static void
416fix_path (char *pathname)
417{
418 char *ptr;
419
420 nih_assert (pathname != NULL);
421
422 for (ptr = pathname; *ptr; ptr++) {
423 size_t len;
424
425 if (ptr[0] != '/')
426 continue;
427
428 len = strcspn (ptr + 1, "/");
429
430 /* // and /./, we shorten the string and repeat the loop
431 * looking at the new /
432 */
433 if ((len == 0) || ((len == 1) && ptr[1] == '.')) {
434 memmove (ptr, ptr + len + 1, strlen (ptr) - len);
435 ptr--;
436 continue;
437 }
438
439 /* /../, we shorten back to the previous / or the start
440 * of the string and repeat the loop looking at the new /
441 */
442 if ((len == 2) && (ptr[1] == '.') && (ptr[2] == '.')) {
443 char *root;
444
445 for (root = ptr - 1;
446 (root >= pathname) && (root[0] != '/');
447 root--)
448 ;
449 if (root < pathname)
450 root = pathname;
451
452 memmove (root, ptr + len + 1, strlen (ptr) - len);
453 ptr = root - 1;
454 continue;
455 }
456 }
457
458 while ((ptr != pathname) && (*(--ptr) == '/'))
459 *ptr = '\0';
460}
461
462
463static int
464trace_add_path (const void *parent,
465 const char *pathname,
466 PackFile ** files,
467 size_t * num_files)
468{
469 static NihHash *path_hash = NULL;
470 struct stat statbuf;
471 int fd;
472 PackFile * file;
473 PackPath * path;
474 static NihHash *inode_hash = NULL;
475 nih_local char *inode_key = NULL;
476
477 nih_assert (pathname != NULL);
478 nih_assert (files != NULL);
479 nih_assert (num_files != NULL);
480
481 /* We can't really deal with relative paths since we don't know
482 * the working directory that they were opened from.
483 */
484 if (pathname[0] != '/') {
485 nih_warn ("%s: %s", pathname, _("Ignored relative path"));
486 return 0;
487 }
488
489 /* Certain paths aren't worth caching, because they're virtual or
490 * temporary filesystems and would waste pack space.
491 */
492 if (ignore_path (pathname))
493 return 0;
494
495 /* Ignore paths that won't fit in the pack; we could use PATH_MAX,
496 * but with 1000 files that'd be 4M just for the
497 * pack.
498 */
499 if (strlen (pathname) > PACK_PATH_MAX) {
500 nih_warn ("%s: %s", pathname, _("Ignored far too long path"));
501 return 0;
502 }
503
504 /* Use a hash table of paths to eliminate duplicate path names from
505 * the table since that would waste pack space (and fds).
506 */
507 if (! path_hash)
508 path_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
509
510 if (nih_hash_lookup (path_hash, pathname)) {
511 return 0;
512 } else {
513 NihListEntry *entry;
514
515 entry = NIH_MUST (nih_list_entry_new (path_hash));
516 entry->str = NIH_MUST (nih_strdup (entry, pathname));
517
518 nih_hash_add (path_hash, &entry->entry);
519 }
520
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100521 /* Make sure that we have an ordinary file
522 * This avoids us opening a fifo or socket or symlink.
Scott James Remnant56686d62009-11-09 18:38:51 +0000523 */
524 if ((lstat (pathname, &statbuf) < 0)
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100525 || (S_ISLNK (statbuf.st_mode))
Scott James Remnant56686d62009-11-09 18:38:51 +0000526 || (! S_ISREG (statbuf.st_mode)))
527 return 0;
528
529 /* Open and stat again to get the genuine details, in case it
530 * changes under us.
531 */
532 fd = open (pathname, O_RDONLY | O_NOATIME);
533 if (fd < 0) {
534 nih_warn ("%s: %s: %s", pathname,
535 _("File vanished or error reading"),
536 strerror (errno));
537 return -1;
538 }
539
540 if (fstat (fd, &statbuf) < 0) {
541 nih_warn ("%s: %s: %s", pathname,
542 _("Error retrieving file stat"),
543 strerror (errno));
544 close (fd);
545 return -1;
546 }
547
548 /* Double-check that it's really still a file */
549 if (! S_ISREG (statbuf.st_mode)) {
550 close (fd);
551 return 0;
552 }
553
554 /* Some people think it's clever to split their filesystem across
555 * multiple devices, so we need to generate a different pack file
556 * for each device.
557 *
558 * Lookup file based on the dev_t, potentially creating a new
559 * pack file in the array.
560 */
561 file = trace_file (parent, statbuf.st_dev, files, num_files);
562
563 /* Grow the PackPath array and fill in the details for the new
564 * path.
565 */
566 file->paths = NIH_MUST (nih_realloc (file->paths, *files,
567 (sizeof (PackPath)
568 * (file->num_paths + 1))));
569
570 path = &file->paths[file->num_paths++];
571 memset (path, 0, sizeof (PackPath));
572
573 path->group = -1;
574 path->ino = statbuf.st_ino;
575
576 strncpy (path->path, pathname, PACK_PATH_MAX);
577 path->path[PACK_PATH_MAX] = '\0';
578
579 /* The paths array contains each unique path opened, but these
580 * might be symbolic or hard links to the same underlying files
581 * and we don't want to read the same block more than once.
582 *
583 * Use a hash table of dev_t/ino_t pairs to make sure we only
584 * read the blocks of an actual file the first time.
585 */
586 if (! inode_hash)
587 inode_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
588
589 inode_key = NIH_MUST (nih_sprintf (NULL, "%llu:%llu",
590 (unsigned long long)statbuf.st_dev,
591 (unsigned long long)statbuf.st_ino));
592
593 if (nih_hash_lookup (inode_hash, inode_key)) {
594 close (fd);
595 return 0;
596 } else {
597 NihListEntry *entry;
598
599 entry = NIH_MUST (nih_list_entry_new (inode_hash));
600 entry->str = inode_key;
601 nih_ref (entry->str, entry);
602
603 nih_hash_add (inode_hash, &entry->entry);
604 }
605
606 /* There's also no point reading zero byte files, since they
607 * won't have any blocks (and we can't mmap zero bytes anyway).
608 */
609 if (! statbuf.st_size) {
610 close (fd);
611 return 0;
612 }
613
614 /* Now read the in-memory chunks of this file and add those to
615 * the pack file too.
616 */
617 trace_add_chunks (*files, file, path, fd, statbuf.st_size);
618 close (fd);
619
620 return 0;
621}
622
623static int
624ignore_path (const char *pathname)
625{
626 nih_assert (pathname != NULL);
627
628 if (! strncmp (pathname, "/proc/", 6))
629 return TRUE;
630 if (! strncmp (pathname, "/sys/", 5))
631 return TRUE;
632 if (! strncmp (pathname, "/dev/", 5))
633 return TRUE;
634 if (! strncmp (pathname, "/tmp/", 5))
635 return TRUE;
Steve Langasek2c698a12012-02-03 15:27:29 -0800636 if (! strncmp (pathname, "/run/", 5))
637 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000638 if (! strncmp (pathname, "/var/run/", 9))
639 return TRUE;
Bryan Fullerton96c991b2013-03-12 15:06:41 +0000640 if (! strncmp (pathname, "/var/log/", 9))
641 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000642 if (! strncmp (pathname, "/var/lock/", 10))
643 return TRUE;
644
645 return FALSE;
646}
647
648
649static PackFile *
650trace_file (const void *parent,
651 dev_t dev,
652 PackFile ** files,
653 size_t * num_files)
654{
655 nih_local char *filename = NULL;
656 int rotational;
657 PackFile * file;
658
659 nih_assert (files != NULL);
660 nih_assert (num_files != NULL);
661
662 /* Return any existing file structure for this device */
663 for (size_t i = 0; i < *num_files; i++)
664 if ((*files)[i].dev == dev)
665 return &(*files)[i];
666
667 /* Query sysfs to see whether this disk is rotational; this
668 * obviously won't work for virtual devices and the like, so
669 * default to TRUE for now.
670 */
Yusuke Satofc571f12016-04-30 06:17:08 -0400671 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:%d/queue/rotational",
672 major (dev), minor (dev)));
673 if (access (filename, R_OK) < 0) {
674 /* For devices managed by the scsi stack, the minor device number has to be
675 * masked to find the queue/rotational file.
676 */
677 nih_free (filename);
678 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:%d/queue/rotational",
679 major (dev), minor (dev) & 0xffff0));
680 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000681
682 if (get_value (AT_FDCWD, filename, &rotational) < 0) {
683 NihError *err;
684
685 err = nih_error_get ();
686 nih_warn (_("Unable to obtain rotationalness for device %u:%u: %s"),
687 major (dev), minor (dev), err->message);
688 nih_free (err);
689
690 rotational = TRUE;
691 }
692
693 /* Grow the PackFile array and fill in the details for the new
694 * file.
695 */
696 *files = NIH_MUST (nih_realloc (*files, parent,
697 (sizeof (PackFile) * (*num_files + 1))));
698
699 file = &(*files)[(*num_files)++];
700 memset (file, 0, sizeof (PackFile));
701
702 file->dev = dev;
703 file->rotational = rotational;
704 file->num_paths = 0;
705 file->paths = NULL;
706 file->num_blocks = 0;
707 file->blocks = NULL;
708
709 return file;
710}
711
712
713static int
714trace_add_chunks (const void *parent,
715 PackFile * file,
716 PackPath * path,
717 int fd,
718 off_t size)
719{
720 static int page_size = -1;
721 void * buf;
722 off_t num_pages;
723 nih_local unsigned char *vec = NULL;
724
725 nih_assert (file != NULL);
726 nih_assert (path != NULL);
727 nih_assert (fd >= 0);
728 nih_assert (size > 0);
729
730 if (page_size < 0)
731 page_size = sysconf (_SC_PAGESIZE);
732
733 /* Map the file into memory */
734 buf = mmap (NULL, size, PROT_READ, MAP_SHARED, fd, 0);
735 if (buf == MAP_FAILED) {
736 nih_warn ("%s: %s: %s", path->path,
737 _("Error mapping into memory"),
738 strerror (errno));
739 return -1;
740 }
741
742 /* Grab the core memory map of the file */
743 num_pages = (size - 1) / page_size + 1;
744 vec = NIH_MUST (nih_alloc (NULL, num_pages));
745 memset (vec, 0, num_pages);
746
747 if (mincore (buf, size, vec) < 0) {
748 nih_warn ("%s: %s: %s", path->path,
749 _("Error retrieving page cache info"),
750 strerror (errno));
751 munmap (buf, size);
752 return -1;
753 }
754
755 /* Clean up */
756 if (munmap (buf, size) < 0) {
757 nih_warn ("%s: %s: %s", path->path,
758 _("Error unmapping from memory"),
759 strerror (errno));
760 return -1;
761 }
762
763
764 /* Now we can figure out which contiguous bits of the file are
765 * in core memory.
766 */
767 for (off_t i = 0; i < num_pages; i++) {
768 off_t offset;
769 off_t length;
770
771 if (! vec[i])
772 continue;
773
774 offset = i * page_size;
775 length = page_size;
776
777 while (((i + 1) < num_pages) && vec[i + 1]) {
778 length += page_size;
779 i++;
780 }
781
782 /* The rotational crowd need this split down further into
783 * on-disk extents, the non-rotational folks can just use
784 * the chunks data.
785 */
786 if (file->rotational) {
787 trace_add_extents (parent, file, path, fd, size,
788 offset, length);
789 } else {
790 PackBlock *block;
791
792 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
793 (sizeof (PackBlock)
794 * (file->num_blocks + 1))));
795
796 block = &file->blocks[file->num_blocks++];
797 memset (block, 0, sizeof (PackBlock));
798
799 block->pathidx = file->num_paths - 1;
800 block->offset = offset;
801 block->length = length;
802 block->physical = -1;
803 }
804 }
805
806 return 0;
807}
808
809struct fiemap *
810get_fiemap (const void *parent,
811 int fd,
812 off_t offset,
813 off_t length)
814{
815 struct fiemap *fiemap;
816
817 nih_assert (fd >= 0);
818
819 fiemap = NIH_MUST (nih_new (parent, struct fiemap));
820 memset (fiemap, 0, sizeof (struct fiemap));
821
822 fiemap->fm_start = offset;
823 fiemap->fm_length = length;
824 fiemap->fm_flags = 0;
825
826 do {
827 /* Query the current number of extents */
828 fiemap->fm_mapped_extents = 0;
829 fiemap->fm_extent_count = 0;
830
831 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
832 nih_error_raise_system ();
833 nih_free (fiemap);
834 return NULL;
835 }
836
837 /* Always allow room for one extra over what we were told,
838 * so we know if they changed under us.
839 */
840 fiemap = NIH_MUST (nih_realloc (fiemap, parent,
841 (sizeof (struct fiemap)
842 + (sizeof (struct fiemap_extent)
843 * (fiemap->fm_mapped_extents + 1)))));
844 fiemap->fm_extent_count = fiemap->fm_mapped_extents + 1;
845 fiemap->fm_mapped_extents = 0;
846
847 memset (fiemap->fm_extents, 0, (sizeof (struct fiemap_extent)
848 * fiemap->fm_extent_count));
849
850 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
851 nih_error_raise_system ();
852 nih_free (fiemap);
853 return NULL;
854 }
855 } while (fiemap->fm_mapped_extents
856 && (fiemap->fm_mapped_extents >= fiemap->fm_extent_count));
857
858 return fiemap;
859}
860
861static int
862trace_add_extents (const void *parent,
863 PackFile * file,
864 PackPath * path,
865 int fd,
866 off_t size,
867 off_t offset,
868 off_t length)
869{
870 nih_local struct fiemap *fiemap = NULL;
871
872 nih_assert (file != NULL);
873 nih_assert (path != NULL);
874 nih_assert (fd >= 0);
875 nih_assert (size > 0);
876
877 /* Get the extents map for this chunk, then iterate the extents
878 * and put those in the pack instead of the chunks.
879 */
880 fiemap = get_fiemap (NULL, fd, offset, length);
881 if (! fiemap) {
882 NihError *err;
883
884 err = nih_error_get ();
885 nih_warn ("%s: %s: %s", path->path,
886 _("Error retrieving chunk extents"),
887 err->message);
888 nih_free (err);
889
890 return -1;
891 }
892
893 for (__u32 j = 0; j < fiemap->fm_mapped_extents; j++) {
894 PackBlock *block;
895 off_t start;
896 off_t end;
897
898 if (fiemap->fm_extents[j].fe_flags & FIEMAP_EXTENT_UNKNOWN)
899 continue;
900
901 /* Work out the intersection of the chunk and extent */
902 start = nih_max (fiemap->fm_start,
903 fiemap->fm_extents[j].fe_logical);
904 end = nih_min ((fiemap->fm_start + fiemap->fm_length),
905 (fiemap->fm_extents[j].fe_logical
906 + fiemap->fm_extents[j].fe_length));
907
908 /* Grow the blocks array to add the extent */
909 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
910 (sizeof (PackBlock)
911 * (file->num_blocks + 1))));
912
913 block = &file->blocks[file->num_blocks++];
914 memset (block, 0, sizeof (PackBlock));
915
916 block->pathidx = file->num_paths - 1;
917 block->offset = start;
918 block->length = end - start;
919 block->physical = (fiemap->fm_extents[j].fe_physical
920 + (start - fiemap->fm_extents[j].fe_logical));
921 }
922
923 return 0;
924}
925
926static int
927trace_add_groups (const void *parent,
928 PackFile * file)
929{
930 const char *devname;
931 ext2_filsys fs = NULL;
932
933 nih_assert (file != NULL);
934
935 devname = blkid_devno_to_devname (file->dev);
936 if (devname
937 && (! ext2fs_open (devname, 0, 0, 0, unix_io_manager, &fs))) {
938 nih_assert (fs != NULL);
939 size_t num_groups = 0;
940 nih_local size_t *num_inodes = NULL;
941 size_t mean = 0;
942 size_t hits = 0;
943
944 nih_assert (fs != NULL);
945
946 /* Calculate the number of inode groups on this filesystem */
947 num_groups = ((fs->super->s_blocks_count - 1)
948 / fs->super->s_blocks_per_group) + 1;
949
950 /* Fill in the pack path's group member, and count the
951 * number of inodes in each group.
952 */
953 num_inodes = NIH_MUST (nih_alloc (NULL, (sizeof (size_t)
954 * num_groups)));
955 memset (num_inodes, 0, sizeof (size_t) * num_groups);
956
957 for (size_t i = 0; i < file->num_paths; i++) {
958 file->paths[i].group = ext2fs_group_of_ino (fs, file->paths[i].ino);
959 num_inodes[file->paths[i].group]++;
960 }
961
962 /* Iterate the groups and add any group that exceeds the
963 * inode preload threshold.
964 */
965 for (size_t i = 0; i < num_groups; i++) {
966 mean += num_inodes[i];
967 if (num_inodes[i] > INODE_GROUP_PRELOAD_THRESHOLD) {
968 file->groups = NIH_MUST (nih_realloc (file->groups, parent,
969 (sizeof (int)
970 * (file->num_groups + 1))));
971 file->groups[file->num_groups++] = i;
972 hits++;
973 }
974 }
975
976 mean /= num_groups;
977
978 nih_debug ("%zu inode groups, mean %zu inodes per group, %zu hits",
979 num_groups, mean, hits);
980
981 ext2fs_close (fs);
982 }
983
984 return 0;
985}
986
987
988static int
989block_compar (const void *a,
990 const void *b)
991{
992 const PackBlock *block_a = a;
993 const PackBlock *block_b = b;
994
995 nih_assert (block_a != NULL);
996 nih_assert (block_b != NULL);
997
998 if (block_a->physical < block_b->physical) {
999 return -1;
1000 } else if (block_a->physical > block_b->physical) {
1001 return 1;
1002 } else {
1003 return 0;
1004 }
1005}
1006
1007static int
1008trace_sort_blocks (const void *parent,
1009 PackFile * file)
1010{
1011 nih_assert (file != NULL);
1012
1013 /* Sort the blocks array by physical location, since these are
1014 * read in a separate pass to opening files, there's no reason
1015 * to consider which path each block is in - and thus resulting
1016 * in a linear disk read.
1017 */
1018 qsort (file->blocks, file->num_blocks, sizeof (PackBlock),
1019 block_compar);
1020
1021 return 0;
1022}
1023
1024static int
1025path_compar (const void *a,
1026 const void *b)
1027{
1028 const PackPath * const *path_a = a;
1029 const PackPath * const *path_b = b;
1030
1031 nih_assert (path_a != NULL);
1032 nih_assert (path_b != NULL);
1033
1034 if ((*path_a)->group < (*path_b)->group) {
1035 return -1;
1036 } else if ((*path_a)->group > (*path_b)->group) {
1037 return 1;
1038 } else if ((*path_a)->ino < (*path_b)->ino) {
1039 return -1;
1040 } else if ((*path_b)->ino > (*path_b)->ino) {
1041 return 1;
1042 } else {
1043 return strcmp ((*path_a)->path, (*path_b)->path);
1044 }
1045}
1046
1047static int
1048trace_sort_paths (const void *parent,
1049 PackFile * file)
1050{
1051 nih_local PackPath **paths = NULL;
1052 nih_local size_t * new_idx = NULL;
1053 PackPath * new_paths;
1054
1055 nih_assert (file != NULL);
1056
1057 /* Sort the paths array by ext2fs inode group, ino_t then path.
1058 *
1059 * Mucking around with things like the physical locations of
1060 * first on-disk blocks of the dentry and stuff didn't work out
1061 * so well, sorting by path was better, but this seems the best.
1062 * (it looks good on blktrace too)
1063 */
1064 paths = NIH_MUST (nih_alloc (NULL, (sizeof (PackPath *)
1065 * file->num_paths)));
1066
1067 for (size_t i = 0; i < file->num_paths; i++)
1068 paths[i] = &file->paths[i];
1069
1070 qsort (paths, file->num_paths, sizeof (PackPath *),
1071 path_compar);
1072
1073 /* Calculate the new indexes of each path element in the old
1074 * array, and then update the block array's path indexes to
1075 * match.
1076 */
1077 new_idx = NIH_MUST (nih_alloc (NULL,
1078 (sizeof (size_t) * file->num_paths)));
1079 for (size_t i = 0; i < file->num_paths; i++)
1080 new_idx[paths[i] - file->paths] = i;
1081
1082 for (size_t i = 0; i < file->num_blocks; i++)
1083 file->blocks[i].pathidx = new_idx[file->blocks[i].pathidx];
1084
1085 /* Finally generate a new paths array with the new order and
1086 * attach it to the file.
1087 */
1088 new_paths = NIH_MUST (nih_alloc (parent,
1089 (sizeof (PackPath) * file->num_paths)));
1090 for (size_t i = 0; i < file->num_paths; i++)
1091 memcpy (&new_paths[new_idx[i]], &file->paths[i],
1092 sizeof (PackPath));
1093
1094 nih_unref (file->paths, parent);
1095 file->paths = new_paths;
1096
1097 return 0;
1098}