blob: 80f82eed65e8b8ce79ff7567803b8b06c7d80e1d [file] [log] [blame]
Scott James Remnant56686d62009-11-09 18:38:51 +00001/* ureadahead
2 *
3 * trace.c - boot tracing
4 *
5 * Copyright © 2009 Canonical Ltd.
6 * Author: Scott James Remnant <scott@netsplit.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2, as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif /* HAVE_CONFIG_H */
25
26#define _ATFILE_SOURCE
27
28
29#include <sys/select.h>
30#include <sys/mount.h>
Yunlian Jiangb2de86a2018-07-19 16:46:39 -040031#include <sys/sysmacros.h>
Scott James Remnant56686d62009-11-09 18:38:51 +000032#include <sys/types.h>
33#include <sys/mman.h>
34#include <sys/stat.h>
Yusuke Satoa2a925e2015-12-17 13:14:55 -080035#include <sys/param.h>
Scott James Remnant56686d62009-11-09 18:38:51 +000036
37#include <errno.h>
38#include <fcntl.h>
39#include <stdio.h>
40#include <signal.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44
45#include <blkid.h>
46#define NO_INLINE_FUNCS
47#include <ext2fs.h>
48
49#include <linux/fs.h>
50#include <linux/fiemap.h>
51
52#include <nih/macros.h>
53#include <nih/alloc.h>
54#include <nih/string.h>
55#include <nih/list.h>
56#include <nih/hash.h>
57#include <nih/main.h>
58#include <nih/logging.h>
59#include <nih/error.h>
60
61#include "trace.h"
62#include "pack.h"
63#include "values.h"
64#include "file.h"
65
66
67/**
68 * PATH_DEBUGFS:
69 *
70 * Path to the usual debugfs mountpoint.
71 **/
72#define PATH_DEBUGFS "/sys/kernel/debug"
73
74/**
75 * PATH_DEBUGFS_TMP:
76 *
77 * Path to the temporary debugfs mountpoint that we mount it on if it
78 * hasn't been mounted at the usual place yet.
79 **/
80#define PATH_DEBUGFS_TMP "/var/lib/ureadahead/debugfs"
81
82/**
83 * INODE_GROUP_PRELOAD_THRESHOLD:
84 *
85 * Number of inodes in a group before we preload that inode's blocks.
86 **/
87#define INODE_GROUP_PRELOAD_THRESHOLD 8
88
89
90/* Prototypes for static functions */
91static int read_trace (const void *parent,
92 int dfd, const char *path,
Philippe Liarde1de5de2019-07-25 07:14:17 -040093 const char *path_prefix_filter,
Yusuke Satoa2a925e2015-12-17 13:14:55 -080094 const PathPrefixOption *path_prefix,
Alan Ding14b131a2021-05-18 15:21:14 -070095 PackFile **files, size_t *num_files, int force_ssd_mode);
Scott James Remnant56686d62009-11-09 18:38:51 +000096static void fix_path (char *pathname);
97static int trace_add_path (const void *parent, const char *pathname,
Alan Ding14b131a2021-05-18 15:21:14 -070098 PackFile **files, size_t *num_files, int force_ssd_mode);
Scott James Remnant56686d62009-11-09 18:38:51 +000099static int ignore_path (const char *pathname);
100static PackFile *trace_file (const void *parent, dev_t dev,
Alan Ding14b131a2021-05-18 15:21:14 -0700101 PackFile **files, size_t *num_files, int force_ssd_mode);
Scott James Remnant56686d62009-11-09 18:38:51 +0000102static int trace_add_chunks (const void *parent,
103 PackFile *file, PackPath *path,
104 int fd, off_t size);
105static int trace_add_extents (const void *parent,
106 PackFile *file, PackPath *path,
107 int fd, off_t size,
108 off_t offset, off_t length);
109static int trace_add_groups (const void *parent, PackFile *file);
110static int trace_sort_blocks (const void *parent, PackFile *file);
111static int trace_sort_paths (const void *parent, PackFile *file);
112
113
114static void
115sig_interrupt (int signum)
116{
117}
118
119int
120trace (int daemonise,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800121 int timeout,
122 const char *filename_to_replace,
Philippe Liarde1de5de2019-07-25 07:14:17 -0400123 const char *pack_file,
124 const char *path_prefix_filter,
Alan Ding14b131a2021-05-18 15:21:14 -0700125 const PathPrefixOption *path_prefix,
126 int force_ssd_mode)
Scott James Remnant56686d62009-11-09 18:38:51 +0000127{
128 int dfd;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600129 FILE *fp;
Scott James Remnant56686d62009-11-09 18:38:51 +0000130 int unmount = FALSE;
131 int old_sys_open_enabled = 0;
132 int old_open_exec_enabled = 0;
133 int old_uselib_enabled = 0;
134 int old_tracing_enabled = 0;
Tim Gardner73aa2c52010-07-22 04:04:36 -0600135 int old_buffer_size_kb = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000136 struct sigaction act;
137 struct sigaction old_sigterm;
138 struct sigaction old_sigint;
139 struct timeval tv;
140 nih_local PackFile *files = NULL;
141 size_t num_files = 0;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600142 size_t num_cpus = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000143
144 /* Mount debugfs if not already mounted */
Hardik Goyal2c3a17c2019-07-08 19:16:35 -0400145 dfd = open (PATH_DEBUGFS "/tracing", O_NOFOLLOW | O_RDONLY | O_NOATIME);
Scott James Remnant56686d62009-11-09 18:38:51 +0000146 if (dfd < 0) {
147 if (errno != ENOENT)
148 nih_return_system_error (-1);
149
150 if (mount ("none", PATH_DEBUGFS_TMP, "debugfs", 0, NULL) < 0)
151 nih_return_system_error (-1);
152
Hardik Goyal2c3a17c2019-07-08 19:16:35 -0400153 dfd = open (PATH_DEBUGFS_TMP "/tracing", O_NOFOLLOW | O_RDONLY | O_NOATIME);
Scott James Remnant56686d62009-11-09 18:38:51 +0000154 if (dfd < 0) {
155 nih_error_raise_system ();
156 umount (PATH_DEBUGFS_TMP);
157 return -1;
158 }
159
160 unmount = TRUE;
161 }
162
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600163 /*
164 * Count the number of CPUs, default to 1 on error.
165 */
166 fp = fopen("/proc/cpuinfo", "r");
167 if (fp) {
168 int line_size=1024;
169 char *processor="processor";
170 char *line = malloc(line_size);
171 if (line) {
172 num_cpus = 0;
173 while (fgets(line,line_size,fp) != NULL) {
174 if (!strncmp(line,processor,strlen(processor)))
175 num_cpus++;
176 }
177 free(line);
178 nih_message("Counted %d CPUs\n",num_cpus);
179 }
180 fclose(fp);
181 }
182 if (!num_cpus)
183 num_cpus = 1;
184
Scott James Remnante30e2372010-09-20 18:34:31 +0100185 /* Enable tracing of open() syscalls */
Scott James Remnant56686d62009-11-09 18:38:51 +0000186 if (set_value (dfd, "events/fs/do_sys_open/enable",
187 TRUE, &old_sys_open_enabled) < 0)
188 goto error;
189 if (set_value (dfd, "events/fs/open_exec/enable",
190 TRUE, &old_open_exec_enabled) < 0)
191 goto error;
192 if (set_value (dfd, "events/fs/uselib/enable",
193 TRUE, &old_uselib_enabled) < 0) {
194 NihError *err;
195
196 err = nih_error_get ();
197 nih_debug ("Missing uselib tracing: %s", err->message);
198 nih_free (err);
199
200 old_uselib_enabled = -1;
201 }
Scott James Remnante30e2372010-09-20 18:34:31 +0100202 if (set_value (dfd, "buffer_size_kb", 8192/num_cpus, &old_buffer_size_kb) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000203 goto error;
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000204 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000205 TRUE, &old_tracing_enabled) < 0)
206 goto error;
207
208 if (daemonise) {
209 pid_t pid;
210
211 pid = fork ();
212 if (pid < 0) {
213 nih_error_raise_system ();
214 goto error;
215 } else if (pid > 0) {
216 _exit (0);
217 }
218 }
219
220 /* Sleep until we get signals */
221 act.sa_handler = sig_interrupt;
222 sigemptyset (&act.sa_mask);
223 act.sa_flags = 0;
224
225 sigaction (SIGTERM, &act, &old_sigterm);
226 sigaction (SIGINT, &act, &old_sigint);
227
228 if (timeout) {
229 tv.tv_sec = timeout;
230 tv.tv_usec = 0;
231
232 select (0, NULL, NULL, NULL, &tv);
233 } else {
234 pause ();
235 }
236
237 sigaction (SIGTERM, &old_sigterm, NULL);
238 sigaction (SIGINT, &old_sigint, NULL);
239
240 /* Restore previous tracing settings */
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000241 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000242 old_tracing_enabled, NULL) < 0)
243 goto error;
244 if (old_uselib_enabled >= 0)
245 if (set_value (dfd, "events/fs/uselib/enable",
246 old_uselib_enabled, NULL) < 0)
247 goto error;
248 if (set_value (dfd, "events/fs/open_exec/enable",
249 old_open_exec_enabled, NULL) < 0)
250 goto error;
251 if (set_value (dfd, "events/fs/do_sys_open/enable",
252 old_sys_open_enabled, NULL) < 0)
253 goto error;
254
255 /* Be nicer */
Scott James Remnantcc2943b2009-11-29 15:24:15 +0000256 if (nice (15))
257 ;
Scott James Remnant56686d62009-11-09 18:38:51 +0000258
259 /* Read trace log */
Philippe Liarde1de5de2019-07-25 07:14:17 -0400260 if (read_trace (NULL, dfd, "trace", path_prefix_filter, path_prefix,
Alan Ding14b131a2021-05-18 15:21:14 -0700261 &files, &num_files, force_ssd_mode) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000262 goto error;
263
Tim Gardner73aa2c52010-07-22 04:04:36 -0600264 /*
265 * Restore the trace buffer size (which has just been read) and free
266 * a bunch of memory.
267 */
268 if (set_value (dfd, "buffer_size_kb", old_buffer_size_kb, NULL) < 0)
269 goto error;
270
Scott James Remnant56686d62009-11-09 18:38:51 +0000271 /* Unmount the temporary debugfs mount if we mounted it */
272 if (close (dfd)) {
273 nih_error_raise_system ();
274 goto error;
275 }
276 if (unmount
277 && (umount (PATH_DEBUGFS_TMP) < 0)) {
278 nih_error_raise_system ();
279 goto error;
280 }
281
282 /* Write out pack files */
283 for (size_t i = 0; i < num_files; i++) {
284 nih_local char *filename = NULL;
Philippe Liarde1de5de2019-07-25 07:14:17 -0400285 if (pack_file) {
286 filename = NIH_MUST (nih_strdup (NULL, pack_file));
287 } else {
288 filename = pack_file_name_for_device (NULL,
289 files[i].dev);
290 if (! filename) {
291 NihError *err;
Scott James Remnant56686d62009-11-09 18:38:51 +0000292
Philippe Liarde1de5de2019-07-25 07:14:17 -0400293 err = nih_error_get ();
294 nih_warn ("%s", err->message);
295 nih_free (err);
Scott James Remnant56686d62009-11-09 18:38:51 +0000296
Philippe Liarde1de5de2019-07-25 07:14:17 -0400297 continue;
298 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000299
Philippe Liarde1de5de2019-07-25 07:14:17 -0400300 /* If filename_to_replace is not NULL, only write out
301 * the file and skip others.
302 */
303 if (filename_to_replace &&
304 strcmp (filename_to_replace, filename)) {
305 nih_info ("Skipping %s", filename);
306 continue;
307 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000308 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000309 nih_info ("Writing %s", filename);
310
311 /* We only need to apply additional sorting to the
312 * HDD-optimised packs, the SSD ones can read in random
313 * order quite happily.
314 *
315 * Also for HDD, generate the inode group preloading
316 * array.
317 */
318 if (files[i].rotational) {
319 trace_add_groups (files, &files[i]);
320
321 trace_sort_blocks (files, &files[i]);
322 trace_sort_paths (files, &files[i]);
323 }
324
325 write_pack (filename, &files[i]);
326
327 if (nih_log_priority < NIH_LOG_MESSAGE)
328 pack_dump (&files[i], SORT_OPEN);
329 }
330
331 return 0;
332error:
333 close (dfd);
334 if (unmount)
335 umount (PATH_DEBUGFS_TMP);
336
337 return -1;
338}
339
340
341static int
342read_trace (const void *parent,
343 int dfd,
344 const char *path,
Philippe Liarde1de5de2019-07-25 07:14:17 -0400345 const char *path_prefix_filter, /* May be null */
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800346 const PathPrefixOption *path_prefix,
Scott James Remnant56686d62009-11-09 18:38:51 +0000347 PackFile ** files,
Alan Ding14b131a2021-05-18 15:21:14 -0700348 size_t * num_files,
349 int force_ssd_mode)
Scott James Remnant56686d62009-11-09 18:38:51 +0000350{
351 int fd;
352 FILE *fp;
353 char *line;
354
355 nih_assert (path != NULL);
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800356 nih_assert (path_prefix != NULL);
Scott James Remnant56686d62009-11-09 18:38:51 +0000357 nih_assert (files != NULL);
358 nih_assert (num_files != NULL);
359
360 fd = openat (dfd, path, O_RDONLY);
361 if (fd < 0)
362 nih_return_system_error (-1);
363
364 fp = fdopen (fd, "r");
365 if (! fp) {
366 nih_error_raise_system ();
367 close (fd);
368 return -1;
369 }
370
371 while ((line = fgets_alloc (NULL, fp)) != NULL) {
372 char *ptr;
373 char *end;
374
375 ptr = strstr (line, " do_sys_open:");
376 if (! ptr)
377 ptr = strstr (line, " open_exec:");
378 if (! ptr)
379 ptr = strstr (line, " uselib:");
380 if (! ptr) {
381 nih_free (line);
382 continue;
383 }
384
385 ptr = strchr (ptr, '"');
386 if (! ptr) {
387 nih_free (line);
388 continue;
389 }
390
391 ptr++;
392
393 end = strrchr (ptr, '"');
394 if (! end) {
395 nih_free (line);
396 continue;
397 }
398
399 *end = '\0';
400
401 fix_path (ptr);
Philippe Liarde1de5de2019-07-25 07:14:17 -0400402
403 if (path_prefix_filter &&
404 strncmp (ptr, path_prefix_filter,
405 strlen (path_prefix_filter))) {
406 nih_warn ("Skipping %s due to path prefix filter", ptr);
407 continue;
408 }
409
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800410 if (path_prefix->st_dev != NODEV && ptr[0] == '/') {
411 struct stat stbuf;
412 char *rewritten = nih_sprintf (
413 line, "%s%s", path_prefix->prefix, ptr);
414 if (! lstat (rewritten, &stbuf) &&
415 stbuf.st_dev == path_prefix->st_dev) {
416 /* If |rewritten| exists on the same device as
417 * path_prefix->st_dev, record the rewritten one
418 * instead of the original path.
419 */
420 ptr = rewritten;
421 }
422 }
Alan Ding14b131a2021-05-18 15:21:14 -0700423 trace_add_path (parent, ptr, files, num_files, force_ssd_mode);
Scott James Remnant56686d62009-11-09 18:38:51 +0000424
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800425 nih_free (line); /* also frees |rewritten| */
Scott James Remnant56686d62009-11-09 18:38:51 +0000426 }
427
428 if (fclose (fp) < 0)
429 nih_return_system_error (-1);
430
431 return 0;
432}
433
434static void
435fix_path (char *pathname)
436{
437 char *ptr;
438
439 nih_assert (pathname != NULL);
440
441 for (ptr = pathname; *ptr; ptr++) {
442 size_t len;
443
444 if (ptr[0] != '/')
445 continue;
446
447 len = strcspn (ptr + 1, "/");
448
449 /* // and /./, we shorten the string and repeat the loop
450 * looking at the new /
451 */
452 if ((len == 0) || ((len == 1) && ptr[1] == '.')) {
453 memmove (ptr, ptr + len + 1, strlen (ptr) - len);
454 ptr--;
455 continue;
456 }
457
458 /* /../, we shorten back to the previous / or the start
459 * of the string and repeat the loop looking at the new /
460 */
461 if ((len == 2) && (ptr[1] == '.') && (ptr[2] == '.')) {
462 char *root;
463
464 for (root = ptr - 1;
465 (root >= pathname) && (root[0] != '/');
466 root--)
467 ;
468 if (root < pathname)
469 root = pathname;
470
471 memmove (root, ptr + len + 1, strlen (ptr) - len);
472 ptr = root - 1;
473 continue;
474 }
475 }
476
477 while ((ptr != pathname) && (*(--ptr) == '/'))
478 *ptr = '\0';
479}
480
481
482static int
483trace_add_path (const void *parent,
484 const char *pathname,
485 PackFile ** files,
Alan Ding14b131a2021-05-18 15:21:14 -0700486 size_t * num_files,
487 int force_ssd_mode)
Scott James Remnant56686d62009-11-09 18:38:51 +0000488{
489 static NihHash *path_hash = NULL;
490 struct stat statbuf;
491 int fd;
492 PackFile * file;
493 PackPath * path;
494 static NihHash *inode_hash = NULL;
495 nih_local char *inode_key = NULL;
496
497 nih_assert (pathname != NULL);
498 nih_assert (files != NULL);
499 nih_assert (num_files != NULL);
500
501 /* We can't really deal with relative paths since we don't know
502 * the working directory that they were opened from.
503 */
504 if (pathname[0] != '/') {
505 nih_warn ("%s: %s", pathname, _("Ignored relative path"));
506 return 0;
507 }
508
509 /* Certain paths aren't worth caching, because they're virtual or
510 * temporary filesystems and would waste pack space.
511 */
512 if (ignore_path (pathname))
513 return 0;
514
515 /* Ignore paths that won't fit in the pack; we could use PATH_MAX,
516 * but with 1000 files that'd be 4M just for the
517 * pack.
518 */
519 if (strlen (pathname) > PACK_PATH_MAX) {
520 nih_warn ("%s: %s", pathname, _("Ignored far too long path"));
521 return 0;
522 }
523
524 /* Use a hash table of paths to eliminate duplicate path names from
525 * the table since that would waste pack space (and fds).
526 */
527 if (! path_hash)
528 path_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
529
530 if (nih_hash_lookup (path_hash, pathname)) {
531 return 0;
532 } else {
533 NihListEntry *entry;
534
535 entry = NIH_MUST (nih_list_entry_new (path_hash));
536 entry->str = NIH_MUST (nih_strdup (entry, pathname));
537
538 nih_hash_add (path_hash, &entry->entry);
539 }
540
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100541 /* Make sure that we have an ordinary file
542 * This avoids us opening a fifo or socket or symlink.
Scott James Remnant56686d62009-11-09 18:38:51 +0000543 */
544 if ((lstat (pathname, &statbuf) < 0)
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100545 || (S_ISLNK (statbuf.st_mode))
Scott James Remnant56686d62009-11-09 18:38:51 +0000546 || (! S_ISREG (statbuf.st_mode)))
547 return 0;
548
549 /* Open and stat again to get the genuine details, in case it
550 * changes under us.
551 */
552 fd = open (pathname, O_RDONLY | O_NOATIME);
553 if (fd < 0) {
554 nih_warn ("%s: %s: %s", pathname,
555 _("File vanished or error reading"),
556 strerror (errno));
557 return -1;
558 }
559
560 if (fstat (fd, &statbuf) < 0) {
561 nih_warn ("%s: %s: %s", pathname,
562 _("Error retrieving file stat"),
563 strerror (errno));
564 close (fd);
565 return -1;
566 }
567
568 /* Double-check that it's really still a file */
569 if (! S_ISREG (statbuf.st_mode)) {
570 close (fd);
571 return 0;
572 }
573
574 /* Some people think it's clever to split their filesystem across
575 * multiple devices, so we need to generate a different pack file
576 * for each device.
577 *
578 * Lookup file based on the dev_t, potentially creating a new
579 * pack file in the array.
580 */
Alan Ding14b131a2021-05-18 15:21:14 -0700581 file = trace_file (parent, statbuf.st_dev, files, num_files, force_ssd_mode);
Scott James Remnant56686d62009-11-09 18:38:51 +0000582
583 /* Grow the PackPath array and fill in the details for the new
584 * path.
585 */
586 file->paths = NIH_MUST (nih_realloc (file->paths, *files,
587 (sizeof (PackPath)
588 * (file->num_paths + 1))));
589
590 path = &file->paths[file->num_paths++];
591 memset (path, 0, sizeof (PackPath));
592
593 path->group = -1;
594 path->ino = statbuf.st_ino;
595
596 strncpy (path->path, pathname, PACK_PATH_MAX);
597 path->path[PACK_PATH_MAX] = '\0';
598
599 /* The paths array contains each unique path opened, but these
600 * might be symbolic or hard links to the same underlying files
601 * and we don't want to read the same block more than once.
602 *
603 * Use a hash table of dev_t/ino_t pairs to make sure we only
604 * read the blocks of an actual file the first time.
605 */
606 if (! inode_hash)
607 inode_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
608
609 inode_key = NIH_MUST (nih_sprintf (NULL, "%llu:%llu",
610 (unsigned long long)statbuf.st_dev,
611 (unsigned long long)statbuf.st_ino));
612
613 if (nih_hash_lookup (inode_hash, inode_key)) {
614 close (fd);
615 return 0;
616 } else {
617 NihListEntry *entry;
618
619 entry = NIH_MUST (nih_list_entry_new (inode_hash));
620 entry->str = inode_key;
621 nih_ref (entry->str, entry);
622
623 nih_hash_add (inode_hash, &entry->entry);
624 }
625
626 /* There's also no point reading zero byte files, since they
627 * won't have any blocks (and we can't mmap zero bytes anyway).
628 */
629 if (! statbuf.st_size) {
630 close (fd);
631 return 0;
632 }
633
634 /* Now read the in-memory chunks of this file and add those to
635 * the pack file too.
636 */
637 trace_add_chunks (*files, file, path, fd, statbuf.st_size);
638 close (fd);
639
640 return 0;
641}
642
643static int
644ignore_path (const char *pathname)
645{
646 nih_assert (pathname != NULL);
647
648 if (! strncmp (pathname, "/proc/", 6))
649 return TRUE;
650 if (! strncmp (pathname, "/sys/", 5))
651 return TRUE;
652 if (! strncmp (pathname, "/dev/", 5))
653 return TRUE;
654 if (! strncmp (pathname, "/tmp/", 5))
655 return TRUE;
Steve Langasek2c698a12012-02-03 15:27:29 -0800656 if (! strncmp (pathname, "/run/", 5))
657 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000658 if (! strncmp (pathname, "/var/run/", 9))
659 return TRUE;
Bryan Fullerton96c991b2013-03-12 15:06:41 +0000660 if (! strncmp (pathname, "/var/log/", 9))
661 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000662 if (! strncmp (pathname, "/var/lock/", 10))
663 return TRUE;
664
665 return FALSE;
666}
667
668
669static PackFile *
670trace_file (const void *parent,
671 dev_t dev,
672 PackFile ** files,
Alan Ding14b131a2021-05-18 15:21:14 -0700673 size_t * num_files,
674 int force_ssd_mode)
Scott James Remnant56686d62009-11-09 18:38:51 +0000675{
676 nih_local char *filename = NULL;
677 int rotational;
678 PackFile * file;
679
680 nih_assert (files != NULL);
681 nih_assert (num_files != NULL);
682
683 /* Return any existing file structure for this device */
684 for (size_t i = 0; i < *num_files; i++)
685 if ((*files)[i].dev == dev)
686 return &(*files)[i];
687
Alan Ding14b131a2021-05-18 15:21:14 -0700688 if (force_ssd_mode) {
689 rotational = FALSE;
690 } else {
691 /* Query sysfs to see whether this disk is rotational; this
692 * obviously won't work for virtual devices and the like, so
693 * default to TRUE for now.
Yusuke Satofc571f12016-04-30 06:17:08 -0400694 */
Yusuke Satofc571f12016-04-30 06:17:08 -0400695 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:%d/queue/rotational",
Alan Ding14b131a2021-05-18 15:21:14 -0700696 major (dev), minor (dev)));
697 if (access (filename, R_OK) < 0) {
698 /* For devices managed by the scsi stack, the minor device number has to be
699 * masked to find the queue/rotational file.
700 */
701 nih_free (filename);
702 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:%d/queue/rotational",
703 major (dev), minor (dev) & 0xffff0));
704 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000705
Alan Ding14b131a2021-05-18 15:21:14 -0700706 if (get_value (AT_FDCWD, filename, &rotational) < 0) {
707 NihError *err;
Scott James Remnant56686d62009-11-09 18:38:51 +0000708
Alan Ding14b131a2021-05-18 15:21:14 -0700709 err = nih_error_get ();
710 nih_warn (_("Unable to obtain rotationalness for device %u:%u: %s"),
711 major (dev), minor (dev), err->message);
712 nih_free (err);
Scott James Remnant56686d62009-11-09 18:38:51 +0000713
Alan Ding14b131a2021-05-18 15:21:14 -0700714 rotational = TRUE;
715 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000716 }
717
718 /* Grow the PackFile array and fill in the details for the new
719 * file.
720 */
721 *files = NIH_MUST (nih_realloc (*files, parent,
722 (sizeof (PackFile) * (*num_files + 1))));
723
724 file = &(*files)[(*num_files)++];
725 memset (file, 0, sizeof (PackFile));
726
727 file->dev = dev;
728 file->rotational = rotational;
729 file->num_paths = 0;
730 file->paths = NULL;
731 file->num_blocks = 0;
732 file->blocks = NULL;
733
734 return file;
735}
736
737
738static int
739trace_add_chunks (const void *parent,
740 PackFile * file,
741 PackPath * path,
742 int fd,
743 off_t size)
744{
745 static int page_size = -1;
746 void * buf;
747 off_t num_pages;
748 nih_local unsigned char *vec = NULL;
749
750 nih_assert (file != NULL);
751 nih_assert (path != NULL);
752 nih_assert (fd >= 0);
753 nih_assert (size > 0);
754
755 if (page_size < 0)
756 page_size = sysconf (_SC_PAGESIZE);
757
758 /* Map the file into memory */
759 buf = mmap (NULL, size, PROT_READ, MAP_SHARED, fd, 0);
760 if (buf == MAP_FAILED) {
761 nih_warn ("%s: %s: %s", path->path,
762 _("Error mapping into memory"),
763 strerror (errno));
764 return -1;
765 }
766
767 /* Grab the core memory map of the file */
768 num_pages = (size - 1) / page_size + 1;
769 vec = NIH_MUST (nih_alloc (NULL, num_pages));
770 memset (vec, 0, num_pages);
771
772 if (mincore (buf, size, vec) < 0) {
773 nih_warn ("%s: %s: %s", path->path,
774 _("Error retrieving page cache info"),
775 strerror (errno));
776 munmap (buf, size);
777 return -1;
778 }
779
780 /* Clean up */
781 if (munmap (buf, size) < 0) {
782 nih_warn ("%s: %s: %s", path->path,
783 _("Error unmapping from memory"),
784 strerror (errno));
785 return -1;
786 }
787
788
789 /* Now we can figure out which contiguous bits of the file are
790 * in core memory.
791 */
792 for (off_t i = 0; i < num_pages; i++) {
793 off_t offset;
794 off_t length;
795
796 if (! vec[i])
797 continue;
798
799 offset = i * page_size;
800 length = page_size;
801
802 while (((i + 1) < num_pages) && vec[i + 1]) {
803 length += page_size;
804 i++;
805 }
806
807 /* The rotational crowd need this split down further into
808 * on-disk extents, the non-rotational folks can just use
809 * the chunks data.
810 */
811 if (file->rotational) {
812 trace_add_extents (parent, file, path, fd, size,
813 offset, length);
814 } else {
815 PackBlock *block;
816
817 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
818 (sizeof (PackBlock)
819 * (file->num_blocks + 1))));
820
821 block = &file->blocks[file->num_blocks++];
822 memset (block, 0, sizeof (PackBlock));
823
824 block->pathidx = file->num_paths - 1;
825 block->offset = offset;
826 block->length = length;
827 block->physical = -1;
828 }
829 }
830
831 return 0;
832}
833
834struct fiemap *
835get_fiemap (const void *parent,
836 int fd,
837 off_t offset,
838 off_t length)
839{
840 struct fiemap *fiemap;
841
842 nih_assert (fd >= 0);
843
844 fiemap = NIH_MUST (nih_new (parent, struct fiemap));
845 memset (fiemap, 0, sizeof (struct fiemap));
846
847 fiemap->fm_start = offset;
848 fiemap->fm_length = length;
849 fiemap->fm_flags = 0;
850
851 do {
852 /* Query the current number of extents */
853 fiemap->fm_mapped_extents = 0;
854 fiemap->fm_extent_count = 0;
855
856 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
857 nih_error_raise_system ();
858 nih_free (fiemap);
859 return NULL;
860 }
861
862 /* Always allow room for one extra over what we were told,
863 * so we know if they changed under us.
864 */
865 fiemap = NIH_MUST (nih_realloc (fiemap, parent,
866 (sizeof (struct fiemap)
867 + (sizeof (struct fiemap_extent)
868 * (fiemap->fm_mapped_extents + 1)))));
869 fiemap->fm_extent_count = fiemap->fm_mapped_extents + 1;
870 fiemap->fm_mapped_extents = 0;
871
872 memset (fiemap->fm_extents, 0, (sizeof (struct fiemap_extent)
873 * fiemap->fm_extent_count));
874
875 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
876 nih_error_raise_system ();
877 nih_free (fiemap);
878 return NULL;
879 }
880 } while (fiemap->fm_mapped_extents
881 && (fiemap->fm_mapped_extents >= fiemap->fm_extent_count));
882
883 return fiemap;
884}
885
886static int
887trace_add_extents (const void *parent,
888 PackFile * file,
889 PackPath * path,
890 int fd,
891 off_t size,
892 off_t offset,
893 off_t length)
894{
895 nih_local struct fiemap *fiemap = NULL;
896
897 nih_assert (file != NULL);
898 nih_assert (path != NULL);
899 nih_assert (fd >= 0);
900 nih_assert (size > 0);
901
902 /* Get the extents map for this chunk, then iterate the extents
903 * and put those in the pack instead of the chunks.
904 */
905 fiemap = get_fiemap (NULL, fd, offset, length);
906 if (! fiemap) {
907 NihError *err;
908
909 err = nih_error_get ();
910 nih_warn ("%s: %s: %s", path->path,
911 _("Error retrieving chunk extents"),
912 err->message);
913 nih_free (err);
914
915 return -1;
916 }
917
918 for (__u32 j = 0; j < fiemap->fm_mapped_extents; j++) {
919 PackBlock *block;
920 off_t start;
921 off_t end;
922
923 if (fiemap->fm_extents[j].fe_flags & FIEMAP_EXTENT_UNKNOWN)
924 continue;
925
926 /* Work out the intersection of the chunk and extent */
927 start = nih_max (fiemap->fm_start,
928 fiemap->fm_extents[j].fe_logical);
929 end = nih_min ((fiemap->fm_start + fiemap->fm_length),
930 (fiemap->fm_extents[j].fe_logical
931 + fiemap->fm_extents[j].fe_length));
932
933 /* Grow the blocks array to add the extent */
934 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
935 (sizeof (PackBlock)
936 * (file->num_blocks + 1))));
937
938 block = &file->blocks[file->num_blocks++];
939 memset (block, 0, sizeof (PackBlock));
940
941 block->pathidx = file->num_paths - 1;
942 block->offset = start;
943 block->length = end - start;
944 block->physical = (fiemap->fm_extents[j].fe_physical
945 + (start - fiemap->fm_extents[j].fe_logical));
946 }
947
948 return 0;
949}
950
951static int
952trace_add_groups (const void *parent,
953 PackFile * file)
954{
955 const char *devname;
956 ext2_filsys fs = NULL;
957
958 nih_assert (file != NULL);
959
960 devname = blkid_devno_to_devname (file->dev);
961 if (devname
962 && (! ext2fs_open (devname, 0, 0, 0, unix_io_manager, &fs))) {
963 nih_assert (fs != NULL);
964 size_t num_groups = 0;
965 nih_local size_t *num_inodes = NULL;
966 size_t mean = 0;
967 size_t hits = 0;
968
969 nih_assert (fs != NULL);
970
971 /* Calculate the number of inode groups on this filesystem */
972 num_groups = ((fs->super->s_blocks_count - 1)
973 / fs->super->s_blocks_per_group) + 1;
974
975 /* Fill in the pack path's group member, and count the
976 * number of inodes in each group.
977 */
978 num_inodes = NIH_MUST (nih_alloc (NULL, (sizeof (size_t)
979 * num_groups)));
980 memset (num_inodes, 0, sizeof (size_t) * num_groups);
981
982 for (size_t i = 0; i < file->num_paths; i++) {
983 file->paths[i].group = ext2fs_group_of_ino (fs, file->paths[i].ino);
984 num_inodes[file->paths[i].group]++;
985 }
986
987 /* Iterate the groups and add any group that exceeds the
988 * inode preload threshold.
989 */
990 for (size_t i = 0; i < num_groups; i++) {
991 mean += num_inodes[i];
992 if (num_inodes[i] > INODE_GROUP_PRELOAD_THRESHOLD) {
993 file->groups = NIH_MUST (nih_realloc (file->groups, parent,
994 (sizeof (int)
995 * (file->num_groups + 1))));
996 file->groups[file->num_groups++] = i;
997 hits++;
998 }
999 }
1000
1001 mean /= num_groups;
1002
1003 nih_debug ("%zu inode groups, mean %zu inodes per group, %zu hits",
1004 num_groups, mean, hits);
1005
1006 ext2fs_close (fs);
1007 }
1008
1009 return 0;
1010}
1011
1012
1013static int
1014block_compar (const void *a,
1015 const void *b)
1016{
1017 const PackBlock *block_a = a;
1018 const PackBlock *block_b = b;
1019
1020 nih_assert (block_a != NULL);
1021 nih_assert (block_b != NULL);
1022
1023 if (block_a->physical < block_b->physical) {
1024 return -1;
1025 } else if (block_a->physical > block_b->physical) {
1026 return 1;
1027 } else {
1028 return 0;
1029 }
1030}
1031
1032static int
1033trace_sort_blocks (const void *parent,
1034 PackFile * file)
1035{
1036 nih_assert (file != NULL);
1037
1038 /* Sort the blocks array by physical location, since these are
1039 * read in a separate pass to opening files, there's no reason
1040 * to consider which path each block is in - and thus resulting
1041 * in a linear disk read.
1042 */
1043 qsort (file->blocks, file->num_blocks, sizeof (PackBlock),
1044 block_compar);
1045
1046 return 0;
1047}
1048
1049static int
1050path_compar (const void *a,
1051 const void *b)
1052{
1053 const PackPath * const *path_a = a;
1054 const PackPath * const *path_b = b;
1055
1056 nih_assert (path_a != NULL);
1057 nih_assert (path_b != NULL);
1058
1059 if ((*path_a)->group < (*path_b)->group) {
1060 return -1;
1061 } else if ((*path_a)->group > (*path_b)->group) {
1062 return 1;
1063 } else if ((*path_a)->ino < (*path_b)->ino) {
1064 return -1;
1065 } else if ((*path_b)->ino > (*path_b)->ino) {
1066 return 1;
1067 } else {
1068 return strcmp ((*path_a)->path, (*path_b)->path);
1069 }
1070}
1071
1072static int
1073trace_sort_paths (const void *parent,
1074 PackFile * file)
1075{
1076 nih_local PackPath **paths = NULL;
1077 nih_local size_t * new_idx = NULL;
1078 PackPath * new_paths;
1079
1080 nih_assert (file != NULL);
1081
1082 /* Sort the paths array by ext2fs inode group, ino_t then path.
1083 *
1084 * Mucking around with things like the physical locations of
1085 * first on-disk blocks of the dentry and stuff didn't work out
1086 * so well, sorting by path was better, but this seems the best.
1087 * (it looks good on blktrace too)
1088 */
1089 paths = NIH_MUST (nih_alloc (NULL, (sizeof (PackPath *)
1090 * file->num_paths)));
1091
1092 for (size_t i = 0; i < file->num_paths; i++)
1093 paths[i] = &file->paths[i];
1094
1095 qsort (paths, file->num_paths, sizeof (PackPath *),
1096 path_compar);
1097
1098 /* Calculate the new indexes of each path element in the old
1099 * array, and then update the block array's path indexes to
1100 * match.
1101 */
1102 new_idx = NIH_MUST (nih_alloc (NULL,
1103 (sizeof (size_t) * file->num_paths)));
1104 for (size_t i = 0; i < file->num_paths; i++)
1105 new_idx[paths[i] - file->paths] = i;
1106
1107 for (size_t i = 0; i < file->num_blocks; i++)
1108 file->blocks[i].pathidx = new_idx[file->blocks[i].pathidx];
1109
1110 /* Finally generate a new paths array with the new order and
1111 * attach it to the file.
1112 */
1113 new_paths = NIH_MUST (nih_alloc (parent,
1114 (sizeof (PackPath) * file->num_paths)));
1115 for (size_t i = 0; i < file->num_paths; i++)
1116 memcpy (&new_paths[new_idx[i]], &file->paths[i],
1117 sizeof (PackPath));
1118
1119 nih_unref (file->paths, parent);
1120 file->paths = new_paths;
1121
1122 return 0;
1123}