blob: 2918da2ff26fe2a3fc3642836e82f262760e0098 [file] [log] [blame]
Scott James Remnant56686d62009-11-09 18:38:51 +00001/* ureadahead
2 *
3 * trace.c - boot tracing
4 *
5 * Copyright © 2009 Canonical Ltd.
6 * Author: Scott James Remnant <scott@netsplit.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2, as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif /* HAVE_CONFIG_H */
25
26#define _ATFILE_SOURCE
27
28
29#include <sys/select.h>
30#include <sys/mount.h>
Yunlian Jiangb2de86a2018-07-19 16:46:39 -040031#include <sys/sysmacros.h>
Scott James Remnant56686d62009-11-09 18:38:51 +000032#include <sys/types.h>
33#include <sys/mman.h>
34#include <sys/stat.h>
Yusuke Satoa2a925e2015-12-17 13:14:55 -080035#include <sys/param.h>
Scott James Remnant56686d62009-11-09 18:38:51 +000036
37#include <errno.h>
38#include <fcntl.h>
39#include <stdio.h>
40#include <signal.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44
45#include <blkid.h>
46#define NO_INLINE_FUNCS
47#include <ext2fs.h>
48
49#include <linux/fs.h>
50#include <linux/fiemap.h>
51
52#include <nih/macros.h>
53#include <nih/alloc.h>
54#include <nih/string.h>
55#include <nih/list.h>
56#include <nih/hash.h>
57#include <nih/main.h>
58#include <nih/logging.h>
59#include <nih/error.h>
60
61#include "trace.h"
62#include "pack.h"
63#include "values.h"
64#include "file.h"
65
66
67/**
68 * PATH_DEBUGFS:
69 *
70 * Path to the usual debugfs mountpoint.
71 **/
72#define PATH_DEBUGFS "/sys/kernel/debug"
73
74/**
75 * PATH_DEBUGFS_TMP:
76 *
77 * Path to the temporary debugfs mountpoint that we mount it on if it
78 * hasn't been mounted at the usual place yet.
79 **/
80#define PATH_DEBUGFS_TMP "/var/lib/ureadahead/debugfs"
81
82/**
Brian Norrise004b812022-07-28 12:06:12 -070083 * PATH_TRACEFS:
84 *
85 * Path to the usual tracefs (since kernel 4.1) mountpoint.
86 **/
87#define PATH_TRACEFS "/sys/kernel/tracing"
88
89/**
Scott James Remnant56686d62009-11-09 18:38:51 +000090 * INODE_GROUP_PRELOAD_THRESHOLD:
91 *
92 * Number of inodes in a group before we preload that inode's blocks.
93 **/
94#define INODE_GROUP_PRELOAD_THRESHOLD 8
95
96
97/* Prototypes for static functions */
98static int read_trace (const void *parent,
99 int dfd, const char *path,
Philippe Liarde1de5de2019-07-25 07:14:17 -0400100 const char *path_prefix_filter,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800101 const PathPrefixOption *path_prefix,
Alan Ding14b131a2021-05-18 15:21:14 -0700102 PackFile **files, size_t *num_files, int force_ssd_mode);
Scott James Remnant56686d62009-11-09 18:38:51 +0000103static void fix_path (char *pathname);
104static int trace_add_path (const void *parent, const char *pathname,
Alan Ding14b131a2021-05-18 15:21:14 -0700105 PackFile **files, size_t *num_files, int force_ssd_mode);
Scott James Remnant56686d62009-11-09 18:38:51 +0000106static int ignore_path (const char *pathname);
107static PackFile *trace_file (const void *parent, dev_t dev,
Alan Ding14b131a2021-05-18 15:21:14 -0700108 PackFile **files, size_t *num_files, int force_ssd_mode);
Scott James Remnant56686d62009-11-09 18:38:51 +0000109static int trace_add_chunks (const void *parent,
110 PackFile *file, PackPath *path,
111 int fd, off_t size);
112static int trace_add_extents (const void *parent,
113 PackFile *file, PackPath *path,
114 int fd, off_t size,
115 off_t offset, off_t length);
116static int trace_add_groups (const void *parent, PackFile *file);
117static int trace_sort_blocks (const void *parent, PackFile *file);
118static int trace_sort_paths (const void *parent, PackFile *file);
119
120
121static void
122sig_interrupt (int signum)
123{
124}
125
126int
127trace (int daemonise,
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800128 int timeout,
129 const char *filename_to_replace,
Philippe Liarde1de5de2019-07-25 07:14:17 -0400130 const char *pack_file,
131 const char *path_prefix_filter,
Alan Ding14b131a2021-05-18 15:21:14 -0700132 const PathPrefixOption *path_prefix,
133 int force_ssd_mode)
Scott James Remnant56686d62009-11-09 18:38:51 +0000134{
135 int dfd;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600136 FILE *fp;
Scott James Remnant56686d62009-11-09 18:38:51 +0000137 int unmount = FALSE;
138 int old_sys_open_enabled = 0;
139 int old_open_exec_enabled = 0;
140 int old_uselib_enabled = 0;
141 int old_tracing_enabled = 0;
Tim Gardner73aa2c52010-07-22 04:04:36 -0600142 int old_buffer_size_kb = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000143 struct sigaction act;
144 struct sigaction old_sigterm;
145 struct sigaction old_sigint;
146 struct timeval tv;
147 nih_local PackFile *files = NULL;
148 size_t num_files = 0;
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600149 size_t num_cpus = 0;
Scott James Remnant56686d62009-11-09 18:38:51 +0000150
Brian Norrise004b812022-07-28 12:06:12 -0700151 dfd = open (PATH_TRACEFS, O_NOFOLLOW | O_RDONLY | O_NOATIME);
152 if (dfd < 0) {
153 if (errno != ENOENT)
154 nih_return_system_error (-1);
155
156 dfd = open (PATH_DEBUGFS "/tracing", O_NOFOLLOW | O_RDONLY | O_NOATIME);
157 }
158 /* Mount debugfs (and implicitly tracefs) if not already mounted */
Scott James Remnant56686d62009-11-09 18:38:51 +0000159 if (dfd < 0) {
160 if (errno != ENOENT)
161 nih_return_system_error (-1);
162
163 if (mount ("none", PATH_DEBUGFS_TMP, "debugfs", 0, NULL) < 0)
164 nih_return_system_error (-1);
165
Hardik Goyal2c3a17c2019-07-08 19:16:35 -0400166 dfd = open (PATH_DEBUGFS_TMP "/tracing", O_NOFOLLOW | O_RDONLY | O_NOATIME);
Scott James Remnant56686d62009-11-09 18:38:51 +0000167 if (dfd < 0) {
168 nih_error_raise_system ();
169 umount (PATH_DEBUGFS_TMP);
170 return -1;
171 }
172
173 unmount = TRUE;
174 }
175
Tim Gardner6fd9eef2010-08-20 12:19:31 -0600176 /*
177 * Count the number of CPUs, default to 1 on error.
178 */
179 fp = fopen("/proc/cpuinfo", "r");
180 if (fp) {
181 int line_size=1024;
182 char *processor="processor";
183 char *line = malloc(line_size);
184 if (line) {
185 num_cpus = 0;
186 while (fgets(line,line_size,fp) != NULL) {
187 if (!strncmp(line,processor,strlen(processor)))
188 num_cpus++;
189 }
190 free(line);
191 nih_message("Counted %d CPUs\n",num_cpus);
192 }
193 fclose(fp);
194 }
195 if (!num_cpus)
196 num_cpus = 1;
197
Scott James Remnante30e2372010-09-20 18:34:31 +0100198 /* Enable tracing of open() syscalls */
Scott James Remnant56686d62009-11-09 18:38:51 +0000199 if (set_value (dfd, "events/fs/do_sys_open/enable",
200 TRUE, &old_sys_open_enabled) < 0)
201 goto error;
202 if (set_value (dfd, "events/fs/open_exec/enable",
203 TRUE, &old_open_exec_enabled) < 0)
204 goto error;
205 if (set_value (dfd, "events/fs/uselib/enable",
206 TRUE, &old_uselib_enabled) < 0) {
207 NihError *err;
208
209 err = nih_error_get ();
210 nih_debug ("Missing uselib tracing: %s", err->message);
211 nih_free (err);
212
213 old_uselib_enabled = -1;
214 }
Scott James Remnante30e2372010-09-20 18:34:31 +0100215 if (set_value (dfd, "buffer_size_kb", 8192/num_cpus, &old_buffer_size_kb) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000216 goto error;
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000217 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000218 TRUE, &old_tracing_enabled) < 0)
219 goto error;
220
221 if (daemonise) {
222 pid_t pid;
223
224 pid = fork ();
225 if (pid < 0) {
226 nih_error_raise_system ();
227 goto error;
228 } else if (pid > 0) {
229 _exit (0);
230 }
231 }
232
233 /* Sleep until we get signals */
234 act.sa_handler = sig_interrupt;
235 sigemptyset (&act.sa_mask);
236 act.sa_flags = 0;
237
238 sigaction (SIGTERM, &act, &old_sigterm);
239 sigaction (SIGINT, &act, &old_sigint);
240
241 if (timeout) {
242 tv.tv_sec = timeout;
243 tv.tv_usec = 0;
244
245 select (0, NULL, NULL, NULL, &tv);
246 } else {
247 pause ();
248 }
249
250 sigaction (SIGTERM, &old_sigterm, NULL);
251 sigaction (SIGINT, &old_sigint, NULL);
252
253 /* Restore previous tracing settings */
Andy Whitcroftd0e28e82013-01-11 12:05:17 +0000254 if (set_value (dfd, "tracing_on",
Scott James Remnant56686d62009-11-09 18:38:51 +0000255 old_tracing_enabled, NULL) < 0)
256 goto error;
257 if (old_uselib_enabled >= 0)
258 if (set_value (dfd, "events/fs/uselib/enable",
259 old_uselib_enabled, NULL) < 0)
260 goto error;
261 if (set_value (dfd, "events/fs/open_exec/enable",
262 old_open_exec_enabled, NULL) < 0)
263 goto error;
264 if (set_value (dfd, "events/fs/do_sys_open/enable",
265 old_sys_open_enabled, NULL) < 0)
266 goto error;
267
268 /* Be nicer */
Scott James Remnantcc2943b2009-11-29 15:24:15 +0000269 if (nice (15))
270 ;
Scott James Remnant56686d62009-11-09 18:38:51 +0000271
272 /* Read trace log */
Philippe Liarde1de5de2019-07-25 07:14:17 -0400273 if (read_trace (NULL, dfd, "trace", path_prefix_filter, path_prefix,
Alan Ding14b131a2021-05-18 15:21:14 -0700274 &files, &num_files, force_ssd_mode) < 0)
Scott James Remnant56686d62009-11-09 18:38:51 +0000275 goto error;
276
Tim Gardner73aa2c52010-07-22 04:04:36 -0600277 /*
278 * Restore the trace buffer size (which has just been read) and free
279 * a bunch of memory.
280 */
281 if (set_value (dfd, "buffer_size_kb", old_buffer_size_kb, NULL) < 0)
282 goto error;
283
Scott James Remnant56686d62009-11-09 18:38:51 +0000284 /* Unmount the temporary debugfs mount if we mounted it */
285 if (close (dfd)) {
286 nih_error_raise_system ();
287 goto error;
288 }
289 if (unmount
290 && (umount (PATH_DEBUGFS_TMP) < 0)) {
291 nih_error_raise_system ();
292 goto error;
293 }
294
295 /* Write out pack files */
296 for (size_t i = 0; i < num_files; i++) {
297 nih_local char *filename = NULL;
Philippe Liarde1de5de2019-07-25 07:14:17 -0400298 if (pack_file) {
299 filename = NIH_MUST (nih_strdup (NULL, pack_file));
300 } else {
301 filename = pack_file_name_for_device (NULL,
302 files[i].dev);
303 if (! filename) {
304 NihError *err;
Scott James Remnant56686d62009-11-09 18:38:51 +0000305
Philippe Liarde1de5de2019-07-25 07:14:17 -0400306 err = nih_error_get ();
307 nih_warn ("%s", err->message);
308 nih_free (err);
Scott James Remnant56686d62009-11-09 18:38:51 +0000309
Philippe Liarde1de5de2019-07-25 07:14:17 -0400310 continue;
311 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000312
Philippe Liarde1de5de2019-07-25 07:14:17 -0400313 /* If filename_to_replace is not NULL, only write out
314 * the file and skip others.
315 */
316 if (filename_to_replace &&
317 strcmp (filename_to_replace, filename)) {
318 nih_info ("Skipping %s", filename);
319 continue;
320 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000321 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000322 nih_info ("Writing %s", filename);
323
324 /* We only need to apply additional sorting to the
325 * HDD-optimised packs, the SSD ones can read in random
326 * order quite happily.
327 *
328 * Also for HDD, generate the inode group preloading
329 * array.
330 */
331 if (files[i].rotational) {
332 trace_add_groups (files, &files[i]);
333
334 trace_sort_blocks (files, &files[i]);
335 trace_sort_paths (files, &files[i]);
336 }
337
338 write_pack (filename, &files[i]);
339
340 if (nih_log_priority < NIH_LOG_MESSAGE)
341 pack_dump (&files[i], SORT_OPEN);
342 }
343
344 return 0;
345error:
346 close (dfd);
347 if (unmount)
348 umount (PATH_DEBUGFS_TMP);
349
350 return -1;
351}
352
353
354static int
355read_trace (const void *parent,
356 int dfd,
357 const char *path,
Philippe Liarde1de5de2019-07-25 07:14:17 -0400358 const char *path_prefix_filter, /* May be null */
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800359 const PathPrefixOption *path_prefix,
Scott James Remnant56686d62009-11-09 18:38:51 +0000360 PackFile ** files,
Alan Ding14b131a2021-05-18 15:21:14 -0700361 size_t * num_files,
362 int force_ssd_mode)
Scott James Remnant56686d62009-11-09 18:38:51 +0000363{
364 int fd;
365 FILE *fp;
366 char *line;
367
368 nih_assert (path != NULL);
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800369 nih_assert (path_prefix != NULL);
Scott James Remnant56686d62009-11-09 18:38:51 +0000370 nih_assert (files != NULL);
371 nih_assert (num_files != NULL);
372
373 fd = openat (dfd, path, O_RDONLY);
374 if (fd < 0)
375 nih_return_system_error (-1);
376
377 fp = fdopen (fd, "r");
378 if (! fp) {
379 nih_error_raise_system ();
380 close (fd);
381 return -1;
382 }
383
384 while ((line = fgets_alloc (NULL, fp)) != NULL) {
385 char *ptr;
386 char *end;
387
388 ptr = strstr (line, " do_sys_open:");
389 if (! ptr)
390 ptr = strstr (line, " open_exec:");
391 if (! ptr)
392 ptr = strstr (line, " uselib:");
393 if (! ptr) {
394 nih_free (line);
395 continue;
396 }
397
398 ptr = strchr (ptr, '"');
399 if (! ptr) {
400 nih_free (line);
401 continue;
402 }
403
404 ptr++;
405
406 end = strrchr (ptr, '"');
407 if (! end) {
408 nih_free (line);
409 continue;
410 }
411
412 *end = '\0';
413
414 fix_path (ptr);
Philippe Liarde1de5de2019-07-25 07:14:17 -0400415
416 if (path_prefix_filter &&
417 strncmp (ptr, path_prefix_filter,
418 strlen (path_prefix_filter))) {
419 nih_warn ("Skipping %s due to path prefix filter", ptr);
420 continue;
421 }
422
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800423 if (path_prefix->st_dev != NODEV && ptr[0] == '/') {
424 struct stat stbuf;
425 char *rewritten = nih_sprintf (
426 line, "%s%s", path_prefix->prefix, ptr);
427 if (! lstat (rewritten, &stbuf) &&
428 stbuf.st_dev == path_prefix->st_dev) {
429 /* If |rewritten| exists on the same device as
430 * path_prefix->st_dev, record the rewritten one
431 * instead of the original path.
432 */
433 ptr = rewritten;
434 }
435 }
Alan Ding14b131a2021-05-18 15:21:14 -0700436 trace_add_path (parent, ptr, files, num_files, force_ssd_mode);
Scott James Remnant56686d62009-11-09 18:38:51 +0000437
Yusuke Satoa2a925e2015-12-17 13:14:55 -0800438 nih_free (line); /* also frees |rewritten| */
Scott James Remnant56686d62009-11-09 18:38:51 +0000439 }
440
441 if (fclose (fp) < 0)
442 nih_return_system_error (-1);
443
444 return 0;
445}
446
447static void
448fix_path (char *pathname)
449{
450 char *ptr;
451
452 nih_assert (pathname != NULL);
453
454 for (ptr = pathname; *ptr; ptr++) {
455 size_t len;
456
457 if (ptr[0] != '/')
458 continue;
459
460 len = strcspn (ptr + 1, "/");
461
462 /* // and /./, we shorten the string and repeat the loop
463 * looking at the new /
464 */
465 if ((len == 0) || ((len == 1) && ptr[1] == '.')) {
466 memmove (ptr, ptr + len + 1, strlen (ptr) - len);
467 ptr--;
468 continue;
469 }
470
471 /* /../, we shorten back to the previous / or the start
472 * of the string and repeat the loop looking at the new /
473 */
474 if ((len == 2) && (ptr[1] == '.') && (ptr[2] == '.')) {
475 char *root;
476
477 for (root = ptr - 1;
478 (root >= pathname) && (root[0] != '/');
479 root--)
480 ;
481 if (root < pathname)
482 root = pathname;
483
484 memmove (root, ptr + len + 1, strlen (ptr) - len);
485 ptr = root - 1;
486 continue;
487 }
488 }
489
490 while ((ptr != pathname) && (*(--ptr) == '/'))
491 *ptr = '\0';
492}
493
494
495static int
496trace_add_path (const void *parent,
497 const char *pathname,
498 PackFile ** files,
Alan Ding14b131a2021-05-18 15:21:14 -0700499 size_t * num_files,
500 int force_ssd_mode)
Scott James Remnant56686d62009-11-09 18:38:51 +0000501{
502 static NihHash *path_hash = NULL;
503 struct stat statbuf;
504 int fd;
505 PackFile * file;
506 PackPath * path;
507 static NihHash *inode_hash = NULL;
508 nih_local char *inode_key = NULL;
509
510 nih_assert (pathname != NULL);
511 nih_assert (files != NULL);
512 nih_assert (num_files != NULL);
513
514 /* We can't really deal with relative paths since we don't know
515 * the working directory that they were opened from.
516 */
517 if (pathname[0] != '/') {
518 nih_warn ("%s: %s", pathname, _("Ignored relative path"));
519 return 0;
520 }
521
522 /* Certain paths aren't worth caching, because they're virtual or
523 * temporary filesystems and would waste pack space.
524 */
525 if (ignore_path (pathname))
526 return 0;
527
528 /* Ignore paths that won't fit in the pack; we could use PATH_MAX,
529 * but with 1000 files that'd be 4M just for the
530 * pack.
531 */
532 if (strlen (pathname) > PACK_PATH_MAX) {
533 nih_warn ("%s: %s", pathname, _("Ignored far too long path"));
534 return 0;
535 }
536
537 /* Use a hash table of paths to eliminate duplicate path names from
538 * the table since that would waste pack space (and fds).
539 */
540 if (! path_hash)
541 path_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
542
543 if (nih_hash_lookup (path_hash, pathname)) {
544 return 0;
545 } else {
546 NihListEntry *entry;
547
548 entry = NIH_MUST (nih_list_entry_new (path_hash));
549 entry->str = NIH_MUST (nih_strdup (entry, pathname));
550
551 nih_hash_add (path_hash, &entry->entry);
552 }
553
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100554 /* Make sure that we have an ordinary file
555 * This avoids us opening a fifo or socket or symlink.
Scott James Remnant56686d62009-11-09 18:38:51 +0000556 */
557 if ((lstat (pathname, &statbuf) < 0)
Bryan Fullerton883c12c2013-03-25 10:09:18 +0100558 || (S_ISLNK (statbuf.st_mode))
Scott James Remnant56686d62009-11-09 18:38:51 +0000559 || (! S_ISREG (statbuf.st_mode)))
560 return 0;
561
562 /* Open and stat again to get the genuine details, in case it
563 * changes under us.
564 */
565 fd = open (pathname, O_RDONLY | O_NOATIME);
566 if (fd < 0) {
567 nih_warn ("%s: %s: %s", pathname,
568 _("File vanished or error reading"),
569 strerror (errno));
570 return -1;
571 }
572
573 if (fstat (fd, &statbuf) < 0) {
574 nih_warn ("%s: %s: %s", pathname,
575 _("Error retrieving file stat"),
576 strerror (errno));
577 close (fd);
578 return -1;
579 }
580
581 /* Double-check that it's really still a file */
582 if (! S_ISREG (statbuf.st_mode)) {
583 close (fd);
584 return 0;
585 }
586
587 /* Some people think it's clever to split their filesystem across
588 * multiple devices, so we need to generate a different pack file
589 * for each device.
590 *
591 * Lookup file based on the dev_t, potentially creating a new
592 * pack file in the array.
593 */
Alan Ding14b131a2021-05-18 15:21:14 -0700594 file = trace_file (parent, statbuf.st_dev, files, num_files, force_ssd_mode);
Scott James Remnant56686d62009-11-09 18:38:51 +0000595
596 /* Grow the PackPath array and fill in the details for the new
597 * path.
598 */
599 file->paths = NIH_MUST (nih_realloc (file->paths, *files,
600 (sizeof (PackPath)
601 * (file->num_paths + 1))));
602
603 path = &file->paths[file->num_paths++];
604 memset (path, 0, sizeof (PackPath));
605
606 path->group = -1;
607 path->ino = statbuf.st_ino;
608
609 strncpy (path->path, pathname, PACK_PATH_MAX);
610 path->path[PACK_PATH_MAX] = '\0';
611
612 /* The paths array contains each unique path opened, but these
613 * might be symbolic or hard links to the same underlying files
614 * and we don't want to read the same block more than once.
615 *
616 * Use a hash table of dev_t/ino_t pairs to make sure we only
617 * read the blocks of an actual file the first time.
618 */
619 if (! inode_hash)
620 inode_hash = NIH_MUST (nih_hash_string_new (NULL, 2500));
621
622 inode_key = NIH_MUST (nih_sprintf (NULL, "%llu:%llu",
623 (unsigned long long)statbuf.st_dev,
624 (unsigned long long)statbuf.st_ino));
625
626 if (nih_hash_lookup (inode_hash, inode_key)) {
627 close (fd);
628 return 0;
629 } else {
630 NihListEntry *entry;
631
632 entry = NIH_MUST (nih_list_entry_new (inode_hash));
633 entry->str = inode_key;
634 nih_ref (entry->str, entry);
635
636 nih_hash_add (inode_hash, &entry->entry);
637 }
638
639 /* There's also no point reading zero byte files, since they
640 * won't have any blocks (and we can't mmap zero bytes anyway).
641 */
642 if (! statbuf.st_size) {
643 close (fd);
644 return 0;
645 }
646
647 /* Now read the in-memory chunks of this file and add those to
648 * the pack file too.
649 */
650 trace_add_chunks (*files, file, path, fd, statbuf.st_size);
651 close (fd);
652
653 return 0;
654}
655
656static int
657ignore_path (const char *pathname)
658{
659 nih_assert (pathname != NULL);
660
661 if (! strncmp (pathname, "/proc/", 6))
662 return TRUE;
663 if (! strncmp (pathname, "/sys/", 5))
664 return TRUE;
665 if (! strncmp (pathname, "/dev/", 5))
666 return TRUE;
667 if (! strncmp (pathname, "/tmp/", 5))
668 return TRUE;
Steve Langasek2c698a12012-02-03 15:27:29 -0800669 if (! strncmp (pathname, "/run/", 5))
670 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000671 if (! strncmp (pathname, "/var/run/", 9))
672 return TRUE;
Bryan Fullerton96c991b2013-03-12 15:06:41 +0000673 if (! strncmp (pathname, "/var/log/", 9))
674 return TRUE;
Scott James Remnant56686d62009-11-09 18:38:51 +0000675 if (! strncmp (pathname, "/var/lock/", 10))
676 return TRUE;
677
678 return FALSE;
679}
680
681
682static PackFile *
683trace_file (const void *parent,
684 dev_t dev,
685 PackFile ** files,
Alan Ding14b131a2021-05-18 15:21:14 -0700686 size_t * num_files,
687 int force_ssd_mode)
Scott James Remnant56686d62009-11-09 18:38:51 +0000688{
689 nih_local char *filename = NULL;
690 int rotational;
691 PackFile * file;
692
693 nih_assert (files != NULL);
694 nih_assert (num_files != NULL);
695
696 /* Return any existing file structure for this device */
697 for (size_t i = 0; i < *num_files; i++)
698 if ((*files)[i].dev == dev)
699 return &(*files)[i];
700
Alan Ding14b131a2021-05-18 15:21:14 -0700701 if (force_ssd_mode) {
702 rotational = FALSE;
703 } else {
704 /* Query sysfs to see whether this disk is rotational; this
705 * obviously won't work for virtual devices and the like, so
706 * default to TRUE for now.
Yusuke Satofc571f12016-04-30 06:17:08 -0400707 */
Yusuke Satofc571f12016-04-30 06:17:08 -0400708 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:%d/queue/rotational",
Alan Ding14b131a2021-05-18 15:21:14 -0700709 major (dev), minor (dev)));
710 if (access (filename, R_OK) < 0) {
711 /* For devices managed by the scsi stack, the minor device number has to be
712 * masked to find the queue/rotational file.
713 */
714 nih_free (filename);
715 filename = NIH_MUST (nih_sprintf (NULL, "/sys/dev/block/%d:%d/queue/rotational",
716 major (dev), minor (dev) & 0xffff0));
717 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000718
Alan Ding14b131a2021-05-18 15:21:14 -0700719 if (get_value (AT_FDCWD, filename, &rotational) < 0) {
720 NihError *err;
Scott James Remnant56686d62009-11-09 18:38:51 +0000721
Alan Ding14b131a2021-05-18 15:21:14 -0700722 err = nih_error_get ();
723 nih_warn (_("Unable to obtain rotationalness for device %u:%u: %s"),
724 major (dev), minor (dev), err->message);
725 nih_free (err);
Scott James Remnant56686d62009-11-09 18:38:51 +0000726
Alan Ding14b131a2021-05-18 15:21:14 -0700727 rotational = TRUE;
728 }
Scott James Remnant56686d62009-11-09 18:38:51 +0000729 }
730
731 /* Grow the PackFile array and fill in the details for the new
732 * file.
733 */
734 *files = NIH_MUST (nih_realloc (*files, parent,
735 (sizeof (PackFile) * (*num_files + 1))));
736
737 file = &(*files)[(*num_files)++];
738 memset (file, 0, sizeof (PackFile));
739
740 file->dev = dev;
741 file->rotational = rotational;
742 file->num_paths = 0;
743 file->paths = NULL;
744 file->num_blocks = 0;
745 file->blocks = NULL;
746
747 return file;
748}
749
750
751static int
752trace_add_chunks (const void *parent,
753 PackFile * file,
754 PackPath * path,
755 int fd,
756 off_t size)
757{
758 static int page_size = -1;
759 void * buf;
760 off_t num_pages;
761 nih_local unsigned char *vec = NULL;
762
763 nih_assert (file != NULL);
764 nih_assert (path != NULL);
765 nih_assert (fd >= 0);
766 nih_assert (size > 0);
767
768 if (page_size < 0)
769 page_size = sysconf (_SC_PAGESIZE);
770
771 /* Map the file into memory */
772 buf = mmap (NULL, size, PROT_READ, MAP_SHARED, fd, 0);
773 if (buf == MAP_FAILED) {
774 nih_warn ("%s: %s: %s", path->path,
775 _("Error mapping into memory"),
776 strerror (errno));
777 return -1;
778 }
779
780 /* Grab the core memory map of the file */
781 num_pages = (size - 1) / page_size + 1;
782 vec = NIH_MUST (nih_alloc (NULL, num_pages));
783 memset (vec, 0, num_pages);
784
785 if (mincore (buf, size, vec) < 0) {
786 nih_warn ("%s: %s: %s", path->path,
787 _("Error retrieving page cache info"),
788 strerror (errno));
789 munmap (buf, size);
790 return -1;
791 }
792
793 /* Clean up */
794 if (munmap (buf, size) < 0) {
795 nih_warn ("%s: %s: %s", path->path,
796 _("Error unmapping from memory"),
797 strerror (errno));
798 return -1;
799 }
800
801
802 /* Now we can figure out which contiguous bits of the file are
803 * in core memory.
804 */
805 for (off_t i = 0; i < num_pages; i++) {
806 off_t offset;
807 off_t length;
808
809 if (! vec[i])
810 continue;
811
812 offset = i * page_size;
813 length = page_size;
814
815 while (((i + 1) < num_pages) && vec[i + 1]) {
816 length += page_size;
817 i++;
818 }
819
820 /* The rotational crowd need this split down further into
821 * on-disk extents, the non-rotational folks can just use
822 * the chunks data.
823 */
824 if (file->rotational) {
825 trace_add_extents (parent, file, path, fd, size,
826 offset, length);
827 } else {
828 PackBlock *block;
829
830 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
831 (sizeof (PackBlock)
832 * (file->num_blocks + 1))));
833
834 block = &file->blocks[file->num_blocks++];
835 memset (block, 0, sizeof (PackBlock));
836
837 block->pathidx = file->num_paths - 1;
838 block->offset = offset;
839 block->length = length;
840 block->physical = -1;
841 }
842 }
843
844 return 0;
845}
846
847struct fiemap *
848get_fiemap (const void *parent,
849 int fd,
850 off_t offset,
851 off_t length)
852{
853 struct fiemap *fiemap;
854
855 nih_assert (fd >= 0);
856
857 fiemap = NIH_MUST (nih_new (parent, struct fiemap));
858 memset (fiemap, 0, sizeof (struct fiemap));
859
860 fiemap->fm_start = offset;
861 fiemap->fm_length = length;
862 fiemap->fm_flags = 0;
863
864 do {
865 /* Query the current number of extents */
866 fiemap->fm_mapped_extents = 0;
867 fiemap->fm_extent_count = 0;
868
869 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
870 nih_error_raise_system ();
871 nih_free (fiemap);
872 return NULL;
873 }
874
875 /* Always allow room for one extra over what we were told,
876 * so we know if they changed under us.
877 */
878 fiemap = NIH_MUST (nih_realloc (fiemap, parent,
879 (sizeof (struct fiemap)
880 + (sizeof (struct fiemap_extent)
881 * (fiemap->fm_mapped_extents + 1)))));
882 fiemap->fm_extent_count = fiemap->fm_mapped_extents + 1;
883 fiemap->fm_mapped_extents = 0;
884
885 memset (fiemap->fm_extents, 0, (sizeof (struct fiemap_extent)
886 * fiemap->fm_extent_count));
887
888 if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0) {
889 nih_error_raise_system ();
890 nih_free (fiemap);
891 return NULL;
892 }
893 } while (fiemap->fm_mapped_extents
894 && (fiemap->fm_mapped_extents >= fiemap->fm_extent_count));
895
896 return fiemap;
897}
898
899static int
900trace_add_extents (const void *parent,
901 PackFile * file,
902 PackPath * path,
903 int fd,
904 off_t size,
905 off_t offset,
906 off_t length)
907{
908 nih_local struct fiemap *fiemap = NULL;
909
910 nih_assert (file != NULL);
911 nih_assert (path != NULL);
912 nih_assert (fd >= 0);
913 nih_assert (size > 0);
914
915 /* Get the extents map for this chunk, then iterate the extents
916 * and put those in the pack instead of the chunks.
917 */
918 fiemap = get_fiemap (NULL, fd, offset, length);
919 if (! fiemap) {
920 NihError *err;
921
922 err = nih_error_get ();
923 nih_warn ("%s: %s: %s", path->path,
924 _("Error retrieving chunk extents"),
925 err->message);
926 nih_free (err);
927
928 return -1;
929 }
930
931 for (__u32 j = 0; j < fiemap->fm_mapped_extents; j++) {
932 PackBlock *block;
933 off_t start;
934 off_t end;
935
936 if (fiemap->fm_extents[j].fe_flags & FIEMAP_EXTENT_UNKNOWN)
937 continue;
938
939 /* Work out the intersection of the chunk and extent */
940 start = nih_max (fiemap->fm_start,
941 fiemap->fm_extents[j].fe_logical);
942 end = nih_min ((fiemap->fm_start + fiemap->fm_length),
943 (fiemap->fm_extents[j].fe_logical
944 + fiemap->fm_extents[j].fe_length));
945
946 /* Grow the blocks array to add the extent */
947 file->blocks = NIH_MUST (nih_realloc (file->blocks, parent,
948 (sizeof (PackBlock)
949 * (file->num_blocks + 1))));
950
951 block = &file->blocks[file->num_blocks++];
952 memset (block, 0, sizeof (PackBlock));
953
954 block->pathidx = file->num_paths - 1;
955 block->offset = start;
956 block->length = end - start;
957 block->physical = (fiemap->fm_extents[j].fe_physical
958 + (start - fiemap->fm_extents[j].fe_logical));
959 }
960
961 return 0;
962}
963
964static int
965trace_add_groups (const void *parent,
966 PackFile * file)
967{
968 const char *devname;
969 ext2_filsys fs = NULL;
970
971 nih_assert (file != NULL);
972
973 devname = blkid_devno_to_devname (file->dev);
974 if (devname
975 && (! ext2fs_open (devname, 0, 0, 0, unix_io_manager, &fs))) {
976 nih_assert (fs != NULL);
977 size_t num_groups = 0;
978 nih_local size_t *num_inodes = NULL;
979 size_t mean = 0;
980 size_t hits = 0;
981
982 nih_assert (fs != NULL);
983
984 /* Calculate the number of inode groups on this filesystem */
985 num_groups = ((fs->super->s_blocks_count - 1)
986 / fs->super->s_blocks_per_group) + 1;
987
988 /* Fill in the pack path's group member, and count the
989 * number of inodes in each group.
990 */
991 num_inodes = NIH_MUST (nih_alloc (NULL, (sizeof (size_t)
992 * num_groups)));
993 memset (num_inodes, 0, sizeof (size_t) * num_groups);
994
995 for (size_t i = 0; i < file->num_paths; i++) {
996 file->paths[i].group = ext2fs_group_of_ino (fs, file->paths[i].ino);
997 num_inodes[file->paths[i].group]++;
998 }
999
1000 /* Iterate the groups and add any group that exceeds the
1001 * inode preload threshold.
1002 */
1003 for (size_t i = 0; i < num_groups; i++) {
1004 mean += num_inodes[i];
1005 if (num_inodes[i] > INODE_GROUP_PRELOAD_THRESHOLD) {
1006 file->groups = NIH_MUST (nih_realloc (file->groups, parent,
1007 (sizeof (int)
1008 * (file->num_groups + 1))));
1009 file->groups[file->num_groups++] = i;
1010 hits++;
1011 }
1012 }
1013
1014 mean /= num_groups;
1015
1016 nih_debug ("%zu inode groups, mean %zu inodes per group, %zu hits",
1017 num_groups, mean, hits);
1018
1019 ext2fs_close (fs);
1020 }
1021
1022 return 0;
1023}
1024
1025
1026static int
1027block_compar (const void *a,
1028 const void *b)
1029{
1030 const PackBlock *block_a = a;
1031 const PackBlock *block_b = b;
1032
1033 nih_assert (block_a != NULL);
1034 nih_assert (block_b != NULL);
1035
1036 if (block_a->physical < block_b->physical) {
1037 return -1;
1038 } else if (block_a->physical > block_b->physical) {
1039 return 1;
1040 } else {
1041 return 0;
1042 }
1043}
1044
1045static int
1046trace_sort_blocks (const void *parent,
1047 PackFile * file)
1048{
1049 nih_assert (file != NULL);
1050
1051 /* Sort the blocks array by physical location, since these are
1052 * read in a separate pass to opening files, there's no reason
1053 * to consider which path each block is in - and thus resulting
1054 * in a linear disk read.
1055 */
1056 qsort (file->blocks, file->num_blocks, sizeof (PackBlock),
1057 block_compar);
1058
1059 return 0;
1060}
1061
1062static int
1063path_compar (const void *a,
1064 const void *b)
1065{
1066 const PackPath * const *path_a = a;
1067 const PackPath * const *path_b = b;
1068
1069 nih_assert (path_a != NULL);
1070 nih_assert (path_b != NULL);
1071
1072 if ((*path_a)->group < (*path_b)->group) {
1073 return -1;
1074 } else if ((*path_a)->group > (*path_b)->group) {
1075 return 1;
1076 } else if ((*path_a)->ino < (*path_b)->ino) {
1077 return -1;
1078 } else if ((*path_b)->ino > (*path_b)->ino) {
1079 return 1;
1080 } else {
1081 return strcmp ((*path_a)->path, (*path_b)->path);
1082 }
1083}
1084
1085static int
1086trace_sort_paths (const void *parent,
1087 PackFile * file)
1088{
1089 nih_local PackPath **paths = NULL;
1090 nih_local size_t * new_idx = NULL;
1091 PackPath * new_paths;
1092
1093 nih_assert (file != NULL);
1094
1095 /* Sort the paths array by ext2fs inode group, ino_t then path.
1096 *
1097 * Mucking around with things like the physical locations of
1098 * first on-disk blocks of the dentry and stuff didn't work out
1099 * so well, sorting by path was better, but this seems the best.
1100 * (it looks good on blktrace too)
1101 */
1102 paths = NIH_MUST (nih_alloc (NULL, (sizeof (PackPath *)
1103 * file->num_paths)));
1104
1105 for (size_t i = 0; i < file->num_paths; i++)
1106 paths[i] = &file->paths[i];
1107
1108 qsort (paths, file->num_paths, sizeof (PackPath *),
1109 path_compar);
1110
1111 /* Calculate the new indexes of each path element in the old
1112 * array, and then update the block array's path indexes to
1113 * match.
1114 */
1115 new_idx = NIH_MUST (nih_alloc (NULL,
1116 (sizeof (size_t) * file->num_paths)));
1117 for (size_t i = 0; i < file->num_paths; i++)
1118 new_idx[paths[i] - file->paths] = i;
1119
1120 for (size_t i = 0; i < file->num_blocks; i++)
1121 file->blocks[i].pathidx = new_idx[file->blocks[i].pathidx];
1122
1123 /* Finally generate a new paths array with the new order and
1124 * attach it to the file.
1125 */
1126 new_paths = NIH_MUST (nih_alloc (parent,
1127 (sizeof (PackPath) * file->num_paths)));
1128 for (size_t i = 0; i < file->num_paths; i++)
1129 memcpy (&new_paths[new_idx[i]], &file->paths[i],
1130 sizeof (PackPath));
1131
1132 nih_unref (file->paths, parent);
1133 file->paths = new_paths;
1134
1135 return 0;
1136}