From de49951491631bab884ac387ebb745a511817d6b Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 31 Jan 2003 10:39:33 +0000 Subject: Rewrite to perform directory traversal using nftw. Include "dirname.h", "ftw.h", and "quotearg.h". (AUTHORS): Add self. (opt_one_file_system): Move global into `main'. (path, xstat, exit_status): Remove declarations. (arg_length, suffix_length): New globals. (G_fail): New global, sort of like the old `exit_status'. (IS_FTW_DIR_TYPE): Define. (print_only_size): New function. (process_file): New function. (str_init, ensure_space, str_copyc, str_concatc): Remove functions. (str_trunc, pop_dir, count_entry): Likewise. (du_files): Rewrite to use nftw. --- src/du.c | 493 +++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 239 insertions(+), 254 deletions(-) (limited to 'src/du.c') diff --git a/src/du.c b/src/du.c index d66afb443..56b0e1668 100644 --- a/src/du.c +++ b/src/du.c @@ -1,5 +1,5 @@ /* du -- summarize disk usage - Copyright (C) 88, 89, 90, 91, 1995-2003 Free Software Foundation, Inc. + Copyright (C) 1988-1991, 1995-2003 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,29 +18,11 @@ /* Differences from the Unix du: * Doesn't simply ignore the names of regular files given as arguments when -a is given. - * Additional options: - -l Count the size of all files, even if they have appeared - already in another hard link. - -x Do not cross file-system boundaries during the recursion. - -c Write a grand total of all of the arguments after all - arguments have been processed. This can be used to find - out the disk usage of a directory, with some files excluded. - -h Print sizes in human readable format (1k 234M 2G, etc). - -H Similar, but use powers of 1000 not 1024. - -k Print sizes in kilobytes. - -m Print sizes in megabytes. - -b Print sizes in bytes. - -S Count the size of each directory separately, not including - the sizes of subdirectories. - -D Dereference only symbolic links given on the command line. - -L Dereference all symbolic links. - --exclude=PATTERN Exclude files that match PATTERN. - -X FILE Exclude files that match patterns taken from FILE. By tege@sics.se, Torbjorn Granlund, and djm@ai.mit.edu, David MacKenzie. Variable blocks added by lm@sgi.com and eggert@twinsun.com. -*/ + Rewritten to use nftw by Jim Meyering. */ #include #include @@ -49,21 +31,22 @@ #include #include "system.h" +#include "dirname.h" /* for strip_trailing_slashes */ #include "error.h" #include "exclude.h" +#include "ftw.h" #include "hash.h" #include "human.h" #include "quote.h" +#include "quotearg.h" #include "same.h" -#include "save-cwd.h" -#include "savedir.h" #include "xstrtol.h" /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME "du" #define AUTHORS \ - N_ ("Torbjorn Granlund, David MacKenzie, Larry McVoy, and Paul Eggert") + N_ ("Torbjorn Granlund, David MacKenzie, Larry McVoy, Paul Eggert, and Jim Meyering") /* Initial size of the hash table. */ #define INITIAL_TABLE_SIZE 103 @@ -83,28 +66,6 @@ struct entry /* A set of dev/ino pairs. */ static Hash_table *htab; -/* Structure for dynamically resizable strings. */ - -struct String -{ - unsigned alloc; /* Size of allocation for the text. */ - unsigned length; /* Length of the text currently. */ - char *text; /* Pointer to the text. */ -}; -typedef struct String String; - -int stat (); -int lstat (); - -/* Arrange to make lstat calls go through the wrapper function - on systems with an lstat function that does not dereference symlinks - that are specified with a trailing slash. */ -#if ! LSTAT_FOLLOWS_SLASHED_SYMLINK -int rpl_lstat (char const *, struct stat *); -# undef lstat -# define lstat rpl_lstat -#endif - /* Name under which this program was invoked. */ char *program_name; @@ -114,16 +75,17 @@ static int opt_all = 0; /* If nonzero, count each hard link of files with multiple links. */ static int opt_count_all = 0; -/* If nonzero, do not cross file-system boundaries. */ -static int opt_one_file_system = 0; - /* If nonzero, print a grand total at the end. */ static int print_totals = 0; /* If nonzero, do not add sizes of subdirectories. */ static int opt_separate_dirs = 0; -/* If nonzero, dereference symlinks that are command line arguments. */ +/* If nonzero, dereference symlinks that are command line arguments. + Implementing this while still using nftw is a little tricky. + For each command line argument that is a symlink-to-directory, + call nftw with "command_line_arg/." and remember to omit the + added `/.' when printing. */ static int opt_dereference_arguments = 0; /* Show the total for each directory (and file if --all) that is at @@ -137,22 +99,31 @@ static int human_output_opts; /* The units to use when printing sizes. */ static uintmax_t output_block_size; -/* Accumulated path for file or directory being processed. */ -static String *path; - -/* A pointer to either lstat or stat, depending on whether - dereferencing of all symbolic links is to be done. */ -static int (*xstat) (); - -/* The exit status to use if we don't get any fatal errors. */ -static int exit_status; - /* File name patterns to exclude. */ static struct exclude *exclude; /* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */ static uintmax_t tot_size = 0; +/* In some cases, we have to append `/.' or just `.' to an argument + (to dereference a symlink). When we do that, we don't want to + expose this artifact when printing file/directory names, so these + variables keep track of the length of the original command line + argument and the length of the suffix we've added, respectively. + ARG_LENGTH == 0 indicates that we haven't added a suffix. + This information is used to omit any such added characters when + printing names. */ +size_t arg_length; +size_t suffix_length; + +/* Nonzero indicates that du should exit with EXIT_FAILURE upon completion. */ +int G_fail; + +#define IS_FTW_DIR_TYPE(Type) \ + ((Type) == FTW_D \ + || (Type) == FTW_DP \ + || (Type) == FTW_DNR) + /* For long options that have no equivalent short option, use a non-character as a pseudo short option, starting with CHAR_MAX + 1. */ enum @@ -302,63 +273,16 @@ hash_init (void) xalloc_die (); } -/* Initialize string S1 to hold SIZE characters. */ - -static void -str_init (String **s1, unsigned int size) -{ - String *s; - - s = (String *) xmalloc (sizeof (struct String)); - s->text = xmalloc (size + 1); - - s->alloc = size; - *s1 = s; -} - -static void -ensure_space (String *s, unsigned int size) -{ - if (s->alloc < size) - { - s->text = xrealloc (s->text, size + 1); - s->alloc = size; - } -} - -/* Assign the null-terminated C-string CSTR to S1. */ - -static void -str_copyc (String *s1, const char *cstr) -{ - unsigned l = strlen (cstr); - ensure_space (s1, l); - strcpy (s1->text, cstr); - s1->length = l; -} - -static void -str_concatc (String *s1, const char *cstr) -{ - unsigned l1 = s1->length; - unsigned l2 = strlen (cstr); - unsigned l = l1 + l2; - - ensure_space (s1, l); - strcpy (s1->text + l1, cstr); - s1->length = l; -} - -/* Truncate the string S1 to have length LENGTH. */ +/* Print N_BLOCKS. NBLOCKS is the number of + ST_NBLOCKSIZE-byte blocks; convert it to a readable value before + printing. */ static void -str_trunc (String *s1, unsigned int length) +print_only_size (uintmax_t n_blocks) { - if (s1->length > length) - { - s1->text[length] = 0; - s1->length = length; - } + char buf[LONGEST_HUMAN_READABLE + 1]; + fputs (human_readable (n_blocks, buf, human_output_opts, + ST_NBLOCKSIZE, output_block_size), stdout); } /* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of @@ -376,181 +300,235 @@ print_size (uintmax_t n_blocks, const char *string) fflush (stdout); } -/* Restore the previous working directory or exit. - If CWD is null, simply call `chdir ("..")'. Otherwise, - use CWD and free it. CURR_DIR_NAME is the name of the current directory - and is used solely in failure diagnostics. */ +/* This function is called once for every file system object that nftw + encounters. nftw does a depth-first traversal. This function knows + that and accumulates per-directory totals based on changes in + the depth of the current entry. */ -static void -pop_dir (struct saved_cwd *cwd, const char *curr_dir_name) +static int +process_file (const char *file, const struct stat *sb, int file_type, + struct FTW *info) { - if (cwd) + size_t size; + size_t s; + static int first_call = 1; + static size_t prev_level; + static size_t n_alloc; + /* The sum of the st_size values of all entries in the single directory + at the corresponding level. Although this does include the st_size + corresponding to each subdirectory, it does not include the size of + any file in a subdirectory. */ + static uintmax_t *sum_ent; + + /* The sum of the sizes of all entries in the hierarchy at or below the + directory at the specified level. */ + static uintmax_t *sum_subdir; + + switch (file_type) { - if (restore_cwd (cwd, "..", curr_dir_name)) - exit (EXIT_FAILURE); - free_cwd (cwd); - } - else if (chdir ("..") < 0) - { - error (EXIT_FAILURE, errno, _("cannot change to `..' from directory %s"), - quote (curr_dir_name)); - } -} - -/* Print (if appropriate) the size (in units determined by `output_block_size') - of file or directory ENT. Return the size of ENT in units of 512-byte - blocks. TOP is one for external calls, zero for recursive calls. - LAST_DEV is the device that the parent directory of ENT is on. - DEPTH is the number of levels (in hierarchy) down from a command - line argument. Don't print if DEPTH > max_depth. - An important invariant is that when this function returns, the current - working directory is the same as when it was called. */ - -static uintmax_t -count_entry (const char *ent, int top, dev_t last_dev, int depth) -{ - uintmax_t size; - struct stat stat_buf; - int (*tmp_stat) (); - - if (top && opt_dereference_arguments) - tmp_stat = stat; - else - tmp_stat = xstat; + case FTW_NS: + error (0, errno, _("cannot access %s"), quote (file)); + G_fail = 1; + return 0; - if ((*tmp_stat) (ent, &stat_buf) < 0) - { - error (0, errno, "%s", quote (path->text)); - exit_status = 1; + case FTW_DCHP: + error (0, errno, _("cannot change to parent of directory %s"), + quote (file)); + G_fail = 1; return 0; + + case FTW_DCH: + /* Don't return just yet, since although nftw couldn't chdir into the + directory, it was able to stat it, so we do have a size. */ + error (0, errno, _("cannot change to directory %s"), quote (file)); + G_fail = 1; + break; + + case FTW_DNR: + /* Don't return just yet, since although nftw couldn't read the + directory, it was able to stat it, so we do have a size. */ + error (0, errno, _("cannot read directory %s"), quote (file)); + G_fail = 1; + break; + + default: + break; } if (!opt_count_all - && stat_buf.st_nlink > 1 - && hash_ins (stat_buf.st_ino, stat_buf.st_dev)) + && 1 < sb->st_nlink + && hash_ins (sb->st_ino, sb->st_dev)) return 0; /* Have counted this already. */ - size = ST_NBLOCKS (stat_buf); - tot_size += size; + if (excluded_filename (exclude, file + info->base)) + return 0; + + s = size = ST_NBLOCKS (*sb); - if (S_ISDIR (stat_buf.st_mode)) + if (first_call) + { + n_alloc = info->level + 10; + sum_ent = XCALLOC (uintmax_t, n_alloc); + sum_subdir = XCALLOC (uintmax_t, n_alloc); + } + else { - unsigned pathlen; - unsigned prev_len; - dev_t dir_dev; - char *name_space; - char *namep; - struct saved_cwd *cwd; - struct saved_cwd cwd_buf; - struct stat e_buf; - - dir_dev = stat_buf.st_dev; - - /* Return `0' here, not SIZE, since the SIZE bytes - would reside in the new filesystem. */ - if (opt_one_file_system && !top && last_dev != dir_dev) - return 0; /* Don't enter a new file system. */ - - /* If we're traversing more than one level, or if we're - dereferencing symlinks and we're about to chdir through a - symlink, remember the current directory so we can return to - it later. In other cases, chdir ("..") works fine. - Treat `.' and `..' like multi-level paths, since `chdir ("..")' - wont't restore the current working directory after a `chdir' - to one of those. */ - if (strchr (ent, '/') - || DOT_OR_DOTDOT (ent) - || (tmp_stat == stat - && lstat (ent, &e_buf) == 0 - && S_ISLNK (e_buf.st_mode))) + /* FIXME: it's a shame that we need these `size_t' casts to avoid + warnings from gcc about `comparison between signed and unsigned'. + Probably unavoidable, assuming that the members of struct FTW + are of type `int' (historical), since I want variables like + n_alloc and prev_level to have types that make sense. */ + if (n_alloc <= (size_t) info->level) { - if (save_cwd (&cwd_buf)) - exit (EXIT_FAILURE); - cwd = &cwd_buf; + n_alloc = info->level * 2; + sum_ent = XREALLOC (sum_ent, uintmax_t, n_alloc); + sum_subdir = XREALLOC (sum_subdir, uintmax_t, n_alloc); } - else - cwd = NULL; + } - if (chdir (ent) < 0) + if (! first_call) + { + if ((size_t) info->level == prev_level) { - error (0, errno, _("cannot change to directory %s"), - quote (path->text)); - if (cwd) - free_cwd (cwd); - exit_status = 1; - /* Do return SIZE, here, since even though we can't chdir into ENT, - we *can* count the blocks used by its directory entry. */ - return opt_separate_dirs ? 0 : size; + /* This is usually the most common case. Do nothing. */ } - - name_space = savedir ("."); - if (name_space == NULL) + else if ((size_t) info->level < prev_level) { - error (0, errno, "%s", quote (path->text)); - pop_dir (cwd, path->text); - exit_status = 1; - /* Do count the SIZE bytes. */ - return opt_separate_dirs ? 0 : size; + /* Ascending the hierarchy. + nftw processes a directory only after all entries in that + directory have been processed. When the depth decreases, + propagate sums from the children (prev_level) to the parent. + Here, the current level is always one smaller than the + previous one. */ + assert ((size_t) info->level == prev_level - 1); + s += sum_ent[prev_level]; + if (!opt_separate_dirs) + s += sum_subdir[prev_level]; + sum_subdir[info->level] += (sum_ent[prev_level] + + sum_subdir[prev_level]); } - - /* Remember the current path. */ - - prev_len = path->length; - if (prev_len && path->text[prev_len - 1] != '/') - str_concatc (path, "/"); - pathlen = path->length; - - for (namep = name_space; *namep; namep += strlen (namep) + 1) + else /* info->level > prev_level */ { - if (!excluded_filename (exclude, namep)) + /* Descending the hierarchy. + Clear the accumulators for *all* levels between prev_level + and the current one. The depth may change dramatically, + e.g., from 1 to 10. */ + int i; + for (i = prev_level + 1; i <= info->level; i++) { - str_concatc (path, namep); - size += count_entry (namep, 0, dir_dev, depth + 1); - str_trunc (path, pathlen); + sum_ent[i] = 0; + sum_subdir[i] = 0; } } + } - free (name_space); + prev_level = info->level; + first_call = 0; - str_trunc (path, prev_len); /* Remove any "/" we added. */ - pop_dir (cwd, path->text); + sum_ent[info->level] += size; - if (depth <= max_depth || top) - print_size (size, path->text); - return opt_separate_dirs ? 0 : size; - } - else if ((opt_all && depth <= max_depth) || top) + /* Even if this directory was unreadable or we couldn't chdir into it, + do let its size contribute to the total, ... */ + tot_size += size; + + /* ... but don't print out a total for it, since without the size(s) + of any potential entries, it could be very misleading. */ + if (file_type == FTW_DNR || file_type == FTW_DCH) + return 0; + + /* FIXME: This looks suspiciously like it could be simplified. */ + if ((IS_FTW_DIR_TYPE (file_type) && + (info->level <= max_depth || info->level == 0)) + || ((opt_all && info->level <= max_depth) || info->level == 0)) { - /* FIXME: make this an option. */ - int print_only_dir_size = 0; - if (!print_only_dir_size) - print_size (size, path->text); + print_only_size (s); + fputc ('\t', stdout); + if (arg_length) + { + /* Print the file name, but without the `.' or `/.' + directory suffix that we may have added in main. */ + /* Print everything before the part we appended. */ + fwrite (file, arg_length, 1, stdout); + /* Print everything after what we appended. */ + fputs (file + arg_length + suffix_length + + (file[arg_length + suffix_length] == '/'), stdout); + } + else + { + fputs (file, stdout); + } + fputc ('\n', stdout); + fflush (stdout); } - return size; + return 0; +} + +static int +is_symlink_to_dir (char const *file) +{ + char *f; + struct stat sb; + + ASSIGN_STRDUPA (f, file); + strip_trailing_slashes (f); + return (lstat (f, &sb) == 0 && S_ISLNK (sb.st_mode) + && stat (f, &sb) == 0 && S_ISDIR (sb.st_mode)); } /* Recursively print the sizes of the directories (and, if selected, files) - named in FILES, the last entry of which is NULL. */ + named in FILES, the last entry of which is NULL. + FTW_FLAGS controls how nftw works. + Return nonzero upon error. */ -static void -du_files (char **files) +static int +du_files (char **files, int ftw_flags) { - int i; /* Index in FILES. */ - + int fail = 0; + int i; for (i = 0; files[i]; i++) { - char const *arg = files[i]; - str_copyc (path, arg); + char *file = files[i]; + char *orig = file; + int err; + arg_length = 0; if (!print_totals) hash_clear (htab); - count_entry (arg, 1, 0, 0); + /* When dereferencing only command line arguments, we're using + nftw's FTW_PHYS flag, so a symlink-to-directory specified on + the command line wouldn't normally be dereferenced. To work + around that, we incur the overhead of appending `/.' (or `.') + now, and later removing it each time we output the name of + a derived file or directory name. */ + if (opt_dereference_arguments && is_symlink_to_dir (file)) + { + size_t len = strlen (file); + /* Append `/.', but if there's already a trailing slash, + append only the `.'. */ + char const *suffix = (file[len - 1] == '/' ? "." : "/."); + suffix_length = strlen (suffix); + char *new_file = xmalloc (len + suffix_length + 1); + memcpy (mempcpy (new_file, file, len), suffix, suffix_length + 1); + arg_length = len; + file = new_file; + } + +#define MAX_N_DESCRIPTORS (1000 - 1) /* FIXME-compute at configure time */ + err = nftw (file, process_file, MAX_N_DESCRIPTORS, ftw_flags); + if (err) + error (0, errno, "%s", quote (orig)); + fail |= err; + + if (arg_length) + free (file); } if (print_totals) print_size (tot_size, _("total")); + + return fail; } int @@ -559,6 +537,10 @@ main (int argc, char **argv) int c; char *cwd_only[2]; int max_depth_specified = 0; + char **files; + + /* Bit flags that control how nftw works. */ + int ftw_flags = FTW_DEPTH | FTW_PHYS | FTW_CHDIR; /* If nonzero, display only a total for each argument. */ int opt_summarize_only = 0; @@ -574,7 +556,6 @@ main (int argc, char **argv) atexit (close_stdout); exclude = new_exclude (); - xstat = lstat; human_output_opts = human_options (getenv ("DU_BLOCK_SIZE"), false, &output_block_size); @@ -617,9 +598,12 @@ main (int argc, char **argv) break; case MAX_DEPTH_OPTION: /* --max-depth=N */ + /* FIXME: merely set `fail' here, in case there are + additional invalid options */ if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) != LONGINT_OK || tmp_long < 0 || tmp_long > INT_MAX) - error (EXIT_FAILURE, 0, _("invalid maximum depth %s"), quote (optarg)); + error (EXIT_FAILURE, 0, _("invalid maximum depth %s"), + quote (optarg)); max_depth_specified = 1; max_depth = (int) tmp_long; @@ -639,7 +623,7 @@ main (int argc, char **argv) break; case 'x': - opt_one_file_system = 1; + ftw_flags |= FTW_MOUNT; break; case 'B': @@ -651,7 +635,7 @@ main (int argc, char **argv) break; case 'L': - xstat = stat; + ftw_flags &= ~FTW_PHYS; break; case 'S': @@ -659,9 +643,11 @@ main (int argc, char **argv) break; case 'X': + /* FIXME: merely set `fail' here, in case there are + additional invalid options */ if (add_exclude_file (add_exclude, exclude, optarg, EXCLUDE_WILDCARDS, '\n')) - error (EXIT_FAILURE, errno, "%s", quote (optarg)); + error (EXIT_FAILURE, errno, "%s", quotearg_colon (optarg)); break; case EXCLUDE_OPTION: @@ -700,12 +686,11 @@ main (int argc, char **argv) if (opt_summarize_only) max_depth = 0; + files = (optind == argc ? cwd_only : argv + optind); + /* Initialize the hash structure for inode numbers. */ hash_init (); - str_init (&path, INITIAL_PATH_SIZE); - - du_files (optind == argc ? cwd_only : argv + optind); - - exit (exit_status); + exit (du_files (files, ftw_flags) || G_fail + ? EXIT_FAILURE : EXIT_SUCCESS); } -- cgit v1.2.3-54-g00ecf