summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>2003-01-31 10:39:33 +0000
committerJim Meyering <jim@meyering.net>2003-01-31 10:39:33 +0000
commitde49951491631bab884ac387ebb745a511817d6b (patch)
tree02580e73a4717f15d34b0cf97047b3a52fece55c /src
parent129c735cda46fbe1c1d4a86879f87aa52fb59779 (diff)
downloadcoreutils-de49951491631bab884ac387ebb745a511817d6b.tar.xz
Rewrite to perform directory traversal using nftw.
Include "dirname.h", "ftw.h", and "quotearg.h". (AUTHORS): Add self. (opt_one_file_system): Move global into `main'. (path, xstat, exit_status): Remove declarations. (arg_length, suffix_length): New globals. (G_fail): New global, sort of like the old `exit_status'. (IS_FTW_DIR_TYPE): Define. (print_only_size): New function. (process_file): New function. (str_init, ensure_space, str_copyc, str_concatc): Remove functions. (str_trunc, pop_dir, count_entry): Likewise. (du_files): Rewrite to use nftw.
Diffstat (limited to 'src')
-rw-r--r--src/du.c493
1 files changed, 239 insertions, 254 deletions
diff --git a/src/du.c b/src/du.c
index d66afb443..56b0e1668 100644
--- a/src/du.c
+++ b/src/du.c
@@ -1,5 +1,5 @@
/* du -- summarize disk usage
- Copyright (C) 88, 89, 90, 91, 1995-2003 Free Software Foundation, Inc.
+ Copyright (C) 1988-1991, 1995-2003 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,29 +18,11 @@
/* Differences from the Unix du:
* Doesn't simply ignore the names of regular files given as arguments
when -a is given.
- * Additional options:
- -l Count the size of all files, even if they have appeared
- already in another hard link.
- -x Do not cross file-system boundaries during the recursion.
- -c Write a grand total of all of the arguments after all
- arguments have been processed. This can be used to find
- out the disk usage of a directory, with some files excluded.
- -h Print sizes in human readable format (1k 234M 2G, etc).
- -H Similar, but use powers of 1000 not 1024.
- -k Print sizes in kilobytes.
- -m Print sizes in megabytes.
- -b Print sizes in bytes.
- -S Count the size of each directory separately, not including
- the sizes of subdirectories.
- -D Dereference only symbolic links given on the command line.
- -L Dereference all symbolic links.
- --exclude=PATTERN Exclude files that match PATTERN.
- -X FILE Exclude files that match patterns taken from FILE.
By tege@sics.se, Torbjorn Granlund,
and djm@ai.mit.edu, David MacKenzie.
Variable blocks added by lm@sgi.com and eggert@twinsun.com.
-*/
+ Rewritten to use nftw by Jim Meyering. */
#include <config.h>
#include <stdio.h>
@@ -49,21 +31,22 @@
#include <assert.h>
#include "system.h"
+#include "dirname.h" /* for strip_trailing_slashes */
#include "error.h"
#include "exclude.h"
+#include "ftw.h"
#include "hash.h"
#include "human.h"
#include "quote.h"
+#include "quotearg.h"
#include "same.h"
-#include "save-cwd.h"
-#include "savedir.h"
#include "xstrtol.h"
/* The official name of this program (e.g., no `g' prefix). */
#define PROGRAM_NAME "du"
#define AUTHORS \
- N_ ("Torbjorn Granlund, David MacKenzie, Larry McVoy, and Paul Eggert")
+ N_ ("Torbjorn Granlund, David MacKenzie, Larry McVoy, Paul Eggert, and Jim Meyering")
/* Initial size of the hash table. */
#define INITIAL_TABLE_SIZE 103
@@ -83,28 +66,6 @@ struct entry
/* A set of dev/ino pairs. */
static Hash_table *htab;
-/* Structure for dynamically resizable strings. */
-
-struct String
-{
- unsigned alloc; /* Size of allocation for the text. */
- unsigned length; /* Length of the text currently. */
- char *text; /* Pointer to the text. */
-};
-typedef struct String String;
-
-int stat ();
-int lstat ();
-
-/* Arrange to make lstat calls go through the wrapper function
- on systems with an lstat function that does not dereference symlinks
- that are specified with a trailing slash. */
-#if ! LSTAT_FOLLOWS_SLASHED_SYMLINK
-int rpl_lstat (char const *, struct stat *);
-# undef lstat
-# define lstat rpl_lstat
-#endif
-
/* Name under which this program was invoked. */
char *program_name;
@@ -114,16 +75,17 @@ static int opt_all = 0;
/* If nonzero, count each hard link of files with multiple links. */
static int opt_count_all = 0;
-/* If nonzero, do not cross file-system boundaries. */
-static int opt_one_file_system = 0;
-
/* If nonzero, print a grand total at the end. */
static int print_totals = 0;
/* If nonzero, do not add sizes of subdirectories. */
static int opt_separate_dirs = 0;
-/* If nonzero, dereference symlinks that are command line arguments. */
+/* If nonzero, dereference symlinks that are command line arguments.
+ Implementing this while still using nftw is a little tricky.
+ For each command line argument that is a symlink-to-directory,
+ call nftw with "command_line_arg/." and remember to omit the
+ added `/.' when printing. */
static int opt_dereference_arguments = 0;
/* Show the total for each directory (and file if --all) that is at
@@ -137,22 +99,31 @@ static int human_output_opts;
/* The units to use when printing sizes. */
static uintmax_t output_block_size;
-/* Accumulated path for file or directory being processed. */
-static String *path;
-
-/* A pointer to either lstat or stat, depending on whether
- dereferencing of all symbolic links is to be done. */
-static int (*xstat) ();
-
-/* The exit status to use if we don't get any fatal errors. */
-static int exit_status;
-
/* File name patterns to exclude. */
static struct exclude *exclude;
/* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */
static uintmax_t tot_size = 0;
+/* In some cases, we have to append `/.' or just `.' to an argument
+ (to dereference a symlink). When we do that, we don't want to
+ expose this artifact when printing file/directory names, so these
+ variables keep track of the length of the original command line
+ argument and the length of the suffix we've added, respectively.
+ ARG_LENGTH == 0 indicates that we haven't added a suffix.
+ This information is used to omit any such added characters when
+ printing names. */
+size_t arg_length;
+size_t suffix_length;
+
+/* Nonzero indicates that du should exit with EXIT_FAILURE upon completion. */
+int G_fail;
+
+#define IS_FTW_DIR_TYPE(Type) \
+ ((Type) == FTW_D \
+ || (Type) == FTW_DP \
+ || (Type) == FTW_DNR)
+
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum
@@ -302,63 +273,16 @@ hash_init (void)
xalloc_die ();
}
-/* Initialize string S1 to hold SIZE characters. */
-
-static void
-str_init (String **s1, unsigned int size)
-{
- String *s;
-
- s = (String *) xmalloc (sizeof (struct String));
- s->text = xmalloc (size + 1);
-
- s->alloc = size;
- *s1 = s;
-}
-
-static void
-ensure_space (String *s, unsigned int size)
-{
- if (s->alloc < size)
- {
- s->text = xrealloc (s->text, size + 1);
- s->alloc = size;
- }
-}
-
-/* Assign the null-terminated C-string CSTR to S1. */
-
-static void
-str_copyc (String *s1, const char *cstr)
-{
- unsigned l = strlen (cstr);
- ensure_space (s1, l);
- strcpy (s1->text, cstr);
- s1->length = l;
-}
-
-static void
-str_concatc (String *s1, const char *cstr)
-{
- unsigned l1 = s1->length;
- unsigned l2 = strlen (cstr);
- unsigned l = l1 + l2;
-
- ensure_space (s1, l);
- strcpy (s1->text + l1, cstr);
- s1->length = l;
-}
-
-/* Truncate the string S1 to have length LENGTH. */
+/* Print N_BLOCKS. NBLOCKS is the number of
+ ST_NBLOCKSIZE-byte blocks; convert it to a readable value before
+ printing. */
static void
-str_trunc (String *s1, unsigned int length)
+print_only_size (uintmax_t n_blocks)
{
- if (s1->length > length)
- {
- s1->text[length] = 0;
- s1->length = length;
- }
+ char buf[LONGEST_HUMAN_READABLE + 1];
+ fputs (human_readable (n_blocks, buf, human_output_opts,
+ ST_NBLOCKSIZE, output_block_size), stdout);
}
/* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of
@@ -376,181 +300,235 @@ print_size (uintmax_t n_blocks, const char *string)
fflush (stdout);
}
-/* Restore the previous working directory or exit.
- If CWD is null, simply call `chdir ("..")'. Otherwise,
- use CWD and free it. CURR_DIR_NAME is the name of the current directory
- and is used solely in failure diagnostics. */
+/* This function is called once for every file system object that nftw
+ encounters. nftw does a depth-first traversal. This function knows
+ that and accumulates per-directory totals based on changes in
+ the depth of the current entry. */
-static void
-pop_dir (struct saved_cwd *cwd, const char *curr_dir_name)
+static int
+process_file (const char *file, const struct stat *sb, int file_type,
+ struct FTW *info)
{
- if (cwd)
+ size_t size;
+ size_t s;
+ static int first_call = 1;
+ static size_t prev_level;
+ static size_t n_alloc;
+ /* The sum of the st_size values of all entries in the single directory
+ at the corresponding level. Although this does include the st_size
+ corresponding to each subdirectory, it does not include the size of
+ any file in a subdirectory. */
+ static uintmax_t *sum_ent;
+
+ /* The sum of the sizes of all entries in the hierarchy at or below the
+ directory at the specified level. */
+ static uintmax_t *sum_subdir;
+
+ switch (file_type)
{
- if (restore_cwd (cwd, "..", curr_dir_name))
- exit (EXIT_FAILURE);
- free_cwd (cwd);
- }
- else if (chdir ("..") < 0)
- {
- error (EXIT_FAILURE, errno, _("cannot change to `..' from directory %s"),
- quote (curr_dir_name));
- }
-}
-
-/* Print (if appropriate) the size (in units determined by `output_block_size')
- of file or directory ENT. Return the size of ENT in units of 512-byte
- blocks. TOP is one for external calls, zero for recursive calls.
- LAST_DEV is the device that the parent directory of ENT is on.
- DEPTH is the number of levels (in hierarchy) down from a command
- line argument. Don't print if DEPTH > max_depth.
- An important invariant is that when this function returns, the current
- working directory is the same as when it was called. */
-
-static uintmax_t
-count_entry (const char *ent, int top, dev_t last_dev, int depth)
-{
- uintmax_t size;
- struct stat stat_buf;
- int (*tmp_stat) ();
-
- if (top && opt_dereference_arguments)
- tmp_stat = stat;
- else
- tmp_stat = xstat;
+ case FTW_NS:
+ error (0, errno, _("cannot access %s"), quote (file));
+ G_fail = 1;
+ return 0;
- if ((*tmp_stat) (ent, &stat_buf) < 0)
- {
- error (0, errno, "%s", quote (path->text));
- exit_status = 1;
+ case FTW_DCHP:
+ error (0, errno, _("cannot change to parent of directory %s"),
+ quote (file));
+ G_fail = 1;
return 0;
+
+ case FTW_DCH:
+ /* Don't return just yet, since although nftw couldn't chdir into the
+ directory, it was able to stat it, so we do have a size. */
+ error (0, errno, _("cannot change to directory %s"), quote (file));
+ G_fail = 1;
+ break;
+
+ case FTW_DNR:
+ /* Don't return just yet, since although nftw couldn't read the
+ directory, it was able to stat it, so we do have a size. */
+ error (0, errno, _("cannot read directory %s"), quote (file));
+ G_fail = 1;
+ break;
+
+ default:
+ break;
}
if (!opt_count_all
- && stat_buf.st_nlink > 1
- && hash_ins (stat_buf.st_ino, stat_buf.st_dev))
+ && 1 < sb->st_nlink
+ && hash_ins (sb->st_ino, sb->st_dev))
return 0; /* Have counted this already. */
- size = ST_NBLOCKS (stat_buf);
- tot_size += size;
+ if (excluded_filename (exclude, file + info->base))
+ return 0;
+
+ s = size = ST_NBLOCKS (*sb);
- if (S_ISDIR (stat_buf.st_mode))
+ if (first_call)
+ {
+ n_alloc = info->level + 10;
+ sum_ent = XCALLOC (uintmax_t, n_alloc);
+ sum_subdir = XCALLOC (uintmax_t, n_alloc);
+ }
+ else
{
- unsigned pathlen;
- unsigned prev_len;
- dev_t dir_dev;
- char *name_space;
- char *namep;
- struct saved_cwd *cwd;
- struct saved_cwd cwd_buf;
- struct stat e_buf;
-
- dir_dev = stat_buf.st_dev;
-
- /* Return `0' here, not SIZE, since the SIZE bytes
- would reside in the new filesystem. */
- if (opt_one_file_system && !top && last_dev != dir_dev)
- return 0; /* Don't enter a new file system. */
-
- /* If we're traversing more than one level, or if we're
- dereferencing symlinks and we're about to chdir through a
- symlink, remember the current directory so we can return to
- it later. In other cases, chdir ("..") works fine.
- Treat `.' and `..' like multi-level paths, since `chdir ("..")'
- wont't restore the current working directory after a `chdir'
- to one of those. */
- if (strchr (ent, '/')
- || DOT_OR_DOTDOT (ent)
- || (tmp_stat == stat
- && lstat (ent, &e_buf) == 0
- && S_ISLNK (e_buf.st_mode)))
+ /* FIXME: it's a shame that we need these `size_t' casts to avoid
+ warnings from gcc about `comparison between signed and unsigned'.
+ Probably unavoidable, assuming that the members of struct FTW
+ are of type `int' (historical), since I want variables like
+ n_alloc and prev_level to have types that make sense. */
+ if (n_alloc <= (size_t) info->level)
{
- if (save_cwd (&cwd_buf))
- exit (EXIT_FAILURE);
- cwd = &cwd_buf;
+ n_alloc = info->level * 2;
+ sum_ent = XREALLOC (sum_ent, uintmax_t, n_alloc);
+ sum_subdir = XREALLOC (sum_subdir, uintmax_t, n_alloc);
}
- else
- cwd = NULL;
+ }
- if (chdir (ent) < 0)
+ if (! first_call)
+ {
+ if ((size_t) info->level == prev_level)
{
- error (0, errno, _("cannot change to directory %s"),
- quote (path->text));
- if (cwd)
- free_cwd (cwd);
- exit_status = 1;
- /* Do return SIZE, here, since even though we can't chdir into ENT,
- we *can* count the blocks used by its directory entry. */
- return opt_separate_dirs ? 0 : size;
+ /* This is usually the most common case. Do nothing. */
}
-
- name_space = savedir (".");
- if (name_space == NULL)
+ else if ((size_t) info->level < prev_level)
{
- error (0, errno, "%s", quote (path->text));
- pop_dir (cwd, path->text);
- exit_status = 1;
- /* Do count the SIZE bytes. */
- return opt_separate_dirs ? 0 : size;
+ /* Ascending the hierarchy.
+ nftw processes a directory only after all entries in that
+ directory have been processed. When the depth decreases,
+ propagate sums from the children (prev_level) to the parent.
+ Here, the current level is always one smaller than the
+ previous one. */
+ assert ((size_t) info->level == prev_level - 1);
+ s += sum_ent[prev_level];
+ if (!opt_separate_dirs)
+ s += sum_subdir[prev_level];
+ sum_subdir[info->level] += (sum_ent[prev_level]
+ + sum_subdir[prev_level]);
}
-
- /* Remember the current path. */
-
- prev_len = path->length;
- if (prev_len && path->text[prev_len - 1] != '/')
- str_concatc (path, "/");
- pathlen = path->length;
-
- for (namep = name_space; *namep; namep += strlen (namep) + 1)
+ else /* info->level > prev_level */
{
- if (!excluded_filename (exclude, namep))
+ /* Descending the hierarchy.
+ Clear the accumulators for *all* levels between prev_level
+ and the current one. The depth may change dramatically,
+ e.g., from 1 to 10. */
+ int i;
+ for (i = prev_level + 1; i <= info->level; i++)
{
- str_concatc (path, namep);
- size += count_entry (namep, 0, dir_dev, depth + 1);
- str_trunc (path, pathlen);
+ sum_ent[i] = 0;
+ sum_subdir[i] = 0;
}
}
+ }
- free (name_space);
+ prev_level = info->level;
+ first_call = 0;
- str_trunc (path, prev_len); /* Remove any "/" we added. */
- pop_dir (cwd, path->text);
+ sum_ent[info->level] += size;
- if (depth <= max_depth || top)
- print_size (size, path->text);
- return opt_separate_dirs ? 0 : size;
- }
- else if ((opt_all && depth <= max_depth) || top)
+ /* Even if this directory was unreadable or we couldn't chdir into it,
+ do let its size contribute to the total, ... */
+ tot_size += size;
+
+ /* ... but don't print out a total for it, since without the size(s)
+ of any potential entries, it could be very misleading. */
+ if (file_type == FTW_DNR || file_type == FTW_DCH)
+ return 0;
+
+ /* FIXME: This looks suspiciously like it could be simplified. */
+ if ((IS_FTW_DIR_TYPE (file_type) &&
+ (info->level <= max_depth || info->level == 0))
+ || ((opt_all && info->level <= max_depth) || info->level == 0))
{
- /* FIXME: make this an option. */
- int print_only_dir_size = 0;
- if (!print_only_dir_size)
- print_size (size, path->text);
+ print_only_size (s);
+ fputc ('\t', stdout);
+ if (arg_length)
+ {
+ /* Print the file name, but without the `.' or `/.'
+ directory suffix that we may have added in main. */
+ /* Print everything before the part we appended. */
+ fwrite (file, arg_length, 1, stdout);
+ /* Print everything after what we appended. */
+ fputs (file + arg_length + suffix_length
+ + (file[arg_length + suffix_length] == '/'), stdout);
+ }
+ else
+ {
+ fputs (file, stdout);
+ }
+ fputc ('\n', stdout);
+ fflush (stdout);
}
- return size;
+ return 0;
+}
+
+static int
+is_symlink_to_dir (char const *file)
+{
+ char *f;
+ struct stat sb;
+
+ ASSIGN_STRDUPA (f, file);
+ strip_trailing_slashes (f);
+ return (lstat (f, &sb) == 0 && S_ISLNK (sb.st_mode)
+ && stat (f, &sb) == 0 && S_ISDIR (sb.st_mode));
}
/* Recursively print the sizes of the directories (and, if selected, files)
- named in FILES, the last entry of which is NULL. */
+ named in FILES, the last entry of which is NULL.
+ FTW_FLAGS controls how nftw works.
+ Return nonzero upon error. */
-static void
-du_files (char **files)
+static int
+du_files (char **files, int ftw_flags)
{
- int i; /* Index in FILES. */
-
+ int fail = 0;
+ int i;
for (i = 0; files[i]; i++)
{
- char const *arg = files[i];
- str_copyc (path, arg);
+ char *file = files[i];
+ char *orig = file;
+ int err;
+ arg_length = 0;
if (!print_totals)
hash_clear (htab);
- count_entry (arg, 1, 0, 0);
+ /* When dereferencing only command line arguments, we're using
+ nftw's FTW_PHYS flag, so a symlink-to-directory specified on
+ the command line wouldn't normally be dereferenced. To work
+ around that, we incur the overhead of appending `/.' (or `.')
+ now, and later removing it each time we output the name of
+ a derived file or directory name. */
+ if (opt_dereference_arguments && is_symlink_to_dir (file))
+ {
+ size_t len = strlen (file);
+ /* Append `/.', but if there's already a trailing slash,
+ append only the `.'. */
+ char const *suffix = (file[len - 1] == '/' ? "." : "/.");
+ suffix_length = strlen (suffix);
+ char *new_file = xmalloc (len + suffix_length + 1);
+ memcpy (mempcpy (new_file, file, len), suffix, suffix_length + 1);
+ arg_length = len;
+ file = new_file;
+ }
+
+#define MAX_N_DESCRIPTORS (1000 - 1) /* FIXME-compute at configure time */
+ err = nftw (file, process_file, MAX_N_DESCRIPTORS, ftw_flags);
+ if (err)
+ error (0, errno, "%s", quote (orig));
+ fail |= err;
+
+ if (arg_length)
+ free (file);
}
if (print_totals)
print_size (tot_size, _("total"));
+
+ return fail;
}
int
@@ -559,6 +537,10 @@ main (int argc, char **argv)
int c;
char *cwd_only[2];
int max_depth_specified = 0;
+ char **files;
+
+ /* Bit flags that control how nftw works. */
+ int ftw_flags = FTW_DEPTH | FTW_PHYS | FTW_CHDIR;
/* If nonzero, display only a total for each argument. */
int opt_summarize_only = 0;
@@ -574,7 +556,6 @@ main (int argc, char **argv)
atexit (close_stdout);
exclude = new_exclude ();
- xstat = lstat;
human_output_opts = human_options (getenv ("DU_BLOCK_SIZE"), false,
&output_block_size);
@@ -617,9 +598,12 @@ main (int argc, char **argv)
break;
case MAX_DEPTH_OPTION: /* --max-depth=N */
+ /* FIXME: merely set `fail' here, in case there are
+ additional invalid options */
if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) != LONGINT_OK
|| tmp_long < 0 || tmp_long > INT_MAX)
- error (EXIT_FAILURE, 0, _("invalid maximum depth %s"), quote (optarg));
+ error (EXIT_FAILURE, 0, _("invalid maximum depth %s"),
+ quote (optarg));
max_depth_specified = 1;
max_depth = (int) tmp_long;
@@ -639,7 +623,7 @@ main (int argc, char **argv)
break;
case 'x':
- opt_one_file_system = 1;
+ ftw_flags |= FTW_MOUNT;
break;
case 'B':
@@ -651,7 +635,7 @@ main (int argc, char **argv)
break;
case 'L':
- xstat = stat;
+ ftw_flags &= ~FTW_PHYS;
break;
case 'S':
@@ -659,9 +643,11 @@ main (int argc, char **argv)
break;
case 'X':
+ /* FIXME: merely set `fail' here, in case there are
+ additional invalid options */
if (add_exclude_file (add_exclude, exclude, optarg,
EXCLUDE_WILDCARDS, '\n'))
- error (EXIT_FAILURE, errno, "%s", quote (optarg));
+ error (EXIT_FAILURE, errno, "%s", quotearg_colon (optarg));
break;
case EXCLUDE_OPTION:
@@ -700,12 +686,11 @@ main (int argc, char **argv)
if (opt_summarize_only)
max_depth = 0;
+ files = (optind == argc ? cwd_only : argv + optind);
+
/* Initialize the hash structure for inode numbers. */
hash_init ();
- str_init (&path, INITIAL_PATH_SIZE);
-
- du_files (optind == argc ? cwd_only : argv + optind);
-
- exit (exit_status);
+ exit (du_files (files, ftw_flags) || G_fail
+ ? EXIT_FAILURE : EXIT_SUCCESS);
}