/* du -- summarize disk usage Copyright (C) 1988-1991, 1995-2005 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* Differences from the Unix du: * Doesn't simply ignore the names of regular files given as arguments when -a is given. By tege@sics.se, Torbjorn Granlund, and djm@ai.mit.edu, David MacKenzie. Variable blocks added by lm@sgi.com and eggert@twinsun.com. Rewritten to use nftw, then to use fts by Jim Meyering. */ #include #include #include #include #include #include "system.h" #include "dirname.h" /* for strip_trailing_slashes */ #include "error.h" #include "exclude.h" #include "hash.h" #include "human.h" #include "quote.h" #include "quotearg.h" #include "readtokens0.h" #include "same.h" #include "xfts.h" #include "xstrtol.h" extern bool fts_debug; /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME "du" #define AUTHORS \ "Torbjorn Granlund", "David MacKenzie, Paul Eggert", "Jim Meyering" #if DU_DEBUG # define FTS_CROSS_CHECK(Fts) fts_cross_check (Fts) # define DEBUG_OPT "d" #else # define FTS_CROSS_CHECK(Fts) # define DEBUG_OPT #endif /* Initial size of the hash table. */ #define INITIAL_TABLE_SIZE 103 /* Hash structure for inode and device numbers. The separate entry structure makes it easier to rehash "in place". */ struct entry { ino_t st_ino; dev_t st_dev; }; /* A set of dev/ino pairs. */ static Hash_table *htab; /* Name under which this program was invoked. */ char *program_name; /* If true, display counts for all files, not just directories. */ static bool opt_all = false; /* If true, rather than using the disk usage of each file, use the apparent size (a la stat.st_size). */ static bool apparent_size = false; /* If true, count each hard link of files with multiple links. */ static bool opt_count_all = false; /* If true, output the NUL byte instead of a newline at the end of each line. */ static bool opt_nul_terminate_output = false; /* If true, print a grand total at the end. */ static bool print_grand_total = false; /* If nonzero, do not add sizes of subdirectories. */ static bool opt_separate_dirs = false; /* Show the total for each directory (and file if --all) that is at most MAX_DEPTH levels down from the root of the hierarchy. The root is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */ static size_t max_depth = SIZE_MAX; /* Human-readable options for output. */ static int human_output_opts; /* The units to use when printing sizes. */ static uintmax_t output_block_size; /* File name patterns to exclude. */ static struct exclude *exclude; /* Grand total size of all args, in bytes. */ static uintmax_t tot_size = 0; #define IS_DIR_TYPE(Type) \ ((Type) == FTS_DP \ || (Type) == FTS_DNR) /* For long options that have no equivalent short option, use a non-character as a pseudo short option, starting with CHAR_MAX + 1. */ enum { APPARENT_SIZE_OPTION = CHAR_MAX + 1, EXCLUDE_OPTION, FILES0_FROM_OPTION, HUMAN_SI_OPTION, MAX_DEPTH_OPTION }; static struct option const long_options[] = { {"all", no_argument, NULL, 'a'}, {"apparent-size", no_argument, NULL, APPARENT_SIZE_OPTION}, {"block-size", required_argument, NULL, 'B'}, {"bytes", no_argument, NULL, 'b'}, {"count-links", no_argument, NULL, 'l'}, {"dereference", no_argument, NULL, 'L'}, {"dereference-args", no_argument, NULL, 'D'}, {"exclude", required_argument, NULL, EXCLUDE_OPTION}, {"exclude-from", required_argument, NULL, 'X'}, {"files0-from", required_argument, NULL, FILES0_FROM_OPTION}, {"human-readable", no_argument, NULL, 'h'}, {"si", no_argument, NULL, HUMAN_SI_OPTION}, {"kilobytes", no_argument, NULL, 'k'}, /* long form is obsolescent */ {"max-depth", required_argument, NULL, MAX_DEPTH_OPTION}, {"null", no_argument, NULL, '0'}, {"megabytes", no_argument, NULL, 'm'}, /* obsolescent */ {"no-dereference", no_argument, NULL, 'P'}, {"one-file-system", no_argument, NULL, 'x'}, {"separate-dirs", no_argument, NULL, 'S'}, {"summarize", no_argument, NULL, 's'}, {"total", no_argument, NULL, 'c'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} }; void usage (int status) { if (status != EXIT_SUCCESS) fprintf (stderr, _("Try `%s --help' for more information.\n"), program_name); else { printf (_("\ Usage: %s [OPTION]... [FILE]...\n\ or: %s [OPTION]... --files0-from=F\n\ "), program_name, program_name); fputs (_("\ Summarize disk usage of each FILE, recursively for directories.\n\ \n\ "), stdout); fputs (_("\ Mandatory arguments to long options are mandatory for short options too.\n\ "), stdout); fputs (_("\ -a, --all write counts for all files, not just directories\n\ --apparent-size print apparent sizes, rather than disk usage; although\n\ the apparent size is usually smaller, it may be\n\ larger due to holes in (`sparse') files, internal\n\ fragmentation, indirect blocks, and the like\n\ -B, --block-size=SIZE use SIZE-byte blocks\n\ -b, --bytes equivalent to `--apparent-size --block-size=1'\n\ -c, --total produce a grand total\n\ -D, --dereference-args dereference FILEs that are symbolic links\n\ "), stdout); fputs (_("\ --files0-from=F summarize disk usage of the NUL-terminated file\n\ names specified in file F\n\ -H like --si, but also evokes a warning; will soon\n\ change to be equivalent to --dereference-args (-D)\n\ -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\ --si like -h, but use powers of 1000 not 1024\n\ -k like --block-size=1K\n\ -l, --count-links count sizes many times if hard linked\n\ "), stdout); fputs (_("\ -L, --dereference dereference all symbolic links\n\ -P, --no-dereference don't follow any symbolic links (this is the default)\n\ -0, --null end each output line with 0 byte rather than newline\n\ -S, --separate-dirs do not include size of subdirectories\n\ -s, --summarize display only a total for each argument\n\ "), stdout); fputs (_("\ -x, --one-file-system skip directories on different file systems\n\ -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\ --exclude=PATTERN Exclude files that match PATTERN.\n\ --max-depth=N print the total for a directory (or file, with --all)\n\ only if it is N or fewer levels below the command\n\ line argument; --max-depth=0 is the same as\n\ --summarize\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); fputs (_("\n\ SIZE may be (or may be an integer optionally followed by) one of following:\n\ kB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n\ "), stdout); printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); } exit (status); } static size_t entry_hash (void const *x, size_t table_size) { struct entry const *p = x; /* Ignoring the device number here should be fine. */ /* The cast to uintmax_t prevents negative remainders if st_ino is negative. */ return (uintmax_t) p->st_ino % table_size; } /* Compare two dev/ino pairs. Return true if they are the same. */ static bool entry_compare (void const *x, void const *y) { struct entry const *a = x; struct entry const *b = y; return SAME_INODE (*a, *b) ? true : false; } /* Try to insert the INO/DEV pair into the global table, HTAB. Return true if the pair is successfully inserted, false if the pair is already in the table. */ static bool hash_ins (ino_t ino, dev_t dev) { struct entry *ent; struct entry *ent_from_table; ent = xmalloc (sizeof *ent); ent->st_ino = ino; ent->st_dev = dev; ent_from_table = hash_insert (htab, ent); if (ent_from_table == NULL) { /* Insertion failed due to lack of memory. */ xalloc_die (); } if (ent_from_table == ent) { /* Insertion succeeded. */ return true; } /* That pair is already in the table, so ENT was not inserted. Free it. */ free (ent); return false; } /* Initialize the hash table. */ static void hash_init (void) { htab = hash_initialize (INITIAL_TABLE_SIZE, NULL, entry_hash, entry_compare, free); if (htab == NULL) xalloc_die (); } /* Print N_BYTES. Convert it to a readable value before printing. */ static void print_only_size (uintmax_t n_bytes) { char buf[LONGEST_HUMAN_READABLE + 1]; fputs (human_readable (n_bytes, buf, human_output_opts, 1, output_block_size), stdout); } /* Print N_BYTES followed by STRING on a line. Convert N_BYTES to a readable value before printing. */ static void print_size (uintmax_t n_bytes, const char *string) { print_only_size (n_bytes); printf ("\t%s%c", string, opt_nul_terminate_output ? '\0' : '\n'); fflush (stdout); } /* This function is called once for every file system object that fts encounters. fts does a depth-first traversal. This function knows that and accumulates per-directory totals based on changes in the depth of the current entry. It returns true on success. */ static bool process_file (FTS *fts, FTSENT *ent) { bool ok; uintmax_t size; uintmax_t size_to_print; size_t level; static size_t prev_level; static size_t n_alloc; /* The sum of the st_size values of all entries in the single directory at the corresponding level. Although this does include the st_size corresponding to each subdirectory, it does not include the size of any file in a subdirectory. */ static uintmax_t *sum_ent; /* The sum of the sizes of all entries in the hierarchy at or below the directory at the specified level. */ static uintmax_t *sum_subdir; bool print = true; const char *file = ent->fts_path; const struct stat *sb = ent->fts_statp; bool skip; /* If necessary, set FTS_SKIP before returning. */ skip = excluded_filename (exclude, ent->fts_path); if (skip) fts_set (fts, ent, FTS_SKIP); switch (ent->fts_info) { case FTS_NS: error (0, ent->fts_errno, _("cannot access %s"), quote (file)); return false; case FTS_ERR: /* if (S_ISDIR (ent->fts_statp->st_mode) && FIXME */ error (0, ent->fts_errno, _("%s"), quote (file)); return false; case FTS_DNR: /* Don't return just yet, since although the directory is not readable, we were able to stat it, so we do have a size. */ error (0, ent->fts_errno, _("cannot read directory %s"), quote (file)); ok = false; break; default: ok = true; break; } /* If this is the first (pre-order) encounter with a directory, or if it's the second encounter for a skipped directory, then return right away. */ if (ent->fts_info == FTS_D || skip) return ok; /* If the file is being excluded or if it has already been counted via a hard link, then don't let it contribute to the sums. */ if (skip || (!opt_count_all && ! S_ISDIR (sb->st_mode) && 1 < sb->st_nlink && ! hash_ins (sb->st_ino, sb->st_dev))) { /* Note that we must not simply return here. We still have to update prev_level and maybe propagate some sums up the hierarchy. */ size = 0; print = false; } else { size = (apparent_size ? sb->st_size : ST_NBLOCKS (*sb) * ST_NBLOCKSIZE); } level = ent->fts_level; size_to_print = size; if (n_alloc == 0) { n_alloc = level + 10; sum_ent = xcalloc (n_alloc, sizeof *sum_ent); sum_subdir = xcalloc (n_alloc, sizeof *sum_subdir); } else { if (level == prev_level) { /* This is usually the most common case. Do nothing. */ } else if (level > prev_level) { /* Descending the hierarchy. Clear the accumulators for *all* levels between prev_level and the current one. The depth may change dramatically, e.g., from 1 to 10. */ size_t i; if (n_alloc <= level) { sum_ent = xnrealloc (sum_ent, level, 2 * sizeof *sum_ent); sum_subdir = xnrealloc (sum_subdir, level, 2 * sizeof *sum_subdir); n_alloc = level * 2; } for (i = prev_level + 1; i <= level; i++) { sum_ent[i] = 0; sum_subdir[i] = 0; } } else /* level < prev_level */ { /* Ascending the hierarchy. Process a directory only after all entries in that directory have been processed. When the depth decreases, propagate sums from the children (prev_level) to the parent. Here, the current level is always one smaller than the previous one. */ assert (level == prev_level - 1); size_to_print += sum_ent[prev_level]; if (!opt_separate_dirs) size_to_print += sum_subdir[prev_level]; sum_subdir[level] += sum_ent[prev_level] + sum_subdir[prev_level]; } } prev_level = level; /* Let the size of a directory entry contribute to the total for the containing directory, unless --separate-dirs (-S) is specified. */ if ( ! (opt_separate_dirs && IS_DIR_TYPE (ent->fts_info))) sum_ent[level] += size; /* Even if this directory is unreadable or we can't chdir into it, do let its size contribute to the total, ... */ tot_size += size; /* ... but don't print out a total for it, since without the size(s) of any potential entries, it could be very misleading. */ if (ent->fts_info == FTS_DNR) return ok; /* If we're not counting an entry, e.g., because it's a hard link to a file we've already counted (and --count-links), then don't print a line for it. */ if (!print) return ok; if ((IS_DIR_TYPE (ent->fts_info) && level <= max_depth) || ((opt_all && level <= max_depth) || level == 0)) { print_only_size (size_to_print); fputc ('\t', stdout); fputs (file, stdout); fputc (opt_nul_terminate_output ? '\0' : '\n', stdout); fflush (stdout); } return ok; } /* Recursively print the sizes of the directories (and, if selected, files) named in FILES, the last entry of which is NULL. BIT_FLAGS controls how fts works. Return true if successful. */ static bool du_files (char **files, int bit_flags) { bool ok = true; if (*files) { FTS *fts = xfts_open (files, bit_flags, NULL); while (1) { FTSENT *ent; ent = fts_read (fts); if (ent == NULL) { if (errno != 0) { /* FIXME: try to give a better message */ error (0, errno, _("fts_read failed")); ok = false; } break; } FTS_CROSS_CHECK (fts); ok &= process_file (fts, ent); } /* Ignore failure, since the only way it can do so is in failing to return to the original directory, and since we're about to exit, that doesn't matter. */ fts_close (fts); } if (print_grand_total) print_size (tot_size, _("total")); return ok; } int main (int argc, char **argv) { int c; char *cwd_only[2]; bool max_depth_specified = false; char **files; bool ok = true; char *files_from = NULL; struct Tokens tok; /* Bit flags that control how fts works. */ int bit_flags = FTS_PHYSICAL | FTS_TIGHT_CYCLE_CHECK; /* If true, display only a total for each argument. */ bool opt_summarize_only = false; cwd_only[0] = "."; cwd_only[1] = NULL; initialize_main (&argc, &argv); program_name = argv[0]; setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); atexit (close_stdout); exclude = new_exclude (); human_output_opts = human_options (getenv ("DU_BLOCK_SIZE"), false, &output_block_size); while ((c = getopt_long (argc, argv, DEBUG_OPT "0abchHklmsxB:DLPSX:", long_options, NULL)) != -1) { switch (c) { #if DU_DEBUG case 'd': fts_debug = true; break; #endif case '0': opt_nul_terminate_output = true; break; case 'a': opt_all = true; break; case APPARENT_SIZE_OPTION: apparent_size = true; break; case 'b': apparent_size = true; human_output_opts = 0; output_block_size = 1; break; case 'c': print_grand_total = true; break; case 'h': human_output_opts = human_autoscale | human_SI | human_base_1024; output_block_size = 1; break; case 'H': error (0, 0, _("WARNING: use --si, not -H; the meaning of the -H\ option will soon\nchange to be the same as that of --dereference-args (-D)")); /* fall through */ case HUMAN_SI_OPTION: human_output_opts = human_autoscale | human_SI; output_block_size = 1; break; case 'k': human_output_opts = 0; output_block_size = 1024; break; case MAX_DEPTH_OPTION: /* --max-depth=N */ { unsigned long int tmp_ulong; if (xstrtoul (optarg, NULL, 0, &tmp_ulong, NULL) == LONGINT_OK && tmp_ulong <= SIZE_MAX) { max_depth_specified = true; max_depth = tmp_ulong; } else { error (0, 0, _("invalid maximum depth %s"), quote (optarg)); ok = false; } } break; case 'm': /* obsolescent: FIXME: remove in 2005. */ human_output_opts = 0; output_block_size = 1024 * 1024; break; case 'l': opt_count_all = true; break; case 's': opt_summarize_only = true; break; case 'x': bit_flags |= FTS_XDEV; break; case 'B': human_output_opts = human_options (optarg, true, &output_block_size); break; case 'D': /* This will eventually be 'H' (-H), too. */ bit_flags = FTS_COMFOLLOW; break; case 'L': /* --dereference */ bit_flags = FTS_LOGICAL; break; case 'P': /* --no-dereference */ bit_flags = FTS_PHYSICAL; break; case 'S': opt_separate_dirs = true; break; case 'X': if (add_exclude_file (add_exclude, exclude, optarg, EXCLUDE_WILDCARDS, '\n')) { error (0, errno, "%s", quotearg_colon (optarg)); ok = false; } break; case FILES0_FROM_OPTION: files_from = optarg; break; case EXCLUDE_OPTION: add_exclude (exclude, optarg, EXCLUDE_WILDCARDS); break; case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); default: ok = false; } } if (!ok) usage (EXIT_FAILURE); if (opt_all & opt_summarize_only) { error (0, 0, _("cannot both summarize and show all entries")); usage (EXIT_FAILURE); } if (opt_summarize_only && max_depth_specified && max_depth == 0) { error (0, 0, _("warning: summarizing is the same as using --max-depth=0")); } if (opt_summarize_only && max_depth_specified && max_depth != 0) { unsigned long int d = max_depth; error (0, 0, _("warning: summarizing conflicts with --max-depth=%lu"), d); usage (EXIT_FAILURE); } if (opt_summarize_only) max_depth = 0; if (files_from) { FILE *istream; /* When using --files0-from=F, you may not specify any files on the command-line. */ if (optind < argc) { error (0, 0, _("extra operand %s"), quote (argv[optind])); fprintf (stderr, "%s\n", _("File operands cannot be combined with --files0-from.")); usage (EXIT_FAILURE); } istream = (STREQ (files_from, "-") ? stdin : fopen (files_from, "r")); if (istream == NULL) error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (files_from)); readtokens0_init (&tok); if (! readtokens0 (istream, &tok) || fclose (istream) != 0) error (EXIT_FAILURE, 0, _("cannot read file names from %s"), quote (files_from)); files = tok.tok; } else { files = (optind < argc ? argv + optind : cwd_only); } /* Initialize the hash structure for inode numbers. */ hash_init (); /* Report and filter out any empty file names before invoking fts. This works around a glitch in fts, which fails immediately (without looking at the other file names) when given an empty file name. */ { size_t i = 0; size_t j; for (j = 0; ; j++) { if (i != j) files[i] = files[j]; if ( ! files[i]) break; if (files[i][0]) i++; else { if (files_from) { /* Using the standard `filename:line-number:' prefix here is not totally appropriate, since NUL is the separator, not NL, but it might be better than nothing. */ unsigned long int file_number = j + 1; error (0, 0, "%s:%lu: %s", quotearg_colon (files_from), file_number, _("invalid zero-length file name")); } else error (0, 0, "%s", _("invalid zero-length file name")); } } ok = (i == j); } ok &= du_files (files, bit_flags); /* This isn't really necessary, but it does ensure we exercise this function. */ if (files_from) readtokens0_free (&tok); hash_free (htab); exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); }