/* du -- summarize disk usage Copyright (C) 88, 89, 90, 91, 95, 96, 97, 1998 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* Differences from the Unix du: * Doesn't simply ignore the names of regular files given as arguments when -a is given. * Additional options: -l Count the size of all files, even if they have appeared already in another hard link. -x Do not cross file-system boundaries during the recursion. -c Write a grand total of all of the arguments after all arguments have been processed. This can be used to find out the disk usage of a directory, with some files excluded. -h Print sizes in human readable format (1k 234M 2G, etc). -H Similar, but use powers of 1000 not 1024. -k Print sizes in kilobytes. -m Print sizes in megabytes. -b Print sizes in bytes. -S Count the size of each directory separately, not including the sizes of subdirectories. -D Dereference only symbolic links given on the command line. -L Dereference all symbolic links. --exclude=PAT Exclude files that match PAT. -X FILE Exclude files that match patterns taken from FILE. By tege@sics.se, Torbjorn Granlund, and djm@ai.mit.edu, David MacKenzie. Variable blocks added by lm@sgi.com and eggert@twinsun.com. */ #include #if HAVE_INTTYPES_H # include #endif #include #include #include #include #include "exclude.h" #include "system.h" #include "save-cwd.h" #include "closeout.h" #include "error.h" #include "human.h" #include "xstrtol.h" #include "savedir.h" /* Initial number of entries in each hash table entry's table of inodes. */ #define INITIAL_HASH_MODULE 100 /* Initial number of entries in the inode hash table. */ #define INITIAL_ENTRY_TAB_SIZE 70 /* Initial size to allocate for `path'. */ #define INITIAL_PATH_SIZE 100 /* Hash structure for inode and device numbers. The separate entry structure makes it easier to rehash "in place". */ struct entry { ino_t ino; dev_t dev; struct entry *coll_link; }; /* Structure for a hash table for inode numbers. */ struct htab { unsigned modulus; /* Size of the `hash' pointer vector. */ struct entry *entry_tab; /* Pointer to dynamically growing vector. */ unsigned entry_tab_size; /* Size of current `entry_tab' allocation. */ unsigned first_free_entry; /* Index in `entry_tab'. */ struct entry *hash[1]; /* Vector of pointers in `entry_tab'. */ }; /* Structure for dynamically resizable strings. */ struct String { unsigned alloc; /* Size of allocation for the text. */ unsigned length; /* Length of the text currently. */ char *text; /* Pointer to the text. */ }; typedef struct String String; int stat (); int lstat (); static int hash_insert PARAMS ((ino_t ino, dev_t dev)); static int hash_insert2 PARAMS ((struct htab *_htab, ino_t ino, dev_t dev)); static uintmax_t count_entry PARAMS ((const char *ent, int top, dev_t last_dev, int depth)); static void du_files PARAMS ((char **files)); static void hash_init PARAMS ((unsigned int modulus, unsigned int entry_tab_size)); static void hash_reset PARAMS ((void)); static void str_concatc PARAMS ((String *s1, char *cstr)); static void str_copyc PARAMS ((String *s1, char *cstr)); static void str_init PARAMS ((String **s1, unsigned int size)); static void str_trunc PARAMS ((String *s1, unsigned int length)); /* Name under which this program was invoked. */ char *program_name; /* If nonzero, display counts for all files, not just directories. */ static int opt_all = 0; /* If nonzero, count each hard link of files with multiple links. */ static int opt_count_all = 0; /* If nonzero, do not cross file-system boundaries. */ static int opt_one_file_system = 0; /* If nonzero, print a grand total at the end. */ static int opt_combined_arguments = 0; /* If nonzero, do not add sizes of subdirectories. */ static int opt_separate_dirs = 0; /* If nonzero, dereference symlinks that are command line arguments. */ static int opt_dereference_arguments = 0; /* Show the total for each directory (and file if --all) that is at most MAX_DEPTH levels down from the root of the hierarchy. The root is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */ static int max_depth = INT_MAX; /* If positive, the units to use when printing sizes; if negative, the human-readable base. */ static int output_block_size; /* Accumulated path for file or directory being processed. */ static String *path; /* Pointer to hash structure, used by the hash routines. */ static struct htab *htab; /* Globally used stat buffer. */ static struct stat stat_buf; /* A pointer to either lstat or stat, depending on whether dereferencing of all symbolic links is to be done. */ static int (*xstat) (); /* The exit status to use if we don't get any fatal errors. */ static int exit_status; /* If nonzero, display usage information and exit. */ static int show_help; /* If nonzero, print the version on standard output and exit. */ static int show_version; /* File name patterns to exclude. */ static struct exclude *exclude; /* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */ static uintmax_t tot_size = 0; static struct option const long_options[] = { {"all", no_argument, NULL, 'a'}, {"block-size", required_argument, 0, CHAR_MAX + 2}, {"bytes", no_argument, NULL, 'b'}, {"count-links", no_argument, NULL, 'l'}, {"dereference", no_argument, NULL, 'L'}, {"dereference-args", no_argument, NULL, 'D'}, {"exclude", required_argument, 0, CHAR_MAX + 1}, {"exclude-from", required_argument, 0, 'X'}, {"human-readable", no_argument, NULL, 'h'}, {"si", no_argument, 0, 'H'}, {"kilobytes", no_argument, NULL, 'k'}, {"max-depth", required_argument, NULL, CHAR_MAX + 3}, {"megabytes", no_argument, NULL, 'm'}, {"one-file-system", no_argument, NULL, 'x'}, {"separate-dirs", no_argument, NULL, 'S'}, {"summarize", no_argument, NULL, 's'}, {"total", no_argument, NULL, 'c'}, {"help", no_argument, &show_help, 1}, {"version", no_argument, &show_version, 1}, {NULL, 0, NULL, 0} }; static void usage (int status, char *reason) { if (reason != NULL) fprintf (status == 0 ? stdout : stderr, "%s: %s\n", program_name, reason); if (status != 0) fprintf (stderr, _("Try `%s --help' for more information.\n"), program_name); else { printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name); printf (_("\ Summarize disk usage of each FILE, recursively for directories.\n\ \n\ -a, --all write counts for all files, not just directories\n\ --block-size=SIZE use SIZE-byte blocks\n\ -b, --bytes print size in bytes\n\ -c, --total produce a grand total\n\ -D, --dereference-args dereference PATHs when symbolic link\n\ -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\ -H, --si likewise, but use powers of 1000 not 1024\n\ -k, --kilobytes like --block-size=1024\n\ -l, --count-links count sizes many times if hard linked\n\ -L, --dereference dereference all symbolic links\n\ -m, --megabytes like --block-size=1048576\n\ -S, --separate-dirs do not include size of subdirectories\n\ -s, --summarize display only a total for each argument\n\ -x, --one-file-system skip directories on different filesystems\n\ -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\ --exclude=PAT Exclude files that match PAT.\n\ --max-depth=N print the total for a directory (or file, with --all)\n\ only if it is N or fewer levels below the command\n\ line argument; --max-depth=0 is the same as\n\ --summarize\n\ --help display this help and exit\n\ --version output version information and exit\n\ ")); puts (_("\nReport bugs to .")); close_stdout (); } exit (status); } int main (int argc, char **argv) { int c; char *cwd_only[2]; int max_depth_specified = 0; /* If nonzero, display only a total for each argument. */ int opt_summarize_only = 0; cwd_only[0] = "."; cwd_only[1] = NULL; program_name = argv[0]; setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); exclude = new_exclude (); xstat = lstat; human_block_size (getenv ("DU_BLOCK_SIZE"), 0, &output_block_size); while ((c = getopt_long (argc, argv, "abchHklmsxDLSX:", long_options, NULL)) != -1) { long int tmp_long; switch (c) { case 0: /* Long option. */ break; case 'a': opt_all = 1; break; case 'b': output_block_size = 1; break; case 'c': opt_combined_arguments = 1; break; case 'h': output_block_size = -1024; break; case 'H': output_block_size = -1000; break; case 'k': output_block_size = 1024; break; case CHAR_MAX + 3: /* --max-depth=N */ if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) != LONGINT_OK || tmp_long < 0 || tmp_long > INT_MAX) error (1, 0, _("invalid maximum depth `%s'"), optarg); max_depth_specified = 1; max_depth = (int) tmp_long; break; case 'm': output_block_size = 1024 * 1024; break; case 'l': opt_count_all = 1; break; case 's': opt_summarize_only = 1; break; case 'x': opt_one_file_system = 1; break; case 'D': opt_dereference_arguments = 1; break; case 'L': xstat = stat; break; case 'S': opt_separate_dirs = 1; break; case 'X': if (add_exclude_file (exclude, optarg, '\n') != 0) error (1, errno, "%s", optarg); break; case CHAR_MAX + 1: add_exclude (exclude, optarg); break; case CHAR_MAX + 2: human_block_size (optarg, 1, &output_block_size); break; default: usage (1, (char *) 0); } } if (show_version) { printf ("du (%s) %s\n", GNU_PACKAGE, VERSION); close_stdout (); exit (0); } if (show_help) usage (0, NULL); if (opt_all && opt_summarize_only) usage (1, _("cannot both summarize and show all entries")); if (opt_summarize_only && max_depth_specified && max_depth == 0) { error (0, 0, _("warning: summarizing is the same as using --max-depth=0")); } if (opt_summarize_only && max_depth_specified && max_depth != 0) { error (0, 0, _("warning: summarizing conflicts with --max-depth=%d"), max_depth); usage (1, NULL); } if (opt_summarize_only) max_depth = 0; /* Initialize the hash structure for inode numbers. */ hash_init (INITIAL_HASH_MODULE, INITIAL_ENTRY_TAB_SIZE); str_init (&path, INITIAL_PATH_SIZE); du_files (optind == argc ? cwd_only : argv + optind); close_stdout (); exit (exit_status); } /* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of ST_NBLOCKSIZE-byte blocks; convert it to OUTPUT_BLOCK_SIZE units before printing. If OUTPUT_BLOCK_SIZE is negative, use a human readable notation instead. */ static void print_size (uintmax_t n_blocks, const char *string) { char buf[LONGEST_HUMAN_READABLE + 1]; printf ("%s\t%s\n", human_readable (n_blocks, buf, ST_NBLOCKSIZE, output_block_size), string); fflush (stdout); } /* Recursively print the sizes of the directories (and, if selected, files) named in FILES, the last entry of which is NULL. */ static void du_files (char **files) { struct saved_cwd cwd; ino_t initial_ino; /* Initial directory's inode. */ dev_t initial_dev; /* Initial directory's device. */ int i; /* Index in FILES. */ if (save_cwd (&cwd)) exit (1); /* Remember the inode and device number of the current directory. */ if (stat (".", &stat_buf)) error (1, errno, _("current directory")); initial_ino = stat_buf.st_ino; initial_dev = stat_buf.st_dev; for (i = 0; files[i]; i++) { char *arg; int s; arg = files[i]; /* Delete final slash in the argument, unless the slash is alone. */ s = strlen (arg) - 1; if (s != 0) { if (arg[s] == '/') arg[s] = 0; str_copyc (path, arg); } else if (arg[0] == '/') str_trunc (path, 0); /* Null path for root directory. */ else str_copyc (path, arg); if (!opt_combined_arguments) hash_reset (); count_entry (arg, 1, 0, 0); /* chdir if `count_entry' has changed the working directory. */ if (stat (".", &stat_buf)) error (1, errno, "."); if (stat_buf.st_ino != initial_ino || stat_buf.st_dev != initial_dev) { if (restore_cwd (&cwd, _("starting directory"), NULL)) exit (1); } } if (opt_combined_arguments) print_size (tot_size, _("total")); free_cwd (&cwd); } /* Print (if appropriate) the size (in units determined by `output_block_size') of file or directory ENT. Return the size of ENT in units of 512-byte blocks. TOP is one for external calls, zero for recursive calls. LAST_DEV is the device that the parent directory of ENT is on. DEPTH is the number of levels (in hierarchy) down from a command line argument. Don't print if DEPTH > max_depth. */ static uintmax_t count_entry (const char *ent, int top, dev_t last_dev, int depth) { uintmax_t size; if (((top && opt_dereference_arguments) ? stat (ent, &stat_buf) : (*xstat) (ent, &stat_buf)) < 0) { error (0, errno, "%s", path->text); exit_status = 1; return 0; } if (!opt_count_all && stat_buf.st_nlink > 1 && hash_insert (stat_buf.st_ino, stat_buf.st_dev)) return 0; /* Have counted this already. */ size = ST_NBLOCKS (stat_buf); tot_size += size; if (S_ISDIR (stat_buf.st_mode)) { unsigned pathlen; dev_t dir_dev; char *name_space; char *namep; struct saved_cwd cwd; int through_symlink; struct stat e_buf; dir_dev = stat_buf.st_dev; if (opt_one_file_system && !top && last_dev != dir_dev) return 0; /* Don't enter a new file system. */ #ifndef S_ISLNK # define S_ISLNK(s) 0 #endif /* If we're dereferencing symlinks and we're about to chdir through a symlink, remember the current directory so we can return to it later. In other cases, chdir ("..") works fine. */ through_symlink = (xstat == stat && lstat (ent, &e_buf) == 0 && S_ISLNK (e_buf.st_mode)); if (through_symlink && save_cwd (&cwd)) exit (1); if (chdir (ent) < 0) { error (0, errno, _("cannot change to directory %s"), path->text); exit_status = 1; return 0; } errno = 0; name_space = savedir (".", (unsigned int) stat_buf.st_size); if (name_space == NULL) { if (errno) { error (0, errno, "%s", path->text); if (through_symlink) { if (restore_cwd (&cwd, "..", path->text)) exit (1); free_cwd (&cwd); } else if (chdir ("..") < 0) error (1, errno, _("cannot change to `..' from directory %s"), path->text); exit_status = 1; return 0; } else error (1, 0, _("virtual memory exhausted")); } /* Remember the current path. */ str_concatc (path, "/"); pathlen = path->length; for (namep = name_space; *namep; namep += strlen (namep) + 1) { if (!excluded_filename (exclude, namep)) { str_concatc (path, namep); size += count_entry (namep, 0, dir_dev, depth + 1); str_trunc (path, pathlen); } } free (name_space); if (through_symlink) { restore_cwd (&cwd, "..", path->text); free_cwd (&cwd); } else if (chdir ("..") < 0) { error (1, errno, _("cannot change to `..' from directory %s"), path->text); } str_trunc (path, pathlen - 1); /* Remove the "/" we added. */ if (depth <= max_depth || top) print_size (size, path->length > 0 ? path->text : "/"); return opt_separate_dirs ? 0 : size; } else if ((opt_all && depth <= max_depth) || top) { /* FIXME: make this an option. */ int print_only_dir_size = 0; if (!print_only_dir_size) print_size (size, path->length > 0 ? path->text : "/"); } return size; } /* Allocate space for the hash structures, and set the global variable `htab' to point to it. The initial hash module is specified in MODULUS, and the number of entries are specified in ENTRY_TAB_SIZE. (The hash structure will be rebuilt when ENTRY_TAB_SIZE entries have been inserted, and MODULUS and ENTRY_TAB_SIZE in the global `htab' will be doubled.) */ static void hash_init (unsigned int modulus, unsigned int entry_tab_size) { struct htab *htab_r; htab_r = (struct htab *) xmalloc (sizeof (struct htab) + sizeof (struct entry *) * modulus); htab_r->entry_tab = (struct entry *) xmalloc (sizeof (struct entry) * entry_tab_size); htab_r->modulus = modulus; htab_r->entry_tab_size = entry_tab_size; htab = htab_r; hash_reset (); } /* Reset the hash structure in the global variable `htab' to contain no entries. */ static void hash_reset (void) { int i; struct entry **p; htab->first_free_entry = 0; p = htab->hash; for (i = htab->modulus; i > 0; i--) *p++ = NULL; } /* Insert an item (inode INO and device DEV) in the hash structure in the global variable `htab', if an entry with the same data was not found already. Return zero if the item was inserted and nonzero if it wasn't. */ static int hash_insert (ino_t ino, dev_t dev) { struct htab *htab_r = htab; /* Initially a copy of the global `htab'. */ if (htab_r->first_free_entry >= htab_r->entry_tab_size) { int i; struct entry *ep; unsigned modulus; unsigned entry_tab_size; /* Increase the number of hash entries, and re-hash the data. The method of shrimping and increasing is made to compactify the heap. If twice as much data would be allocated straightforwardly, we would never re-use a byte of memory. */ /* Let `htab' shrimp. Keep only the header, not the pointer vector. */ htab_r = (struct htab *) xrealloc ((char *) htab_r, sizeof (struct htab)); modulus = 2 * htab_r->modulus; entry_tab_size = 2 * htab_r->entry_tab_size; /* Increase the number of possible entries. */ htab_r->entry_tab = (struct entry *) xrealloc ((char *) htab_r->entry_tab, sizeof (struct entry) * entry_tab_size); /* Increase the size of htab again. */ htab_r = (struct htab *) xrealloc ((char *) htab_r, sizeof (struct htab) + sizeof (struct entry *) * modulus); htab_r->modulus = modulus; htab_r->entry_tab_size = entry_tab_size; htab = htab_r; i = htab_r->first_free_entry; /* Make the increased hash table empty. The entries are still available in htab->entry_tab. */ hash_reset (); /* Go through the entries and install them in the pointer vector htab->hash. The items are actually inserted in htab->entry_tab at the position where they already are. The htab->coll_link need however be updated. Could be made a little more efficient. */ for (ep = htab_r->entry_tab; i > 0; i--) { hash_insert2 (htab_r, ep->ino, ep->dev); ep++; } } return hash_insert2 (htab_r, ino, dev); } /* Insert INO and DEV in the hash structure HTAB, if not already present. Return zero if inserted and nonzero if it already existed. */ static int hash_insert2 (struct htab *ht, ino_t ino, dev_t dev) { struct entry **hp, *ep2, *ep; hp = &ht->hash[ino % ht->modulus]; ep2 = *hp; /* Collision? */ if (ep2 != NULL) { ep = ep2; /* Search for an entry with the same data. */ do { if (ep->ino == ino && ep->dev == dev) return 1; /* Found an entry with the same data. */ ep = ep->coll_link; } while (ep != NULL); /* Did not find it. */ } ep = *hp = &ht->entry_tab[ht->first_free_entry++]; ep->ino = ino; ep->dev = dev; ep->coll_link = ep2; /* `ep2' is NULL if no collision. */ return 0; } /* Initialize string S1 to hold SIZE characters. */ static void str_init (String **s1, unsigned int size) { String *s; s = (String *) xmalloc (sizeof (struct String)); s->text = xmalloc (size + 1); s->alloc = size; *s1 = s; } static void ensure_space (String *s, unsigned int size) { if (s->alloc < size) { s->text = xrealloc (s->text, size + 1); s->alloc = size; } } /* Assign the null-terminated C-string CSTR to S1. */ static void str_copyc (String *s1, char *cstr) { unsigned l = strlen (cstr); ensure_space (s1, l); strcpy (s1->text, cstr); s1->length = l; } static void str_concatc (String *s1, char *cstr) { unsigned l1 = s1->length; unsigned l2 = strlen (cstr); unsigned l = l1 + l2; ensure_space (s1, l); strcpy (s1->text + l1, cstr); s1->length = l; } /* Truncate the string S1 to have length LENGTH. */ static void str_trunc (String *s1, unsigned int length) { if (s1->length > length) { s1->text[length] = 0; s1->length = length; } }