diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2010-07-06 14:53:14 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2010-07-06 14:58:48 -0700 |
commit | fb1a26c3f64669a1b61740252c5db5fd5413c7e5 (patch) | |
tree | 994631f1859b0adbe74bff895919f1636f798e81 | |
parent | d5427265e30522cfda098bb82ad3d4bff0a0d2bd (diff) | |
download | coreutils-fb1a26c3f64669a1b61740252c5db5fd5413c7e5.tar.xz |
du: Hash with a mechanism that's simpler and takes less memory.
* gl/lib/dev-map.c, gl/lib/dev-map.h, gl/modules/dev-map: Remove.
* gl/lib/ino-map.c, gl/lib/ino-map.h, gl/modules/ino-map: New files.
* gl/modules/dev-map-tests, gl/tests/test-dev-map.c: Remove.
* gl/modules/ino-map-tests, gl/tests/test-ino-map.c: New files.
* gl/lib/di-set.h (struct di_set): Renamed from struct di_set_state,
and now private. All uses changed.
(_ATTRIBUTE_NONNULL_): Don't assume C99.
(di_set_alloc): Renamed from di_set_init, with no size arg.
Now allocates the object rather than initializing it.
For now, this no longer takes an initial size; we can put this
back later if it is needed.
* gl/lib/di-set.c: Include hash.h, ino-map.h, and limits.h instead of
stdio.h, assert.h, stdint.h, sys/types.h (di-set.h includes that
now), sys/stat.h, and verify.h.
(N_DEV_BITS_4, N_INO_BITS_4, N_DEV_BITS_8, N_INO_BITS_8): Remove.
(struct dev_ino_4, struct dev_ino_8, struct dev_ino_full): Remove.
(enum di_mode): Remove.
(hashint): New typedef.
(HASHINT_MAX, LARGE_INO_MIN): New macros.
(struct di_ent): Now maps a dev_t to a inode set, instead of
containing a union.
(struct dev_map_ent): Remove.
(struct di_set): New type.
(is_encoded_ptr, decode_ptr, di_ent_create): Remove.
(di_ent_hash, di_ent_compare, di_ent_free, di_set_alloc, di_set_free):
(di_set_insert): Adjust to new representation.
(di_ino_hash, map_device, map_inode_number): New functions.
* gl/modules/di-set (Depends-on): Replace dev-map with ino-map.
Remove 'verify'.
* gl/tests/test-di-set.c: Adjust to the above changes to API.
* src/du.c (INITIAL_DI_SET_SIZE): Remove.
(hash_ins, main): Adjust to new di-set API.
-rw-r--r-- | gl/lib/dev-map.c | 110 | ||||
-rw-r--r-- | gl/lib/dev-map.h | 23 | ||||
-rw-r--r-- | gl/lib/di-set.c | 353 | ||||
-rw-r--r-- | gl/lib/di-set.h | 28 | ||||
-rw-r--r-- | gl/lib/ino-map.c | 162 | ||||
-rw-r--r-- | gl/lib/ino-map.h | 14 | ||||
-rw-r--r-- | gl/modules/dev-map | 23 | ||||
-rw-r--r-- | gl/modules/dev-map-tests | 10 | ||||
-rw-r--r-- | gl/modules/di-set | 3 | ||||
-rw-r--r-- | gl/modules/ino-map | 24 | ||||
-rw-r--r-- | gl/modules/ino-map-tests | 10 | ||||
-rw-r--r-- | gl/tests/test-di-set.c | 31 | ||||
-rw-r--r-- | gl/tests/test-ino-map.c (renamed from gl/tests/test-dev-map.c) | 34 | ||||
-rw-r--r-- | src/du.c | 21 |
14 files changed, 402 insertions, 444 deletions
diff --git a/gl/lib/dev-map.c b/gl/lib/dev-map.c deleted file mode 100644 index 363f5af72..000000000 --- a/gl/lib/dev-map.c +++ /dev/null @@ -1,110 +0,0 @@ -/* Map a dev_t device number to a small integer. - - Copyright 2009-2010 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -/* written by Jim Meyering */ - -#include <config.h> -#include "dev-map.h" - -#include <stdio.h> -#include <assert.h> -#include <stdint.h> -#include <stdlib.h> - -struct dev_map_ent -{ - dev_t dev; - uint32_t mapped_dev; -}; - -enum { INITIAL_DEV_MAP_TABLE_SIZE = 31 }; - -static size_t -dev_hash (void const *x, size_t table_size) -{ - struct dev_map_ent const *p = x; - return (uintmax_t) p->dev % table_size; -} - -/* Compare two dev_map_ent structs on "dev". - Return true if they are the same. */ -static bool -dev_compare (void const *x, void const *y) -{ - struct dev_map_ent const *a = x; - struct dev_map_ent const *b = y; - return a->dev == b->dev ? true : false; -} - -/* Initialize state. */ -int -dev_map_init (struct dev_map *dev_map) -{ - assert (dev_map != NULL); - dev_map->n_device = 0; - dev_map->dev_map = hash_initialize (INITIAL_DEV_MAP_TABLE_SIZE, NULL, - dev_hash, dev_compare, free); - return dev_map->dev_map ? 0 : -1; -} - -void -dev_map_free (struct dev_map *dev_map) -{ - hash_free (dev_map->dev_map); -} - -/* Insert device number, DEV, into the map, DEV_MAP if it's not there already, - and return the nonnegative, mapped and usually smaller, number. - If DEV is already in DEV_MAP, return the existing mapped value. - Upon memory allocation failure, return -1. */ -int -dev_map_insert (struct dev_map *dev_map, dev_t dev) -{ - /* Attempt to insert <DEV,n_device> in the map. - Possible outcomes: - - DEV was already there, with a different necessarily smaller value - - DEV was not there, (thus was just inserted) - - insert failed: out of memory - Return -1 on out of memory. - */ - - struct dev_map_ent *ent_from_table; - struct dev_map_ent *ent = malloc (sizeof *ent); - if (!ent) - return -1; - - ent->dev = dev; - ent->mapped_dev = dev_map->n_device; - - ent_from_table = hash_insert (dev_map->dev_map, ent); - if (ent_from_table == NULL) - { - /* Insertion failed due to lack of memory. */ - return -1; - } - - if (ent_from_table == ent) - { - /* Insertion succeeded: new device. */ - return dev_map->n_device++; - } - - /* That DEV value is already in the table, so ENT was not inserted. - Free it and return the already-associated value. */ - free (ent); - return ent_from_table->mapped_dev; -} diff --git a/gl/lib/dev-map.h b/gl/lib/dev-map.h deleted file mode 100644 index f093d908f..000000000 --- a/gl/lib/dev-map.h +++ /dev/null @@ -1,23 +0,0 @@ -#include <stddef.h> -#include <sys/types.h> -#include <sys/stat.h> -#include "hash.h" - -struct dev_map -{ - /* KEY,VAL pair, where KEY is the raw st_dev value - and VAL is the small number that maps to. */ - struct hash_table *dev_map; - size_t n_device; -}; - -#undef _ATTRIBUTE_NONNULL_ -#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__ -# define _ATTRIBUTE_NONNULL_(m) __attribute__ ((__nonnull__ (m))) -#else -# define _ATTRIBUTE_NONNULL_(m) -#endif - -int dev_map_init (struct dev_map *) _ATTRIBUTE_NONNULL_ (1); -void dev_map_free (struct dev_map *) _ATTRIBUTE_NONNULL_ (1); -int dev_map_insert (struct dev_map *, dev_t) _ATTRIBUTE_NONNULL_ (1); diff --git a/gl/lib/di-set.c b/gl/lib/di-set.c index 3c4717b73..e0e2b24dd 100644 --- a/gl/lib/di-set.c +++ b/gl/lib/di-set.c @@ -15,262 +15,221 @@ You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ -/* written by Jim Meyering */ +/* written by Paul Eggert and Jim Meyering */ #include <config.h> #include "di-set.h" -#include <stdio.h> -#include <assert.h> -#include <stdint.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> - -#include "verify.h" - -/* Set operations for device-inode pairs stored in a space-efficient manner. - A naive mapping uses 16 bytes to save a single st_dev, st_ino pair. - However, in many applications, the vast majority of actual device,inode - number pairs can be efficiently compressed to fit in 8 or even 4 bytes, - by using a separate table to map a relatively small number of devices - to small integers. */ - -#define N_DEV_BITS_4 5 -#define N_INO_BITS_4 (32 - N_DEV_BITS_4 - 2 - 1) - -#define N_DEV_BITS_8 8 -#define N_INO_BITS_8 (64 - N_DEV_BITS_8 - 2 - 1) - -/* Note how the last bit is always set. - This is required, in order to be able to distinguish - an encoded di_ent value from a malloc-returned pointer, - which must be 4-byte-aligned or better. */ -struct dev_ino_4 -{ - uint32_t mode:2; /* must be first */ - uint32_t short_ino:N_INO_BITS_4; - uint32_t mapped_dev:N_DEV_BITS_4; - uint32_t always_set:1; -}; -verify (N_DEV_BITS_4 <= 8 * sizeof (int)); -verify (sizeof (struct dev_ino_4) == 4); - -struct dev_ino_8 -{ - uint32_t mode:2; /* must be first */ - uint64_t short_ino:N_INO_BITS_8; - uint32_t mapped_dev:N_DEV_BITS_8; - uint32_t always_set:1; -}; -verify (sizeof (struct dev_ino_8) == 8); - -struct dev_ino_full -{ - uint32_t mode:2; /* must be first */ - dev_t dev; - ino_t ino; -}; +#include "hash.h" +#include "ino-map.h" -enum di_mode -{ - DI_MODE_4 = 1, - DI_MODE_8 = 2, - DI_MODE_FULL = 3 -}; +#include <limits.h> +#include <stdlib.h> -/* - di_mode raw_inode mapped dev always_set - \____________|_______________\_____/ - 4-byte | 2| 25 | 5 |1| mapped_dev - `----------------------------------------------------|-----. - 8-byte | 2| 53 | 8 |1| - `----------------------------------------------------------' -*/ +/* The hash package hashes "void *", but this package wants to hash + integers. Use integers that are as large as possible, but no + larger than void *, so that they can be cast to void * and back + without losing information. */ +typedef size_t hashint; +#define HASHINT_MAX ((hashint) -1) + +/* Integers represent inode numbers. Integers in the range + 1..(LARGE_INO_MIN-1) represent inode numbers directly. (The hash + package does not work with null pointers, so inode 0 cannot be used + as a key.) To find the representations of other inode numbers, map + them through INO_MAP. */ +#define LARGE_INO_MIN (HASHINT_MAX / 2) + +/* Set operations for device-inode pairs stored in a space-efficient + manner. Use a two-level hash table. The top level hashes by + device number, as there are typically a small number of devices. + The lower level hashes by mapped inode numbers. In the typical + case where the inode number is positive and small, the inode number + maps to itself, masquerading as a void * value; otherwise, its + value is the result of hashing the inode value through INO_MAP. */ + +/* A pair that maps a device number to a set of inode numbers. */ struct di_ent { - union - { - struct dev_ino_4 di4; - struct dev_ino_8 di8; - struct dev_ino_full full; - uint32_t u32; - uint64_t u64; - void *ptr; - } u; -}; - -struct dev_map_ent -{ dev_t dev; - uint32_t mapped_dev; + struct hash_table *ino_set; }; -static inline bool -is_encoded_ptr (struct di_ent const *v) +/* A two-level hash table that manages and indexes these pairs. */ +struct di_set { - return (size_t) v % 4; -} + /* Map device numbers to sets of inode number representatives. */ + struct hash_table *dev_map; -static struct di_ent -decode_ptr (struct di_ent const *v) -{ - if (!is_encoded_ptr (v)) - return *v; + /* If nonnull, map large inode numbers to their small + representatives. If null, there are no large inode numbers in + this set. */ + struct ino_map *ino_map; - struct di_ent di; - di.u.ptr = (void *) v; - return di; -} + /* Cache of the most recently allocated and otherwise-unused storage + for probing this table. */ + struct di_ent *probe; +}; +/* Hash a device-inode-set entry. */ static size_t di_ent_hash (void const *x, size_t table_size) { - struct di_ent e = decode_ptr (x); - return (e.u.di4.mode == DI_MODE_4 - ? e.u.u32 - : (e.u.di4.mode == DI_MODE_8 - ? e.u.u64 - : e.u.full.ino)) % table_size; + struct di_ent const *p = x; + dev_t dev = p->dev; + + /* Exclusive-OR the words of DEV into H. This avoids loss of info, + without using a wider % that could be quite slow. */ + size_t h = dev; + int i; + for (i = 1; i < sizeof dev / sizeof h + (sizeof dev % sizeof h != 0); i++) + h ^= dev >>= CHAR_BIT * sizeof h; + + return h % table_size; } -/* Compare two di_ent structs. - Return true if they are the same. */ +/* Return true if two device-inode-set entries are the same. */ static bool di_ent_compare (void const *x, void const *y) { - struct di_ent a = decode_ptr (x); - struct di_ent b = decode_ptr (y); - if (a.u.di4.mode != b.u.di4.mode) - return false; - - if (a.u.di4.mode == DI_MODE_4) - return (a.u.di4.short_ino == b.u.di4.short_ino - && a.u.di4.mapped_dev == b.u.di4.mapped_dev); - - if (a.u.di8.mode == DI_MODE_8) - return (a.u.di8.short_ino == b.u.di8.short_ino - && a.u.di8.mapped_dev == b.u.di8.mapped_dev); - - return (a.u.full.ino == b.u.full.ino - && a.u.full.dev == b.u.full.dev); + struct di_ent const *a = x; + struct di_ent const *b = y; + return a->dev == b->dev; } +/* Free a device-inode-set entry. */ static void di_ent_free (void *v) { - if ( ! is_encoded_ptr (v)) - free (v); + struct di_ent *a = v; + hash_free (a->ino_set); + free (a); } -int -di_set_init (struct di_set_state *dis, size_t initial_size) +/* Create a set of device-inode pairs. Return NULL on allocation failure. */ +struct di_set * +di_set_alloc (void) { - if (dev_map_init (&dis->dev_map) < 0) - return -1; + struct di_set *dis = malloc (sizeof *dis); + if (dis) + { + enum { INITIAL_DEV_MAP_SIZE = 11 }; + dis->dev_map = hash_initialize (INITIAL_DEV_MAP_SIZE, NULL, + di_ent_hash, di_ent_compare, + di_ent_free); + if (! dis->dev_map) + { + free (dis); + return NULL; + } + dis->ino_map = NULL; + dis->probe = NULL; + } - dis->di_set = hash_initialize (initial_size, NULL, - di_ent_hash, di_ent_compare, di_ent_free); - return dis->di_set ? 0 : -1; + return dis; } +/* Free a set of device-inode pairs. */ void -di_set_free (struct di_set_state *dis) +di_set_free (struct di_set *dis) { - dev_map_free (&dis->dev_map); - hash_free (dis->di_set); + hash_free (dis->dev_map); + free (dis->ino_map); + free (dis->probe); + free (dis); } -/* Given a device-inode set, DIS, create an entry for the DEV,INO - pair, and store it in *V. If possible, encode DEV,INO into the pointer - itself, but if not, allocate space for a full "struct di_ent" and set *V - to that pointer. Upon memory allocation failure, return -1. - Otherwise return 0. */ -int -di_ent_create (struct di_set_state *dis, - dev_t dev, ino_t ino, - struct di_ent **v) +/* Hash an encoded inode number I. */ +static size_t +di_ino_hash (void const *i, size_t table_size) { - static int prev_m = -1; - static dev_t prev_dev = -1; - struct di_ent di_ent; - int mapped_dev; + return (hashint) i % table_size; +} - if (dev == prev_dev) - mapped_dev = prev_m; +/* Using the DIS table, map a device to a hash table that represents + a set of inode numbers. Return NULL on error. */ +static struct hash_table * +map_device (struct di_set *dis, dev_t dev) +{ + /* Find space for the probe, reusing the cache if available. */ + struct di_ent *ent; + struct di_ent *probe = dis->probe; + if (probe) + { + /* If repeating a recent query, return the cached result. */ + if (probe->dev == dev) + return probe->ino_set; + } else { - mapped_dev = dev_map_insert (&dis->dev_map, dev); - if (mapped_dev < 0) - return -1; - prev_dev = dev; - prev_m = mapped_dev; + dis->probe = probe = malloc (sizeof *probe); + if (! probe) + return NULL; } - if (mapped_dev < (1 << N_DEV_BITS_4) - && ino < (1 << N_INO_BITS_4)) + /* Probe for the device. */ + probe->dev = dev; + ent = hash_insert (dis->dev_map, probe); + if (! ent) + return NULL; + + if (ent != probe) { -#if lint - /* When this struct is smaller than a pointer, initialize - the pointer so tools like valgrind don't complain about - the uninitialized bytes. */ - if (sizeof di_ent.u.di4 < sizeof di_ent.u.ptr) - di_ent.u.ptr = NULL; -#endif - di_ent.u.di4.mode = DI_MODE_4; - di_ent.u.di4.short_ino = ino; - di_ent.u.di4.mapped_dev = mapped_dev; - di_ent.u.di4.always_set = 1; - *v = di_ent.u.ptr; + /* Use the existing entry. */ + probe->ino_set = ent->ino_set; } - else if (mapped_dev < (1 << N_DEV_BITS_8) - && ino < ((uint64_t) 1 << N_INO_BITS_8)) + else { - di_ent.u.di8.mode = DI_MODE_8; - di_ent.u.di8.short_ino = ino; - di_ent.u.di8.mapped_dev = mapped_dev; - di_ent.u.di8.always_set = 1; - *v = di_ent.u.ptr; + enum { INITIAL_INO_SET_SIZE = 1021 }; + + /* Prepare to allocate a new probe next time; this one is in use. */ + dis->probe = NULL; + + /* DEV is new; allocate an inode set for it. */ + probe->ino_set = hash_initialize (INITIAL_INO_SET_SIZE, NULL, + di_ino_hash, NULL, NULL); } - else + + return probe->ino_set; +} + +/* Using the DIS table, map an inode number to a mapped value. + Return INO_MAP_INSERT_FAILURE on error. */ +static hashint +map_inode_number (struct di_set *dis, ino_t ino) +{ + if (0 < ino && ino < LARGE_INO_MIN) + return ino; + + if (! dis->ino_map) { - /* Handle the case in which INO is too large or in which (far less - likely) we encounter hard-linked files on 2^N_DEV_BITS_8 - different devices. */ - struct di_ent *p = malloc (sizeof *p); - if (!p) - return -1; - assert ((size_t) p % 4 == 0); - p->u.full.mode = DI_MODE_FULL; - p->u.full.ino = ino; - p->u.full.dev = dev; - *v = p; + dis->ino_map = ino_map_alloc (LARGE_INO_MIN); + if (! dis->ino_map) + return INO_MAP_INSERT_FAILURE; } - return 0; + return ino_map_insert (dis->ino_map, ino); } -/* Attempt to insert the DEV,INO pair into the set, DIS. - If it matches a pair already in DIS, don't modify DIS and return 0. +/* Attempt to insert the DEV,INO pair into the set DIS. + If it matches a pair already in DIS, keep that pair and return 0. Otherwise, if insertion is successful, return 1. Upon any failure return -1. */ int -di_set_insert (struct di_set_state *dis, dev_t dev, ino_t ino) +di_set_insert (struct di_set *dis, dev_t dev, ino_t ino) { - struct di_ent *v; - if (di_ent_create (dis, dev, ino, &v) < 0) - return -1; + hashint i; - int err = hash_insert0 (dis->di_set, v, NULL); - if (err == -1) /* Insertion failed due to lack of memory. */ + /* Map the device number to a set of inodes. */ + struct hash_table *ino_set = map_device (dis, dev); + if (! ino_set) return -1; - if (err == 1) /* Insertion succeeded. */ - return 1; - - /* That pair is already in the table, so ENT was not inserted. Free it. */ - if (! is_encoded_ptr (v)) - free (v); + /* Map the inode number to a small representative I. */ + i = map_inode_number (dis, ino); + if (i == INO_MAP_INSERT_FAILURE) + return -1; - return 0; + /* Put I into the inode set. */ + return hash_insert0 (ino_set, (void *) i, NULL); } diff --git a/gl/lib/di-set.h b/gl/lib/di-set.h index f90d0ddc7..d30a31e92 100644 --- a/gl/lib/di-set.h +++ b/gl/lib/di-set.h @@ -1,28 +1,12 @@ -#include "dev-map.h" - -struct di_set_state -{ - /* A map to help compact device numbers. */ - struct dev_map dev_map; - - /* A set of compact dev,inode pairs. */ - struct hash_table *di_set; -}; +#include <sys/types.h> #undef _ATTRIBUTE_NONNULL_ #if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__ -# define _ATTRIBUTE_NONNULL_(m,...) __attribute__ ((__nonnull__ (m))) +# define _ATTRIBUTE_NONNULL_(m) __attribute__ ((__nonnull__ (m))) #else -# define _ATTRIBUTE_NONNULL_(m,...) +# define _ATTRIBUTE_NONNULL_(m) #endif -int di_set_init (struct di_set_state *, size_t) _ATTRIBUTE_NONNULL_ (1); -void di_set_free (struct di_set_state *) _ATTRIBUTE_NONNULL_ (1); -int di_set_insert (struct di_set_state *, dev_t, ino_t) - _ATTRIBUTE_NONNULL_ (1); - -struct di_ent; -int di_ent_create (struct di_set_state *di_set_state, - dev_t dev, ino_t ino, - struct di_ent **di_ent) - _ATTRIBUTE_NONNULL_ (1,4); +struct di_set *di_set_alloc (void); +int di_set_insert (struct di_set *, dev_t, ino_t) _ATTRIBUTE_NONNULL_ (1); +void di_set_free (struct di_set *) _ATTRIBUTE_NONNULL_ (1); diff --git a/gl/lib/ino-map.c b/gl/lib/ino-map.c new file mode 100644 index 000000000..c86898365 --- /dev/null +++ b/gl/lib/ino-map.c @@ -0,0 +1,162 @@ +/* Map an ino_t inode number to a small integer. + + Copyright 2009, 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* written by Paul Eggert and Jim Meyering */ + +#include <config.h> +#include "ino-map.h" + +#include "hash.h" +#include "verify.h" + +#include <limits.h> +#include <stdlib.h> + +/* A pair that maps an inode number to a mapped inode number; the + latter is a small unique ID for the former. */ +struct ino_map_ent +{ + ino_t ino; + size_t mapped_ino; +}; + +/* A table that manages and indexes these pairs. */ +struct ino_map +{ + /* A table of KEY,VAL pairs, where KEY is the raw ino_t value and + VAL is the small number that it maps to. */ + struct hash_table *map; + + /* The next mapped inode number to hand out. */ + size_t next_mapped_ino; + + /* Cache of the most recently allocated and otherwise-unused storage + for probing the table. */ + struct ino_map_ent *probe; +}; + +/* Hash an inode map entry. */ +static size_t +ino_hash (void const *x, size_t table_size) +{ + struct ino_map_ent const *p = x; + ino_t ino = p->ino; + + /* Exclusive-OR the words of INO into H. This avoids loss of info, + without using a wider % that could be quite slow. */ + size_t h = ino; + int i; + for (i = 1; i < sizeof ino / sizeof h + (sizeof ino % sizeof h != 0); i++) + h ^= ino >>= CHAR_BIT * sizeof h; + + return h % table_size; +} + +/* Return true if two inode map entries are the same. */ +static bool +ino_compare (void const *x, void const *y) +{ + struct ino_map_ent const *a = x; + struct ino_map_ent const *b = y; + return a->ino == b->ino; +} + +/* Allocate an inode map that will hand out integers starting with + NEXT_MAPPED_INO. Return NULL if memory is exhausted. */ +struct ino_map * +ino_map_alloc (size_t next_mapped_ino) +{ + struct ino_map *im = malloc (sizeof *im); + + if (im) + { + enum { INITIAL_INO_MAP_TABLE_SIZE = 1021 }; + im->map = hash_initialize (INITIAL_INO_MAP_TABLE_SIZE, NULL, + ino_hash, ino_compare, free); + if (! im->map) + { + free (im); + return NULL; + } + im->next_mapped_ino = next_mapped_ino; + im->probe = NULL; + } + + return im; +} + +/* Free an inode map. */ +void +ino_map_free (struct ino_map *map) +{ + hash_free (map->map); + free (map->probe); + free (map); +} + + +/* Insert into MAP the inode number INO if it's not there already, + and return its nonnegative mapped inode number. + If INO is already in MAP, return the existing mapped inode number. + Return INO_MAP_INSERT_FAILURE on memory or counter exhaustion. */ +size_t +ino_map_insert (struct ino_map *im, ino_t ino) +{ + struct ino_map_ent *ent; + + /* Find space for the probe, reusing the cache if available. */ + struct ino_map_ent *probe = im->probe; + if (probe) + { + /* If repeating a recent query, return the cached result. */ + if (probe->ino == ino) + return probe->mapped_ino; + } + else + { + im->probe = probe = malloc (sizeof *probe); + if (! probe) + return INO_MAP_INSERT_FAILURE; + } + + probe->ino = ino; + ent = hash_insert (im->map, probe); + if (! ent) + return INO_MAP_INSERT_FAILURE; + + if (ent != probe) + { + /* Use the existing entry. */ + probe->mapped_ino = ent->mapped_ino; + } + else + { + /* If adding 1 to map->next_mapped_ino would cause it to + overflow to zero, then it must equal INO_MAP_INSERT_FAILURE, + which is the value that should be returned in that case. + Verify that this works. */ + verify (INO_MAP_INSERT_FAILURE + 1 == 0); + + /* Prepare to allocate a new probe next time; this one is in use. */ + im->probe = NULL; + + /* INO is new; allocate a mapped inode number for it. */ + probe->mapped_ino = im->next_mapped_ino++; + } + + return probe->mapped_ino; +} diff --git a/gl/lib/ino-map.h b/gl/lib/ino-map.h new file mode 100644 index 000000000..661b78a55 --- /dev/null +++ b/gl/lib/ino-map.h @@ -0,0 +1,14 @@ +#include <sys/types.h> + +#undef _ATTRIBUTE_NONNULL_ +#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__ +# define _ATTRIBUTE_NONNULL_(m) __attribute__ ((__nonnull__ (m))) +#else +# define _ATTRIBUTE_NONNULL_(m) +#endif + +#define INO_MAP_INSERT_FAILURE ((size_t) -1) + +struct ino_map *ino_map_alloc (size_t); +void ino_map_free (struct ino_map *) _ATTRIBUTE_NONNULL_ (1); +size_t ino_map_insert (struct ino_map *, ino_t) _ATTRIBUTE_NONNULL_ (1); diff --git a/gl/modules/dev-map b/gl/modules/dev-map deleted file mode 100644 index 91f437b27..000000000 --- a/gl/modules/dev-map +++ /dev/null @@ -1,23 +0,0 @@ -Description: -maintain a mapping of dev_t numbers to small integers - -Files: -lib/dev-map.c -lib/dev-map.h - -Depends-on: -hash - -configure.ac: - -Makefile.am: -lib_SOURCES += dev-map.c dev-map.h - -Include: -"dev-map.h" - -License -GPL - -Maintainer: -Jim Meyering diff --git a/gl/modules/dev-map-tests b/gl/modules/dev-map-tests deleted file mode 100644 index 4bec2e6b2..000000000 --- a/gl/modules/dev-map-tests +++ /dev/null @@ -1,10 +0,0 @@ -Files: -tests/test-dev-map.c - -Depends-on: - -configure.ac: - -Makefile.am: -TESTS += test-dev-map -check_PROGRAMS += test-dev-map diff --git a/gl/modules/di-set b/gl/modules/di-set index fe5277858..562db14a4 100644 --- a/gl/modules/di-set +++ b/gl/modules/di-set @@ -6,9 +6,8 @@ lib/di-set.c lib/di-set.h Depends-on: -dev-map +ino-map hash -verify configure.ac: diff --git a/gl/modules/ino-map b/gl/modules/ino-map new file mode 100644 index 000000000..06ee51d5b --- /dev/null +++ b/gl/modules/ino-map @@ -0,0 +1,24 @@ +Description: +maintain a mapping of ino_t numbers to small integers + +Files: +lib/ino-map.c +lib/ino-map.h + +Depends-on: +hash +verify + +configure.ac: + +Makefile.am: +lib_SOURCES += ino-map.c ino-map.h + +Include: +"ino-map.h" + +License +GPL + +Maintainer: +Jim Meyering diff --git a/gl/modules/ino-map-tests b/gl/modules/ino-map-tests new file mode 100644 index 000000000..fa10b4ffc --- /dev/null +++ b/gl/modules/ino-map-tests @@ -0,0 +1,10 @@ +Files: +tests/test-ino-map.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-ino-map +check_PROGRAMS += test-ino-map diff --git a/gl/tests/test-di-set.c b/gl/tests/test-di-set.c index 4d5823ed8..e5fb6cb21 100644 --- a/gl/tests/test-di-set.c +++ b/gl/tests/test-di-set.c @@ -1,4 +1,4 @@ -/* Test the dev-map module. +/* Test the di-set module. Copyright (C) 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -36,41 +36,21 @@ #include "di-set.h" -/* FIXME: ugly duplication of code from di-set.c */ -#define N_DEV_BITS_4 5 -#define N_INO_BITS_4 (32 - N_DEV_BITS_4 - 2 - 1) - -#define N_DEV_BITS_8 8 -#define N_INO_BITS_8 (64 - N_DEV_BITS_8 - 2 - 1) - int main (void) { /* set_program_name (argv[0]); placate overzealous "syntax-check" test. */ - size_t initial_size = 61; - /* "real" code might prefer to avoid the allocation here, simply - declaring "struct di_set_state dis;", do a global substitution, - s/\<dis\>/\&dis/, and remove the final free. */ - struct di_set_state *dis = malloc (sizeof *dis); + struct di_set *dis = di_set_alloc (); ASSERT (dis); - ASSERT (di_set_init (dis, initial_size) == 0); - - struct di_ent *di_ent; - ASSERT (di_ent_create (dis, 1, 1, &di_ent) == 0); - ASSERT (di_ent_create (dis, 1 << N_DEV_BITS_4, 1, &di_ent) == 0); - ASSERT (di_ent_create (dis, 1, 1 << N_INO_BITS_4, &di_ent) == 0); - ASSERT (di_ent_create (dis, 1, - (uint64_t) 1 << N_INO_BITS_8, &di_ent) == 0); - free (di_ent); ASSERT (di_set_insert (dis, 2, 5) == 1); /* first insertion succeeds */ ASSERT (di_set_insert (dis, 2, 5) == 0); /* duplicate fails */ ASSERT (di_set_insert (dis, 3, 5) == 1); /* diff dev, duplicate inode is ok */ ASSERT (di_set_insert (dis, 2, 8) == 1); /* same dev, different inode is ok */ - /* very large inode number */ - ASSERT (di_set_insert (dis, 5, (uint64_t) 1 << 63) == 1); - ASSERT (di_set_insert (dis, 5, (uint64_t) 1 << 63) == 0); /* dup */ + /* very large (or negative) inode number */ + ASSERT (di_set_insert (dis, 5, (ino_t) -1) == 1); + ASSERT (di_set_insert (dis, 5, (ino_t) -1) == 0); /* dup */ unsigned int i; for (i = 0; i < 3000; i++) @@ -79,7 +59,6 @@ main (void) ASSERT (di_set_insert (dis, 9, i) == 0); /* duplicate fails */ di_set_free (dis); - free (dis); return 0; } diff --git a/gl/tests/test-dev-map.c b/gl/tests/test-ino-map.c index 98ba6301b..2b44602e2 100644 --- a/gl/tests/test-dev-map.c +++ b/gl/tests/test-ino-map.c @@ -1,4 +1,4 @@ -/* Test the dev-map module. +/* Test the ino-map module. Copyright (C) 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -34,30 +34,30 @@ } \ while (0) -#include "dev-map.h" - -/* Risky: this is also defined in di-set.c; they should match. */ -#define N_DEV_BITS_4 5 +#include "ino-map.h" int main () { /* set_program_name (argv[0]); placate overzealous "syntax-check" test. */ - struct dev_map dev_map; - ASSERT (dev_map_init (&dev_map) == 0); - - ASSERT (dev_map_insert (&dev_map, 42) == 0); - ASSERT (dev_map_insert (&dev_map, 42) == 0); - ASSERT (dev_map_insert (&dev_map, 398) == 1); - ASSERT (dev_map_insert (&dev_map, 398) == 1); - - int32_t i; - for (i = 0; i < (1 << N_DEV_BITS_4); i++) + enum { INO_MAP_INIT = 123 }; + struct ino_map *ino_map = ino_map_alloc (INO_MAP_INIT); + ASSERT (ino_map != NULL); + + ASSERT (ino_map_insert (ino_map, 42) == INO_MAP_INIT); + ASSERT (ino_map_insert (ino_map, 42) == INO_MAP_INIT); + ASSERT (ino_map_insert (ino_map, 398) == INO_MAP_INIT + 1); + ASSERT (ino_map_insert (ino_map, 398) == INO_MAP_INIT + 1); + ASSERT (ino_map_insert (ino_map, 0) == INO_MAP_INIT + 2); + ASSERT (ino_map_insert (ino_map, 0) == INO_MAP_INIT + 2); + + int i; + for (i = 0; i < 100; i++) { - ASSERT (dev_map_insert (&dev_map, 10000+i) == 2+i); + ASSERT (ino_map_insert (ino_map, 10000 + i) == INO_MAP_INIT + 3 + i); } - dev_map_free (&dev_map); + ino_map_free (ino_map); return 0; } @@ -60,11 +60,8 @@ extern bool fts_debug; # define FTS_CROSS_CHECK(Fts) #endif -/* Initial size of the hash table. */ -enum { INITIAL_DI_SET_SIZE = 1021 }; - /* A set of dev/ino pairs. */ -static struct di_set_state di_set; +static struct di_set *di_set; /* Define a class for collecting directory information. */ @@ -337,14 +334,10 @@ Mandatory arguments to long options are mandatory for short options too.\n\ static bool hash_ins (ino_t ino, dev_t dev) { - int inserted = di_set_insert (&di_set, dev, ino); + int inserted = di_set_insert (di_set, dev, ino); if (inserted < 0) - { - /* Insertion failed due to lack of memory. */ - xalloc_die (); - } - - return inserted ? true : false; + xalloc_die (); + return inserted; } /* FIXME: this code is nearly identical to code in date.c */ @@ -905,7 +898,8 @@ main (int argc, char **argv) xalloc_die (); /* Initialize the set of dev,inode pairs. */ - if (di_set_init (&di_set, INITIAL_DI_SET_SIZE)) + di_set = di_set_alloc (); + if (!di_set) xalloc_die (); bit_flags |= symlink_deref_bits; @@ -977,6 +971,7 @@ main (int argc, char **argv) } argv_iter_free (ai); + di_set_free (di_set); if (files_from && (ferror (stdin) || fclose (stdin) != 0)) error (EXIT_FAILURE, 0, _("error reading %s"), quote (files_from)); @@ -984,7 +979,5 @@ main (int argc, char **argv) if (print_grand_total) print_size (&tot_dui, _("total")); - di_set_free (&di_set); - exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); } |