summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2010-07-06 14:53:14 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2010-07-06 14:58:48 -0700
commitfb1a26c3f64669a1b61740252c5db5fd5413c7e5 (patch)
tree994631f1859b0adbe74bff895919f1636f798e81
parentd5427265e30522cfda098bb82ad3d4bff0a0d2bd (diff)
downloadcoreutils-fb1a26c3f64669a1b61740252c5db5fd5413c7e5.tar.xz
du: Hash with a mechanism that's simpler and takes less memory.
* gl/lib/dev-map.c, gl/lib/dev-map.h, gl/modules/dev-map: Remove. * gl/lib/ino-map.c, gl/lib/ino-map.h, gl/modules/ino-map: New files. * gl/modules/dev-map-tests, gl/tests/test-dev-map.c: Remove. * gl/modules/ino-map-tests, gl/tests/test-ino-map.c: New files. * gl/lib/di-set.h (struct di_set): Renamed from struct di_set_state, and now private. All uses changed. (_ATTRIBUTE_NONNULL_): Don't assume C99. (di_set_alloc): Renamed from di_set_init, with no size arg. Now allocates the object rather than initializing it. For now, this no longer takes an initial size; we can put this back later if it is needed. * gl/lib/di-set.c: Include hash.h, ino-map.h, and limits.h instead of stdio.h, assert.h, stdint.h, sys/types.h (di-set.h includes that now), sys/stat.h, and verify.h. (N_DEV_BITS_4, N_INO_BITS_4, N_DEV_BITS_8, N_INO_BITS_8): Remove. (struct dev_ino_4, struct dev_ino_8, struct dev_ino_full): Remove. (enum di_mode): Remove. (hashint): New typedef. (HASHINT_MAX, LARGE_INO_MIN): New macros. (struct di_ent): Now maps a dev_t to a inode set, instead of containing a union. (struct dev_map_ent): Remove. (struct di_set): New type. (is_encoded_ptr, decode_ptr, di_ent_create): Remove. (di_ent_hash, di_ent_compare, di_ent_free, di_set_alloc, di_set_free): (di_set_insert): Adjust to new representation. (di_ino_hash, map_device, map_inode_number): New functions. * gl/modules/di-set (Depends-on): Replace dev-map with ino-map. Remove 'verify'. * gl/tests/test-di-set.c: Adjust to the above changes to API. * src/du.c (INITIAL_DI_SET_SIZE): Remove. (hash_ins, main): Adjust to new di-set API.
-rw-r--r--gl/lib/dev-map.c110
-rw-r--r--gl/lib/dev-map.h23
-rw-r--r--gl/lib/di-set.c353
-rw-r--r--gl/lib/di-set.h28
-rw-r--r--gl/lib/ino-map.c162
-rw-r--r--gl/lib/ino-map.h14
-rw-r--r--gl/modules/dev-map23
-rw-r--r--gl/modules/dev-map-tests10
-rw-r--r--gl/modules/di-set3
-rw-r--r--gl/modules/ino-map24
-rw-r--r--gl/modules/ino-map-tests10
-rw-r--r--gl/tests/test-di-set.c31
-rw-r--r--gl/tests/test-ino-map.c (renamed from gl/tests/test-dev-map.c)34
-rw-r--r--src/du.c21
14 files changed, 402 insertions, 444 deletions
diff --git a/gl/lib/dev-map.c b/gl/lib/dev-map.c
deleted file mode 100644
index 363f5af72..000000000
--- a/gl/lib/dev-map.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Map a dev_t device number to a small integer.
-
- Copyright 2009-2010 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-/* written by Jim Meyering */
-
-#include <config.h>
-#include "dev-map.h"
-
-#include <stdio.h>
-#include <assert.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-struct dev_map_ent
-{
- dev_t dev;
- uint32_t mapped_dev;
-};
-
-enum { INITIAL_DEV_MAP_TABLE_SIZE = 31 };
-
-static size_t
-dev_hash (void const *x, size_t table_size)
-{
- struct dev_map_ent const *p = x;
- return (uintmax_t) p->dev % table_size;
-}
-
-/* Compare two dev_map_ent structs on "dev".
- Return true if they are the same. */
-static bool
-dev_compare (void const *x, void const *y)
-{
- struct dev_map_ent const *a = x;
- struct dev_map_ent const *b = y;
- return a->dev == b->dev ? true : false;
-}
-
-/* Initialize state. */
-int
-dev_map_init (struct dev_map *dev_map)
-{
- assert (dev_map != NULL);
- dev_map->n_device = 0;
- dev_map->dev_map = hash_initialize (INITIAL_DEV_MAP_TABLE_SIZE, NULL,
- dev_hash, dev_compare, free);
- return dev_map->dev_map ? 0 : -1;
-}
-
-void
-dev_map_free (struct dev_map *dev_map)
-{
- hash_free (dev_map->dev_map);
-}
-
-/* Insert device number, DEV, into the map, DEV_MAP if it's not there already,
- and return the nonnegative, mapped and usually smaller, number.
- If DEV is already in DEV_MAP, return the existing mapped value.
- Upon memory allocation failure, return -1. */
-int
-dev_map_insert (struct dev_map *dev_map, dev_t dev)
-{
- /* Attempt to insert <DEV,n_device> in the map.
- Possible outcomes:
- - DEV was already there, with a different necessarily smaller value
- - DEV was not there, (thus was just inserted)
- - insert failed: out of memory
- Return -1 on out of memory.
- */
-
- struct dev_map_ent *ent_from_table;
- struct dev_map_ent *ent = malloc (sizeof *ent);
- if (!ent)
- return -1;
-
- ent->dev = dev;
- ent->mapped_dev = dev_map->n_device;
-
- ent_from_table = hash_insert (dev_map->dev_map, ent);
- if (ent_from_table == NULL)
- {
- /* Insertion failed due to lack of memory. */
- return -1;
- }
-
- if (ent_from_table == ent)
- {
- /* Insertion succeeded: new device. */
- return dev_map->n_device++;
- }
-
- /* That DEV value is already in the table, so ENT was not inserted.
- Free it and return the already-associated value. */
- free (ent);
- return ent_from_table->mapped_dev;
-}
diff --git a/gl/lib/dev-map.h b/gl/lib/dev-map.h
deleted file mode 100644
index f093d908f..000000000
--- a/gl/lib/dev-map.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <stddef.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "hash.h"
-
-struct dev_map
-{
- /* KEY,VAL pair, where KEY is the raw st_dev value
- and VAL is the small number that maps to. */
- struct hash_table *dev_map;
- size_t n_device;
-};
-
-#undef _ATTRIBUTE_NONNULL_
-#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__
-# define _ATTRIBUTE_NONNULL_(m) __attribute__ ((__nonnull__ (m)))
-#else
-# define _ATTRIBUTE_NONNULL_(m)
-#endif
-
-int dev_map_init (struct dev_map *) _ATTRIBUTE_NONNULL_ (1);
-void dev_map_free (struct dev_map *) _ATTRIBUTE_NONNULL_ (1);
-int dev_map_insert (struct dev_map *, dev_t) _ATTRIBUTE_NONNULL_ (1);
diff --git a/gl/lib/di-set.c b/gl/lib/di-set.c
index 3c4717b73..e0e2b24dd 100644
--- a/gl/lib/di-set.c
+++ b/gl/lib/di-set.c
@@ -15,262 +15,221 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
-/* written by Jim Meyering */
+/* written by Paul Eggert and Jim Meyering */
#include <config.h>
#include "di-set.h"
-#include <stdio.h>
-#include <assert.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "verify.h"
-
-/* Set operations for device-inode pairs stored in a space-efficient manner.
- A naive mapping uses 16 bytes to save a single st_dev, st_ino pair.
- However, in many applications, the vast majority of actual device,inode
- number pairs can be efficiently compressed to fit in 8 or even 4 bytes,
- by using a separate table to map a relatively small number of devices
- to small integers. */
-
-#define N_DEV_BITS_4 5
-#define N_INO_BITS_4 (32 - N_DEV_BITS_4 - 2 - 1)
-
-#define N_DEV_BITS_8 8
-#define N_INO_BITS_8 (64 - N_DEV_BITS_8 - 2 - 1)
-
-/* Note how the last bit is always set.
- This is required, in order to be able to distinguish
- an encoded di_ent value from a malloc-returned pointer,
- which must be 4-byte-aligned or better. */
-struct dev_ino_4
-{
- uint32_t mode:2; /* must be first */
- uint32_t short_ino:N_INO_BITS_4;
- uint32_t mapped_dev:N_DEV_BITS_4;
- uint32_t always_set:1;
-};
-verify (N_DEV_BITS_4 <= 8 * sizeof (int));
-verify (sizeof (struct dev_ino_4) == 4);
-
-struct dev_ino_8
-{
- uint32_t mode:2; /* must be first */
- uint64_t short_ino:N_INO_BITS_8;
- uint32_t mapped_dev:N_DEV_BITS_8;
- uint32_t always_set:1;
-};
-verify (sizeof (struct dev_ino_8) == 8);
-
-struct dev_ino_full
-{
- uint32_t mode:2; /* must be first */
- dev_t dev;
- ino_t ino;
-};
+#include "hash.h"
+#include "ino-map.h"
-enum di_mode
-{
- DI_MODE_4 = 1,
- DI_MODE_8 = 2,
- DI_MODE_FULL = 3
-};
+#include <limits.h>
+#include <stdlib.h>
-/*
- di_mode raw_inode mapped dev always_set
- \____________|_______________\_____/
- 4-byte | 2| 25 | 5 |1| mapped_dev
- `----------------------------------------------------|-----.
- 8-byte | 2| 53 | 8 |1|
- `----------------------------------------------------------'
-*/
+/* The hash package hashes "void *", but this package wants to hash
+ integers. Use integers that are as large as possible, but no
+ larger than void *, so that they can be cast to void * and back
+ without losing information. */
+typedef size_t hashint;
+#define HASHINT_MAX ((hashint) -1)
+
+/* Integers represent inode numbers. Integers in the range
+ 1..(LARGE_INO_MIN-1) represent inode numbers directly. (The hash
+ package does not work with null pointers, so inode 0 cannot be used
+ as a key.) To find the representations of other inode numbers, map
+ them through INO_MAP. */
+#define LARGE_INO_MIN (HASHINT_MAX / 2)
+
+/* Set operations for device-inode pairs stored in a space-efficient
+ manner. Use a two-level hash table. The top level hashes by
+ device number, as there are typically a small number of devices.
+ The lower level hashes by mapped inode numbers. In the typical
+ case where the inode number is positive and small, the inode number
+ maps to itself, masquerading as a void * value; otherwise, its
+ value is the result of hashing the inode value through INO_MAP. */
+
+/* A pair that maps a device number to a set of inode numbers. */
struct di_ent
{
- union
- {
- struct dev_ino_4 di4;
- struct dev_ino_8 di8;
- struct dev_ino_full full;
- uint32_t u32;
- uint64_t u64;
- void *ptr;
- } u;
-};
-
-struct dev_map_ent
-{
dev_t dev;
- uint32_t mapped_dev;
+ struct hash_table *ino_set;
};
-static inline bool
-is_encoded_ptr (struct di_ent const *v)
+/* A two-level hash table that manages and indexes these pairs. */
+struct di_set
{
- return (size_t) v % 4;
-}
+ /* Map device numbers to sets of inode number representatives. */
+ struct hash_table *dev_map;
-static struct di_ent
-decode_ptr (struct di_ent const *v)
-{
- if (!is_encoded_ptr (v))
- return *v;
+ /* If nonnull, map large inode numbers to their small
+ representatives. If null, there are no large inode numbers in
+ this set. */
+ struct ino_map *ino_map;
- struct di_ent di;
- di.u.ptr = (void *) v;
- return di;
-}
+ /* Cache of the most recently allocated and otherwise-unused storage
+ for probing this table. */
+ struct di_ent *probe;
+};
+/* Hash a device-inode-set entry. */
static size_t
di_ent_hash (void const *x, size_t table_size)
{
- struct di_ent e = decode_ptr (x);
- return (e.u.di4.mode == DI_MODE_4
- ? e.u.u32
- : (e.u.di4.mode == DI_MODE_8
- ? e.u.u64
- : e.u.full.ino)) % table_size;
+ struct di_ent const *p = x;
+ dev_t dev = p->dev;
+
+ /* Exclusive-OR the words of DEV into H. This avoids loss of info,
+ without using a wider % that could be quite slow. */
+ size_t h = dev;
+ int i;
+ for (i = 1; i < sizeof dev / sizeof h + (sizeof dev % sizeof h != 0); i++)
+ h ^= dev >>= CHAR_BIT * sizeof h;
+
+ return h % table_size;
}
-/* Compare two di_ent structs.
- Return true if they are the same. */
+/* Return true if two device-inode-set entries are the same. */
static bool
di_ent_compare (void const *x, void const *y)
{
- struct di_ent a = decode_ptr (x);
- struct di_ent b = decode_ptr (y);
- if (a.u.di4.mode != b.u.di4.mode)
- return false;
-
- if (a.u.di4.mode == DI_MODE_4)
- return (a.u.di4.short_ino == b.u.di4.short_ino
- && a.u.di4.mapped_dev == b.u.di4.mapped_dev);
-
- if (a.u.di8.mode == DI_MODE_8)
- return (a.u.di8.short_ino == b.u.di8.short_ino
- && a.u.di8.mapped_dev == b.u.di8.mapped_dev);
-
- return (a.u.full.ino == b.u.full.ino
- && a.u.full.dev == b.u.full.dev);
+ struct di_ent const *a = x;
+ struct di_ent const *b = y;
+ return a->dev == b->dev;
}
+/* Free a device-inode-set entry. */
static void
di_ent_free (void *v)
{
- if ( ! is_encoded_ptr (v))
- free (v);
+ struct di_ent *a = v;
+ hash_free (a->ino_set);
+ free (a);
}
-int
-di_set_init (struct di_set_state *dis, size_t initial_size)
+/* Create a set of device-inode pairs. Return NULL on allocation failure. */
+struct di_set *
+di_set_alloc (void)
{
- if (dev_map_init (&dis->dev_map) < 0)
- return -1;
+ struct di_set *dis = malloc (sizeof *dis);
+ if (dis)
+ {
+ enum { INITIAL_DEV_MAP_SIZE = 11 };
+ dis->dev_map = hash_initialize (INITIAL_DEV_MAP_SIZE, NULL,
+ di_ent_hash, di_ent_compare,
+ di_ent_free);
+ if (! dis->dev_map)
+ {
+ free (dis);
+ return NULL;
+ }
+ dis->ino_map = NULL;
+ dis->probe = NULL;
+ }
- dis->di_set = hash_initialize (initial_size, NULL,
- di_ent_hash, di_ent_compare, di_ent_free);
- return dis->di_set ? 0 : -1;
+ return dis;
}
+/* Free a set of device-inode pairs. */
void
-di_set_free (struct di_set_state *dis)
+di_set_free (struct di_set *dis)
{
- dev_map_free (&dis->dev_map);
- hash_free (dis->di_set);
+ hash_free (dis->dev_map);
+ free (dis->ino_map);
+ free (dis->probe);
+ free (dis);
}
-/* Given a device-inode set, DIS, create an entry for the DEV,INO
- pair, and store it in *V. If possible, encode DEV,INO into the pointer
- itself, but if not, allocate space for a full "struct di_ent" and set *V
- to that pointer. Upon memory allocation failure, return -1.
- Otherwise return 0. */
-int
-di_ent_create (struct di_set_state *dis,
- dev_t dev, ino_t ino,
- struct di_ent **v)
+/* Hash an encoded inode number I. */
+static size_t
+di_ino_hash (void const *i, size_t table_size)
{
- static int prev_m = -1;
- static dev_t prev_dev = -1;
- struct di_ent di_ent;
- int mapped_dev;
+ return (hashint) i % table_size;
+}
- if (dev == prev_dev)
- mapped_dev = prev_m;
+/* Using the DIS table, map a device to a hash table that represents
+ a set of inode numbers. Return NULL on error. */
+static struct hash_table *
+map_device (struct di_set *dis, dev_t dev)
+{
+ /* Find space for the probe, reusing the cache if available. */
+ struct di_ent *ent;
+ struct di_ent *probe = dis->probe;
+ if (probe)
+ {
+ /* If repeating a recent query, return the cached result. */
+ if (probe->dev == dev)
+ return probe->ino_set;
+ }
else
{
- mapped_dev = dev_map_insert (&dis->dev_map, dev);
- if (mapped_dev < 0)
- return -1;
- prev_dev = dev;
- prev_m = mapped_dev;
+ dis->probe = probe = malloc (sizeof *probe);
+ if (! probe)
+ return NULL;
}
- if (mapped_dev < (1 << N_DEV_BITS_4)
- && ino < (1 << N_INO_BITS_4))
+ /* Probe for the device. */
+ probe->dev = dev;
+ ent = hash_insert (dis->dev_map, probe);
+ if (! ent)
+ return NULL;
+
+ if (ent != probe)
{
-#if lint
- /* When this struct is smaller than a pointer, initialize
- the pointer so tools like valgrind don't complain about
- the uninitialized bytes. */
- if (sizeof di_ent.u.di4 < sizeof di_ent.u.ptr)
- di_ent.u.ptr = NULL;
-#endif
- di_ent.u.di4.mode = DI_MODE_4;
- di_ent.u.di4.short_ino = ino;
- di_ent.u.di4.mapped_dev = mapped_dev;
- di_ent.u.di4.always_set = 1;
- *v = di_ent.u.ptr;
+ /* Use the existing entry. */
+ probe->ino_set = ent->ino_set;
}
- else if (mapped_dev < (1 << N_DEV_BITS_8)
- && ino < ((uint64_t) 1 << N_INO_BITS_8))
+ else
{
- di_ent.u.di8.mode = DI_MODE_8;
- di_ent.u.di8.short_ino = ino;
- di_ent.u.di8.mapped_dev = mapped_dev;
- di_ent.u.di8.always_set = 1;
- *v = di_ent.u.ptr;
+ enum { INITIAL_INO_SET_SIZE = 1021 };
+
+ /* Prepare to allocate a new probe next time; this one is in use. */
+ dis->probe = NULL;
+
+ /* DEV is new; allocate an inode set for it. */
+ probe->ino_set = hash_initialize (INITIAL_INO_SET_SIZE, NULL,
+ di_ino_hash, NULL, NULL);
}
- else
+
+ return probe->ino_set;
+}
+
+/* Using the DIS table, map an inode number to a mapped value.
+ Return INO_MAP_INSERT_FAILURE on error. */
+static hashint
+map_inode_number (struct di_set *dis, ino_t ino)
+{
+ if (0 < ino && ino < LARGE_INO_MIN)
+ return ino;
+
+ if (! dis->ino_map)
{
- /* Handle the case in which INO is too large or in which (far less
- likely) we encounter hard-linked files on 2^N_DEV_BITS_8
- different devices. */
- struct di_ent *p = malloc (sizeof *p);
- if (!p)
- return -1;
- assert ((size_t) p % 4 == 0);
- p->u.full.mode = DI_MODE_FULL;
- p->u.full.ino = ino;
- p->u.full.dev = dev;
- *v = p;
+ dis->ino_map = ino_map_alloc (LARGE_INO_MIN);
+ if (! dis->ino_map)
+ return INO_MAP_INSERT_FAILURE;
}
- return 0;
+ return ino_map_insert (dis->ino_map, ino);
}
-/* Attempt to insert the DEV,INO pair into the set, DIS.
- If it matches a pair already in DIS, don't modify DIS and return 0.
+/* Attempt to insert the DEV,INO pair into the set DIS.
+ If it matches a pair already in DIS, keep that pair and return 0.
Otherwise, if insertion is successful, return 1.
Upon any failure return -1. */
int
-di_set_insert (struct di_set_state *dis, dev_t dev, ino_t ino)
+di_set_insert (struct di_set *dis, dev_t dev, ino_t ino)
{
- struct di_ent *v;
- if (di_ent_create (dis, dev, ino, &v) < 0)
- return -1;
+ hashint i;
- int err = hash_insert0 (dis->di_set, v, NULL);
- if (err == -1) /* Insertion failed due to lack of memory. */
+ /* Map the device number to a set of inodes. */
+ struct hash_table *ino_set = map_device (dis, dev);
+ if (! ino_set)
return -1;
- if (err == 1) /* Insertion succeeded. */
- return 1;
-
- /* That pair is already in the table, so ENT was not inserted. Free it. */
- if (! is_encoded_ptr (v))
- free (v);
+ /* Map the inode number to a small representative I. */
+ i = map_inode_number (dis, ino);
+ if (i == INO_MAP_INSERT_FAILURE)
+ return -1;
- return 0;
+ /* Put I into the inode set. */
+ return hash_insert0 (ino_set, (void *) i, NULL);
}
diff --git a/gl/lib/di-set.h b/gl/lib/di-set.h
index f90d0ddc7..d30a31e92 100644
--- a/gl/lib/di-set.h
+++ b/gl/lib/di-set.h
@@ -1,28 +1,12 @@
-#include "dev-map.h"
-
-struct di_set_state
-{
- /* A map to help compact device numbers. */
- struct dev_map dev_map;
-
- /* A set of compact dev,inode pairs. */
- struct hash_table *di_set;
-};
+#include <sys/types.h>
#undef _ATTRIBUTE_NONNULL_
#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__
-# define _ATTRIBUTE_NONNULL_(m,...) __attribute__ ((__nonnull__ (m)))
+# define _ATTRIBUTE_NONNULL_(m) __attribute__ ((__nonnull__ (m)))
#else
-# define _ATTRIBUTE_NONNULL_(m,...)
+# define _ATTRIBUTE_NONNULL_(m)
#endif
-int di_set_init (struct di_set_state *, size_t) _ATTRIBUTE_NONNULL_ (1);
-void di_set_free (struct di_set_state *) _ATTRIBUTE_NONNULL_ (1);
-int di_set_insert (struct di_set_state *, dev_t, ino_t)
- _ATTRIBUTE_NONNULL_ (1);
-
-struct di_ent;
-int di_ent_create (struct di_set_state *di_set_state,
- dev_t dev, ino_t ino,
- struct di_ent **di_ent)
- _ATTRIBUTE_NONNULL_ (1,4);
+struct di_set *di_set_alloc (void);
+int di_set_insert (struct di_set *, dev_t, ino_t) _ATTRIBUTE_NONNULL_ (1);
+void di_set_free (struct di_set *) _ATTRIBUTE_NONNULL_ (1);
diff --git a/gl/lib/ino-map.c b/gl/lib/ino-map.c
new file mode 100644
index 000000000..c86898365
--- /dev/null
+++ b/gl/lib/ino-map.c
@@ -0,0 +1,162 @@
+/* Map an ino_t inode number to a small integer.
+
+ Copyright 2009, 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* written by Paul Eggert and Jim Meyering */
+
+#include <config.h>
+#include "ino-map.h"
+
+#include "hash.h"
+#include "verify.h"
+
+#include <limits.h>
+#include <stdlib.h>
+
+/* A pair that maps an inode number to a mapped inode number; the
+ latter is a small unique ID for the former. */
+struct ino_map_ent
+{
+ ino_t ino;
+ size_t mapped_ino;
+};
+
+/* A table that manages and indexes these pairs. */
+struct ino_map
+{
+ /* A table of KEY,VAL pairs, where KEY is the raw ino_t value and
+ VAL is the small number that it maps to. */
+ struct hash_table *map;
+
+ /* The next mapped inode number to hand out. */
+ size_t next_mapped_ino;
+
+ /* Cache of the most recently allocated and otherwise-unused storage
+ for probing the table. */
+ struct ino_map_ent *probe;
+};
+
+/* Hash an inode map entry. */
+static size_t
+ino_hash (void const *x, size_t table_size)
+{
+ struct ino_map_ent const *p = x;
+ ino_t ino = p->ino;
+
+ /* Exclusive-OR the words of INO into H. This avoids loss of info,
+ without using a wider % that could be quite slow. */
+ size_t h = ino;
+ int i;
+ for (i = 1; i < sizeof ino / sizeof h + (sizeof ino % sizeof h != 0); i++)
+ h ^= ino >>= CHAR_BIT * sizeof h;
+
+ return h % table_size;
+}
+
+/* Return true if two inode map entries are the same. */
+static bool
+ino_compare (void const *x, void const *y)
+{
+ struct ino_map_ent const *a = x;
+ struct ino_map_ent const *b = y;
+ return a->ino == b->ino;
+}
+
+/* Allocate an inode map that will hand out integers starting with
+ NEXT_MAPPED_INO. Return NULL if memory is exhausted. */
+struct ino_map *
+ino_map_alloc (size_t next_mapped_ino)
+{
+ struct ino_map *im = malloc (sizeof *im);
+
+ if (im)
+ {
+ enum { INITIAL_INO_MAP_TABLE_SIZE = 1021 };
+ im->map = hash_initialize (INITIAL_INO_MAP_TABLE_SIZE, NULL,
+ ino_hash, ino_compare, free);
+ if (! im->map)
+ {
+ free (im);
+ return NULL;
+ }
+ im->next_mapped_ino = next_mapped_ino;
+ im->probe = NULL;
+ }
+
+ return im;
+}
+
+/* Free an inode map. */
+void
+ino_map_free (struct ino_map *map)
+{
+ hash_free (map->map);
+ free (map->probe);
+ free (map);
+}
+
+
+/* Insert into MAP the inode number INO if it's not there already,
+ and return its nonnegative mapped inode number.
+ If INO is already in MAP, return the existing mapped inode number.
+ Return INO_MAP_INSERT_FAILURE on memory or counter exhaustion. */
+size_t
+ino_map_insert (struct ino_map *im, ino_t ino)
+{
+ struct ino_map_ent *ent;
+
+ /* Find space for the probe, reusing the cache if available. */
+ struct ino_map_ent *probe = im->probe;
+ if (probe)
+ {
+ /* If repeating a recent query, return the cached result. */
+ if (probe->ino == ino)
+ return probe->mapped_ino;
+ }
+ else
+ {
+ im->probe = probe = malloc (sizeof *probe);
+ if (! probe)
+ return INO_MAP_INSERT_FAILURE;
+ }
+
+ probe->ino = ino;
+ ent = hash_insert (im->map, probe);
+ if (! ent)
+ return INO_MAP_INSERT_FAILURE;
+
+ if (ent != probe)
+ {
+ /* Use the existing entry. */
+ probe->mapped_ino = ent->mapped_ino;
+ }
+ else
+ {
+ /* If adding 1 to map->next_mapped_ino would cause it to
+ overflow to zero, then it must equal INO_MAP_INSERT_FAILURE,
+ which is the value that should be returned in that case.
+ Verify that this works. */
+ verify (INO_MAP_INSERT_FAILURE + 1 == 0);
+
+ /* Prepare to allocate a new probe next time; this one is in use. */
+ im->probe = NULL;
+
+ /* INO is new; allocate a mapped inode number for it. */
+ probe->mapped_ino = im->next_mapped_ino++;
+ }
+
+ return probe->mapped_ino;
+}
diff --git a/gl/lib/ino-map.h b/gl/lib/ino-map.h
new file mode 100644
index 000000000..661b78a55
--- /dev/null
+++ b/gl/lib/ino-map.h
@@ -0,0 +1,14 @@
+#include <sys/types.h>
+
+#undef _ATTRIBUTE_NONNULL_
+#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__
+# define _ATTRIBUTE_NONNULL_(m) __attribute__ ((__nonnull__ (m)))
+#else
+# define _ATTRIBUTE_NONNULL_(m)
+#endif
+
+#define INO_MAP_INSERT_FAILURE ((size_t) -1)
+
+struct ino_map *ino_map_alloc (size_t);
+void ino_map_free (struct ino_map *) _ATTRIBUTE_NONNULL_ (1);
+size_t ino_map_insert (struct ino_map *, ino_t) _ATTRIBUTE_NONNULL_ (1);
diff --git a/gl/modules/dev-map b/gl/modules/dev-map
deleted file mode 100644
index 91f437b27..000000000
--- a/gl/modules/dev-map
+++ /dev/null
@@ -1,23 +0,0 @@
-Description:
-maintain a mapping of dev_t numbers to small integers
-
-Files:
-lib/dev-map.c
-lib/dev-map.h
-
-Depends-on:
-hash
-
-configure.ac:
-
-Makefile.am:
-lib_SOURCES += dev-map.c dev-map.h
-
-Include:
-"dev-map.h"
-
-License
-GPL
-
-Maintainer:
-Jim Meyering
diff --git a/gl/modules/dev-map-tests b/gl/modules/dev-map-tests
deleted file mode 100644
index 4bec2e6b2..000000000
--- a/gl/modules/dev-map-tests
+++ /dev/null
@@ -1,10 +0,0 @@
-Files:
-tests/test-dev-map.c
-
-Depends-on:
-
-configure.ac:
-
-Makefile.am:
-TESTS += test-dev-map
-check_PROGRAMS += test-dev-map
diff --git a/gl/modules/di-set b/gl/modules/di-set
index fe5277858..562db14a4 100644
--- a/gl/modules/di-set
+++ b/gl/modules/di-set
@@ -6,9 +6,8 @@ lib/di-set.c
lib/di-set.h
Depends-on:
-dev-map
+ino-map
hash
-verify
configure.ac:
diff --git a/gl/modules/ino-map b/gl/modules/ino-map
new file mode 100644
index 000000000..06ee51d5b
--- /dev/null
+++ b/gl/modules/ino-map
@@ -0,0 +1,24 @@
+Description:
+maintain a mapping of ino_t numbers to small integers
+
+Files:
+lib/ino-map.c
+lib/ino-map.h
+
+Depends-on:
+hash
+verify
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += ino-map.c ino-map.h
+
+Include:
+"ino-map.h"
+
+License
+GPL
+
+Maintainer:
+Jim Meyering
diff --git a/gl/modules/ino-map-tests b/gl/modules/ino-map-tests
new file mode 100644
index 000000000..fa10b4ffc
--- /dev/null
+++ b/gl/modules/ino-map-tests
@@ -0,0 +1,10 @@
+Files:
+tests/test-ino-map.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-ino-map
+check_PROGRAMS += test-ino-map
diff --git a/gl/tests/test-di-set.c b/gl/tests/test-di-set.c
index 4d5823ed8..e5fb6cb21 100644
--- a/gl/tests/test-di-set.c
+++ b/gl/tests/test-di-set.c
@@ -1,4 +1,4 @@
-/* Test the dev-map module.
+/* Test the di-set module.
Copyright (C) 2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
@@ -36,41 +36,21 @@
#include "di-set.h"
-/* FIXME: ugly duplication of code from di-set.c */
-#define N_DEV_BITS_4 5
-#define N_INO_BITS_4 (32 - N_DEV_BITS_4 - 2 - 1)
-
-#define N_DEV_BITS_8 8
-#define N_INO_BITS_8 (64 - N_DEV_BITS_8 - 2 - 1)
-
int
main (void)
{
/* set_program_name (argv[0]); placate overzealous "syntax-check" test. */
- size_t initial_size = 61;
- /* "real" code might prefer to avoid the allocation here, simply
- declaring "struct di_set_state dis;", do a global substitution,
- s/\<dis\>/\&dis/, and remove the final free. */
- struct di_set_state *dis = malloc (sizeof *dis);
+ struct di_set *dis = di_set_alloc ();
ASSERT (dis);
- ASSERT (di_set_init (dis, initial_size) == 0);
-
- struct di_ent *di_ent;
- ASSERT (di_ent_create (dis, 1, 1, &di_ent) == 0);
- ASSERT (di_ent_create (dis, 1 << N_DEV_BITS_4, 1, &di_ent) == 0);
- ASSERT (di_ent_create (dis, 1, 1 << N_INO_BITS_4, &di_ent) == 0);
- ASSERT (di_ent_create (dis, 1,
- (uint64_t) 1 << N_INO_BITS_8, &di_ent) == 0);
- free (di_ent);
ASSERT (di_set_insert (dis, 2, 5) == 1); /* first insertion succeeds */
ASSERT (di_set_insert (dis, 2, 5) == 0); /* duplicate fails */
ASSERT (di_set_insert (dis, 3, 5) == 1); /* diff dev, duplicate inode is ok */
ASSERT (di_set_insert (dis, 2, 8) == 1); /* same dev, different inode is ok */
- /* very large inode number */
- ASSERT (di_set_insert (dis, 5, (uint64_t) 1 << 63) == 1);
- ASSERT (di_set_insert (dis, 5, (uint64_t) 1 << 63) == 0); /* dup */
+ /* very large (or negative) inode number */
+ ASSERT (di_set_insert (dis, 5, (ino_t) -1) == 1);
+ ASSERT (di_set_insert (dis, 5, (ino_t) -1) == 0); /* dup */
unsigned int i;
for (i = 0; i < 3000; i++)
@@ -79,7 +59,6 @@ main (void)
ASSERT (di_set_insert (dis, 9, i) == 0); /* duplicate fails */
di_set_free (dis);
- free (dis);
return 0;
}
diff --git a/gl/tests/test-dev-map.c b/gl/tests/test-ino-map.c
index 98ba6301b..2b44602e2 100644
--- a/gl/tests/test-dev-map.c
+++ b/gl/tests/test-ino-map.c
@@ -1,4 +1,4 @@
-/* Test the dev-map module.
+/* Test the ino-map module.
Copyright (C) 2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
@@ -34,30 +34,30 @@
} \
while (0)
-#include "dev-map.h"
-
-/* Risky: this is also defined in di-set.c; they should match. */
-#define N_DEV_BITS_4 5
+#include "ino-map.h"
int
main ()
{
/* set_program_name (argv[0]); placate overzealous "syntax-check" test. */
- struct dev_map dev_map;
- ASSERT (dev_map_init (&dev_map) == 0);
-
- ASSERT (dev_map_insert (&dev_map, 42) == 0);
- ASSERT (dev_map_insert (&dev_map, 42) == 0);
- ASSERT (dev_map_insert (&dev_map, 398) == 1);
- ASSERT (dev_map_insert (&dev_map, 398) == 1);
-
- int32_t i;
- for (i = 0; i < (1 << N_DEV_BITS_4); i++)
+ enum { INO_MAP_INIT = 123 };
+ struct ino_map *ino_map = ino_map_alloc (INO_MAP_INIT);
+ ASSERT (ino_map != NULL);
+
+ ASSERT (ino_map_insert (ino_map, 42) == INO_MAP_INIT);
+ ASSERT (ino_map_insert (ino_map, 42) == INO_MAP_INIT);
+ ASSERT (ino_map_insert (ino_map, 398) == INO_MAP_INIT + 1);
+ ASSERT (ino_map_insert (ino_map, 398) == INO_MAP_INIT + 1);
+ ASSERT (ino_map_insert (ino_map, 0) == INO_MAP_INIT + 2);
+ ASSERT (ino_map_insert (ino_map, 0) == INO_MAP_INIT + 2);
+
+ int i;
+ for (i = 0; i < 100; i++)
{
- ASSERT (dev_map_insert (&dev_map, 10000+i) == 2+i);
+ ASSERT (ino_map_insert (ino_map, 10000 + i) == INO_MAP_INIT + 3 + i);
}
- dev_map_free (&dev_map);
+ ino_map_free (ino_map);
return 0;
}
diff --git a/src/du.c b/src/du.c
index 7c02c8619..739be73ea 100644
--- a/src/du.c
+++ b/src/du.c
@@ -60,11 +60,8 @@ extern bool fts_debug;
# define FTS_CROSS_CHECK(Fts)
#endif
-/* Initial size of the hash table. */
-enum { INITIAL_DI_SET_SIZE = 1021 };
-
/* A set of dev/ino pairs. */
-static struct di_set_state di_set;
+static struct di_set *di_set;
/* Define a class for collecting directory information. */
@@ -337,14 +334,10 @@ Mandatory arguments to long options are mandatory for short options too.\n\
static bool
hash_ins (ino_t ino, dev_t dev)
{
- int inserted = di_set_insert (&di_set, dev, ino);
+ int inserted = di_set_insert (di_set, dev, ino);
if (inserted < 0)
- {
- /* Insertion failed due to lack of memory. */
- xalloc_die ();
- }
-
- return inserted ? true : false;
+ xalloc_die ();
+ return inserted;
}
/* FIXME: this code is nearly identical to code in date.c */
@@ -905,7 +898,8 @@ main (int argc, char **argv)
xalloc_die ();
/* Initialize the set of dev,inode pairs. */
- if (di_set_init (&di_set, INITIAL_DI_SET_SIZE))
+ di_set = di_set_alloc ();
+ if (!di_set)
xalloc_die ();
bit_flags |= symlink_deref_bits;
@@ -977,6 +971,7 @@ main (int argc, char **argv)
}
argv_iter_free (ai);
+ di_set_free (di_set);
if (files_from && (ferror (stdin) || fclose (stdin) != 0))
error (EXIT_FAILURE, 0, _("error reading %s"), quote (files_from));
@@ -984,7 +979,5 @@ main (int argc, char **argv)
if (print_grand_total)
print_size (&tot_dui, _("total"));
- di_set_free (&di_set);
-
exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}