diff options
-rw-r--r-- | gl/lib/di-set.c | 276 | ||||
-rw-r--r-- | gl/lib/di-set.h | 28 | ||||
-rw-r--r-- | gl/modules/di-set | 25 | ||||
-rw-r--r-- | gl/modules/di-set-tests | 10 | ||||
-rw-r--r-- | gl/tests/test-di-set.c | 85 |
5 files changed, 424 insertions, 0 deletions
diff --git a/gl/lib/di-set.c b/gl/lib/di-set.c new file mode 100644 index 000000000..3c4717b73 --- /dev/null +++ b/gl/lib/di-set.c @@ -0,0 +1,276 @@ +/* Set operations for device-inode pairs stored in a space-efficient manner. + + Copyright 2009-2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* written by Jim Meyering */ + +#include <config.h> +#include "di-set.h" + +#include <stdio.h> +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "verify.h" + +/* Set operations for device-inode pairs stored in a space-efficient manner. + A naive mapping uses 16 bytes to save a single st_dev, st_ino pair. + However, in many applications, the vast majority of actual device,inode + number pairs can be efficiently compressed to fit in 8 or even 4 bytes, + by using a separate table to map a relatively small number of devices + to small integers. */ + +#define N_DEV_BITS_4 5 +#define N_INO_BITS_4 (32 - N_DEV_BITS_4 - 2 - 1) + +#define N_DEV_BITS_8 8 +#define N_INO_BITS_8 (64 - N_DEV_BITS_8 - 2 - 1) + +/* Note how the last bit is always set. + This is required, in order to be able to distinguish + an encoded di_ent value from a malloc-returned pointer, + which must be 4-byte-aligned or better. */ +struct dev_ino_4 +{ + uint32_t mode:2; /* must be first */ + uint32_t short_ino:N_INO_BITS_4; + uint32_t mapped_dev:N_DEV_BITS_4; + uint32_t always_set:1; +}; +verify (N_DEV_BITS_4 <= 8 * sizeof (int)); +verify (sizeof (struct dev_ino_4) == 4); + +struct dev_ino_8 +{ + uint32_t mode:2; /* must be first */ + uint64_t short_ino:N_INO_BITS_8; + uint32_t mapped_dev:N_DEV_BITS_8; + uint32_t always_set:1; +}; +verify (sizeof (struct dev_ino_8) == 8); + +struct dev_ino_full +{ + uint32_t mode:2; /* must be first */ + dev_t dev; + ino_t ino; +}; + +enum di_mode +{ + DI_MODE_4 = 1, + DI_MODE_8 = 2, + DI_MODE_FULL = 3 +}; + +/* + di_mode raw_inode mapped dev always_set + \____________|_______________\_____/ + 4-byte | 2| 25 | 5 |1| mapped_dev + `----------------------------------------------------|-----. + 8-byte | 2| 53 | 8 |1| + `----------------------------------------------------------' +*/ +struct di_ent +{ + union + { + struct dev_ino_4 di4; + struct dev_ino_8 di8; + struct dev_ino_full full; + uint32_t u32; + uint64_t u64; + void *ptr; + } u; +}; + +struct dev_map_ent +{ + dev_t dev; + uint32_t mapped_dev; +}; + +static inline bool +is_encoded_ptr (struct di_ent const *v) +{ + return (size_t) v % 4; +} + +static struct di_ent +decode_ptr (struct di_ent const *v) +{ + if (!is_encoded_ptr (v)) + return *v; + + struct di_ent di; + di.u.ptr = (void *) v; + return di; +} + +static size_t +di_ent_hash (void const *x, size_t table_size) +{ + struct di_ent e = decode_ptr (x); + return (e.u.di4.mode == DI_MODE_4 + ? e.u.u32 + : (e.u.di4.mode == DI_MODE_8 + ? e.u.u64 + : e.u.full.ino)) % table_size; +} + +/* Compare two di_ent structs. + Return true if they are the same. */ +static bool +di_ent_compare (void const *x, void const *y) +{ + struct di_ent a = decode_ptr (x); + struct di_ent b = decode_ptr (y); + if (a.u.di4.mode != b.u.di4.mode) + return false; + + if (a.u.di4.mode == DI_MODE_4) + return (a.u.di4.short_ino == b.u.di4.short_ino + && a.u.di4.mapped_dev == b.u.di4.mapped_dev); + + if (a.u.di8.mode == DI_MODE_8) + return (a.u.di8.short_ino == b.u.di8.short_ino + && a.u.di8.mapped_dev == b.u.di8.mapped_dev); + + return (a.u.full.ino == b.u.full.ino + && a.u.full.dev == b.u.full.dev); +} + +static void +di_ent_free (void *v) +{ + if ( ! is_encoded_ptr (v)) + free (v); +} + +int +di_set_init (struct di_set_state *dis, size_t initial_size) +{ + if (dev_map_init (&dis->dev_map) < 0) + return -1; + + dis->di_set = hash_initialize (initial_size, NULL, + di_ent_hash, di_ent_compare, di_ent_free); + return dis->di_set ? 0 : -1; +} + +void +di_set_free (struct di_set_state *dis) +{ + dev_map_free (&dis->dev_map); + hash_free (dis->di_set); +} + +/* Given a device-inode set, DIS, create an entry for the DEV,INO + pair, and store it in *V. If possible, encode DEV,INO into the pointer + itself, but if not, allocate space for a full "struct di_ent" and set *V + to that pointer. Upon memory allocation failure, return -1. + Otherwise return 0. */ +int +di_ent_create (struct di_set_state *dis, + dev_t dev, ino_t ino, + struct di_ent **v) +{ + static int prev_m = -1; + static dev_t prev_dev = -1; + struct di_ent di_ent; + int mapped_dev; + + if (dev == prev_dev) + mapped_dev = prev_m; + else + { + mapped_dev = dev_map_insert (&dis->dev_map, dev); + if (mapped_dev < 0) + return -1; + prev_dev = dev; + prev_m = mapped_dev; + } + + if (mapped_dev < (1 << N_DEV_BITS_4) + && ino < (1 << N_INO_BITS_4)) + { +#if lint + /* When this struct is smaller than a pointer, initialize + the pointer so tools like valgrind don't complain about + the uninitialized bytes. */ + if (sizeof di_ent.u.di4 < sizeof di_ent.u.ptr) + di_ent.u.ptr = NULL; +#endif + di_ent.u.di4.mode = DI_MODE_4; + di_ent.u.di4.short_ino = ino; + di_ent.u.di4.mapped_dev = mapped_dev; + di_ent.u.di4.always_set = 1; + *v = di_ent.u.ptr; + } + else if (mapped_dev < (1 << N_DEV_BITS_8) + && ino < ((uint64_t) 1 << N_INO_BITS_8)) + { + di_ent.u.di8.mode = DI_MODE_8; + di_ent.u.di8.short_ino = ino; + di_ent.u.di8.mapped_dev = mapped_dev; + di_ent.u.di8.always_set = 1; + *v = di_ent.u.ptr; + } + else + { + /* Handle the case in which INO is too large or in which (far less + likely) we encounter hard-linked files on 2^N_DEV_BITS_8 + different devices. */ + struct di_ent *p = malloc (sizeof *p); + if (!p) + return -1; + assert ((size_t) p % 4 == 0); + p->u.full.mode = DI_MODE_FULL; + p->u.full.ino = ino; + p->u.full.dev = dev; + *v = p; + } + + return 0; +} + +/* Attempt to insert the DEV,INO pair into the set, DIS. + If it matches a pair already in DIS, don't modify DIS and return 0. + Otherwise, if insertion is successful, return 1. + Upon any failure return -1. */ +int +di_set_insert (struct di_set_state *dis, dev_t dev, ino_t ino) +{ + struct di_ent *v; + if (di_ent_create (dis, dev, ino, &v) < 0) + return -1; + + int err = hash_insert0 (dis->di_set, v, NULL); + if (err == -1) /* Insertion failed due to lack of memory. */ + return -1; + + if (err == 1) /* Insertion succeeded. */ + return 1; + + /* That pair is already in the table, so ENT was not inserted. Free it. */ + if (! is_encoded_ptr (v)) + free (v); + + return 0; +} diff --git a/gl/lib/di-set.h b/gl/lib/di-set.h new file mode 100644 index 000000000..f90d0ddc7 --- /dev/null +++ b/gl/lib/di-set.h @@ -0,0 +1,28 @@ +#include "dev-map.h" + +struct di_set_state +{ + /* A map to help compact device numbers. */ + struct dev_map dev_map; + + /* A set of compact dev,inode pairs. */ + struct hash_table *di_set; +}; + +#undef _ATTRIBUTE_NONNULL_ +#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__ +# define _ATTRIBUTE_NONNULL_(m,...) __attribute__ ((__nonnull__ (m))) +#else +# define _ATTRIBUTE_NONNULL_(m,...) +#endif + +int di_set_init (struct di_set_state *, size_t) _ATTRIBUTE_NONNULL_ (1); +void di_set_free (struct di_set_state *) _ATTRIBUTE_NONNULL_ (1); +int di_set_insert (struct di_set_state *, dev_t, ino_t) + _ATTRIBUTE_NONNULL_ (1); + +struct di_ent; +int di_ent_create (struct di_set_state *di_set_state, + dev_t dev, ino_t ino, + struct di_ent **di_ent) + _ATTRIBUTE_NONNULL_ (1,4); diff --git a/gl/modules/di-set b/gl/modules/di-set new file mode 100644 index 000000000..fe5277858 --- /dev/null +++ b/gl/modules/di-set @@ -0,0 +1,25 @@ +Description: +manipulate sets of device-inode pairs efficiently + +Files: +lib/di-set.c +lib/di-set.h + +Depends-on: +dev-map +hash +verify + +configure.ac: + +Makefile.am: +lib_SOURCES += di-set.c di-set.h + +Include: +"di-set.h" + +License +GPL + +Maintainer: +Jim Meyering diff --git a/gl/modules/di-set-tests b/gl/modules/di-set-tests new file mode 100644 index 000000000..d60f7fdc6 --- /dev/null +++ b/gl/modules/di-set-tests @@ -0,0 +1,10 @@ +Files: +tests/test-di-set.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-di-set +check_PROGRAMS += test-di-set diff --git a/gl/tests/test-di-set.c b/gl/tests/test-di-set.c new file mode 100644 index 000000000..4d5823ed8 --- /dev/null +++ b/gl/tests/test-di-set.c @@ -0,0 +1,85 @@ +/* Test the dev-map module. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Jim Meyering. */ + +#include <config.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> + +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#include "di-set.h" + +/* FIXME: ugly duplication of code from di-set.c */ +#define N_DEV_BITS_4 5 +#define N_INO_BITS_4 (32 - N_DEV_BITS_4 - 2 - 1) + +#define N_DEV_BITS_8 8 +#define N_INO_BITS_8 (64 - N_DEV_BITS_8 - 2 - 1) + +int +main (void) +{ + /* set_program_name (argv[0]); placate overzealous "syntax-check" test. */ + size_t initial_size = 61; + /* "real" code might prefer to avoid the allocation here, simply + declaring "struct di_set_state dis;", do a global substitution, + s/\<dis\>/\&dis/, and remove the final free. */ + struct di_set_state *dis = malloc (sizeof *dis); + ASSERT (dis); + ASSERT (di_set_init (dis, initial_size) == 0); + + struct di_ent *di_ent; + ASSERT (di_ent_create (dis, 1, 1, &di_ent) == 0); + ASSERT (di_ent_create (dis, 1 << N_DEV_BITS_4, 1, &di_ent) == 0); + ASSERT (di_ent_create (dis, 1, 1 << N_INO_BITS_4, &di_ent) == 0); + ASSERT (di_ent_create (dis, 1, + (uint64_t) 1 << N_INO_BITS_8, &di_ent) == 0); + free (di_ent); + + ASSERT (di_set_insert (dis, 2, 5) == 1); /* first insertion succeeds */ + ASSERT (di_set_insert (dis, 2, 5) == 0); /* duplicate fails */ + ASSERT (di_set_insert (dis, 3, 5) == 1); /* diff dev, duplicate inode is ok */ + ASSERT (di_set_insert (dis, 2, 8) == 1); /* same dev, different inode is ok */ + + /* very large inode number */ + ASSERT (di_set_insert (dis, 5, (uint64_t) 1 << 63) == 1); + ASSERT (di_set_insert (dis, 5, (uint64_t) 1 << 63) == 0); /* dup */ + + unsigned int i; + for (i = 0; i < 3000; i++) + ASSERT (di_set_insert (dis, 9, i) == 1); + for (i = 0; i < 3000; i++) + ASSERT (di_set_insert (dis, 9, i) == 0); /* duplicate fails */ + + di_set_free (dis); + free (dis); + + return 0; +} |