diff options
author | Pádraig Brady <P@draigBrady.com> | 2013-11-07 17:00:56 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2013-11-27 01:43:12 +0000 |
commit | 569b4edd18cddb5a8cc1f9549a7c1eed91b674f7 (patch) | |
tree | 00acb5a1a0da66ae42e6b61f2befd144c716f466 | |
parent | ba6582e95ce2a041423e1ff34c93abe7b4702332 (diff) | |
download | coreutils-569b4edd18cddb5a8cc1f9549a7c1eed91b674f7.tar.xz |
shred: provide --remove methods to avoid excessive syncing
A sync operation is very often expensive. For illustration
I timed the following python script which indicated that
each ext4 dir sync was taking about 2ms and 12ms, on an
SSD and traditional disk respectively.
import os
d=os.open(".", os.O_DIRECTORY|os.O_RDONLY)
for i in range(1000):
os.fdatasync(d)
So syncing for each character for each file can result
in significant delays. Often this overhead is redundant,
as only the data is sensitive and not the file name.
Even if the names are sensitive, your file system may
employ synchronous metadata updates, which also makes
explicit syncing redundant.
* tests/misc/shred-remove.sh: Ensure all the new parameters
actually unlink the file.
* doc/coreutils.texi (shred invocation): Describe the new
parameters to the --remove option.
* src/shred.c (Usage): Likewise.
(main): Parse the new options.
(wipename): Inspect the new enum to see which of
the now optional tasks to perform.
* NEWS: Mention the new feature.
* THANKS.in: Add reporter Joseph D. Wagner
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | THANKS.in | 1 | ||||
-rw-r--r-- | doc/coreutils.texi | 15 | ||||
-rw-r--r-- | src/shred.c | 49 | ||||
-rwxr-xr-x | tests/misc/shred-remove.sh | 19 |
5 files changed, 72 insertions, 15 deletions
@@ -76,6 +76,9 @@ GNU coreutils NEWS -*- outline -*- csplit accepts a new option: --suppressed-matched, to elide the lines used to identify the split points. + shred accepts new parameters to the --remove option to give greater + control over that operation, which can greatly reduce sync overhead. + shuf accepts a new option: --repetitions (-r), to allow repetitions of input items in the permuted output. @@ -316,6 +316,7 @@ Jon Peatfield J.S.Peatfield@damtp.cam.ac.uk Joost van Baal joostvb@xs4all.nl Jordi Pujol jordipujolp@gmail.com Jorge Stolfi stolfi@ic.unicamp.br +Joseph D. Wagner joe@josephdwagner.info Joseph S. Myers jsm28@cam.ac.uk Josh Triplett josh@freedesktop.org Joshua Hudson joshudson@gmail.com diff --git a/doc/coreutils.texi b/doc/coreutils.texi index c7258ff5d..7a5023101 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -9563,12 +9563,25 @@ the whole file. @var{bytes} can be followed by a size specification like @samp{K}, @samp{M}, or @samp{G} to specify a multiple. @xref{Block size}. @item -u -@itemx --remove +@itemx --remove[=@var{how}] @opindex -u @opindex --remove +@opindex --remove=unlink +@opindex --remove=wipe +@opindex --remove=wipesync @cindex removing files after shredding After shredding a file, truncate it (if possible) and then remove it. If a file has multiple links, only the named links will be removed. +Often the file name is less sensitive than the file data, in which case +the optional @var{how} parameter gives control of how to more efficiently +remove each directory entry. +The @samp{unlink} parameter will just use a standard unlink call, +@samp{wipe} will also first obfuscate bytes in the name, and +@samp{wipesync} will also sync each obfuscated byte in the name to disk. +Note @samp{wipesync} is the default method, but can be expensive, +requiring a sync for every character in every file. This can become +significant with many files, or is redundant if your file system provides +synchronous metadata updates. @item -v @itemx --verbose diff --git a/src/shred.c b/src/shred.c index 95a255a5b..6f5b4da65 100644 --- a/src/shred.c +++ b/src/shred.c @@ -82,6 +82,7 @@ #include <sys/types.h> #include "system.h" +#include "argmatch.h" #include "xstrtol.h" #include "error.h" #include "fcntl--.h" @@ -104,12 +105,30 @@ enum { SECTOR_SIZE = 512 }; enum { SECTOR_MASK = SECTOR_SIZE - 1 }; verify (0 < SECTOR_SIZE && (SECTOR_SIZE & SECTOR_MASK) == 0); +enum remove_method +{ + remove_none = 0, /* the default: only wipe data. */ + remove_unlink, /* don't obfuscate name, just unlink. */ + remove_wipe, /* obfuscate name before unlink. */ + remove_wipesync /* obfuscate name, syncing each byte, before unlink. */ +}; + +static char const *const remove_args[] = +{ + "unlink", "wipe", "wipesync", NULL +}; + +static enum remove_method const remove_methods[] = +{ + remove_unlink, remove_wipe, remove_wipesync +}; + struct Options { bool force; /* -f flag: chmod files if necessary */ size_t n_iterations; /* -n flag: Number of iterations */ off_t size; /* -s flag: size of file */ - bool remove_file; /* -u flag: remove file after shredding */ + enum remove_method remove_file; /* -u flag: remove file after shredding */ bool verbose; /* -v flag: Print progress */ bool exact; /* -x flag: Do not round up file size */ bool zero_fill; /* -z flag: Add a final zero pass */ @@ -129,7 +148,7 @@ static struct option const long_opts[] = {"iterations", required_argument, NULL, 'n'}, {"size", required_argument, NULL, 's'}, {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION}, - {"remove", no_argument, NULL, 'u'}, + {"remove", optional_argument, NULL, 'u'}, {"verbose", no_argument, NULL, 'v'}, {"zero", no_argument, NULL, 'z'}, {GETOPT_HELP_OPTION_DECL}, @@ -159,7 +178,7 @@ for even very expensive hardware probing to recover the data.\n\ -s, --size=N shred this many bytes (suffixes like K, M, G accepted)\n\ "), DEFAULT_PASSES); fputs (_("\ - -u, --remove truncate and remove file after overwriting\n\ + -u, --remove[=HOW] truncate and remove file after overwriting; See below\n\ -v, --verbose show progress\n\ -x, --exact do not round file sizes up to the next full block;\n\ this is the default for non-regular files\n\ @@ -173,8 +192,12 @@ If FILE is -, shred standard output.\n\ \n\ Delete FILE(s) if --remove (-u) is specified. The default is not to remove\n\ the files because it is common to operate on device files like /dev/hda,\n\ -and those files usually should not be removed. When operating on regular\n\ -files, most people use the --remove option.\n\ +and those files usually should not be removed.\n\ +The optional HOW parameter indicates how to remove a directory entry:\n\ +'unlink' => use a standard unlink call.\n\ +'wipe' => also first obfuscate bytes in the name.\n\ +'wipesync' => also sync each obfuscated byte to disk.\n\ +The default mode is 'wipesync', but note it can be expensive.\n\ \n\ "), stdout); fputs (_("\ @@ -965,8 +988,8 @@ incname (char *name, size_t len) /* * Repeatedly rename a file with shorter and shorter names, - * to obliterate all traces of the file name on any system that - * adds a trailing delimiter to on-disk file names and reuses + * to obliterate all traces of the file name (and length) on any system + * that adds a trailing delimiter to on-disk file names and reuses * the same directory slot. Finally, unlink it. * The passed-in filename is modified in place to the new filename. * (Which is unlinked if this function succeeds, but is still present if @@ -999,13 +1022,15 @@ wipename (char *oldname, char const *qoldname, struct Options const *flags) char *qdir = xstrdup (quotearg_colon (dir)); bool first = true; bool ok = true; + int dir_fd = -1; - int dir_fd = open (dir, O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK); + if (flags->remove_file == remove_wipesync) + dir_fd = open (dir, O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK); if (flags->verbose) error (0, 0, _("%s: removing"), qoldname); - while (len) + while ((flags->remove_file != remove_unlink) && len) { memset (base, nameset[0], len); base[len] = 0; @@ -1175,7 +1200,11 @@ main (int argc, char **argv) break; case 'u': - flags.remove_file = true; + if (optarg == NULL) + flags.remove_file = remove_wipesync; + else + flags.remove_file = XARGMATCH ("--remove", optarg, + remove_args, remove_methods); break; case 's': diff --git a/tests/misc/shred-remove.sh b/tests/misc/shred-remove.sh index 891dc2d21..5ccfbe295 100755 --- a/tests/misc/shred-remove.sh +++ b/tests/misc/shred-remove.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Exercise a bug that was fixed in shred-4.0l +# Exercise shred --remove # Copyright (C) 1999-2013 Free Software Foundation, Inc. @@ -21,16 +21,27 @@ print_ver_ shred skip_if_root_ # The length of the basename is what matters. -# In this case, shred would try to rename the file 256^10 times +# In this case, shred-4.0l would try to rename the file 256^10 times # before terminating. file=0123456789 touch $file || framework_failure_ chmod u-w $file || framework_failure_ - # This would take so long that it appears to infloop # when using version from fileutils-4.0k. # When the command completes, expect it to fail. -shred -u $file > /dev/null 2>&1 && fail=1 || : +shred -u $file > /dev/null 2>&1 && fail=1 +rm -f $file || framework_failure_ + +# Ensure all --remove methods at least unlink the file +for mode in '' '=unlink' '=wipe' '=wipesync'; do + touch $file || framework_failure_ + shred -n0 --remove"$mode" $file || fail=1 + test -e $file && fail=1 +done + +# Ensure incorrect params are diagnosed +touch $file || framework_failure_ +shred -n0 --remove=none $file 2>/dev/null && fail=1 Exit $fail |