summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2013-11-07 17:00:56 +0000
committerPádraig Brady <P@draigBrady.com>2013-11-27 01:43:12 +0000
commit569b4edd18cddb5a8cc1f9549a7c1eed91b674f7 (patch)
tree00acb5a1a0da66ae42e6b61f2befd144c716f466
parentba6582e95ce2a041423e1ff34c93abe7b4702332 (diff)
downloadcoreutils-569b4edd18cddb5a8cc1f9549a7c1eed91b674f7.tar.xz
shred: provide --remove methods to avoid excessive syncing
A sync operation is very often expensive. For illustration I timed the following python script which indicated that each ext4 dir sync was taking about 2ms and 12ms, on an SSD and traditional disk respectively. import os d=os.open(".", os.O_DIRECTORY|os.O_RDONLY) for i in range(1000): os.fdatasync(d) So syncing for each character for each file can result in significant delays. Often this overhead is redundant, as only the data is sensitive and not the file name. Even if the names are sensitive, your file system may employ synchronous metadata updates, which also makes explicit syncing redundant. * tests/misc/shred-remove.sh: Ensure all the new parameters actually unlink the file. * doc/coreutils.texi (shred invocation): Describe the new parameters to the --remove option. * src/shred.c (Usage): Likewise. (main): Parse the new options. (wipename): Inspect the new enum to see which of the now optional tasks to perform. * NEWS: Mention the new feature. * THANKS.in: Add reporter Joseph D. Wagner
-rw-r--r--NEWS3
-rw-r--r--THANKS.in1
-rw-r--r--doc/coreutils.texi15
-rw-r--r--src/shred.c49
-rwxr-xr-xtests/misc/shred-remove.sh19
5 files changed, 72 insertions, 15 deletions
diff --git a/NEWS b/NEWS
index 5791a2701..f9f3a9e8d 100644
--- a/NEWS
+++ b/NEWS
@@ -76,6 +76,9 @@ GNU coreutils NEWS -*- outline -*-
csplit accepts a new option: --suppressed-matched, to elide the lines
used to identify the split points.
+ shred accepts new parameters to the --remove option to give greater
+ control over that operation, which can greatly reduce sync overhead.
+
shuf accepts a new option: --repetitions (-r), to allow repetitions
of input items in the permuted output.
diff --git a/THANKS.in b/THANKS.in
index 891b376e0..658837601 100644
--- a/THANKS.in
+++ b/THANKS.in
@@ -316,6 +316,7 @@ Jon Peatfield J.S.Peatfield@damtp.cam.ac.uk
Joost van Baal joostvb@xs4all.nl
Jordi Pujol jordipujolp@gmail.com
Jorge Stolfi stolfi@ic.unicamp.br
+Joseph D. Wagner joe@josephdwagner.info
Joseph S. Myers jsm28@cam.ac.uk
Josh Triplett josh@freedesktop.org
Joshua Hudson joshudson@gmail.com
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index c7258ff5d..7a5023101 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -9563,12 +9563,25 @@ the whole file. @var{bytes} can be followed by a size specification like
@samp{K}, @samp{M}, or @samp{G} to specify a multiple. @xref{Block size}.
@item -u
-@itemx --remove
+@itemx --remove[=@var{how}]
@opindex -u
@opindex --remove
+@opindex --remove=unlink
+@opindex --remove=wipe
+@opindex --remove=wipesync
@cindex removing files after shredding
After shredding a file, truncate it (if possible) and then remove it.
If a file has multiple links, only the named links will be removed.
+Often the file name is less sensitive than the file data, in which case
+the optional @var{how} parameter gives control of how to more efficiently
+remove each directory entry.
+The @samp{unlink} parameter will just use a standard unlink call,
+@samp{wipe} will also first obfuscate bytes in the name, and
+@samp{wipesync} will also sync each obfuscated byte in the name to disk.
+Note @samp{wipesync} is the default method, but can be expensive,
+requiring a sync for every character in every file. This can become
+significant with many files, or is redundant if your file system provides
+synchronous metadata updates.
@item -v
@itemx --verbose
diff --git a/src/shred.c b/src/shred.c
index 95a255a5b..6f5b4da65 100644
--- a/src/shred.c
+++ b/src/shred.c
@@ -82,6 +82,7 @@
#include <sys/types.h>
#include "system.h"
+#include "argmatch.h"
#include "xstrtol.h"
#include "error.h"
#include "fcntl--.h"
@@ -104,12 +105,30 @@ enum { SECTOR_SIZE = 512 };
enum { SECTOR_MASK = SECTOR_SIZE - 1 };
verify (0 < SECTOR_SIZE && (SECTOR_SIZE & SECTOR_MASK) == 0);
+enum remove_method
+{
+ remove_none = 0, /* the default: only wipe data. */
+ remove_unlink, /* don't obfuscate name, just unlink. */
+ remove_wipe, /* obfuscate name before unlink. */
+ remove_wipesync /* obfuscate name, syncing each byte, before unlink. */
+};
+
+static char const *const remove_args[] =
+{
+ "unlink", "wipe", "wipesync", NULL
+};
+
+static enum remove_method const remove_methods[] =
+{
+ remove_unlink, remove_wipe, remove_wipesync
+};
+
struct Options
{
bool force; /* -f flag: chmod files if necessary */
size_t n_iterations; /* -n flag: Number of iterations */
off_t size; /* -s flag: size of file */
- bool remove_file; /* -u flag: remove file after shredding */
+ enum remove_method remove_file; /* -u flag: remove file after shredding */
bool verbose; /* -v flag: Print progress */
bool exact; /* -x flag: Do not round up file size */
bool zero_fill; /* -z flag: Add a final zero pass */
@@ -129,7 +148,7 @@ static struct option const long_opts[] =
{"iterations", required_argument, NULL, 'n'},
{"size", required_argument, NULL, 's'},
{"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
- {"remove", no_argument, NULL, 'u'},
+ {"remove", optional_argument, NULL, 'u'},
{"verbose", no_argument, NULL, 'v'},
{"zero", no_argument, NULL, 'z'},
{GETOPT_HELP_OPTION_DECL},
@@ -159,7 +178,7 @@ for even very expensive hardware probing to recover the data.\n\
-s, --size=N shred this many bytes (suffixes like K, M, G accepted)\n\
"), DEFAULT_PASSES);
fputs (_("\
- -u, --remove truncate and remove file after overwriting\n\
+ -u, --remove[=HOW] truncate and remove file after overwriting; See below\n\
-v, --verbose show progress\n\
-x, --exact do not round file sizes up to the next full block;\n\
this is the default for non-regular files\n\
@@ -173,8 +192,12 @@ If FILE is -, shred standard output.\n\
\n\
Delete FILE(s) if --remove (-u) is specified. The default is not to remove\n\
the files because it is common to operate on device files like /dev/hda,\n\
-and those files usually should not be removed. When operating on regular\n\
-files, most people use the --remove option.\n\
+and those files usually should not be removed.\n\
+The optional HOW parameter indicates how to remove a directory entry:\n\
+'unlink' => use a standard unlink call.\n\
+'wipe' => also first obfuscate bytes in the name.\n\
+'wipesync' => also sync each obfuscated byte to disk.\n\
+The default mode is 'wipesync', but note it can be expensive.\n\
\n\
"), stdout);
fputs (_("\
@@ -965,8 +988,8 @@ incname (char *name, size_t len)
/*
* Repeatedly rename a file with shorter and shorter names,
- * to obliterate all traces of the file name on any system that
- * adds a trailing delimiter to on-disk file names and reuses
+ * to obliterate all traces of the file name (and length) on any system
+ * that adds a trailing delimiter to on-disk file names and reuses
* the same directory slot. Finally, unlink it.
* The passed-in filename is modified in place to the new filename.
* (Which is unlinked if this function succeeds, but is still present if
@@ -999,13 +1022,15 @@ wipename (char *oldname, char const *qoldname, struct Options const *flags)
char *qdir = xstrdup (quotearg_colon (dir));
bool first = true;
bool ok = true;
+ int dir_fd = -1;
- int dir_fd = open (dir, O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK);
+ if (flags->remove_file == remove_wipesync)
+ dir_fd = open (dir, O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK);
if (flags->verbose)
error (0, 0, _("%s: removing"), qoldname);
- while (len)
+ while ((flags->remove_file != remove_unlink) && len)
{
memset (base, nameset[0], len);
base[len] = 0;
@@ -1175,7 +1200,11 @@ main (int argc, char **argv)
break;
case 'u':
- flags.remove_file = true;
+ if (optarg == NULL)
+ flags.remove_file = remove_wipesync;
+ else
+ flags.remove_file = XARGMATCH ("--remove", optarg,
+ remove_args, remove_methods);
break;
case 's':
diff --git a/tests/misc/shred-remove.sh b/tests/misc/shred-remove.sh
index 891dc2d21..5ccfbe295 100755
--- a/tests/misc/shred-remove.sh
+++ b/tests/misc/shred-remove.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Exercise a bug that was fixed in shred-4.0l
+# Exercise shred --remove
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
@@ -21,16 +21,27 @@ print_ver_ shred
skip_if_root_
# The length of the basename is what matters.
-# In this case, shred would try to rename the file 256^10 times
+# In this case, shred-4.0l would try to rename the file 256^10 times
# before terminating.
file=0123456789
touch $file || framework_failure_
chmod u-w $file || framework_failure_
-
# This would take so long that it appears to infloop
# when using version from fileutils-4.0k.
# When the command completes, expect it to fail.
-shred -u $file > /dev/null 2>&1 && fail=1 || :
+shred -u $file > /dev/null 2>&1 && fail=1
+rm -f $file || framework_failure_
+
+# Ensure all --remove methods at least unlink the file
+for mode in '' '=unlink' '=wipe' '=wipesync'; do
+ touch $file || framework_failure_
+ shred -n0 --remove"$mode" $file || fail=1
+ test -e $file && fail=1
+done
+
+# Ensure incorrect params are diagnosed
+touch $file || framework_failure_
+shred -n0 --remove=none $file 2>/dev/null && fail=1
Exit $fail