summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2010-12-16 00:03:29 -0800
committerPaul Eggert <eggert@cs.ucla.edu>2010-12-16 00:04:32 -0800
commitf3c584d1e08646704cb46f4e5b6afc4afef3f70a (patch)
tree7a15e6158943421dca1153c3018d7aa6fdd21c1e
parent33a5de5a0efd73abbb532dbd09f5afd23c511484 (diff)
downloadcoreutils-f3c584d1e08646704cb46f4e5b6afc4afef3f70a.tar.xz
sort: don't dump core when merging from input twice
* NEWS: Document this. * src/sort.c (avoid_trashing_input): The previous fix to this function didn't fix all the problems with this code. Replace it with something simpler: just copy the input file. This doesn't change the number of files, so return void instead of the updated file count. Caller changed. * tests/misc/sort-merge-fdlimit: Test for the bug.
-rw-r--r--NEWS2
-rw-r--r--src/sort.c42
-rwxr-xr-xtests/misc/sort-merge-fdlimit20
3 files changed, 39 insertions, 25 deletions
diff --git a/NEWS b/NEWS
index e98b815bc..429a1b76a 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,8 @@ GNU coreutils NEWS -*- outline -*-
sort --compress no longer mishandles subprocesses' exit statuses.
+ sort -m -o f f ... f no longer dumps core when file descriptors are limited.
+
** New features
split accepts the --number option to generate a specific number of files.
diff --git a/src/sort.c b/src/sort.c
index 3321ddb6d..f53e64d9c 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -3549,12 +3549,12 @@ sortlines (struct line *restrict lines, size_t nthreads,
pthread_mutex_destroy (&node->lock);
}
-/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
- the same as OUTFILE. If found, merge the found instances (and perhaps
- some other files) into a temporary file so that it can in turn be
- merged into OUTFILE without destroying OUTFILE before it is completely
- read. Return the new value of NFILES, which differs from the old if
- some merging occurred.
+/* Scan through FILES[NTEMPS .. NFILES-1] looking for files that are
+ the same as OUTFILE. If found, replace each with the same
+ temporary copy that can be merged into OUTFILE without destroying
+ OUTFILE before it is completely read. This temporary copy does not
+ count as a merge temp, so don't worry about incrementing NTEMPS in
+ the caller; final cleanup will remove it, not zaptemp.
This test ensures that an otherwise-erroneous use like
"sort -m -o FILE ... FILE ..." copies FILE before writing to it.
@@ -3566,13 +3566,15 @@ sortlines (struct line *restrict lines, size_t nthreads,
Catching these obscure cases would slow down performance in
common cases. */
-static size_t
+static void
avoid_trashing_input (struct sortfile *files, size_t ntemps,
size_t nfiles, char const *outfile)
{
size_t i;
bool got_outstat = false;
struct stat outstat;
+ char const *tempcopy = NULL;
+ pid_t pid IF_LINT (= 0);
for (i = ntemps; i < nfiles; i++)
{
@@ -3603,27 +3605,17 @@ avoid_trashing_input (struct sortfile *files, size_t ntemps,
if (same)
{
- FILE *tftp;
- pid_t pid;
- char *temp = create_temp (&tftp, &pid);
- size_t num_merged = 0;
- do
+ if (! tempcopy)
{
- num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp);
- files[i].name = temp;
- files[i].pid = pid;
-
- memmove (&files[i + 1], &files[i + num_merged],
- (nfiles - (i + num_merged)) * sizeof *files);
- ntemps += 1;
- nfiles -= num_merged - 1;;
- i += num_merged;
+ FILE *tftp;
+ tempcopy = create_temp (&tftp, &pid);
+ mergefiles (&files[i], 0, 1, tftp, tempcopy);
}
- while (i < nfiles);
+
+ files[i].name = tempcopy;
+ files[i].pid = pid;
}
}
-
- return nfiles;
}
/* Merge the input FILES. NTEMPS is the number of files at the
@@ -3693,7 +3685,7 @@ merge (struct sortfile *files, size_t ntemps, size_t nfiles,
nfiles -= in - out;
}
- nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
+ avoid_trashing_input (files, ntemps, nfiles, output_file);
/* We aren't guaranteed that this final mergefiles will work, therefore we
try to merge into the output, and then merge as much as we can into a
diff --git a/tests/misc/sort-merge-fdlimit b/tests/misc/sort-merge-fdlimit
index 1ed50952f..b0ab71cec 100755
--- a/tests/misc/sort-merge-fdlimit
+++ b/tests/misc/sort-merge-fdlimit
@@ -50,4 +50,24 @@ for randsource in '' --random-source=some-data; do
|| ! grep "open failed" err/merge-random-err) || fail=1
done
+# 'sort -m' should work in a limited file descriptor
+# environment when the output is repeatedly one of its inputs.
+# In coreutils 8.7 and earlier, 'sort' would dump core on this test.
+#
+# This test uses 'exec' to redirect file descriptors rather than
+# ordinary redirection on the 'sort' command. This is intended to
+# work around bugs in OpenBSD /bin/sh, and some other sh variants,
+# that squirrel away file descriptors before closing them; see
+# <http://lists.gnu.org/archive/html/bug-tar/2010-10/msg00075.html>.
+# This test finds the bug only with shells that do not close FDs on
+# exec, and will miss the bug (if present) on other shells, but it's
+# not easy to fix this without running afoul of the OpenBSD-like sh bugs.
+(seq 6 && echo 6) >exp || fail=1
+echo 6 >out || fail=1
+(exec 3<&- 4<&- 5<&- 6</dev/null 7<&6 8<&6 9<&6 &&
+ ulimit -n 10 &&
+ sort -n -m --batch-size=7 -o out out in/1 in/2 in/3 in/4 in/5 out
+) &&
+compare out exp || fail=1
+
Exit $fail