From f3c584d1e08646704cb46f4e5b6afc4afef3f70a Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 16 Dec 2010 00:03:29 -0800 Subject: sort: don't dump core when merging from input twice * NEWS: Document this. * src/sort.c (avoid_trashing_input): The previous fix to this function didn't fix all the problems with this code. Replace it with something simpler: just copy the input file. This doesn't change the number of files, so return void instead of the updated file count. Caller changed. * tests/misc/sort-merge-fdlimit: Test for the bug. --- NEWS | 2 ++ src/sort.c | 42 +++++++++++++++++------------------------- tests/misc/sort-merge-fdlimit | 20 ++++++++++++++++++++ 3 files changed, 39 insertions(+), 25 deletions(-) diff --git a/NEWS b/NEWS index e98b815bc..429a1b76a 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,8 @@ GNU coreutils NEWS -*- outline -*- sort --compress no longer mishandles subprocesses' exit statuses. + sort -m -o f f ... f no longer dumps core when file descriptors are limited. + ** New features split accepts the --number option to generate a specific number of files. diff --git a/src/sort.c b/src/sort.c index 3321ddb6d..f53e64d9c 100644 --- a/src/sort.c +++ b/src/sort.c @@ -3549,12 +3549,12 @@ sortlines (struct line *restrict lines, size_t nthreads, pthread_mutex_destroy (&node->lock); } -/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is - the same as OUTFILE. If found, merge the found instances (and perhaps - some other files) into a temporary file so that it can in turn be - merged into OUTFILE without destroying OUTFILE before it is completely - read. Return the new value of NFILES, which differs from the old if - some merging occurred. +/* Scan through FILES[NTEMPS .. NFILES-1] looking for files that are + the same as OUTFILE. If found, replace each with the same + temporary copy that can be merged into OUTFILE without destroying + OUTFILE before it is completely read. This temporary copy does not + count as a merge temp, so don't worry about incrementing NTEMPS in + the caller; final cleanup will remove it, not zaptemp. This test ensures that an otherwise-erroneous use like "sort -m -o FILE ... FILE ..." copies FILE before writing to it. @@ -3566,13 +3566,15 @@ sortlines (struct line *restrict lines, size_t nthreads, Catching these obscure cases would slow down performance in common cases. */ -static size_t +static void avoid_trashing_input (struct sortfile *files, size_t ntemps, size_t nfiles, char const *outfile) { size_t i; bool got_outstat = false; struct stat outstat; + char const *tempcopy = NULL; + pid_t pid IF_LINT (= 0); for (i = ntemps; i < nfiles; i++) { @@ -3603,27 +3605,17 @@ avoid_trashing_input (struct sortfile *files, size_t ntemps, if (same) { - FILE *tftp; - pid_t pid; - char *temp = create_temp (&tftp, &pid); - size_t num_merged = 0; - do + if (! tempcopy) { - num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp); - files[i].name = temp; - files[i].pid = pid; - - memmove (&files[i + 1], &files[i + num_merged], - (nfiles - (i + num_merged)) * sizeof *files); - ntemps += 1; - nfiles -= num_merged - 1;; - i += num_merged; + FILE *tftp; + tempcopy = create_temp (&tftp, &pid); + mergefiles (&files[i], 0, 1, tftp, tempcopy); } - while (i < nfiles); + + files[i].name = tempcopy; + files[i].pid = pid; } } - - return nfiles; } /* Merge the input FILES. NTEMPS is the number of files at the @@ -3693,7 +3685,7 @@ merge (struct sortfile *files, size_t ntemps, size_t nfiles, nfiles -= in - out; } - nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file); + avoid_trashing_input (files, ntemps, nfiles, output_file); /* We aren't guaranteed that this final mergefiles will work, therefore we try to merge into the output, and then merge as much as we can into a diff --git a/tests/misc/sort-merge-fdlimit b/tests/misc/sort-merge-fdlimit index 1ed50952f..b0ab71cec 100755 --- a/tests/misc/sort-merge-fdlimit +++ b/tests/misc/sort-merge-fdlimit @@ -50,4 +50,24 @@ for randsource in '' --random-source=some-data; do || ! grep "open failed" err/merge-random-err) || fail=1 done +# 'sort -m' should work in a limited file descriptor +# environment when the output is repeatedly one of its inputs. +# In coreutils 8.7 and earlier, 'sort' would dump core on this test. +# +# This test uses 'exec' to redirect file descriptors rather than +# ordinary redirection on the 'sort' command. This is intended to +# work around bugs in OpenBSD /bin/sh, and some other sh variants, +# that squirrel away file descriptors before closing them; see +# . +# This test finds the bug only with shells that do not close FDs on +# exec, and will miss the bug (if present) on other shells, but it's +# not easy to fix this without running afoul of the OpenBSD-like sh bugs. +(seq 6 && echo 6) >exp || fail=1 +echo 6 >out || fail=1 +(exec 3<&- 4<&- 5<&- 6