summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS5
-rw-r--r--THANKS.in1
-rw-r--r--src/sort.c44
-rwxr-xr-xtests/misc/sort11
4 files changed, 57 insertions, 4 deletions
diff --git a/NEWS b/NEWS
index 012a63379..f39a76ae6 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,11 @@ GNU coreutils NEWS -*- outline -*-
certain options like -a, -l, -t and -x.
[This bug was present in "the beginning".]
+ sort -u could fail to output one or more result lines.
+ For example, this command would fail to print "1":
+ (yes 7 | head -11; echo 1) | sort --p=1 -S32b -u
+ [bug introduced in coreutils-8.6]
+
** New features
rm now accepts the --dir (-d) option which makes it remove empty directories.
diff --git a/THANKS.in b/THANKS.in
index 5db443bce..a73620171 100644
--- a/THANKS.in
+++ b/THANKS.in
@@ -508,6 +508,7 @@ Primoz PETERLIN primozz.peterlin@gmail.com
Rainer Orth ro@TechFak.Uni-Bielefeld.DE
Ralf W. Stephan stephan@tmt.de
Ralph Loader loader@maths.ox.ac.uk
+Rasmus Borup Hansen rbh@intomics.com
Raul Miller moth@magenta.com
Raúl Núñez de Arenas Coronado raul@pleyades.net
Richard A Downing richard.downing@bcs.org.uk
diff --git a/src/sort.c b/src/sort.c
index d362dc54a..c2d2d49e7 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -262,6 +262,9 @@ struct merge_node_queue
when popping. */
};
+/* Used to implement --unique (-u). */
+static struct line saved_line;
+
/* FIXME: None of these tables work with multibyte character sets.
Also, there are many other bugs when handling multibyte characters.
One way to fix this is to rewrite 'sort' to use wide characters
@@ -1702,6 +1705,14 @@ limfield (struct line const *line, struct keyfield const *key)
return ptr;
}
+/* Return true if LINE and the buffer BUF of length LEN overlap. */
+static inline bool
+overlap (char const *buf, size_t len, struct line const *line)
+{
+ char const *line_end = line->text + line->length;
+ return !(line_end <= buf || buf + len <= line->text);
+}
+
/* Fill BUF reading from FP, moving buf->left bytes from the end
of buf->buf to the beginning first. If EOF is reached and the
file wasn't terminated by a newline, supply one. Set up BUF's line
@@ -1742,6 +1753,33 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)
rest of the input file consists entirely of newlines,
except that the last byte is not a newline. */
size_t readsize = (avail - 1) / (line_bytes + 1);
+
+ /* With --unique, when we're about to read into a buffer that
+ overlaps the saved "preceding" line (saved_line), copy the line's
+ .text member to a realloc'd-as-needed temporary buffer and adjust
+ the line's key-defining members if they're set. */
+ if (unique && overlap (ptr, readsize, &saved_line))
+ {
+ /* Copy saved_line.text into a buffer where it won't be clobbered
+ and if KEY is non-NULL, adjust saved_line.key* to match. */
+ static char *safe_text;
+ static size_t safe_text_n_alloc;
+ if (safe_text_n_alloc < saved_line.length)
+ {
+ safe_text_n_alloc = saved_line.length;
+ safe_text = x2nrealloc (safe_text, &safe_text_n_alloc, 1);
+ }
+ memcpy (safe_text, saved_line.text, saved_line.length);
+ if (key)
+ {
+ #define s saved_line
+ s.keybeg = safe_text + (s.keybeg - s.text);
+ s.keylim = safe_text + (s.keylim - s.text);
+ #undef s
+ }
+ saved_line.text = safe_text;
+ }
+
size_t bytes_read = fread (ptr, 1, readsize, fp);
char *ptrlim = ptr + bytes_read;
char *p;
@@ -3348,13 +3386,11 @@ queue_pop (struct merge_node_queue *queue)
static void
write_unique (struct line const *line, FILE *tfp, char const *temp_output)
{
- static struct line saved;
-
if (unique)
{
- if (saved.text && ! compare (line, &saved))
+ if (saved_line.text && ! compare (line, &saved_line))
return;
- saved = *line;
+ saved_line = *line;
}
write_line (line, tfp, temp_output);
diff --git a/tests/misc/sort b/tests/misc/sort
index 5d15d7572..4e5116155 100755
--- a/tests/misc/sort
+++ b/tests/misc/sort
@@ -227,6 +227,17 @@ my @Tests =
["15d", '-i -u', {IN=>"\1a\na\n"}, {OUT=>"\1a\n"}],
["15e", '-i -u', {IN=>"a\n\1\1\1\1\1a\1\1\1\1\n"}, {OUT=>"a\n"}],
+# This would fail (printing only the 7) for 8.6..8.18.
+# Use --parallel=1 for reproducibility, and a small buffer size
+# to let us trigger the problem with a smaller input.
+["unique-1", '--p=1 -S32b -u', {IN=>"7\n"x11 . "1\n"}, {OUT=>"1\n7\n"}],
+# Demonstrate that 8.19's key-spec-adjusting code is required.
+# These are more finicky in that they are arch-dependent.
+["unique-key-i686", '-u -k2,2 --p=1 -S32b',
+ {IN=>"a 7\n"x10 . "b 1\n"}, {OUT=>"b 1\na 7\n"}],
+["unique-key-x86_64", '-u -k2,2 --p=1 -S32b',
+ {IN=>"a 7\n"x11 . "b 1\n"}, {OUT=>"b 1\na 7\n"}],
+
# From Erick Branderhorst -- fixed around 1.19e
["16a", '-f',
{IN=>"éminence\nüberhaupt\n's-Gravenhage\naëroclub\nAag\naagtappels\n"},