summaryrefslogtreecommitdiff
path: root/src/sort.c
diff options
context:
space:
mode:
authorJim Meyering <meyering@redhat.com>2012-08-15 12:30:44 +0200
committerJim Meyering <meyering@redhat.com>2012-08-17 09:52:19 +0200
commiteb3f5b3b3de8c6ca005a701f09bff43d778aece7 (patch)
tree49f8c3d7206fda8a2bd2979a73ba07ad69b2065e /src/sort.c
parent0c98bfa3596c4a9f87b3c07976c47246a8ad8684 (diff)
downloadcoreutils-eb3f5b3b3de8c6ca005a701f09bff43d778aece7.tar.xz
sort: sort --unique (-u) could cause data loss
sort -u could omit one or more lines of expected output. This bug arose because sort recorded the most recently printed line via reference, and if you were unlucky, the storage for that line would be reused (overwritten) as additional input was read into memory. If you were doubly unlucky, the new value of the "saved" line would not only match the very next line, but if that next line were also the first in a series of identical, not-yet-printed lines, then the corrupted "saved" line value would result in the omission of all matching lines. * src/sort.c (saved_line): New static/global, renamed and moved from... (write_unique): ...here. Old name was "saved", which was too generic for its new role as file-scoped global. (fillbuf): With --unique, when we're about to read into a buffer that overlaps the saved "preceding" line (saved_line), copy the line's .text member to a realloc'd-as-needed temporary buffer and adjust the line's key-defining members if they're set. (overlap): New function. * tests/misc/sort: New tests. * NEWS (Bug fixes): Mention it. * THANKS.in: Update. Bug introduced via commit v8.5-89-g9face83. Reported by Rasmus Borup Hansen in http://thread.gmane.org/gmane.comp.gnu.coreutils.bugs/23173/focus=24647
Diffstat (limited to 'src/sort.c')
-rw-r--r--src/sort.c44
1 files changed, 40 insertions, 4 deletions
diff --git a/src/sort.c b/src/sort.c
index d362dc54a..c2d2d49e7 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -262,6 +262,9 @@ struct merge_node_queue
when popping. */
};
+/* Used to implement --unique (-u). */
+static struct line saved_line;
+
/* FIXME: None of these tables work with multibyte character sets.
Also, there are many other bugs when handling multibyte characters.
One way to fix this is to rewrite 'sort' to use wide characters
@@ -1702,6 +1705,14 @@ limfield (struct line const *line, struct keyfield const *key)
return ptr;
}
+/* Return true if LINE and the buffer BUF of length LEN overlap. */
+static inline bool
+overlap (char const *buf, size_t len, struct line const *line)
+{
+ char const *line_end = line->text + line->length;
+ return !(line_end <= buf || buf + len <= line->text);
+}
+
/* Fill BUF reading from FP, moving buf->left bytes from the end
of buf->buf to the beginning first. If EOF is reached and the
file wasn't terminated by a newline, supply one. Set up BUF's line
@@ -1742,6 +1753,33 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)
rest of the input file consists entirely of newlines,
except that the last byte is not a newline. */
size_t readsize = (avail - 1) / (line_bytes + 1);
+
+ /* With --unique, when we're about to read into a buffer that
+ overlaps the saved "preceding" line (saved_line), copy the line's
+ .text member to a realloc'd-as-needed temporary buffer and adjust
+ the line's key-defining members if they're set. */
+ if (unique && overlap (ptr, readsize, &saved_line))
+ {
+ /* Copy saved_line.text into a buffer where it won't be clobbered
+ and if KEY is non-NULL, adjust saved_line.key* to match. */
+ static char *safe_text;
+ static size_t safe_text_n_alloc;
+ if (safe_text_n_alloc < saved_line.length)
+ {
+ safe_text_n_alloc = saved_line.length;
+ safe_text = x2nrealloc (safe_text, &safe_text_n_alloc, 1);
+ }
+ memcpy (safe_text, saved_line.text, saved_line.length);
+ if (key)
+ {
+ #define s saved_line
+ s.keybeg = safe_text + (s.keybeg - s.text);
+ s.keylim = safe_text + (s.keylim - s.text);
+ #undef s
+ }
+ saved_line.text = safe_text;
+ }
+
size_t bytes_read = fread (ptr, 1, readsize, fp);
char *ptrlim = ptr + bytes_read;
char *p;
@@ -3348,13 +3386,11 @@ queue_pop (struct merge_node_queue *queue)
static void
write_unique (struct line const *line, FILE *tfp, char const *temp_output)
{
- static struct line saved;
-
if (unique)
{
- if (saved.text && ! compare (line, &saved))
+ if (saved_line.text && ! compare (line, &saved_line))
return;
- saved = *line;
+ saved_line = *line;
}
write_line (line, tfp, temp_output);