summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErich Eckner <git@eckner.net>2016-11-29 10:08:31 +0100
committerErich Eckner <git@eckner.net>2017-02-16 13:23:49 +0100
commit698617e7eb710a3da51ff4edaf44274109544e4a (patch)
tree315d63c949846233f6e91d70e4728cefba185c8b
parente74f109de104c1966d2d4863aedfb25eefe0358f (diff)
downloadcoreutils-698617e7eb710a3da51ff4edaf44274109544e4a.tar.xz
sort: split off even more source lines useful for uniq
-rw-r--r--src/sort-uniq-keyfuncs.c164
-rw-r--r--src/sort.c164
2 files changed, 164 insertions, 164 deletions
diff --git a/src/sort-uniq-keyfuncs.c b/src/sort-uniq-keyfuncs.c
index 196045bc0..d661aefe3 100644
--- a/src/sort-uniq-keyfuncs.c
+++ b/src/sort-uniq-keyfuncs.c
@@ -37,6 +37,9 @@ static bool hard_LC_COLLATE;
/* The kind of blanks for '-b' to skip in various options. */
enum blanktype { bl_start, bl_end, bl_both };
+/* The character marking end of line. Default to \n. */
+static char eolchar = '\n';
+
/* Lines are held in core as counted strings. */
struct line
{
@@ -46,6 +49,22 @@ struct line
char *keylim; /* Limit of first key. */
};
+/* Input buffers. */
+struct buffer
+{
+ char *buf; /* Dynamically allocated buffer,
+ partitioned into 3 regions:
+ - input data;
+ - unused area;
+ - an array of lines, in reverse order. */
+ size_t used; /* Number of bytes used for input data. */
+ size_t nlines; /* Number of lines in the line array. */
+ size_t alloc; /* Number of bytes allocated. */
+ size_t left; /* Number of bytes left from previous reads. */
+ size_t line_bytes; /* Number of bytes to reserve for each line. */
+ bool eof; /* An EOF has been read. */
+};
+
/* Sort key. */
struct keyfield
{
@@ -116,6 +135,14 @@ static struct month monthtab[] =
{"SEP", 9}
};
+/* Minimum size for a merge or check buffer. */
+#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
+
+/* The number of bytes needed for a merge or check buffer, which can
+ function relatively efficiently even if it holds only one line. If
+ a longer line is seen, this value is increased. */
+static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
+
/* Flag to reverse the order of all comparisons. */
static bool reverse;
@@ -139,6 +166,23 @@ static bool unique;
/* List of key field comparisons to be tried. */
static struct keyfield *keylist;
+static void sort_die (char const *, char const *) ATTRIBUTE_NORETURN;
+static void
+sort_die (char const *message, char const *file)
+{
+ die (SORT_FAILURE, errno, "%s: %s", message,
+ quotef (file ? file : _("standard output")));
+}
+
+/* Return one past the limit of the line array. */
+
+static inline struct line *
+buffer_linelim (struct buffer const *buf)
+{
+ void *linelim = buf->buf + buf->alloc;
+ return linelim;
+}
+
/* Return a pointer to the first character of the field specified
by KEY in LINE. */
@@ -283,6 +327,126 @@ limfield (struct line const *line, struct keyfield const *key)
return ptr;
}
+/* Fill BUF reading from FP, moving buf->left bytes from the end
+ of buf->buf to the beginning first. If EOF is reached and the
+ file wasn't terminated by a newline, supply one. Set up BUF's line
+ table too. FILE is the name of the file corresponding to FP.
+ Return true if some input was read. */
+
+static bool
+fillbuf (struct buffer *buf, FILE *fp, char const *file)
+{
+ struct keyfield const *key = keylist;
+ char eol = eolchar;
+ size_t line_bytes = buf->line_bytes;
+ size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
+
+ if (buf->eof)
+ return false;
+
+ if (buf->used != buf->left)
+ {
+ memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
+ buf->used = buf->left;
+ buf->nlines = 0;
+ }
+
+ while (true)
+ {
+ char *ptr = buf->buf + buf->used;
+ struct line *linelim = buffer_linelim (buf);
+ struct line *line = linelim - buf->nlines;
+ size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
+ char *line_start = buf->nlines ? line->text + line->length : buf->buf;
+
+ while (line_bytes + 1 < avail)
+ {
+ /* Read as many bytes as possible, but do not read so many
+ bytes that there might not be enough room for the
+ corresponding line array. The worst case is when the
+ rest of the input file consists entirely of newlines,
+ except that the last byte is not a newline. */
+ size_t readsize = (avail - 1) / (line_bytes + 1);
+ size_t bytes_read = fread (ptr, 1, readsize, fp);
+ char *ptrlim = ptr + bytes_read;
+ char *p;
+ avail -= bytes_read;
+
+ if (bytes_read != readsize)
+ {
+ if (ferror (fp))
+ sort_die (_("read failed"), file);
+ if (feof (fp))
+ {
+ buf->eof = true;
+ if (buf->buf == ptrlim)
+ return false;
+ if (line_start != ptrlim && ptrlim[-1] != eol)
+ *ptrlim++ = eol;
+ }
+ }
+
+ /* Find and record each line in the just-read input. */
+ while ((p = memchr (ptr, eol, ptrlim - ptr)))
+ {
+ /* Delimit the line with NUL. This eliminates the need to
+ temporarily replace the last byte with NUL when calling
+ xmemcoll(), which increases performance. */
+ *p = '\0';
+ ptr = p + 1;
+ line--;
+ line->text = line_start;
+ line->length = ptr - line_start;
+ mergesize = MAX (mergesize, line->length);
+ avail -= line_bytes;
+
+ if (key)
+ {
+ /* Precompute the position of the first key for
+ efficiency. */
+ line->keylim = (key->eword == SIZE_MAX
+ ? p
+ : limfield (line, key));
+
+ if (key->sword != SIZE_MAX)
+ line->keybeg = begfield (line, key);
+ else
+ {
+ if (key->skipsblanks)
+ while (blanks[to_uchar (*line_start)])
+ line_start++;
+ line->keybeg = line_start;
+ }
+ }
+
+ line_start = ptr;
+ }
+
+ ptr = ptrlim;
+ if (buf->eof)
+ break;
+ }
+
+ buf->used = ptr - buf->buf;
+ buf->nlines = buffer_linelim (buf) - line;
+ if (buf->nlines != 0)
+ {
+ buf->left = ptr - line_start;
+ merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
+ return true;
+ }
+
+ {
+ /* The current input line is too long to fit in the buffer.
+ Increase the buffer size and try again, keeping it properly
+ aligned. */
+ size_t line_alloc = buf->alloc / sizeof (struct line);
+ buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
+ buf->alloc = line_alloc * sizeof (struct line);
+ }
+ }
+}
+
/* Table that maps characters to order-of-magnitude values. */
static char const unit_order[UCHAR_LIM] =
{
diff --git a/src/sort.c b/src/sort.c
index 6f28b63ae..e2a3c7e3e 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -139,25 +139,6 @@ enum
#if HAVE_NL_LANGINFO
static bool hard_LC_TIME;
#endif
-/* The character marking end of line. Default to \n. */
-static char eolchar = '\n';
-
-/* Input buffers. */
-struct buffer
-{
- char *buf; /* Dynamically allocated buffer,
- partitioned into 3 regions:
- - input data;
- - unused area;
- - an array of lines, in reverse order. */
- size_t used; /* Number of bytes used for input data. */
- size_t nlines; /* Number of lines in the line array. */
- size_t alloc; /* Number of bytes allocated. */
- size_t left; /* Number of bytes left from previous reads. */
- size_t line_bytes; /* Number of bytes to reserve for each line. */
- bool eof; /* An EOF has been read. */
-};
-
/* Binary merge tree node. */
struct merge_node
{
@@ -191,17 +172,9 @@ static struct line saved_line;
/* During the merge phase, the number of files to merge at once. */
#define NMERGE_DEFAULT 16
-/* Minimum size for a merge or check buffer. */
-#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
-
/* Minimum sort size; the code might not work with smaller sizes. */
#define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
-/* The number of bytes needed for a merge or check buffer, which can
- function relatively efficiently even if it holds only one line. If
- a longer line is seen, this value is increased. */
-static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
-
/* The approximate maximum number of bytes of main memory to use, as
specified by the user. Zero if the user has not specified a size. */
static size_t sort_size;
@@ -266,14 +239,6 @@ async_safe_die (int errnum, const char *errstr)
/* Report MESSAGE for FILE, then clean up and exit.
If FILE is null, it represents standard output. */
-static void sort_die (char const *, char const *) ATTRIBUTE_NORETURN;
-static void
-sort_die (char const *message, char const *file)
-{
- die (SORT_FAILURE, errno, "%s: %s", message,
- quotef (file ? file : _("standard output")));
-}
-
void
usage (int status)
{
@@ -1445,135 +1410,6 @@ initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
buf->eof = false;
}
-/* Return one past the limit of the line array. */
-
-static inline struct line *
-buffer_linelim (struct buffer const *buf)
-{
- void *linelim = buf->buf + buf->alloc;
- return linelim;
-}
-
-/* Fill BUF reading from FP, moving buf->left bytes from the end
- of buf->buf to the beginning first. If EOF is reached and the
- file wasn't terminated by a newline, supply one. Set up BUF's line
- table too. FILE is the name of the file corresponding to FP.
- Return true if some input was read. */
-
-static bool
-fillbuf (struct buffer *buf, FILE *fp, char const *file)
-{
- struct keyfield const *key = keylist;
- char eol = eolchar;
- size_t line_bytes = buf->line_bytes;
- size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
-
- if (buf->eof)
- return false;
-
- if (buf->used != buf->left)
- {
- memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
- buf->used = buf->left;
- buf->nlines = 0;
- }
-
- while (true)
- {
- char *ptr = buf->buf + buf->used;
- struct line *linelim = buffer_linelim (buf);
- struct line *line = linelim - buf->nlines;
- size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
- char *line_start = buf->nlines ? line->text + line->length : buf->buf;
-
- while (line_bytes + 1 < avail)
- {
- /* Read as many bytes as possible, but do not read so many
- bytes that there might not be enough room for the
- corresponding line array. The worst case is when the
- rest of the input file consists entirely of newlines,
- except that the last byte is not a newline. */
- size_t readsize = (avail - 1) / (line_bytes + 1);
- size_t bytes_read = fread (ptr, 1, readsize, fp);
- char *ptrlim = ptr + bytes_read;
- char *p;
- avail -= bytes_read;
-
- if (bytes_read != readsize)
- {
- if (ferror (fp))
- sort_die (_("read failed"), file);
- if (feof (fp))
- {
- buf->eof = true;
- if (buf->buf == ptrlim)
- return false;
- if (line_start != ptrlim && ptrlim[-1] != eol)
- *ptrlim++ = eol;
- }
- }
-
- /* Find and record each line in the just-read input. */
- while ((p = memchr (ptr, eol, ptrlim - ptr)))
- {
- /* Delimit the line with NUL. This eliminates the need to
- temporarily replace the last byte with NUL when calling
- xmemcoll(), which increases performance. */
- *p = '\0';
- ptr = p + 1;
- line--;
- line->text = line_start;
- line->length = ptr - line_start;
- mergesize = MAX (mergesize, line->length);
- avail -= line_bytes;
-
- if (key)
- {
- /* Precompute the position of the first key for
- efficiency. */
- line->keylim = (key->eword == SIZE_MAX
- ? p
- : limfield (line, key));
-
- if (key->sword != SIZE_MAX)
- line->keybeg = begfield (line, key);
- else
- {
- if (key->skipsblanks)
- while (blanks[to_uchar (*line_start)])
- line_start++;
- line->keybeg = line_start;
- }
- }
-
- line_start = ptr;
- }
-
- ptr = ptrlim;
- if (buf->eof)
- break;
- }
-
- buf->used = ptr - buf->buf;
- buf->nlines = buffer_linelim (buf) - line;
- if (buf->nlines != 0)
- {
- buf->left = ptr - line_start;
- merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
- return true;
- }
-
- {
- /* The current input line is too long to fit in the buffer.
- Increase the buffer size and try again, keeping it properly
- aligned. */
- size_t line_alloc = buf->alloc / sizeof (struct line);
- buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
- buf->alloc = line_alloc * sizeof (struct line);
- }
- }
-}
-
/* Initialize the randomly chosen MD5 state. */
static void