From ea48c0baf9ed2f4d0abb1a7657fe7042f9b03ca8 Mon Sep 17 00:00:00 2001 From: Erich Eckner Date: Tue, 29 Nov 2016 10:08:31 +0100 Subject: sort: split off even more source lines useful for uniq --- src/sort.c | 164 ------------------------------------------------------------- 1 file changed, 164 deletions(-) (limited to 'src/sort.c') diff --git a/src/sort.c b/src/sort.c index 6f28b63ae..e2a3c7e3e 100644 --- a/src/sort.c +++ b/src/sort.c @@ -139,25 +139,6 @@ enum #if HAVE_NL_LANGINFO static bool hard_LC_TIME; #endif -/* The character marking end of line. Default to \n. */ -static char eolchar = '\n'; - -/* Input buffers. */ -struct buffer -{ - char *buf; /* Dynamically allocated buffer, - partitioned into 3 regions: - - input data; - - unused area; - - an array of lines, in reverse order. */ - size_t used; /* Number of bytes used for input data. */ - size_t nlines; /* Number of lines in the line array. */ - size_t alloc; /* Number of bytes allocated. */ - size_t left; /* Number of bytes left from previous reads. */ - size_t line_bytes; /* Number of bytes to reserve for each line. */ - bool eof; /* An EOF has been read. */ -}; - /* Binary merge tree node. */ struct merge_node { @@ -191,17 +172,9 @@ static struct line saved_line; /* During the merge phase, the number of files to merge at once. */ #define NMERGE_DEFAULT 16 -/* Minimum size for a merge or check buffer. */ -#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line)) - /* Minimum sort size; the code might not work with smaller sizes. */ #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE) -/* The number of bytes needed for a merge or check buffer, which can - function relatively efficiently even if it holds only one line. If - a longer line is seen, this value is increased. */ -static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024); - /* The approximate maximum number of bytes of main memory to use, as specified by the user. Zero if the user has not specified a size. */ static size_t sort_size; @@ -266,14 +239,6 @@ async_safe_die (int errnum, const char *errstr) /* Report MESSAGE for FILE, then clean up and exit. If FILE is null, it represents standard output. */ -static void sort_die (char const *, char const *) ATTRIBUTE_NORETURN; -static void -sort_die (char const *message, char const *file) -{ - die (SORT_FAILURE, errno, "%s: %s", message, - quotef (file ? file : _("standard output"))); -} - void usage (int status) { @@ -1445,135 +1410,6 @@ initbuf (struct buffer *buf, size_t line_bytes, size_t alloc) buf->eof = false; } -/* Return one past the limit of the line array. */ - -static inline struct line * -buffer_linelim (struct buffer const *buf) -{ - void *linelim = buf->buf + buf->alloc; - return linelim; -} - -/* Fill BUF reading from FP, moving buf->left bytes from the end - of buf->buf to the beginning first. If EOF is reached and the - file wasn't terminated by a newline, supply one. Set up BUF's line - table too. FILE is the name of the file corresponding to FP. - Return true if some input was read. */ - -static bool -fillbuf (struct buffer *buf, FILE *fp, char const *file) -{ - struct keyfield const *key = keylist; - char eol = eolchar; - size_t line_bytes = buf->line_bytes; - size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE; - - if (buf->eof) - return false; - - if (buf->used != buf->left) - { - memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); - buf->used = buf->left; - buf->nlines = 0; - } - - while (true) - { - char *ptr = buf->buf + buf->used; - struct line *linelim = buffer_linelim (buf); - struct line *line = linelim - buf->nlines; - size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr; - char *line_start = buf->nlines ? line->text + line->length : buf->buf; - - while (line_bytes + 1 < avail) - { - /* Read as many bytes as possible, but do not read so many - bytes that there might not be enough room for the - corresponding line array. The worst case is when the - rest of the input file consists entirely of newlines, - except that the last byte is not a newline. */ - size_t readsize = (avail - 1) / (line_bytes + 1); - size_t bytes_read = fread (ptr, 1, readsize, fp); - char *ptrlim = ptr + bytes_read; - char *p; - avail -= bytes_read; - - if (bytes_read != readsize) - { - if (ferror (fp)) - sort_die (_("read failed"), file); - if (feof (fp)) - { - buf->eof = true; - if (buf->buf == ptrlim) - return false; - if (line_start != ptrlim && ptrlim[-1] != eol) - *ptrlim++ = eol; - } - } - - /* Find and record each line in the just-read input. */ - while ((p = memchr (ptr, eol, ptrlim - ptr))) - { - /* Delimit the line with NUL. This eliminates the need to - temporarily replace the last byte with NUL when calling - xmemcoll(), which increases performance. */ - *p = '\0'; - ptr = p + 1; - line--; - line->text = line_start; - line->length = ptr - line_start; - mergesize = MAX (mergesize, line->length); - avail -= line_bytes; - - if (key) - { - /* Precompute the position of the first key for - efficiency. */ - line->keylim = (key->eword == SIZE_MAX - ? p - : limfield (line, key)); - - if (key->sword != SIZE_MAX) - line->keybeg = begfield (line, key); - else - { - if (key->skipsblanks) - while (blanks[to_uchar (*line_start)]) - line_start++; - line->keybeg = line_start; - } - } - - line_start = ptr; - } - - ptr = ptrlim; - if (buf->eof) - break; - } - - buf->used = ptr - buf->buf; - buf->nlines = buffer_linelim (buf) - line; - if (buf->nlines != 0) - { - buf->left = ptr - line_start; - merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE; - return true; - } - - { - /* The current input line is too long to fit in the buffer. - Increase the buffer size and try again, keeping it properly - aligned. */ - size_t line_alloc = buf->alloc / sizeof (struct line); - buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line)); - buf->alloc = line_alloc * sizeof (struct line); - } - } -} - /* Initialize the randomly chosen MD5 state. */ static void -- cgit v1.2.3-54-g00ecf