diff options
Diffstat (limited to 'src/sort.c')
-rw-r--r-- | src/sort.c | 275 |
1 files changed, 1 insertions, 274 deletions
diff --git a/src/sort.c b/src/sort.c index b7a6f7aec..7610393a0 100644 --- a/src/sort.c +++ b/src/sort.c @@ -38,8 +38,6 @@ #include "hard-locale.h" #include "hash.h" #include "heap.h" -#include "ignore-value.h" -#include "mbswidth.h" #include "nproc.h" #include "physmem.h" #include "posixver.h" @@ -139,6 +137,7 @@ enum #if HAVE_NL_LANGINFO static bool hard_LC_TIME; #endif + /* Binary merge tree node. */ struct merge_node { @@ -175,10 +174,6 @@ static struct line saved_line; /* Minimum sort size; the code might not work with smaller sizes. */ #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE) -/* The approximate maximum number of bytes of main memory to use, as - specified by the user. Zero if the user has not specified a size. */ -static size_t sort_size; - /* The initial allocation factor for non-regular files. This is used, e.g., when reading from a pipe. Don't make it too big, since it is multiplied by ~130 to @@ -195,15 +190,9 @@ static size_t temp_dir_count; /* Number of allocated slots in temp_dirs. */ static size_t temp_dir_alloc; -/* Nonzero if any of the input files are the standard input. */ -static bool have_read_stdin; - /* Program used to (de)compress temp files. Must accept -d. */ static char const *compress_program; -/* Annotate the output with extra info to aid the user. */ -static bool debug; - /* Maximum number of files to merge in one go. If more than this number are present, temp files will be used. */ static unsigned int nmerge = NMERGE_DEFAULT; @@ -236,9 +225,6 @@ async_safe_die (int errnum, const char *errstr) _exit (SORT_FAILURE); } -/* Report MESSAGE for FILE, then clean up and exit. - If FILE is null, it represents standard output. */ - void usage (int status) { @@ -717,120 +703,6 @@ create_temp_file (int *pfd, bool survive_fd_exhaustion) return node; } -/* Return a stream for FILE, opened with mode HOW. A null FILE means - standard output; HOW should be "w". When opening for input, "-" - means standard input. To avoid confusion, do not return file - descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when - opening an ordinary FILE. Return NULL if unsuccessful. - - fadvise() is used to specify an access pattern for input files. - There are a few hints we could possibly provide, - and after careful testing it was decided that - specifying POSIX_FADV_SEQUENTIAL was not detrimental - to any cases. On Linux 2.6.31, this option doubles - the size of read ahead performed and thus was seen to - benefit these cases: - Merging - Sorting with a smaller internal buffer - Reading from faster flash devices - - In _addition_ one could also specify other hints... - - POSIX_FADV_WILLNEED was tested, but Linux 2.6.31 - at least uses that to _synchronously_ prepopulate the cache - with the specified range. While sort does need to - read all of its input before outputting, a synchronous - read of the whole file up front precludes any processing - that sort could do in parallel with the system doing - read ahead of the data. This was seen to have negative effects - in a couple of cases: - Merging - Sorting with a smaller internal buffer - Note this option was seen to shorten the runtime for sort - on a multicore system with lots of RAM and other processes - competing for CPU. It could be argued that more explicit - scheduling hints with 'nice' et. al. are more appropriate - for this situation. - - POSIX_FADV_NOREUSE is a possibility as it could lower - the priority of input data in the cache as sort will - only need to process it once. However its functionality - has changed over Linux kernel versions and as of 2.6.31 - it does nothing and thus we can't depend on what it might - do in future. - - POSIX_FADV_DONTNEED is not appropriate for user specified - input files, but for temp files we do want to drop the - cache immediately after processing. This is done implicitly - however when the files are unlinked. */ - -static FILE * -stream_open (char const *file, char const *how) -{ - FILE *fp; - - if (*how == 'r') - { - if (STREQ (file, "-")) - { - have_read_stdin = true; - fp = stdin; - } - else - fp = fopen (file, how); - fadvise (fp, FADVISE_SEQUENTIAL); - } - else if (*how == 'w') - { - if (file && ftruncate (STDOUT_FILENO, 0) != 0) - die (SORT_FAILURE, errno, _("%s: error truncating"), - quotef (file)); - fp = stdout; - } - else - assert (!"unexpected mode passed to stream_open"); - - return fp; -} - -/* Same as stream_open, except always return a non-null value; die on - failure. */ - -static FILE * -xfopen (char const *file, char const *how) -{ - FILE *fp = stream_open (file, how); - if (!fp) - sort_die (_("open failed"), file); - return fp; -} - -/* Close FP, whose name is FILE, and report any errors. */ - -static void -xfclose (FILE *fp, char const *file) -{ - switch (fileno (fp)) - { - case STDIN_FILENO: - /* Allow reading stdin from tty more than once. */ - if (feof (fp)) - clearerr (fp); - break; - - case STDOUT_FILENO: - /* Don't close stdout just yet. close_stdout does that. */ - if (fflush (fp) != 0) - sort_die (_("fflush failed"), file); - break; - - default: - if (fclose (fp) != 0) - sort_die (_("close failed"), file); - break; - } -} - static void move_fd_or_die (int oldfd, int newfd) { @@ -1398,110 +1270,6 @@ random_md5_state_init (char const *random_source) md5_process_bytes (buf, sizeof buf, &random_md5_state); } -/* Return the printable width of the block of memory starting at - TEXT and ending just before LIM, counting each tab as one byte. - FIXME: Should we generally be counting non printable chars? */ - -static size_t -debug_width (char const *text, char const *lim) -{ - size_t width = mbsnwidth (text, lim - text, 0); - while (text < lim) - width += (*text++ == '\t'); - return width; -} - -/* For debug mode, "underline" a key at the - specified offset and screen width. */ - -static void -mark_key (size_t offset, size_t width) -{ - while (offset--) - putchar (' '); - - if (!width) - printf (_("^ no match for key\n")); - else - { - do - putchar ('_'); - while (--width); - - putchar ('\n'); - } -} - -/* For LINE, output a debugging line that underlines KEY in LINE. - If KEY is null, underline the whole line. */ - -static void -debug_key (struct line const *line, struct keyfield const *key) -{ - char *text = line->text; - char *beg = text; - char *lim = text + line->length - 1; - - if (key) - { - if (key->sword != SIZE_MAX) - beg = begfield (line, key); - if (key->eword != SIZE_MAX) - lim = limfield (line, key); - - if ((key->skipsblanks && key->sword == SIZE_MAX) - || key->month || key_numeric (key)) - { - char saved = *lim; - *lim = '\0'; - - while (blanks[to_uchar (*beg)]) - beg++; - - char *tighter_lim = beg; - - if (lim < beg) - tighter_lim = lim; - else if (key->month) - getmonth (beg, &tighter_lim); - else if (key->general_numeric) - ignore_value (strtold (beg, &tighter_lim)); - else if (key->numeric || key->human_numeric) - { - char const *p = beg + (beg < lim && *beg == '-'); - unsigned char max_digit = traverse_raw_number (&p); - if ('0' <= max_digit) - { - unsigned char ch = *p; - tighter_lim = (char *) p - + (key->human_numeric && unit_order[ch]); - } - } - else - tighter_lim = lim; - - *lim = saved; - lim = tighter_lim; - } - } - - size_t offset = debug_width (text, beg); - size_t width = debug_width (beg, lim); - mark_key (offset, width); -} - -/* Debug LINE by underlining its keys. */ - -static void -debug_line (struct line const *line) -{ - struct keyfield const *key = keylist; - - do - debug_key (line, key); - while (key && ((key = key->next) || ! (unique || stable))); -} - /* Return whether sorting options specified for key. */ static bool @@ -1655,45 +1423,6 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) error (0, 0, _("option '-r' only applies to last-resort comparison")); } -/* Write LINE to output stream FP; the output file's name is - OUTPUT_FILE if OUTPUT_FILE is non-null, and is the standard output - otherwise. If debugging is enabled and FP is standard output, - append some debugging information. */ - -static void -write_line (struct line const *line, FILE *fp, char const *output_file) -{ - char *buf = line->text; - size_t n_bytes = line->length; - char *ebuf = buf + n_bytes; - - if (!output_file && debug) - { - /* Convert TAB to '>' and EOL to \n, and then output debugging info. */ - char const *c = buf; - - while (c < ebuf) - { - char wc = *c++; - if (wc == '\t') - wc = '>'; - else if (c == ebuf) - wc = '\n'; - if (fputc (wc, fp) == EOF) - sort_die (_("write failed"), output_file); - } - - debug_line (line); - } - else - { - ebuf[-1] = eolchar; - if (fwrite (buf, 1, n_bytes, fp) != n_bytes) - sort_die (_("write failed"), output_file); - ebuf[-1] = '\0'; - } -} - /* Check that the lines read from FILE_NAME come in order. Return true if they are in order. If CHECKONLY == 'c', also print a diagnostic (FILE_NAME, line number, contents of line) to stderr if @@ -2912,8 +2641,6 @@ sort (char *const *files, size_t nfiles, char const *output_file, reap_all (); } -/* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */ - /* Report incompatible options. */ static void incompatible_options (char const *) ATTRIBUTE_NORETURN; |