summaryrefslogtreecommitdiff
path: root/src/sort.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/sort.c')
-rw-r--r--src/sort.c275
1 files changed, 1 insertions, 274 deletions
diff --git a/src/sort.c b/src/sort.c
index b7a6f7aec..7610393a0 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -38,8 +38,6 @@
#include "hard-locale.h"
#include "hash.h"
#include "heap.h"
-#include "ignore-value.h"
-#include "mbswidth.h"
#include "nproc.h"
#include "physmem.h"
#include "posixver.h"
@@ -139,6 +137,7 @@ enum
#if HAVE_NL_LANGINFO
static bool hard_LC_TIME;
#endif
+
/* Binary merge tree node. */
struct merge_node
{
@@ -175,10 +174,6 @@ static struct line saved_line;
/* Minimum sort size; the code might not work with smaller sizes. */
#define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
-/* The approximate maximum number of bytes of main memory to use, as
- specified by the user. Zero if the user has not specified a size. */
-static size_t sort_size;
-
/* The initial allocation factor for non-regular files.
This is used, e.g., when reading from a pipe.
Don't make it too big, since it is multiplied by ~130 to
@@ -195,15 +190,9 @@ static size_t temp_dir_count;
/* Number of allocated slots in temp_dirs. */
static size_t temp_dir_alloc;
-/* Nonzero if any of the input files are the standard input. */
-static bool have_read_stdin;
-
/* Program used to (de)compress temp files. Must accept -d. */
static char const *compress_program;
-/* Annotate the output with extra info to aid the user. */
-static bool debug;
-
/* Maximum number of files to merge in one go. If more than this
number are present, temp files will be used. */
static unsigned int nmerge = NMERGE_DEFAULT;
@@ -236,9 +225,6 @@ async_safe_die (int errnum, const char *errstr)
_exit (SORT_FAILURE);
}
-/* Report MESSAGE for FILE, then clean up and exit.
- If FILE is null, it represents standard output. */
-
void
usage (int status)
{
@@ -717,120 +703,6 @@ create_temp_file (int *pfd, bool survive_fd_exhaustion)
return node;
}
-/* Return a stream for FILE, opened with mode HOW. A null FILE means
- standard output; HOW should be "w". When opening for input, "-"
- means standard input. To avoid confusion, do not return file
- descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
- opening an ordinary FILE. Return NULL if unsuccessful.
-
- fadvise() is used to specify an access pattern for input files.
- There are a few hints we could possibly provide,
- and after careful testing it was decided that
- specifying POSIX_FADV_SEQUENTIAL was not detrimental
- to any cases. On Linux 2.6.31, this option doubles
- the size of read ahead performed and thus was seen to
- benefit these cases:
- Merging
- Sorting with a smaller internal buffer
- Reading from faster flash devices
-
- In _addition_ one could also specify other hints...
-
- POSIX_FADV_WILLNEED was tested, but Linux 2.6.31
- at least uses that to _synchronously_ prepopulate the cache
- with the specified range. While sort does need to
- read all of its input before outputting, a synchronous
- read of the whole file up front precludes any processing
- that sort could do in parallel with the system doing
- read ahead of the data. This was seen to have negative effects
- in a couple of cases:
- Merging
- Sorting with a smaller internal buffer
- Note this option was seen to shorten the runtime for sort
- on a multicore system with lots of RAM and other processes
- competing for CPU. It could be argued that more explicit
- scheduling hints with 'nice' et. al. are more appropriate
- for this situation.
-
- POSIX_FADV_NOREUSE is a possibility as it could lower
- the priority of input data in the cache as sort will
- only need to process it once. However its functionality
- has changed over Linux kernel versions and as of 2.6.31
- it does nothing and thus we can't depend on what it might
- do in future.
-
- POSIX_FADV_DONTNEED is not appropriate for user specified
- input files, but for temp files we do want to drop the
- cache immediately after processing. This is done implicitly
- however when the files are unlinked. */
-
-static FILE *
-stream_open (char const *file, char const *how)
-{
- FILE *fp;
-
- if (*how == 'r')
- {
- if (STREQ (file, "-"))
- {
- have_read_stdin = true;
- fp = stdin;
- }
- else
- fp = fopen (file, how);
- fadvise (fp, FADVISE_SEQUENTIAL);
- }
- else if (*how == 'w')
- {
- if (file && ftruncate (STDOUT_FILENO, 0) != 0)
- die (SORT_FAILURE, errno, _("%s: error truncating"),
- quotef (file));
- fp = stdout;
- }
- else
- assert (!"unexpected mode passed to stream_open");
-
- return fp;
-}
-
-/* Same as stream_open, except always return a non-null value; die on
- failure. */
-
-static FILE *
-xfopen (char const *file, char const *how)
-{
- FILE *fp = stream_open (file, how);
- if (!fp)
- sort_die (_("open failed"), file);
- return fp;
-}
-
-/* Close FP, whose name is FILE, and report any errors. */
-
-static void
-xfclose (FILE *fp, char const *file)
-{
- switch (fileno (fp))
- {
- case STDIN_FILENO:
- /* Allow reading stdin from tty more than once. */
- if (feof (fp))
- clearerr (fp);
- break;
-
- case STDOUT_FILENO:
- /* Don't close stdout just yet. close_stdout does that. */
- if (fflush (fp) != 0)
- sort_die (_("fflush failed"), file);
- break;
-
- default:
- if (fclose (fp) != 0)
- sort_die (_("close failed"), file);
- break;
- }
-}
-
static void
move_fd_or_die (int oldfd, int newfd)
{
@@ -1398,110 +1270,6 @@ random_md5_state_init (char const *random_source)
md5_process_bytes (buf, sizeof buf, &random_md5_state);
}
-/* Return the printable width of the block of memory starting at
- TEXT and ending just before LIM, counting each tab as one byte.
- FIXME: Should we generally be counting non printable chars? */
-
-static size_t
-debug_width (char const *text, char const *lim)
-{
- size_t width = mbsnwidth (text, lim - text, 0);
- while (text < lim)
- width += (*text++ == '\t');
- return width;
-}
-
-/* For debug mode, "underline" a key at the
- specified offset and screen width. */
-
-static void
-mark_key (size_t offset, size_t width)
-{
- while (offset--)
- putchar (' ');
-
- if (!width)
- printf (_("^ no match for key\n"));
- else
- {
- do
- putchar ('_');
- while (--width);
-
- putchar ('\n');
- }
-}
-
-/* For LINE, output a debugging line that underlines KEY in LINE.
- If KEY is null, underline the whole line. */
-
-static void
-debug_key (struct line const *line, struct keyfield const *key)
-{
- char *text = line->text;
- char *beg = text;
- char *lim = text + line->length - 1;
-
- if (key)
- {
- if (key->sword != SIZE_MAX)
- beg = begfield (line, key);
- if (key->eword != SIZE_MAX)
- lim = limfield (line, key);
-
- if ((key->skipsblanks && key->sword == SIZE_MAX)
- || key->month || key_numeric (key))
- {
- char saved = *lim;
- *lim = '\0';
-
- while (blanks[to_uchar (*beg)])
- beg++;
-
- char *tighter_lim = beg;
-
- if (lim < beg)
- tighter_lim = lim;
- else if (key->month)
- getmonth (beg, &tighter_lim);
- else if (key->general_numeric)
- ignore_value (strtold (beg, &tighter_lim));
- else if (key->numeric || key->human_numeric)
- {
- char const *p = beg + (beg < lim && *beg == '-');
- unsigned char max_digit = traverse_raw_number (&p);
- if ('0' <= max_digit)
- {
- unsigned char ch = *p;
- tighter_lim = (char *) p
- + (key->human_numeric && unit_order[ch]);
- }
- }
- else
- tighter_lim = lim;
-
- *lim = saved;
- lim = tighter_lim;
- }
- }
-
- size_t offset = debug_width (text, beg);
- size_t width = debug_width (beg, lim);
- mark_key (offset, width);
-}
-
-/* Debug LINE by underlining its keys. */
-
-static void
-debug_line (struct line const *line)
-{
- struct keyfield const *key = keylist;
-
- do
- debug_key (line, key);
- while (key && ((key = key->next) || ! (unique || stable)));
-}
-
/* Return whether sorting options specified for key. */
static bool
@@ -1655,45 +1423,6 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)
error (0, 0, _("option '-r' only applies to last-resort comparison"));
}
-/* Write LINE to output stream FP; the output file's name is
- OUTPUT_FILE if OUTPUT_FILE is non-null, and is the standard output
- otherwise. If debugging is enabled and FP is standard output,
- append some debugging information. */
-
-static void
-write_line (struct line const *line, FILE *fp, char const *output_file)
-{
- char *buf = line->text;
- size_t n_bytes = line->length;
- char *ebuf = buf + n_bytes;
-
- if (!output_file && debug)
- {
- /* Convert TAB to '>' and EOL to \n, and then output debugging info. */
- char const *c = buf;
-
- while (c < ebuf)
- {
- char wc = *c++;
- if (wc == '\t')
- wc = '>';
- else if (c == ebuf)
- wc = '\n';
- if (fputc (wc, fp) == EOF)
- sort_die (_("write failed"), output_file);
- }
-
- debug_line (line);
- }
- else
- {
- ebuf[-1] = eolchar;
- if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
- sort_die (_("write failed"), output_file);
- ebuf[-1] = '\0';
- }
-}
-
/* Check that the lines read from FILE_NAME come in order. Return
true if they are in order. If CHECKONLY == 'c', also print a
diagnostic (FILE_NAME, line number, contents of line) to stderr if
@@ -2912,8 +2641,6 @@ sort (char *const *files, size_t nfiles, char const *output_file,
reap_all ();
}
-/* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
-
/* Report incompatible options. */
static void incompatible_options (char const *) ATTRIBUTE_NORETURN;