summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErich Eckner <git@eckner.net>2016-11-30 11:18:30 +0100
committerErich Eckner <git@eckner.net>2017-02-16 13:23:49 +0100
commit24aff479581963a3dbd87c86d27c4e2aaac27507 (patch)
tree752667987d14662279cf592cc09da5cddfe66676
parent34e84e57aa987b7c39ba8f285c8ce602c6cf63ee (diff)
downloadcoreutils-24aff479581963a3dbd87c86d27c4e2aaac27507.tar.xz
uniq: compiles, but does the wrong thing - for now
-rw-r--r--src/uniq.c291
1 files changed, 134 insertions, 157 deletions
diff --git a/src/uniq.c b/src/uniq.c
index d76fa7ed2..ecf05825e 100644
--- a/src/uniq.c
+++ b/src/uniq.c
@@ -43,16 +43,6 @@
proper_name ("Richard M. Stallman"), \
proper_name ("David MacKenzie")
-#define SWAP_LINES(A, B) \
- do \
- { \
- struct line *_tmp; \
- _tmp = (A); \
- (A) = (B); \
- (B) = _tmp; \
- } \
- while (0)
-
/* True if the LC_COLLATE locale is hard. */
static bool hard_LC_COLLATE;
@@ -152,6 +142,7 @@ static struct option const longopts[] =
{"all-repeated", optional_argument, NULL, 'D'},
{"group", optional_argument, NULL, GROUP_OPTION},
{"ignore-case", no_argument, NULL, 'i'},
+ {"key", required_argument, NULL, 'k'},
{"unique", no_argument, NULL, 'u'},
{"skip-fields", required_argument, NULL, 'f'},
{"skip-chars", required_argument, NULL, 's'},
@@ -201,6 +192,7 @@ With no options, matching lines are merged to the first occurrence.\n\
"), stdout);
fputs (_("\
-i, --ignore-case ignore differences in case when comparing\n\
+ -k, --key=... see 'sort' for details\n\
-s, --skip-chars=N avoid comparing the first N characters\n\
-u, --unique only print unique lines\n\
"), stdout);
@@ -257,30 +249,6 @@ size_opt (char const *opt, char const *msgid)
return MIN (size, SIZE_MAX);
}
-/* Given a line LINE,
- return a pointer to the beginning of the line's field to be compared. */
-
-static char * _GL_ATTRIBUTE_PURE
-find_field (struct line const *line)
-{
- size_t count;
- char const *lp = line->text;
- size_t size = line->length - 1;
- size_t i = 0;
-
- for (count = 0; count < skip_fields && i < size; count++)
- {
- while (i < size && field_sep (lp[i]))
- i++;
- while (i < size && !field_sep (lp[i]))
- i++;
- }
-
- i += MIN (skip_chars, size - i);
-
- return line->text + i;
-}
-
/* Output the line in line LINE to standard output
provided that the switches say it should be output.
MATCH is true if the line matches the previous line.
@@ -289,7 +257,7 @@ find_field (struct line const *line)
static void
writeline (struct line const *line,
- bool match, uintmax_t linecount)
+ bool match, uintmax_t linecount, FILE *outfp, const char *outfile)
{
if (! (linecount == 0 ? output_unique
: !match ? output_first_repeated
@@ -297,9 +265,10 @@ writeline (struct line const *line,
return;
if (countmode == count_occurrences)
- printf ("%7" PRIuMAX " ", linecount + 1);
+ fprintf (outfp, "%7" PRIuMAX " ", linecount + 1);
- fwrite (line->text, sizeof (char), line->length, stdout);
+ fwrite (line->text, sizeof (char), line->length, outfp);
+ fputc (eolchar, outfp);
}
/* Process input file INFILE with output to OUTFILE.
@@ -308,154 +277,158 @@ writeline (struct line const *line,
static void
check_file (const char *infile, const char *outfile, char delimiter)
{
- struct buffer buf;
- struct line *thisline, *prevline;
-
- if (! (STREQ (infile, "-") || freopen (infile, "r", stdin)))
- die (EXIT_FAILURE, errno, "%s", quotef (infile));
- if (! (STREQ (outfile, "-") || freopen (outfile, "w", stdout)))
- die (EXIT_FAILURE, errno, "%s", quotef (outfile));
-
- fadvise (stdin, FADVISE_SEQUENTIAL);
-
+ FILE *infp = xfopen (infile, "r");
+ FILE *outfp = xfopen (STREQ(outfile,"-") ? NULL : outfile, "w");
+ struct buffer buf; /* Input buffer. */
+ struct line temp; /* Copy of previous line. */
+ size_t alloc = 0;
+ struct keyfield const *key = keylist;
initbuf (&buf, sizeof (struct line),
MAX (merge_buffer_size, sort_size));
- thisline = lb1.buf;
- prevline = lb2.buf;
+ struct line const *line = buffer_linelim (&buf);
+ struct line const *linebase = line - buf.nlines;
- initbuffer (thisline);
- initbuffer (prevline);
+ bool the_same; /* Is the line equal to the last one? */
+ bool first_group_printed = false;
+ uintmax_t match_count = 0;
+ bool first_delimiter = true;
- /* The duplication in the following 'if' and 'else' blocks is an
- optimization to distinguish between when we can print input
- lines immediately (1. & 2.) or not.
+ temp.text = NULL;
- 1. --group => all input lines are printed.
- checking for unique/duplicated lines is used only for printing
- group separators.
+ while (fillbuf (&buf, infp, infile))
+ {
+ line = buffer_linelim (&buf);
+ linebase = line - buf.nlines;
- 2. The default case in which none of these options has been specified:
- --count, --repeated, --all-repeated, --unique
- In the default case, this optimization lets uniq output each different
- line right away, without waiting to see if the next one is different.
+ /* Check if the line saved from the old buffer contents is
+ equal to the first line of the new buffer. */
+ the_same = alloc && ! compare (&temp, line - 1);
- 3. All other cases.
- */
- if (output_unique && output_first_repeated && countmode == count_none)
- {
- char *prevfield IF_LINT ( = NULL);
- size_t prevlen IF_LINT ( = 0);
- bool first_group_printed = false;
+ if (linebase < line)
+ goto print_something_if_appropriate;
- while (!feof (stdin))
+/*
{
- char *thisfield;
- size_t thislen;
- bool new_group;
+ found_disorder:
+ {
+ if (checkonly == 'c')
+ {
+ struct line const *disorder_line = line - 1;
+ uintmax_t disorder_line_number =
+ buffer_linelim (&buf) - disorder_line + line_number;
+ char hr_buf[INT_BUFSIZE_BOUND (disorder_line_number)];
+ fprintf (stderr, _("%s: %s:%s: disorder: "),
+ program_name, file_name,
+ umaxtostr (disorder_line_number, hr_buf));
+ write_line (disorder_line, stderr, _("standard error"));
+ }
- if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
+ ordered = false;
break;
+ }
+ } */
- thisfield = find_field (thisline);
- thislen = thisline->length - 1 - (thisfield - thisline->text);
-
- new_group = (prevline->length == 0
- || different (thisfield, prevfield, thislen, prevlen));
-
- if (new_group && grouping != GM_NONE
- && (grouping == GM_PREPEND || grouping == GM_BOTH
- || (first_group_printed && (grouping == GM_APPEND
- || grouping == GM_SEPARATE))))
- putchar (delimiter);
-
- if (new_group || grouping != GM_NONE)
- {
- fwrite (thisline->text, sizeof (char),
- thisline->length, stdout);
-
- SWAP_LINES (prevline, thisline);
- prevfield = thisfield;
- prevlen = thislen;
- first_group_printed = true;
- }
- }
- if ((grouping == GM_BOTH || grouping == GM_APPEND) && first_group_printed)
- putchar (delimiter);
- }
- else
- {
- char *prevfield;
- size_t prevlen;
- uintmax_t match_count = 0;
- bool first_delimiter = true;
-
- if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
- goto closefiles;
- prevfield = find_field (prevline);
- prevlen = prevline->length - 1 - (prevfield - prevline->text);
-
- while (!feof (stdin))
+ /* Compare each line in the buffer with its successor. */
+ while (linebase < --line)
{
- bool match;
- char *thisfield;
- size_t thislen;
- if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
- {
- if (ferror (stdin))
- goto closefiles;
- break;
- }
- thisfield = find_field (thisline);
- thislen = thisline->length - 1 - (thisfield - thisline->text);
- match = !different (thisfield, prevfield, thislen, prevlen);
- match_count += match;
+ the_same = ! compare (line, line - 1);
- if (match_count == UINTMAX_MAX)
+ print_something_if_appropriate:
{
- if (count_occurrences)
- die (EXIT_FAILURE, 0, _("too many repeated lines"));
- match_count--;
- }
- if (delimit_groups != DM_NONE)
- {
- if (!match)
+ if (output_unique && output_first_repeated && countmode == count_none)
{
- if (match_count) /* a previous match */
- first_delimiter = false; /* Only used when DM_SEPARATE */
+ if (! the_same && grouping != GM_NONE
+ && (grouping == GM_PREPEND || grouping == GM_BOTH
+ || (first_group_printed && (grouping == GM_APPEND
+ || grouping == GM_SEPARATE))))
+ fputc (delimiter, outfp);
+
+ if (! the_same || grouping != GM_NONE)
+ {
+ write_line (line-1, outfp, outfile);
+
+ first_group_printed = true;
+ }
}
- else if (match_count == 1)
+ else
{
- if ((delimit_groups == DM_PREPEND)
- || (delimit_groups == DM_SEPARATE
- && !first_delimiter))
- putchar (delimiter);
+ match_count += the_same;
+
+ if (match_count == UINTMAX_MAX)
+ {
+ if (count_occurrences)
+ die (EXIT_FAILURE, 0, _("too many repeated lines"));
+ match_count--;
+ }
+
+ if (delimit_groups != DM_NONE)
+ {
+ if (! the_same)
+ {
+ if (match_count) /* a previous match */
+ first_delimiter = false; /* Only used when DM_SEPARATE */
+ }
+ else if (match_count == 1)
+ {
+ if ((delimit_groups == DM_PREPEND)
+ || (delimit_groups == DM_SEPARATE
+ && !first_delimiter))
+ fputc (delimiter, outfp);
+ }
+ }
+
+ if (! the_same || output_later_repeated)
+ {
+ writeline (line-1, the_same, match_count, outfp, outfile);
+ if (! the_same)
+ match_count = 0;
+ }
}
}
+ }
- if (!match || output_later_repeated)
+ /* Save the last line of the buffer. */
+ if (alloc < line->length)
+ {
+ do
{
- writeline (prevline, match, match_count);
- SWAP_LINES (prevline, thisline);
- prevfield = thisfield;
- prevlen = thislen;
- if (!match)
- match_count = 0;
+ alloc *= 2;
+ if (! alloc)
+ {
+ alloc = line->length;
+ break;
+ }
}
- }
+ while (alloc < line->length);
- writeline (prevline, false, match_count);
+ free (temp.text);
+ temp.text = xmalloc (alloc);
+ }
+ memcpy (temp.text, line->text, line->length);
+ temp.length = line->length;
+ if (key)
+ {
+ temp.keybeg = temp.text + (line->keybeg - line->text);
+ temp.keylim = temp.text + (line->keylim - line->text);
+ }
}
- closefiles:
- if (ferror (stdin) || fclose (stdin) != 0)
- die (EXIT_FAILURE, 0, _("error reading %s"), quoteaf (infile));
-
- /* stdout is handled via the atexit-invoked close_stdout function. */
+ if (output_unique && output_first_repeated && countmode == count_none)
+ {
+ if ((grouping == GM_BOTH || grouping == GM_APPEND) && first_group_printed)
+ fputc (delimiter, outfp);
+ }
+ else
+ {
+ writeline (line-1, false, match_count, outfp, outfile);
+ }
- free (lb1.text);
- free (lb2.text);
+ xfclose (infp, infile);
+ xfclose (outfp, outfile);
+ free (buf.buf);
+ free (temp.text);
}
enum Skip_field_option_type
@@ -503,7 +476,7 @@ main (int argc, char **argv)
if (optc == -1
|| (posixly_correct && nfiles != 0)
|| ((optc = getopt_long (argc, argv,
- "-0123456789Dcdf:is:uw:z", longopts, NULL))
+ "-0123456789Dcdf:iks:uw:z", longopts, NULL))
== -1))
{
if (argc <= optind)
@@ -597,6 +570,10 @@ main (int argc, char **argv)
ignore_case = true;
break;
+ case 'k':
+ add_key();
+ break;
+
case 's':
skip_chars = size_opt (optarg,
N_("invalid number of bytes to skip"));