summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBernhard Voelker <mail@bernhard-voelker.de>2016-11-22 22:03:47 +0100
committerBernhard Voelker <mail@bernhard-voelker.de>2016-11-22 22:03:47 +0100
commitb50a151346c42816034b5c26266eb753b7dbe737 (patch)
tree0ee2b87aec984af4593d9a31301828c24f110dfb
parent812877bfcb34edbff1ba554555bd2ddb613a22cc (diff)
downloadcoreutils-b50a151346c42816034b5c26266eb753b7dbe737.tar.xz
comm: add --total option
* src/comm.c (total_option): Add bool variable for the new option. (TOTAL_OPTION): Add enum value. (long_options): Add array element for the new option. (usage): Document the new option here. (compare_files): Count the lines in total[3], and output the summary at the end. (main): Accept the new option. * doc/coreutils.texi (comm invocation): Document it. * tests/misc/comm.pl: Test it. While at it, improve the test data to have 1 unique line in the first file, 2 unique lines in the second file, and 3 common lines. * NEWS (New Features): Mention the new option. Fixes http://bugs.gnu.org/24929
-rw-r--r--NEWS2
-rw-r--r--doc/coreutils.texi31
-rw-r--r--src/comm.c49
-rwxr-xr-xtests/misc/comm.pl53
4 files changed, 115 insertions, 20 deletions
diff --git a/NEWS b/NEWS
index 41c1e3c8c..edfbdfa1d 100644
--- a/NEWS
+++ b/NEWS
@@ -114,6 +114,8 @@ GNU coreutils NEWS -*- outline -*-
** New Features
+ comm now accepts the --total option to output a summary at the end.
+
date now accepts the --debug option, to annotate the parsed date string,
display timezone information, and warn about potential misuse.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index d0694fdd0..521ac3923 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -5174,6 +5174,37 @@ rather than the default of a single TAB character.
The delimiter @var{str} may not be empty.
+@item --total
+Output a summary at the end.
+
+Similar to the regular output,
+column one contains the total number of lines unique to @var{file1},
+column two contains the total number of lines unique to @var{file2}, and
+column three contains the total number of lines common to both files,
+followed by the word @samp{total} in the additional column four.
+
+In the following example, @command{comm} omits the regular output
+(@option{-123}), thus just printing the summary:
+
+@example
+$ printf '%s\n' a b c d e > file1
+$ printf '%s\n' b c d e f g > file2
+$ comm --total -123 file1 file2
+1 2 4 total
+@end example
+
+This option is a GNU extension. Portable scripts should use @command{wc} to
+get the totals, e.g. for the above example files:
+
+@example
+$ comm -23 file1 file2 | wc -l # number of lines only in file1
+1
+$ comm -13 file1 file2 | wc -l # number of lines only in file2
+2
+$ comm -12 file1 file2 | wc -l # number of lines common to both files
+4
+@end example
+
@optZeroTerminated
@end table
diff --git a/src/comm.c b/src/comm.c
index eab81328b..095ee1d2d 100644
--- a/src/comm.c
+++ b/src/comm.c
@@ -63,6 +63,9 @@ static bool issued_disorder_warning[2];
/* line delimiter. */
static unsigned char delim = '\n';
+/* If true, print a summary. */
+static bool total_option;
+
/* If nonzero, check that the input is correctly ordered. */
static enum
{
@@ -82,7 +85,8 @@ enum
{
CHECK_ORDER_OPTION = CHAR_MAX + 1,
NOCHECK_ORDER_OPTION,
- OUTPUT_DELIMITER_OPTION
+ OUTPUT_DELIMITER_OPTION,
+ TOTAL_OPTION
};
static struct option const long_options[] =
@@ -90,6 +94,7 @@ static struct option const long_options[] =
{"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
{"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
{"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
+ {"total", no_argument, NULL, TOTAL_OPTION},
{"zero-terminated", no_argument, NULL, 'z'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
@@ -137,6 +142,9 @@ and column three contains lines common to both files.\n\
--output-delimiter=STR separate columns with STR\n\
"), stdout);
fputs (_("\
+ --total output a summary\n\
+"), stdout);
+ fputs (_("\
-z, --zero-terminated line delimiter is NUL, not newline\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
@@ -263,6 +271,9 @@ compare_files (char **infiles)
/* streams[i] holds the input stream for file i. */
FILE *streams[2];
+ /* Counters for the summary. */
+ uintmax_t total[] = {0, 0, 0};
+
int i, j;
/* Initialize the storage. */
@@ -317,14 +328,26 @@ compare_files (char **infiles)
/* Output the line that is lesser. */
if (order == 0)
- writeline (thisline[1], stdout, 3);
+ {
+ /* Line is seen in both files. */
+ total[2]++;
+ writeline (thisline[1], stdout, 3);
+ }
else
{
seen_unpairable = true;
if (order <= 0)
- writeline (thisline[0], stdout, 1);
+ {
+ /* Line is seen in file 1 only. */
+ total[0]++;
+ writeline (thisline[0], stdout, 1);
+ }
else
- writeline (thisline[1], stdout, 2);
+ {
+ /* Line is seen in file 2 only. */
+ total[1]++;
+ writeline (thisline[1], stdout, 2);
+ }
}
/* Step the file the line came from.
@@ -365,6 +388,19 @@ compare_files (char **infiles)
for (i = 0; i < 2; i++)
if (fclose (streams[i]) != 0)
die (EXIT_FAILURE, errno, "%s", quotef (infiles[i]));
+
+ if (total_option)
+ {
+ /* Print the summary, minding the column and line delimiters. */
+ char buf1[INT_BUFSIZE_BOUND (uintmax_t)];
+ char buf2[INT_BUFSIZE_BOUND (uintmax_t)];
+ char buf3[INT_BUFSIZE_BOUND (uintmax_t)];
+ printf ("%s%s%s%s%s%s%s%c",
+ umaxtostr (total[0], buf1), col_sep,
+ umaxtostr (total[1], buf2), col_sep,
+ umaxtostr (total[2], buf3), col_sep,
+ _("total"), delim);
+ }
}
int
@@ -388,6 +424,7 @@ main (int argc, char **argv)
seen_unpairable = false;
issued_disorder_warning[0] = issued_disorder_warning[1] = false;
check_input_order = CHECK_ORDER_DEFAULT;
+ total_option = false;
while ((c = getopt_long (argc, argv, "123z", long_options, NULL)) != -1)
switch (c)
@@ -423,6 +460,10 @@ main (int argc, char **argv)
col_sep_len = *optarg ? strlen (optarg) : 1;
break;
+ case TOTAL_OPTION:
+ total_option = true;
+ break;
+
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
diff --git a/tests/misc/comm.pl b/tests/misc/comm.pl
index c5cd27f39..fdec3d62c 100755
--- a/tests/misc/comm.pl
+++ b/tests/misc/comm.pl
@@ -27,37 +27,50 @@ my $prog = 'comm';
# Turn off localization of executable's ouput.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
-my @inputs = ({IN=>{a=>"1\n3"}}, {IN=>{b=>"2\n3"}});
-my @zinputs = ({IN=>{za=>"1\0003"}}, {IN=>{zb=>"2\0003"}});
+my @inputs = ({IN=>{a=>"1\n3\n3\n3"}}, {IN=>{b=>"2\n2\n3\n3\n3"}});
+my @zinputs = ({IN=>{za=>"1\0003\0003\0003"}},
+ {IN=>{zb=>"2\0002\0003\0003\0003"}});
my @Tests =
(
# basic operation
- ['basic', @inputs, {OUT=>"1\n\t2\n\t\t3\n"} ],
- ['zbasic', '-z', @zinputs, {OUT=>"1\0\t2\0\t\t3\0"} ],
+ ['basic', @inputs, {OUT=>"1\n\t2\n\t2\n\t\t3\n\t\t3\n\t\t3\n"} ],
+ ['zbasic', '-z', @zinputs, {OUT=>"1\0\t2\0\t2\0\t\t3\0\t\t3\0\t\t3\0"} ],
# suppress lines unique to file 1
- ['opt-1', '-1', @inputs, {OUT=>"2\n\t3\n"} ],
- ['zopt-1', '-z', '-1', @zinputs, {OUT=>"2\0\t3\0"} ],
+ ['opt-1', '-1', @inputs, {OUT=>"2\n2\n\t3\n\t3\n\t3\n"} ],
+ ['zopt-1', '-z', '-1', @zinputs, {OUT=>"2\0002\000\t3\000\t3\000\t3\000"} ],
# suppress lines unique to file 2
- ['opt-2', '-2', @inputs, {OUT=>"1\n\t3\n"} ],
+ ['opt-2', '-2', @inputs, {OUT=>"1\n\t3\n\t3\n\t3\n"} ],
+ ['zopt-2', '-z', '-2', @zinputs, {OUT=>"1\000\t3\000\t3\000\t3\000"} ],
# suppress lines that appear in both files
- ['opt-3', '-3', @inputs, {OUT=>"1\n\t2\n"} ],
+ ['opt-3', '-3', @inputs, {OUT=>"1\n\t2\n\t2\n"} ],
+ ['zopt-3', '-z', '-3', @zinputs, {OUT=>"1\000\t2\000\t2\000"} ],
# suppress lines unique to file 1 and lines unique to file 2
- ['opt-12', '-1', '-2', @inputs, {OUT=>"3\n"} ],
+ ['opt-12', '-1', '-2', @inputs, {OUT=>"3\n3\n3\n"} ],
+ ['zopt-12', '-12z', @zinputs, {OUT=>"3\0003\0003\000"} ],
# suppress lines unique to file 1 and those that appear in both files
- ['opt-13', '-1', '-3', @inputs, {OUT=>"2\n"} ],
+ ['opt-13', '-1', '-3', @inputs, {OUT=>"2\n2\n"} ],
+ ['zopt-13', '-13z', @zinputs, {OUT=>"2\0002\000"} ],
# suppress lines unique to file 2 and those that appear in both files
['opt-23', '-2', '-3', @inputs, {OUT=>"1\n"} ],
+ ['zopt-23', '-23z', @zinputs, {OUT=>"1\000"} ],
- # suppress all output (really?)
+ # suppress all output
['opt-123', '-1', '-2', '-3', @inputs, {OUT=>""} ],
+ # show summary: 1 only in file1, 2 only in file2, 3 in both files
+ ['total-all', '--total', @inputs, {OUT=>"1\n\t2\n\t2\n\t\t3\n\t\t3\n\t\t3\n"
+ . "1\t2\t3\ttotal\n"} ],
+
+ # show summary only, suppressing regular output
+ ['total-123', '--total', '-123', @inputs, {OUT=>"1\t2\t3\ttotal\n"} ],
+
# invalid missing command line argument (1)
['missing-arg1', $inputs[0], {EXIT=>1},
{ERR => "$prog: missing operand after 'a'\n"
@@ -128,17 +141,17 @@ my @Tests =
# alternate delimiter: ','
['delim-comma', '--output-delimiter=,', @inputs,
- {OUT=>"1\n,2\n,,3\n"} ],
+ {OUT=>"1\n,2\n,2\n,,3\n,,3\n,,3\n"} ],
# two-character alternate delimiter: '++'
['delim-2char', '--output-delimiter=++', @inputs,
- {OUT=>"1\n++2\n++++3\n"} ],
+ {OUT=>"1\n++2\n++2\n++++3\n++++3\n++++3\n"} ],
# NUL delimiter
['delim-empty', '--output-delimiter=', @inputs,
- {OUT=>"1\n\0002\n\000\0003\n"} ],
+ {OUT=>"1\n\0002\n\0002\n\000\0003\n\000\0003\n\000\0003\n"} ],
['zdelim-empty', '-z', '-z --output-delimiter=', @zinputs,
- {OUT=>"1\000\0002\000\000\0003\000"} ],
+ {OUT=>"1\000\0002\000\0002\000\000\0003\000\000\0003\000\000\0003\000"} ],
# invalid dual delimiter
['delim-dual', '--output-delimiter=,', '--output-delimiter=+', @inputs,
@@ -146,8 +159,16 @@ my @Tests =
# valid dual delimiter specification
['delim-dual2', '--output-delimiter=,', '--output-delimiter=,', @inputs,
- {OUT=>"1\n,2\n,,3\n"} ],
+ {OUT=>"1\n,2\n,2\n,,3\n,,3\n,,3\n"} ],
+
+ # show summary, zero-terminated
+ ['totalz-all', '--total', '-z', @zinputs,
+ {OUT=>"1\000\t2\000\t2\000\t\t3\000\t\t3\000\t\t3\000"
+ . "1\t2\t3\ttotal\000"} ],
+ # show summary only (-123), zero-terminated and with ',' as delimiter
+ ['totalz-123', '--total', '-z123', '--output-delimiter=,', @zinputs,
+ {OUT=>"1,2,3,total\000"} ],
);
my $save_temps = $ENV{DEBUG};