From b50a151346c42816034b5c26266eb753b7dbe737 Mon Sep 17 00:00:00 2001 From: Bernhard Voelker Date: Tue, 22 Nov 2016 22:03:47 +0100 Subject: comm: add --total option * src/comm.c (total_option): Add bool variable for the new option. (TOTAL_OPTION): Add enum value. (long_options): Add array element for the new option. (usage): Document the new option here. (compare_files): Count the lines in total[3], and output the summary at the end. (main): Accept the new option. * doc/coreutils.texi (comm invocation): Document it. * tests/misc/comm.pl: Test it. While at it, improve the test data to have 1 unique line in the first file, 2 unique lines in the second file, and 3 common lines. * NEWS (New Features): Mention the new option. Fixes http://bugs.gnu.org/24929 --- NEWS | 2 ++ doc/coreutils.texi | 31 +++++++++++++++++++++++++++++++ src/comm.c | 49 +++++++++++++++++++++++++++++++++++++++++++++---- tests/misc/comm.pl | 53 +++++++++++++++++++++++++++++++++++++---------------- 4 files changed, 115 insertions(+), 20 deletions(-) diff --git a/NEWS b/NEWS index 41c1e3c8c..edfbdfa1d 100644 --- a/NEWS +++ b/NEWS @@ -114,6 +114,8 @@ GNU coreutils NEWS -*- outline -*- ** New Features + comm now accepts the --total option to output a summary at the end. + date now accepts the --debug option, to annotate the parsed date string, display timezone information, and warn about potential misuse. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index d0694fdd0..521ac3923 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -5174,6 +5174,37 @@ rather than the default of a single TAB character. The delimiter @var{str} may not be empty. +@item --total +Output a summary at the end. + +Similar to the regular output, +column one contains the total number of lines unique to @var{file1}, +column two contains the total number of lines unique to @var{file2}, and +column three contains the total number of lines common to both files, +followed by the word @samp{total} in the additional column four. + +In the following example, @command{comm} omits the regular output +(@option{-123}), thus just printing the summary: + +@example +$ printf '%s\n' a b c d e > file1 +$ printf '%s\n' b c d e f g > file2 +$ comm --total -123 file1 file2 +1 2 4 total +@end example + +This option is a GNU extension. Portable scripts should use @command{wc} to +get the totals, e.g. for the above example files: + +@example +$ comm -23 file1 file2 | wc -l # number of lines only in file1 +1 +$ comm -13 file1 file2 | wc -l # number of lines only in file2 +2 +$ comm -12 file1 file2 | wc -l # number of lines common to both files +4 +@end example + @optZeroTerminated @end table diff --git a/src/comm.c b/src/comm.c index eab81328b..095ee1d2d 100644 --- a/src/comm.c +++ b/src/comm.c @@ -63,6 +63,9 @@ static bool issued_disorder_warning[2]; /* line delimiter. */ static unsigned char delim = '\n'; +/* If true, print a summary. */ +static bool total_option; + /* If nonzero, check that the input is correctly ordered. */ static enum { @@ -82,7 +85,8 @@ enum { CHECK_ORDER_OPTION = CHAR_MAX + 1, NOCHECK_ORDER_OPTION, - OUTPUT_DELIMITER_OPTION + OUTPUT_DELIMITER_OPTION, + TOTAL_OPTION }; static struct option const long_options[] = @@ -90,6 +94,7 @@ static struct option const long_options[] = {"check-order", no_argument, NULL, CHECK_ORDER_OPTION}, {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION}, {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION}, + {"total", no_argument, NULL, TOTAL_OPTION}, {"zero-terminated", no_argument, NULL, 'z'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, @@ -135,6 +140,9 @@ and column three contains lines common to both files.\n\ "), stdout); fputs (_("\ --output-delimiter=STR separate columns with STR\n\ +"), stdout); + fputs (_("\ + --total output a summary\n\ "), stdout); fputs (_("\ -z, --zero-terminated line delimiter is NUL, not newline\n\ @@ -263,6 +271,9 @@ compare_files (char **infiles) /* streams[i] holds the input stream for file i. */ FILE *streams[2]; + /* Counters for the summary. */ + uintmax_t total[] = {0, 0, 0}; + int i, j; /* Initialize the storage. */ @@ -317,14 +328,26 @@ compare_files (char **infiles) /* Output the line that is lesser. */ if (order == 0) - writeline (thisline[1], stdout, 3); + { + /* Line is seen in both files. */ + total[2]++; + writeline (thisline[1], stdout, 3); + } else { seen_unpairable = true; if (order <= 0) - writeline (thisline[0], stdout, 1); + { + /* Line is seen in file 1 only. */ + total[0]++; + writeline (thisline[0], stdout, 1); + } else - writeline (thisline[1], stdout, 2); + { + /* Line is seen in file 2 only. */ + total[1]++; + writeline (thisline[1], stdout, 2); + } } /* Step the file the line came from. @@ -365,6 +388,19 @@ compare_files (char **infiles) for (i = 0; i < 2; i++) if (fclose (streams[i]) != 0) die (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); + + if (total_option) + { + /* Print the summary, minding the column and line delimiters. */ + char buf1[INT_BUFSIZE_BOUND (uintmax_t)]; + char buf2[INT_BUFSIZE_BOUND (uintmax_t)]; + char buf3[INT_BUFSIZE_BOUND (uintmax_t)]; + printf ("%s%s%s%s%s%s%s%c", + umaxtostr (total[0], buf1), col_sep, + umaxtostr (total[1], buf2), col_sep, + umaxtostr (total[2], buf3), col_sep, + _("total"), delim); + } } int @@ -388,6 +424,7 @@ main (int argc, char **argv) seen_unpairable = false; issued_disorder_warning[0] = issued_disorder_warning[1] = false; check_input_order = CHECK_ORDER_DEFAULT; + total_option = false; while ((c = getopt_long (argc, argv, "123z", long_options, NULL)) != -1) switch (c) @@ -423,6 +460,10 @@ main (int argc, char **argv) col_sep_len = *optarg ? strlen (optarg) : 1; break; + case TOTAL_OPTION: + total_option = true; + break; + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); diff --git a/tests/misc/comm.pl b/tests/misc/comm.pl index c5cd27f39..fdec3d62c 100755 --- a/tests/misc/comm.pl +++ b/tests/misc/comm.pl @@ -27,37 +27,50 @@ my $prog = 'comm'; # Turn off localization of executable's ouput. @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; -my @inputs = ({IN=>{a=>"1\n3"}}, {IN=>{b=>"2\n3"}}); -my @zinputs = ({IN=>{za=>"1\0003"}}, {IN=>{zb=>"2\0003"}}); +my @inputs = ({IN=>{a=>"1\n3\n3\n3"}}, {IN=>{b=>"2\n2\n3\n3\n3"}}); +my @zinputs = ({IN=>{za=>"1\0003\0003\0003"}}, + {IN=>{zb=>"2\0002\0003\0003\0003"}}); my @Tests = ( # basic operation - ['basic', @inputs, {OUT=>"1\n\t2\n\t\t3\n"} ], - ['zbasic', '-z', @zinputs, {OUT=>"1\0\t2\0\t\t3\0"} ], + ['basic', @inputs, {OUT=>"1\n\t2\n\t2\n\t\t3\n\t\t3\n\t\t3\n"} ], + ['zbasic', '-z', @zinputs, {OUT=>"1\0\t2\0\t2\0\t\t3\0\t\t3\0\t\t3\0"} ], # suppress lines unique to file 1 - ['opt-1', '-1', @inputs, {OUT=>"2\n\t3\n"} ], - ['zopt-1', '-z', '-1', @zinputs, {OUT=>"2\0\t3\0"} ], + ['opt-1', '-1', @inputs, {OUT=>"2\n2\n\t3\n\t3\n\t3\n"} ], + ['zopt-1', '-z', '-1', @zinputs, {OUT=>"2\0002\000\t3\000\t3\000\t3\000"} ], # suppress lines unique to file 2 - ['opt-2', '-2', @inputs, {OUT=>"1\n\t3\n"} ], + ['opt-2', '-2', @inputs, {OUT=>"1\n\t3\n\t3\n\t3\n"} ], + ['zopt-2', '-z', '-2', @zinputs, {OUT=>"1\000\t3\000\t3\000\t3\000"} ], # suppress lines that appear in both files - ['opt-3', '-3', @inputs, {OUT=>"1\n\t2\n"} ], + ['opt-3', '-3', @inputs, {OUT=>"1\n\t2\n\t2\n"} ], + ['zopt-3', '-z', '-3', @zinputs, {OUT=>"1\000\t2\000\t2\000"} ], # suppress lines unique to file 1 and lines unique to file 2 - ['opt-12', '-1', '-2', @inputs, {OUT=>"3\n"} ], + ['opt-12', '-1', '-2', @inputs, {OUT=>"3\n3\n3\n"} ], + ['zopt-12', '-12z', @zinputs, {OUT=>"3\0003\0003\000"} ], # suppress lines unique to file 1 and those that appear in both files - ['opt-13', '-1', '-3', @inputs, {OUT=>"2\n"} ], + ['opt-13', '-1', '-3', @inputs, {OUT=>"2\n2\n"} ], + ['zopt-13', '-13z', @zinputs, {OUT=>"2\0002\000"} ], # suppress lines unique to file 2 and those that appear in both files ['opt-23', '-2', '-3', @inputs, {OUT=>"1\n"} ], + ['zopt-23', '-23z', @zinputs, {OUT=>"1\000"} ], - # suppress all output (really?) + # suppress all output ['opt-123', '-1', '-2', '-3', @inputs, {OUT=>""} ], + # show summary: 1 only in file1, 2 only in file2, 3 in both files + ['total-all', '--total', @inputs, {OUT=>"1\n\t2\n\t2\n\t\t3\n\t\t3\n\t\t3\n" + . "1\t2\t3\ttotal\n"} ], + + # show summary only, suppressing regular output + ['total-123', '--total', '-123', @inputs, {OUT=>"1\t2\t3\ttotal\n"} ], + # invalid missing command line argument (1) ['missing-arg1', $inputs[0], {EXIT=>1}, {ERR => "$prog: missing operand after 'a'\n" @@ -128,17 +141,17 @@ my @Tests = # alternate delimiter: ',' ['delim-comma', '--output-delimiter=,', @inputs, - {OUT=>"1\n,2\n,,3\n"} ], + {OUT=>"1\n,2\n,2\n,,3\n,,3\n,,3\n"} ], # two-character alternate delimiter: '++' ['delim-2char', '--output-delimiter=++', @inputs, - {OUT=>"1\n++2\n++++3\n"} ], + {OUT=>"1\n++2\n++2\n++++3\n++++3\n++++3\n"} ], # NUL delimiter ['delim-empty', '--output-delimiter=', @inputs, - {OUT=>"1\n\0002\n\000\0003\n"} ], + {OUT=>"1\n\0002\n\0002\n\000\0003\n\000\0003\n\000\0003\n"} ], ['zdelim-empty', '-z', '-z --output-delimiter=', @zinputs, - {OUT=>"1\000\0002\000\000\0003\000"} ], + {OUT=>"1\000\0002\000\0002\000\000\0003\000\000\0003\000\000\0003\000"} ], # invalid dual delimiter ['delim-dual', '--output-delimiter=,', '--output-delimiter=+', @inputs, @@ -146,8 +159,16 @@ my @Tests = # valid dual delimiter specification ['delim-dual2', '--output-delimiter=,', '--output-delimiter=,', @inputs, - {OUT=>"1\n,2\n,,3\n"} ], + {OUT=>"1\n,2\n,2\n,,3\n,,3\n,,3\n"} ], + + # show summary, zero-terminated + ['totalz-all', '--total', '-z', @zinputs, + {OUT=>"1\000\t2\000\t2\000\t\t3\000\t\t3\000\t\t3\000" + . "1\t2\t3\ttotal\000"} ], + # show summary only (-123), zero-terminated and with ',' as delimiter + ['totalz-123', '--total', '-z123', '--output-delimiter=,', @zinputs, + {OUT=>"1,2,3,total\000"} ], ); my $save_temps = $ENV{DEBUG}; -- cgit v1.2.3-70-g09d2