From ff2178bf30e3eda566cc1d1670768c6d6694a8ac Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Fri, 24 Jun 2016 21:48:29 -0400 Subject: maint: refactor common expand(1) and unexpand(1) code * src/expand.c, src/unexpand.c: Move global variables from here... * src/expand-common.h, src/expand-common.c: ... to here. * src/expand.c, src/unexpand.c: (parse_tab_stops, validate_tab_stops, next_file): Move identical functions to new module. (add_tab_stop): Move to new module, including additional code from 'unexpand' (keeping max_column_width) which will have no effect in when used in 'expand'. Refactor common next-column calculation code into a new function 'get_next_tab_column'. * src/local.mk: (src_expand_SOURCES, src_unexpand_SOURCES): Add 'expand-common.c'; (noinst_HEADERS): Add 'expand-common.h'. * po/POTFILES.in: Add 'expand-common.c'. * tests/misc/expand.pl: Add more tests. * tests/misc/unexpand.pl: Likewise. * TODO: Move conclusions to above test after investigation. --- tests/misc/expand.pl | 120 ++++++++++++++++++++++++++++++++++++++++++++++++- tests/misc/unexpand.pl | 36 +++++++++++++++ 2 files changed, 155 insertions(+), 1 deletion(-) (limited to 'tests/misc') diff --git a/tests/misc/expand.pl b/tests/misc/expand.pl index a6b4a4d61..8a9cad144 100755 --- a/tests/misc/expand.pl +++ b/tests/misc/expand.pl @@ -18,7 +18,11 @@ use strict; +my $limits = getlimits (); +my $UINTMAX_OFLOW = $limits->{UINTMAX_OFLOW}; + (my $program_name = $0) =~ s|.*/||; +my $prog = 'expand'; # Turn off localization of executable's output. @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; @@ -27,13 +31,127 @@ my @Tests = ( ['t1', '--tabs=3', {IN=>"a\tb"}, {OUT=>"a b"}], ['t2', '--tabs=3,6,9', {IN=>"a\tb\tc\td\te"}, {OUT=>"a b c d e"}], + ['t3', '--tabs="3 6 9"', {IN=>"a\tb\tc\td\te"}, {OUT=>"a b c d e"}], + # Leading space/commas are silently ignored; Mixing space/commas is allowed. + # (a side-effect of allowing direct "-3,9" parameter). + ['t4', '--tabs=", 3,6 9"', {IN=>"a\tb\tc\td\te"}, {OUT=>"a b c d e"}], + # tab stops parameter without values + ['t5', '--tabs=""', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + ['t6', '--tabs=","', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + ['t7', '--tabs=" "', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + + # Input field wider than the specified tab list + ['t8', '--tabs=6,9', {IN=>"a\tbbbbbbbbbbbbb\tc"}, + {OUT=>"a bbbbbbbbbbbbb c"}], + ['i1', '--tabs=3 -i', {IN=>"\ta\tb"}, {OUT=>" a\tb"}], ['i2', '--tabs=3 -i', {IN=>" \ta\tb"}, {OUT=>" a\tb"}], + + # Undocumented feature: + # treat "expand -7" as "expand --tabs 7" , + # and "expand -90" as "expand --tabs 90", + ['u1', '-3', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + ['u2', '-4 -9', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + ['u3', '-11', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + # Test all digits (for full code coverage) + ['u4', '-2 -6', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + ['u5', '-7', {IN=>"a\tb"}, {OUT=>"a b"}], + ['u6', '-8', {IN=>"a\tb"}, {OUT=>"a b"}], + # This syntax is handled internally as "-3, -9" + ['u7', '-3,9', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + + # Multiple non-empty files + ['f1', '--tabs=4', + {IN=>{"in1" => "a\tb\n"}}, {IN=>{"in2" => "c\td\n"}}, + {OUT=>"a b\nc d\n"}], + # Multiple files, first file is empty + ['f2', '--tabs=4', + {IN=>{"in1" => ""}}, {IN=>{"in2" => "c\td\n"}}, + {OUT=>"c d\n"}], + # Multiple files, second file is empty + ['f3', '--tabs=4', + {IN=>{"in1" => "a\tb\n"}}, {IN=>{"in2" => ""}}, + {OUT=>"a b\n"}], + + + # Test '\b' (back-space) - subtract one column. + # + # Note: + # In a terminal window, 'expand' will appear to erase the 'a' characters + # due to overwriting them with spaces: + # + # $ printf 'aaa\b\b\bc\td\n' + # caa d + # $ printf 'aaa\b\b\bc\td\n' | expand + # c d + # + # However the characters are all printed: + # + # $ printf 'aaa\b\b\bc\td\n' | expand | od -An -ta + # a a a bs bs bs c sp sp sp sp sp sp sp d nl + # + # If users ever report a problem with these tests and just + # copy&paste from the terminal, their report will be confusing + # (the 'a' will not appear). + # + # To see an example, enable the 'b-confusing' test, and examine the + # reported log: + # + # expand.pl: test b-confusing: stdout mismatch + # *** b-confusing.2 Fri Jun 24 15:43:21 2016 + # --- b-confusing.O Fri Jun 24 15:43:21 2016 + # *************** + # *** 1 **** + # ! c d + # --- 1 ---- + # ! c d + # + # ['b-confusing','', {IN=>"aaa\b\b\bc\td\n"}, {OUT=>"c d\n"}], + + ['b1','', {IN=>"aaa\b\b\bc\td\n"}, {OUT=>"aaa\b\b\bc d\n"}], + + # \b as first character, when column is zero + ['b2','', {IN=>"\bc\td"}, {OUT=>"\bc d"}], + + # Testing tab list adjusted due to backspaces + # ('b3' is the baseline without backspaces). + ['b3','--tabs 2,4,6,10', + {IN=>"1\t2\t3\t4\t5\n" . + "a\tb\tc\td\te\n"}, + {OUT=>"1 2 3 4 5\n" . + "a b c d e\n"}], + + # On screen this will appear the same as 'b3' + ['b4','--tabs 2,4,6,10', + {IN=>"1\t2\t3\t4\t5\n" . + "a\tbHELLO\b\b\b\b\b\tc\td\te\n"}, + {OUT=>"1 2 3 4 5\n" . + "a bHELLO\b\b\b\b\b c d e\n"}], + + # On screen on 'bHE' will appear (LLO overwritten by spaces), + # 'c' should align with 4, 'd' with 5: + # 1 2 3 4 5 + # a bHE c d e + ['b5','--tabs 2,4,6,10', + {IN=>"1\t2\t3\t4\t5\n" . + "a\tbHELLO\b\b\b\tc\td\te\n"}, + {OUT=>"1 2 3 4 5\n" . + "a bHELLO\b\b\b c d e\n"}], + + + # Test errors + ['e1', '--tabs="a"', {IN=>''}, {OUT=>''}, {EXIT=>1}, + {ERR => "$prog: tab size contains invalid character(s): 'a'\n"}], + ['e2', "-t $UINTMAX_OFLOW", {IN=>''}, {OUT=>''}, {EXIT=>1}, + {ERR => "$prog: tab stop is too large '$UINTMAX_OFLOW'\n"}], + ['e3', '--tabs=0', {IN=>''}, {OUT=>''}, {EXIT=>1}, + {ERR => "$prog: tab size cannot be 0\n"}], + ['e4', '--tabs=3,3', {IN=>''}, {OUT=>''}, {EXIT=>1}, + {ERR => "$prog: tab sizes must be ascending\n"}], ); my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; -my $prog = 'expand'; my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); exit $fail; diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl index c592c5a52..6ba6d405c 100755 --- a/tests/misc/unexpand.pl +++ b/tests/misc/unexpand.pl @@ -90,6 +90,42 @@ my @Tests = # setting of e.g., _POSIX2_VERSION=1. ['obs-ovflo', "-$limits->{UINTMAX_OFLOW}", {IN=>''}, {OUT=>''}, {EXIT => 1}, {ERR => "$prog: tab stop value is too large\n"}], + + + # Test input with backspaces '\b' ('bs1' is the baseline, without \b) + # Note: If users report errors in these tests, copy&pasting results from + # their terminate output might be confusing due to '\b' overriding + # characters. For details see '\b' tests in 'expand.pl'. + ['bs1', '-a -t4', {IN=>"aa c\n"}, {OUT=>"aa\tc\n"}], + ['bs2', '-a -t4', {IN=>"aa\b c\n"}, {OUT=>"aa\b c\n"}], + ['bs3', '-a -t4', {IN=>"aa\b c\n"}, {OUT=>"aa\b\tc\n"}], + ['bs4', '-a -t3', {IN=>"aa\b c\n"}, {OUT=>"aa\b\tc\n"}], + + # Undocumented feature: + # treat "unexpand -7" as "unexpand --first-only --tabs 7" , + # and "unexpand -90" as "unexpand --first-only --tabs 90", + ['u1', '-a -3', {IN=>"a b c"}, {OUT=>"a\tb\tc"}], + ['u2', '-a -4,9', {IN=>"a b c"}, {OUT=>"a\tb\tc"}], + ['u3', '-a -11', {IN=>"a b"}, {OUT=>"a\tb"}], + # Test all digits (for full code coverage) + ['u4', '-a -2,6', {IN=>"a b c"}, {OUT=>"a b\tc"}], + ['u5', '-a -7', {IN=>"a b"}, {OUT=>"a\tb"}], + ['u6', '-a -8', {IN=>"a b"}, {OUT=>"a\tb"}], + # This syntax is handled internally as "-3, -9" + ['u7', '-a -3,9', {IN=>"a b c"}, {OUT=>"a\tb\tc"}], + # Default (without -a) is --first-only: + ['u8', '-3', {IN=>" a b"}, {OUT=>"\ta b"}], + + # Arguably this should minimize translation as is done on Solaris. + # I.e., not modify the input. But since the result is equivalent, + # and to be consistent in output with older versions, we output + # a '\t' rather than a space for the second tab position. + # For more detailed comparison with other implementations see: + # http://lists.gnu.org/archive/html/coreutils/2016-06/msg00015.html + # http://lists.gnu.org/archive/html/coreutils/2016-07/msg00011.html + ['ts1', '-t8,9', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t\t y\n"}], + # There is no ambiguity here. This should always be the output. + ['ts2', '-t5,8', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t y\n"}], ); my $save_temps = $ENV{DEBUG}; -- cgit v1.2.3-70-g09d2