From ff2178bf30e3eda566cc1d1670768c6d6694a8ac Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Fri, 24 Jun 2016 21:48:29 -0400 Subject: maint: refactor common expand(1) and unexpand(1) code * src/expand.c, src/unexpand.c: Move global variables from here... * src/expand-common.h, src/expand-common.c: ... to here. * src/expand.c, src/unexpand.c: (parse_tab_stops, validate_tab_stops, next_file): Move identical functions to new module. (add_tab_stop): Move to new module, including additional code from 'unexpand' (keeping max_column_width) which will have no effect in when used in 'expand'. Refactor common next-column calculation code into a new function 'get_next_tab_column'. * src/local.mk: (src_expand_SOURCES, src_unexpand_SOURCES): Add 'expand-common.c'; (noinst_HEADERS): Add 'expand-common.h'. * po/POTFILES.in: Add 'expand-common.c'. * tests/misc/expand.pl: Add more tests. * tests/misc/unexpand.pl: Likewise. * TODO: Move conclusions to above test after investigation. --- TODO | 4 - po/POTFILES.in | 1 + src/expand-common.c | 274 +++++++++++++++++++++++++++++++++++++++++++++++++ src/expand-common.h | 72 +++++++++++++ src/expand.c | 212 ++------------------------------------ src/local.mk | 4 + src/unexpand.c | 230 +++-------------------------------------- tests/misc/expand.pl | 120 +++++++++++++++++++++- tests/misc/unexpand.pl | 36 +++++++ 9 files changed, 529 insertions(+), 424 deletions(-) create mode 100644 src/expand-common.c create mode 100644 src/expand-common.h diff --git a/TODO b/TODO index de95e5a64..dc1a9e2a2 100644 --- a/TODO +++ b/TODO @@ -67,10 +67,6 @@ lib/strftime.c: Since %N is the only format that we need but that would expand /%(-_)?\d*N/ to the desired string and then pass the resulting string to glibc's strftime. -unexpand: [http://www.opengroup.org/onlinepubs/007908799/xcu/unexpand.html] - printf 'x\t \t y\n'|unexpand -t 8,9 should print its input, unmodified. - printf 'x\t \t y\n'|unexpand -t 5,8 should print "x\ty\n" - sort: Investigate better sorting algorithms; see Knuth vol. 3. We tried list merge sort, but it was about 50% slower than the diff --git a/po/POTFILES.in b/po/POTFILES.in index 7583f635d..b8a28bdee 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -57,6 +57,7 @@ src/dirname.c src/du.c src/echo.c src/env.c +src/expand-common.c src/expand.c src/expr.c src/factor.c diff --git a/src/expand-common.c b/src/expand-common.c new file mode 100644 index 000000000..41d7003a4 --- /dev/null +++ b/src/expand-common.c @@ -0,0 +1,274 @@ +/* expand-common - common functionality for expand/unexapnd + Copyright (C) 1989-2016 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include +#include +#include "system.h" +#include "error.h" +#include "fadvise.h" +#include "quote.h" +#include "xstrndup.h" + +#include "expand-common.h" + +/* If true, convert blanks even after nonblank characters have been + read on the line. */ +bool convert_entire_line = false; + +/* If nonzero, the size of all tab stops. If zero, use 'tab_list' instead. */ +static uintmax_t tab_size = 0; + +/* The maximum distance between tab stops. */ +size_t max_column_width; + +/* Array of the explicit column numbers of the tab stops; + after 'tab_list' is exhausted, each additional tab is replaced + by a space. The first column is column 0. */ +static uintmax_t *tab_list = NULL; + +/* The number of allocated entries in 'tab_list'. */ +static size_t n_tabs_allocated = 0; + +/* The index of the first invalid element of 'tab_list', + where the next element can be added. */ +static size_t first_free_tab = 0; + +/* Null-terminated array of input filenames. */ +static char **file_list = NULL; + +/* Default for 'file_list' if no files are given on the command line. */ +static char *stdin_argv[] = +{ + (char *) "-", NULL +}; + +/* True if we have ever read standard input. */ +static bool have_read_stdin = false; + +/* The desired exit status. */ +int exit_status = EXIT_SUCCESS; + + + +/* Add tab stop TABVAL to the end of 'tab_list'. */ +extern void +add_tab_stop (uintmax_t tabval) +{ + uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0; + uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0; + + if (first_free_tab == n_tabs_allocated) + tab_list = X2NREALLOC (tab_list, &n_tabs_allocated); + tab_list[first_free_tab++] = tabval; + + if (max_column_width < column_width) + { + if (SIZE_MAX < column_width) + error (EXIT_FAILURE, 0, _("tabs are too far apart")); + max_column_width = column_width; + } +} + +/* Add the comma or blank separated list of tab stops STOPS + to the list of tab stops. */ +extern void +parse_tab_stops (char const *stops) +{ + bool have_tabval = false; + uintmax_t tabval IF_LINT ( = 0); + char const *num_start IF_LINT ( = NULL); + bool ok = true; + + for (; *stops; stops++) + { + if (*stops == ',' || isblank (to_uchar (*stops))) + { + if (have_tabval) + add_tab_stop (tabval); + have_tabval = false; + } + else if (ISDIGIT (*stops)) + { + if (!have_tabval) + { + tabval = 0; + have_tabval = true; + num_start = stops; + } + + /* Detect overflow. */ + if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t)) + { + size_t len = strspn (num_start, "0123456789"); + char *bad_num = xstrndup (num_start, len); + error (0, 0, _("tab stop is too large %s"), quote (bad_num)); + free (bad_num); + ok = false; + stops = num_start + len - 1; + } + } + else + { + error (0, 0, _("tab size contains invalid character(s): %s"), + quote (stops)); + ok = false; + break; + } + } + + if (!ok) + exit (EXIT_FAILURE); + + if (have_tabval) + add_tab_stop (tabval); +} + +/* Check that the list of tab stops TABS, with ENTRIES entries, + contains only nonzero, ascending values. */ + +static void +validate_tab_stops (uintmax_t const *tabs, size_t entries) +{ + uintmax_t prev_tab = 0; + size_t i; + + for (i = 0; i < entries; i++) + { + if (tabs[i] == 0) + error (EXIT_FAILURE, 0, _("tab size cannot be 0")); + if (tabs[i] <= prev_tab) + error (EXIT_FAILURE, 0, _("tab sizes must be ascending")); + prev_tab = tabs[i]; + } +} + +/* Called after all command-line options have been parsed, + and add_tab_stop/parse_tab_stops have been called. + Will validate the tab-stop values, + and set the final values to: + tab-stops = 8 (if no tab-stops given on command line) + tab-stops = N (if value N specified as the only value). + tab-stops = distinct values given on command line (if multiple values given). +*/ +extern void +finalize_tab_stops (void) +{ + validate_tab_stops (tab_list, first_free_tab); + + if (first_free_tab == 0) + tab_size = max_column_width = 8; + else if (first_free_tab == 1) + tab_size = tab_list[0]; + else + tab_size = 0; +} + + +extern uintmax_t +get_next_tab_column (const uintmax_t column, size_t* tab_index, + bool* last_tab) +{ + *last_tab = false; + + /* single tab-size - return multiples of it */ + if (tab_size) + return column + (tab_size - column % tab_size); + + /* multiple tab-sizes - iterate them until the tab position is beyond + the current input column. */ + for ( ; *tab_index < first_free_tab ; (*tab_index)++ ) + { + uintmax_t tab = tab_list[*tab_index]; + if (column < tab) + return tab; + } + + *last_tab = true; + return 0; +} + + + + +/* Sets new file-list */ +extern void +set_file_list (char **list) +{ + have_read_stdin = false; + + if (!list) + file_list = stdin_argv; + else + file_list = list; +} + +/* Close the old stream pointer FP if it is non-NULL, + and return a new one opened to read the next input file. + Open a filename of '-' as the standard input. + Return NULL if there are no more input files. */ + +extern FILE * +next_file (FILE *fp) +{ + static char *prev_file; + char *file; + + if (fp) + { + if (ferror (fp)) + { + error (0, errno, "%s", quotef (prev_file)); + exit_status = EXIT_FAILURE; + } + if (STREQ (prev_file, "-")) + clearerr (fp); /* Also clear EOF. */ + else if (fclose (fp) != 0) + { + error (0, errno, "%s", quotef (prev_file)); + exit_status = EXIT_FAILURE; + } + } + + while ((file = *file_list++) != NULL) + { + if (STREQ (file, "-")) + { + have_read_stdin = true; + fp = stdin; + } + else + fp = fopen (file, "r"); + if (fp) + { + prev_file = file; + fadvise (fp, FADVISE_SEQUENTIAL); + return fp; + } + error (0, errno, "%s", quotef (file)); + exit_status = EXIT_FAILURE; + } + return NULL; +} + +/* */ +extern void +cleanup_file_list_stdin (void) +{ + if (have_read_stdin && fclose (stdin) != 0) + error (EXIT_FAILURE, errno, "-"); +} diff --git a/src/expand-common.h b/src/expand-common.h new file mode 100644 index 000000000..8cb207997 --- /dev/null +++ b/src/expand-common.h @@ -0,0 +1,72 @@ +/* expand-common - common functionality for expand/unexapnd + + Copyright (C) 1989-2016 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* If true, convert blanks even after nonblank characters have been + read on the line. */ +extern bool convert_entire_line; + +/* The maximum distance between tab stops. */ +extern size_t max_column_width; + +/* Null-terminated array of input filenames. */ +//extern char **file_list; + +/* Default for 'file_list' if no files are given on the command line. */ +//extern char *stdin_argv[]; + +/* True if we have ever read standard input. */ +//extern bool have_read_stdin; + +/* The desired exit status. */ +extern int exit_status; + +/* Add tab stop TABVAL to the end of 'tab_list'. */ +extern void +add_tab_stop (uintmax_t tabval); + +/* Add the comma or blank separated list of tab stops STOPS + to the list of tab stops. */ +extern void +parse_tab_stops (char const *stops); + +/* TODO: Document */ +extern uintmax_t +get_next_tab_column (const uintmax_t column, size_t* tab_index, + bool* last_tab); + +/* Called after all command-line options have been parsed, + sets the final tab-stops values */ +extern void +finalize_tab_stops (void); + + + + +/* Sets new file-list */ +extern void +set_file_list (char **file_list); + +/* Close the old stream pointer FP if it is non-NULL, + and return a new one opened to read the next input file. + Open a filename of '-' as the standard input. + Return NULL if there are no more input files. */ +extern FILE * +next_file (FILE *fp); + +/* */ +extern void +cleanup_file_list_stdin (void); diff --git a/src/expand.c b/src/expand.c index e11f1afc7..9f84de825 100644 --- a/src/expand.c +++ b/src/expand.c @@ -39,49 +39,15 @@ #include #include "system.h" #include "error.h" -#include "fadvise.h" -#include "quote.h" #include "xstrndup.h" +#include "expand-common.h" + /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "expand" #define AUTHORS proper_name ("David MacKenzie") -/* If true, convert blanks even after nonblank characters have been - read on the line. */ -static bool convert_entire_line; - -/* If nonzero, the size of all tab stops. If zero, use 'tab_list' instead. */ -static uintmax_t tab_size; - -/* Array of the explicit column numbers of the tab stops; - after 'tab_list' is exhausted, each additional tab is replaced - by a space. The first column is column 0. */ -static uintmax_t *tab_list; - -/* The number of allocated entries in 'tab_list'. */ -static size_t n_tabs_allocated; - -/* The index of the first invalid element of 'tab_list', - where the next element can be added. */ -static size_t first_free_tab; - -/* Null-terminated array of input filenames. */ -static char **file_list; - -/* Default for 'file_list' if no files are given on the command line. */ -static char *stdin_argv[] = -{ - (char *) "-", NULL -}; - -/* True if we have ever read standard input. */ -static bool have_read_stdin; - -/* The desired exit status. */ -static int exit_status; - static char const shortopts[] = "it:0::1::2::3::4::5::6::7::8::9::"; static struct option const longopts[] = @@ -125,137 +91,6 @@ Convert tabs in each FILE to spaces, writing to standard output.\n\ exit (status); } -/* Add tab stop TABVAL to the end of 'tab_list'. */ - -static void -add_tab_stop (uintmax_t tabval) -{ - if (first_free_tab == n_tabs_allocated) - tab_list = X2NREALLOC (tab_list, &n_tabs_allocated); - tab_list[first_free_tab++] = tabval; -} - -/* Add the comma or blank separated list of tab stops STOPS - to the list of tab stops. */ - -static void -parse_tab_stops (char const *stops) -{ - bool have_tabval = false; - uintmax_t tabval IF_LINT ( = 0); - char const *num_start IF_LINT ( = NULL); - bool ok = true; - - for (; *stops; stops++) - { - if (*stops == ',' || isblank (to_uchar (*stops))) - { - if (have_tabval) - add_tab_stop (tabval); - have_tabval = false; - } - else if (ISDIGIT (*stops)) - { - if (!have_tabval) - { - tabval = 0; - have_tabval = true; - num_start = stops; - } - - /* Detect overflow. */ - if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t)) - { - size_t len = strspn (num_start, "0123456789"); - char *bad_num = xstrndup (num_start, len); - error (0, 0, _("tab stop is too large %s"), quote (bad_num)); - free (bad_num); - ok = false; - stops = num_start + len - 1; - } - } - else - { - error (0, 0, _("tab size contains invalid character(s): %s"), - quote (stops)); - ok = false; - break; - } - } - - if (!ok) - exit (EXIT_FAILURE); - - if (have_tabval) - add_tab_stop (tabval); -} - -/* Check that the list of tab stops TABS, with ENTRIES entries, - contains only nonzero, ascending values. */ - -static void -validate_tab_stops (uintmax_t const *tabs, size_t entries) -{ - uintmax_t prev_tab = 0; - size_t i; - - for (i = 0; i < entries; i++) - { - if (tabs[i] == 0) - error (EXIT_FAILURE, 0, _("tab size cannot be 0")); - if (tabs[i] <= prev_tab) - error (EXIT_FAILURE, 0, _("tab sizes must be ascending")); - prev_tab = tabs[i]; - } -} - -/* Close the old stream pointer FP if it is non-NULL, - and return a new one opened to read the next input file. - Open a filename of '-' as the standard input. - Return NULL if there are no more input files. */ - -static FILE * -next_file (FILE *fp) -{ - static char *prev_file; - char *file; - - if (fp) - { - if (ferror (fp)) - { - error (0, errno, "%s", quotef (prev_file)); - exit_status = EXIT_FAILURE; - } - if (STREQ (prev_file, "-")) - clearerr (fp); /* Also clear EOF. */ - else if (fclose (fp) != 0) - { - error (0, errno, "%s", quotef (prev_file)); - exit_status = EXIT_FAILURE; - } - } - - while ((file = *file_list++) != NULL) - { - if (STREQ (file, "-")) - { - have_read_stdin = true; - fp = stdin; - } - else - fp = fopen (file, "r"); - if (fp) - { - prev_file = file; - fadvise (fp, FADVISE_SEQUENTIAL); - return fp; - } - error (0, errno, "%s", quotef (file)); - exit_status = EXIT_FAILURE; - } - return NULL; -} /* Change tabs to spaces, writing to stdout. Read each file in 'file_list', in order. */ @@ -301,25 +136,13 @@ expand (void) { /* Column the next input tab stop is on. */ uintmax_t next_tab_column; + bool last_tab IF_LINT (=0); - if (tab_size) - next_tab_column = column + (tab_size - column % tab_size); - else - while (true) - if (tab_index == first_free_tab) - { - next_tab_column = column + 1; - break; - } - else - { - uintmax_t tab = tab_list[tab_index++]; - if (column < tab) - { - next_tab_column = tab; - break; - } - } + next_tab_column = get_next_tab_column (column, &tab_index, + &last_tab); + + if (last_tab) + next_tab_column = column + 1; if (next_tab_column < column) error (EXIT_FAILURE, 0, _("input line is too long")); @@ -369,12 +192,7 @@ main (int argc, char **argv) textdomain (PACKAGE); atexit (close_stdout); - - have_read_stdin = false; - exit_status = EXIT_SUCCESS; convert_entire_line = true; - tab_list = NULL; - first_free_tab = 0; while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) { @@ -410,21 +228,13 @@ main (int argc, char **argv) } } - validate_tab_stops (tab_list, first_free_tab); - - if (first_free_tab == 0) - tab_size = 8; - else if (first_free_tab == 1) - tab_size = tab_list[0]; - else - tab_size = 0; + finalize_tab_stops (); - file_list = (optind < argc ? &argv[optind] : stdin_argv); + set_file_list ( (optind < argc) ? &argv[optind] : NULL); expand (); - if (have_read_stdin && fclose (stdin) != 0) - error (EXIT_FAILURE, errno, "-"); + cleanup_file_list_stdin (); return exit_status; } diff --git a/src/local.mk b/src/local.mk index 12a0d55d4..6afaf836a 100644 --- a/src/local.mk +++ b/src/local.mk @@ -44,6 +44,7 @@ noinst_HEADERS = \ src/copy.h \ src/cp-hash.h \ src/dircolors.h \ + src/expand-common.h \ src/fiemap.h \ src/find-mount-point.h \ src/fs.h \ @@ -408,6 +409,9 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) src_ginstall_CPPFLAGS = -DENABLE_MATCHPATHCON=1 $(AM_CPPFLAGS) +src_expand_SOURCES = src/expand.c src/expand-common.c +src_unexpand_SOURCES = src/unexpand.c src/expand-common.c + # Ensure we don't link against libcoreutils.a as that lib is # not compiled with -fPIC which causes issues on 64 bit at least src_libstdbuf_so_LDADD = $(LIBINTL) diff --git a/src/unexpand.c b/src/unexpand.c index a75875659..a1317b173 100644 --- a/src/unexpand.c +++ b/src/unexpand.c @@ -40,51 +40,16 @@ #include #include "system.h" #include "error.h" -#include "fadvise.h" -#include "quote.h" #include "xstrndup.h" +#include "expand-common.h" + /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "unexpand" #define AUTHORS proper_name ("David MacKenzie") -/* If true, convert blanks even after nonblank characters have been - read on the line. */ -static bool convert_entire_line; - -/* If nonzero, the size of all tab stops. If zero, use 'tab_list' instead. */ -static size_t tab_size; - -/* The maximum distance between tab stops. */ -static size_t max_column_width; - -/* Array of the explicit column numbers of the tab stops; - after 'tab_list' is exhausted, the rest of the line is printed - unchanged. The first column is column 0. */ -static uintmax_t *tab_list; - -/* The number of allocated entries in 'tab_list'. */ -static size_t n_tabs_allocated; - -/* The index of the first invalid element of 'tab_list', - where the next element can be added. */ -static size_t first_free_tab; -/* Null-terminated array of input filenames. */ -static char **file_list; - -/* Default for 'file_list' if no files are given on the command line. */ -static char *stdin_argv[] = -{ - (char *) "-", NULL -}; - -/* True if we have ever read standard input. */ -static bool have_read_stdin; - -/* The desired exit status. */ -static int exit_status; /* For long options that have no equivalent short option, use a non-character as a pseudo short option, starting with CHAR_MAX + 1. */ @@ -134,148 +99,6 @@ Convert blanks in each FILE to tabs, writing to standard output.\n\ exit (status); } -/* Add tab stop TABVAL to the end of 'tab_list'. */ - -static void -add_tab_stop (uintmax_t tabval) -{ - uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0; - uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0; - - if (first_free_tab == n_tabs_allocated) - tab_list = X2NREALLOC (tab_list, &n_tabs_allocated); - tab_list[first_free_tab++] = tabval; - - if (max_column_width < column_width) - { - if (SIZE_MAX < column_width) - error (EXIT_FAILURE, 0, _("tabs are too far apart")); - max_column_width = column_width; - } -} - -/* Add the comma or blank separated list of tab stops STOPS - to the list of tab stops. */ - -static void -parse_tab_stops (char const *stops) -{ - bool have_tabval = false; - uintmax_t tabval IF_LINT ( = 0); - char const *num_start IF_LINT ( = NULL); - bool ok = true; - - for (; *stops; stops++) - { - if (*stops == ',' || isblank (to_uchar (*stops))) - { - if (have_tabval) - add_tab_stop (tabval); - have_tabval = false; - } - else if (ISDIGIT (*stops)) - { - if (!have_tabval) - { - tabval = 0; - have_tabval = true; - num_start = stops; - } - - /* Detect overflow. */ - if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t)) - { - size_t len = strspn (num_start, "0123456789"); - char *bad_num = xstrndup (num_start, len); - error (0, 0, _("tab stop is too large %s"), quote (bad_num)); - free (bad_num); - ok = false; - stops = num_start + len - 1; - } - } - else - { - error (0, 0, _("tab size contains invalid character(s): %s"), - quote (stops)); - ok = false; - break; - } - } - - if (!ok) - exit (EXIT_FAILURE); - - if (have_tabval) - add_tab_stop (tabval); -} - -/* Check that the list of tab stops TABS, with ENTRIES entries, - contains only nonzero, ascending values. */ - -static void -validate_tab_stops (uintmax_t const *tabs, size_t entries) -{ - uintmax_t prev_tab = 0; - size_t i; - - for (i = 0; i < entries; i++) - { - if (tabs[i] == 0) - error (EXIT_FAILURE, 0, _("tab size cannot be 0")); - if (tabs[i] <= prev_tab) - error (EXIT_FAILURE, 0, _("tab sizes must be ascending")); - prev_tab = tabs[i]; - } -} - -/* Close the old stream pointer FP if it is non-NULL, - and return a new one opened to read the next input file. - Open a filename of '-' as the standard input. - Return NULL if there are no more input files. */ - -static FILE * -next_file (FILE *fp) -{ - static char *prev_file; - char *file; - - if (fp) - { - if (ferror (fp)) - { - error (0, errno, "%s", quotef (prev_file)); - exit_status = EXIT_FAILURE; - } - if (STREQ (prev_file, "-")) - clearerr (fp); /* Also clear EOF. */ - else if (fclose (fp) != 0) - { - error (0, errno, "%s", quotef (prev_file)); - exit_status = EXIT_FAILURE; - } - } - - while ((file = *file_list++) != NULL) - { - if (STREQ (file, "-")) - { - have_read_stdin = true; - fp = stdin; - } - else - fp = fopen (file, "r"); - if (fp) - { - prev_file = file; - fadvise (fp, FADVISE_SEQUENTIAL); - return fp; - } - error (0, errno, "%s", quotef (file)); - exit_status = EXIT_FAILURE; - } - return NULL; -} - /* Change blanks to tabs, writing to stdout. Read each file in 'file_list', in order. */ @@ -344,28 +167,13 @@ unexpand (void) if (blank) { - if (next_tab_column <= column) - { - if (tab_size) - next_tab_column = - column + (tab_size - column % tab_size); - else - while (true) - if (tab_index == first_free_tab) - { - convert = false; - break; - } - else - { - uintmax_t tab = tab_list[tab_index++]; - if (column < tab) - { - next_tab_column = tab; - break; - } - } - } + bool last_tab IF_LINT (=0); + + next_tab_column = get_next_tab_column (column, &tab_index, + &last_tab); + + if (last_tab) + convert = false; if (convert) { @@ -464,12 +272,6 @@ main (int argc, char **argv) atexit (close_stdout); - have_read_stdin = false; - exit_status = EXIT_SUCCESS; - convert_entire_line = false; - tab_list = NULL; - first_free_tab = 0; - while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL)) != -1) { @@ -512,21 +314,13 @@ main (int argc, char **argv) if (have_tabval) add_tab_stop (tabval); - validate_tab_stops (tab_list, first_free_tab); - - if (first_free_tab == 0) - tab_size = max_column_width = 8; - else if (first_free_tab == 1) - tab_size = tab_list[0]; - else - tab_size = 0; + finalize_tab_stops (); - file_list = (optind < argc ? &argv[optind] : stdin_argv); + set_file_list ( (optind < argc) ? &argv[optind] : NULL); unexpand (); - if (have_read_stdin && fclose (stdin) != 0) - error (EXIT_FAILURE, errno, "-"); + cleanup_file_list_stdin (); return exit_status; } diff --git a/tests/misc/expand.pl b/tests/misc/expand.pl index a6b4a4d61..8a9cad144 100755 --- a/tests/misc/expand.pl +++ b/tests/misc/expand.pl @@ -18,7 +18,11 @@ use strict; +my $limits = getlimits (); +my $UINTMAX_OFLOW = $limits->{UINTMAX_OFLOW}; + (my $program_name = $0) =~ s|.*/||; +my $prog = 'expand'; # Turn off localization of executable's output. @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; @@ -27,13 +31,127 @@ my @Tests = ( ['t1', '--tabs=3', {IN=>"a\tb"}, {OUT=>"a b"}], ['t2', '--tabs=3,6,9', {IN=>"a\tb\tc\td\te"}, {OUT=>"a b c d e"}], + ['t3', '--tabs="3 6 9"', {IN=>"a\tb\tc\td\te"}, {OUT=>"a b c d e"}], + # Leading space/commas are silently ignored; Mixing space/commas is allowed. + # (a side-effect of allowing direct "-3,9" parameter). + ['t4', '--tabs=", 3,6 9"', {IN=>"a\tb\tc\td\te"}, {OUT=>"a b c d e"}], + # tab stops parameter without values + ['t5', '--tabs=""', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + ['t6', '--tabs=","', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + ['t7', '--tabs=" "', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + + # Input field wider than the specified tab list + ['t8', '--tabs=6,9', {IN=>"a\tbbbbbbbbbbbbb\tc"}, + {OUT=>"a bbbbbbbbbbbbb c"}], + ['i1', '--tabs=3 -i', {IN=>"\ta\tb"}, {OUT=>" a\tb"}], ['i2', '--tabs=3 -i', {IN=>" \ta\tb"}, {OUT=>" a\tb"}], + + # Undocumented feature: + # treat "expand -7" as "expand --tabs 7" , + # and "expand -90" as "expand --tabs 90", + ['u1', '-3', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + ['u2', '-4 -9', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + ['u3', '-11', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + # Test all digits (for full code coverage) + ['u4', '-2 -6', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + ['u5', '-7', {IN=>"a\tb"}, {OUT=>"a b"}], + ['u6', '-8', {IN=>"a\tb"}, {OUT=>"a b"}], + # This syntax is handled internally as "-3, -9" + ['u7', '-3,9', {IN=>"a\tb\tc"}, {OUT=>"a b c"}], + + # Multiple non-empty files + ['f1', '--tabs=4', + {IN=>{"in1" => "a\tb\n"}}, {IN=>{"in2" => "c\td\n"}}, + {OUT=>"a b\nc d\n"}], + # Multiple files, first file is empty + ['f2', '--tabs=4', + {IN=>{"in1" => ""}}, {IN=>{"in2" => "c\td\n"}}, + {OUT=>"c d\n"}], + # Multiple files, second file is empty + ['f3', '--tabs=4', + {IN=>{"in1" => "a\tb\n"}}, {IN=>{"in2" => ""}}, + {OUT=>"a b\n"}], + + + # Test '\b' (back-space) - subtract one column. + # + # Note: + # In a terminal window, 'expand' will appear to erase the 'a' characters + # due to overwriting them with spaces: + # + # $ printf 'aaa\b\b\bc\td\n' + # caa d + # $ printf 'aaa\b\b\bc\td\n' | expand + # c d + # + # However the characters are all printed: + # + # $ printf 'aaa\b\b\bc\td\n' | expand | od -An -ta + # a a a bs bs bs c sp sp sp sp sp sp sp d nl + # + # If users ever report a problem with these tests and just + # copy&paste from the terminal, their report will be confusing + # (the 'a' will not appear). + # + # To see an example, enable the 'b-confusing' test, and examine the + # reported log: + # + # expand.pl: test b-confusing: stdout mismatch + # *** b-confusing.2 Fri Jun 24 15:43:21 2016 + # --- b-confusing.O Fri Jun 24 15:43:21 2016 + # *************** + # *** 1 **** + # ! c d + # --- 1 ---- + # ! c d + # + # ['b-confusing','', {IN=>"aaa\b\b\bc\td\n"}, {OUT=>"c d\n"}], + + ['b1','', {IN=>"aaa\b\b\bc\td\n"}, {OUT=>"aaa\b\b\bc d\n"}], + + # \b as first character, when column is zero + ['b2','', {IN=>"\bc\td"}, {OUT=>"\bc d"}], + + # Testing tab list adjusted due to backspaces + # ('b3' is the baseline without backspaces). + ['b3','--tabs 2,4,6,10', + {IN=>"1\t2\t3\t4\t5\n" . + "a\tb\tc\td\te\n"}, + {OUT=>"1 2 3 4 5\n" . + "a b c d e\n"}], + + # On screen this will appear the same as 'b3' + ['b4','--tabs 2,4,6,10', + {IN=>"1\t2\t3\t4\t5\n" . + "a\tbHELLO\b\b\b\b\b\tc\td\te\n"}, + {OUT=>"1 2 3 4 5\n" . + "a bHELLO\b\b\b\b\b c d e\n"}], + + # On screen on 'bHE' will appear (LLO overwritten by spaces), + # 'c' should align with 4, 'd' with 5: + # 1 2 3 4 5 + # a bHE c d e + ['b5','--tabs 2,4,6,10', + {IN=>"1\t2\t3\t4\t5\n" . + "a\tbHELLO\b\b\b\tc\td\te\n"}, + {OUT=>"1 2 3 4 5\n" . + "a bHELLO\b\b\b c d e\n"}], + + + # Test errors + ['e1', '--tabs="a"', {IN=>''}, {OUT=>''}, {EXIT=>1}, + {ERR => "$prog: tab size contains invalid character(s): 'a'\n"}], + ['e2', "-t $UINTMAX_OFLOW", {IN=>''}, {OUT=>''}, {EXIT=>1}, + {ERR => "$prog: tab stop is too large '$UINTMAX_OFLOW'\n"}], + ['e3', '--tabs=0', {IN=>''}, {OUT=>''}, {EXIT=>1}, + {ERR => "$prog: tab size cannot be 0\n"}], + ['e4', '--tabs=3,3', {IN=>''}, {OUT=>''}, {EXIT=>1}, + {ERR => "$prog: tab sizes must be ascending\n"}], ); my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; -my $prog = 'expand'; my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); exit $fail; diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl index c592c5a52..6ba6d405c 100755 --- a/tests/misc/unexpand.pl +++ b/tests/misc/unexpand.pl @@ -90,6 +90,42 @@ my @Tests = # setting of e.g., _POSIX2_VERSION=1. ['obs-ovflo', "-$limits->{UINTMAX_OFLOW}", {IN=>''}, {OUT=>''}, {EXIT => 1}, {ERR => "$prog: tab stop value is too large\n"}], + + + # Test input with backspaces '\b' ('bs1' is the baseline, without \b) + # Note: If users report errors in these tests, copy&pasting results from + # their terminate output might be confusing due to '\b' overriding + # characters. For details see '\b' tests in 'expand.pl'. + ['bs1', '-a -t4', {IN=>"aa c\n"}, {OUT=>"aa\tc\n"}], + ['bs2', '-a -t4', {IN=>"aa\b c\n"}, {OUT=>"aa\b c\n"}], + ['bs3', '-a -t4', {IN=>"aa\b c\n"}, {OUT=>"aa\b\tc\n"}], + ['bs4', '-a -t3', {IN=>"aa\b c\n"}, {OUT=>"aa\b\tc\n"}], + + # Undocumented feature: + # treat "unexpand -7" as "unexpand --first-only --tabs 7" , + # and "unexpand -90" as "unexpand --first-only --tabs 90", + ['u1', '-a -3', {IN=>"a b c"}, {OUT=>"a\tb\tc"}], + ['u2', '-a -4,9', {IN=>"a b c"}, {OUT=>"a\tb\tc"}], + ['u3', '-a -11', {IN=>"a b"}, {OUT=>"a\tb"}], + # Test all digits (for full code coverage) + ['u4', '-a -2,6', {IN=>"a b c"}, {OUT=>"a b\tc"}], + ['u5', '-a -7', {IN=>"a b"}, {OUT=>"a\tb"}], + ['u6', '-a -8', {IN=>"a b"}, {OUT=>"a\tb"}], + # This syntax is handled internally as "-3, -9" + ['u7', '-a -3,9', {IN=>"a b c"}, {OUT=>"a\tb\tc"}], + # Default (without -a) is --first-only: + ['u8', '-3', {IN=>" a b"}, {OUT=>"\ta b"}], + + # Arguably this should minimize translation as is done on Solaris. + # I.e., not modify the input. But since the result is equivalent, + # and to be consistent in output with older versions, we output + # a '\t' rather than a space for the second tab position. + # For more detailed comparison with other implementations see: + # http://lists.gnu.org/archive/html/coreutils/2016-06/msg00015.html + # http://lists.gnu.org/archive/html/coreutils/2016-07/msg00011.html + ['ts1', '-t8,9', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t\t y\n"}], + # There is no ambiguity here. This should always be the output. + ['ts2', '-t5,8', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t y\n"}], ); my $save_temps = $ENV{DEBUG}; -- cgit v1.2.3-54-g00ecf