summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAssaf Gordon <assafgordon@gmail.com>2016-06-24 21:48:29 -0400
committerPádraig Brady <P@draigBrady.com>2016-08-09 11:31:00 +0100
commitff2178bf30e3eda566cc1d1670768c6d6694a8ac (patch)
treee8e7c34c1250f671ca3b700799e96b428348138a
parent1c17f61ef993a5ee5fb0d3bc47b7b25782ae386c (diff)
downloadcoreutils-ff2178bf30e3eda566cc1d1670768c6d6694a8ac.tar.xz
maint: refactor common expand(1) and unexpand(1) code
* src/expand.c, src/unexpand.c: Move global variables from here... * src/expand-common.h, src/expand-common.c: ... to here. * src/expand.c, src/unexpand.c: (parse_tab_stops, validate_tab_stops, next_file): Move identical functions to new module. (add_tab_stop): Move to new module, including additional code from 'unexpand' (keeping max_column_width) which will have no effect in when used in 'expand'. Refactor common next-column calculation code into a new function 'get_next_tab_column'. * src/local.mk: (src_expand_SOURCES, src_unexpand_SOURCES): Add 'expand-common.c'; (noinst_HEADERS): Add 'expand-common.h'. * po/POTFILES.in: Add 'expand-common.c'. * tests/misc/expand.pl: Add more tests. * tests/misc/unexpand.pl: Likewise. * TODO: Move conclusions to above test after investigation.
-rw-r--r--TODO4
-rw-r--r--po/POTFILES.in1
-rw-r--r--src/expand-common.c274
-rw-r--r--src/expand-common.h72
-rw-r--r--src/expand.c212
-rw-r--r--src/local.mk4
-rw-r--r--src/unexpand.c230
-rwxr-xr-xtests/misc/expand.pl120
-rwxr-xr-xtests/misc/unexpand.pl36
9 files changed, 529 insertions, 424 deletions
diff --git a/TODO b/TODO
index de95e5a64..dc1a9e2a2 100644
--- a/TODO
+++ b/TODO
@@ -67,10 +67,6 @@ lib/strftime.c: Since %N is the only format that we need but that
would expand /%(-_)?\d*N/ to the desired string and then pass the
resulting string to glibc's strftime.
-unexpand: [http://www.opengroup.org/onlinepubs/007908799/xcu/unexpand.html]
- printf 'x\t \t y\n'|unexpand -t 8,9 should print its input, unmodified.
- printf 'x\t \t y\n'|unexpand -t 5,8 should print "x\ty\n"
-
sort: Investigate better sorting algorithms; see Knuth vol. 3.
We tried list merge sort, but it was about 50% slower than the
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 7583f635d..b8a28bdee 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -57,6 +57,7 @@ src/dirname.c
src/du.c
src/echo.c
src/env.c
+src/expand-common.c
src/expand.c
src/expr.c
src/factor.c
diff --git a/src/expand-common.c b/src/expand-common.c
new file mode 100644
index 000000000..41d7003a4
--- /dev/null
+++ b/src/expand-common.c
@@ -0,0 +1,274 @@
+/* expand-common - common functionality for expand/unexapnd
+ Copyright (C) 1989-2016 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <sys/types.h>
+#include "system.h"
+#include "error.h"
+#include "fadvise.h"
+#include "quote.h"
+#include "xstrndup.h"
+
+#include "expand-common.h"
+
+/* If true, convert blanks even after nonblank characters have been
+ read on the line. */
+bool convert_entire_line = false;
+
+/* If nonzero, the size of all tab stops. If zero, use 'tab_list' instead. */
+static uintmax_t tab_size = 0;
+
+/* The maximum distance between tab stops. */
+size_t max_column_width;
+
+/* Array of the explicit column numbers of the tab stops;
+ after 'tab_list' is exhausted, each additional tab is replaced
+ by a space. The first column is column 0. */
+static uintmax_t *tab_list = NULL;
+
+/* The number of allocated entries in 'tab_list'. */
+static size_t n_tabs_allocated = 0;
+
+/* The index of the first invalid element of 'tab_list',
+ where the next element can be added. */
+static size_t first_free_tab = 0;
+
+/* Null-terminated array of input filenames. */
+static char **file_list = NULL;
+
+/* Default for 'file_list' if no files are given on the command line. */
+static char *stdin_argv[] =
+{
+ (char *) "-", NULL
+};
+
+/* True if we have ever read standard input. */
+static bool have_read_stdin = false;
+
+/* The desired exit status. */
+int exit_status = EXIT_SUCCESS;
+
+
+
+/* Add tab stop TABVAL to the end of 'tab_list'. */
+extern void
+add_tab_stop (uintmax_t tabval)
+{
+ uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
+ uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
+
+ if (first_free_tab == n_tabs_allocated)
+ tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
+ tab_list[first_free_tab++] = tabval;
+
+ if (max_column_width < column_width)
+ {
+ if (SIZE_MAX < column_width)
+ error (EXIT_FAILURE, 0, _("tabs are too far apart"));
+ max_column_width = column_width;
+ }
+}
+
+/* Add the comma or blank separated list of tab stops STOPS
+ to the list of tab stops. */
+extern void
+parse_tab_stops (char const *stops)
+{
+ bool have_tabval = false;
+ uintmax_t tabval IF_LINT ( = 0);
+ char const *num_start IF_LINT ( = NULL);
+ bool ok = true;
+
+ for (; *stops; stops++)
+ {
+ if (*stops == ',' || isblank (to_uchar (*stops)))
+ {
+ if (have_tabval)
+ add_tab_stop (tabval);
+ have_tabval = false;
+ }
+ else if (ISDIGIT (*stops))
+ {
+ if (!have_tabval)
+ {
+ tabval = 0;
+ have_tabval = true;
+ num_start = stops;
+ }
+
+ /* Detect overflow. */
+ if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
+ {
+ size_t len = strspn (num_start, "0123456789");
+ char *bad_num = xstrndup (num_start, len);
+ error (0, 0, _("tab stop is too large %s"), quote (bad_num));
+ free (bad_num);
+ ok = false;
+ stops = num_start + len - 1;
+ }
+ }
+ else
+ {
+ error (0, 0, _("tab size contains invalid character(s): %s"),
+ quote (stops));
+ ok = false;
+ break;
+ }
+ }
+
+ if (!ok)
+ exit (EXIT_FAILURE);
+
+ if (have_tabval)
+ add_tab_stop (tabval);
+}
+
+/* Check that the list of tab stops TABS, with ENTRIES entries,
+ contains only nonzero, ascending values. */
+
+static void
+validate_tab_stops (uintmax_t const *tabs, size_t entries)
+{
+ uintmax_t prev_tab = 0;
+ size_t i;
+
+ for (i = 0; i < entries; i++)
+ {
+ if (tabs[i] == 0)
+ error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
+ if (tabs[i] <= prev_tab)
+ error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
+ prev_tab = tabs[i];
+ }
+}
+
+/* Called after all command-line options have been parsed,
+ and add_tab_stop/parse_tab_stops have been called.
+ Will validate the tab-stop values,
+ and set the final values to:
+ tab-stops = 8 (if no tab-stops given on command line)
+ tab-stops = N (if value N specified as the only value).
+ tab-stops = distinct values given on command line (if multiple values given).
+*/
+extern void
+finalize_tab_stops (void)
+{
+ validate_tab_stops (tab_list, first_free_tab);
+
+ if (first_free_tab == 0)
+ tab_size = max_column_width = 8;
+ else if (first_free_tab == 1)
+ tab_size = tab_list[0];
+ else
+ tab_size = 0;
+}
+
+
+extern uintmax_t
+get_next_tab_column (const uintmax_t column, size_t* tab_index,
+ bool* last_tab)
+{
+ *last_tab = false;
+
+ /* single tab-size - return multiples of it */
+ if (tab_size)
+ return column + (tab_size - column % tab_size);
+
+ /* multiple tab-sizes - iterate them until the tab position is beyond
+ the current input column. */
+ for ( ; *tab_index < first_free_tab ; (*tab_index)++ )
+ {
+ uintmax_t tab = tab_list[*tab_index];
+ if (column < tab)
+ return tab;
+ }
+
+ *last_tab = true;
+ return 0;
+}
+
+
+
+
+/* Sets new file-list */
+extern void
+set_file_list (char **list)
+{
+ have_read_stdin = false;
+
+ if (!list)
+ file_list = stdin_argv;
+ else
+ file_list = list;
+}
+
+/* Close the old stream pointer FP if it is non-NULL,
+ and return a new one opened to read the next input file.
+ Open a filename of '-' as the standard input.
+ Return NULL if there are no more input files. */
+
+extern FILE *
+next_file (FILE *fp)
+{
+ static char *prev_file;
+ char *file;
+
+ if (fp)
+ {
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", quotef (prev_file));
+ exit_status = EXIT_FAILURE;
+ }
+ if (STREQ (prev_file, "-"))
+ clearerr (fp); /* Also clear EOF. */
+ else if (fclose (fp) != 0)
+ {
+ error (0, errno, "%s", quotef (prev_file));
+ exit_status = EXIT_FAILURE;
+ }
+ }
+
+ while ((file = *file_list++) != NULL)
+ {
+ if (STREQ (file, "-"))
+ {
+ have_read_stdin = true;
+ fp = stdin;
+ }
+ else
+ fp = fopen (file, "r");
+ if (fp)
+ {
+ prev_file = file;
+ fadvise (fp, FADVISE_SEQUENTIAL);
+ return fp;
+ }
+ error (0, errno, "%s", quotef (file));
+ exit_status = EXIT_FAILURE;
+ }
+ return NULL;
+}
+
+/* */
+extern void
+cleanup_file_list_stdin (void)
+{
+ if (have_read_stdin && fclose (stdin) != 0)
+ error (EXIT_FAILURE, errno, "-");
+}
diff --git a/src/expand-common.h b/src/expand-common.h
new file mode 100644
index 000000000..8cb207997
--- /dev/null
+++ b/src/expand-common.h
@@ -0,0 +1,72 @@
+/* expand-common - common functionality for expand/unexapnd
+
+ Copyright (C) 1989-2016 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* If true, convert blanks even after nonblank characters have been
+ read on the line. */
+extern bool convert_entire_line;
+
+/* The maximum distance between tab stops. */
+extern size_t max_column_width;
+
+/* Null-terminated array of input filenames. */
+//extern char **file_list;
+
+/* Default for 'file_list' if no files are given on the command line. */
+//extern char *stdin_argv[];
+
+/* True if we have ever read standard input. */
+//extern bool have_read_stdin;
+
+/* The desired exit status. */
+extern int exit_status;
+
+/* Add tab stop TABVAL to the end of 'tab_list'. */
+extern void
+add_tab_stop (uintmax_t tabval);
+
+/* Add the comma or blank separated list of tab stops STOPS
+ to the list of tab stops. */
+extern void
+parse_tab_stops (char const *stops);
+
+/* TODO: Document */
+extern uintmax_t
+get_next_tab_column (const uintmax_t column, size_t* tab_index,
+ bool* last_tab);
+
+/* Called after all command-line options have been parsed,
+ sets the final tab-stops values */
+extern void
+finalize_tab_stops (void);
+
+
+
+
+/* Sets new file-list */
+extern void
+set_file_list (char **file_list);
+
+/* Close the old stream pointer FP if it is non-NULL,
+ and return a new one opened to read the next input file.
+ Open a filename of '-' as the standard input.
+ Return NULL if there are no more input files. */
+extern FILE *
+next_file (FILE *fp);
+
+/* */
+extern void
+cleanup_file_list_stdin (void);
diff --git a/src/expand.c b/src/expand.c
index e11f1afc7..9f84de825 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -39,49 +39,15 @@
#include <sys/types.h>
#include "system.h"
#include "error.h"
-#include "fadvise.h"
-#include "quote.h"
#include "xstrndup.h"
+#include "expand-common.h"
+
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "expand"
#define AUTHORS proper_name ("David MacKenzie")
-/* If true, convert blanks even after nonblank characters have been
- read on the line. */
-static bool convert_entire_line;
-
-/* If nonzero, the size of all tab stops. If zero, use 'tab_list' instead. */
-static uintmax_t tab_size;
-
-/* Array of the explicit column numbers of the tab stops;
- after 'tab_list' is exhausted, each additional tab is replaced
- by a space. The first column is column 0. */
-static uintmax_t *tab_list;
-
-/* The number of allocated entries in 'tab_list'. */
-static size_t n_tabs_allocated;
-
-/* The index of the first invalid element of 'tab_list',
- where the next element can be added. */
-static size_t first_free_tab;
-
-/* Null-terminated array of input filenames. */
-static char **file_list;
-
-/* Default for 'file_list' if no files are given on the command line. */
-static char *stdin_argv[] =
-{
- (char *) "-", NULL
-};
-
-/* True if we have ever read standard input. */
-static bool have_read_stdin;
-
-/* The desired exit status. */
-static int exit_status;
-
static char const shortopts[] = "it:0::1::2::3::4::5::6::7::8::9::";
static struct option const longopts[] =
@@ -125,137 +91,6 @@ Convert tabs in each FILE to spaces, writing to standard output.\n\
exit (status);
}
-/* Add tab stop TABVAL to the end of 'tab_list'. */
-
-static void
-add_tab_stop (uintmax_t tabval)
-{
- if (first_free_tab == n_tabs_allocated)
- tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
- tab_list[first_free_tab++] = tabval;
-}
-
-/* Add the comma or blank separated list of tab stops STOPS
- to the list of tab stops. */
-
-static void
-parse_tab_stops (char const *stops)
-{
- bool have_tabval = false;
- uintmax_t tabval IF_LINT ( = 0);
- char const *num_start IF_LINT ( = NULL);
- bool ok = true;
-
- for (; *stops; stops++)
- {
- if (*stops == ',' || isblank (to_uchar (*stops)))
- {
- if (have_tabval)
- add_tab_stop (tabval);
- have_tabval = false;
- }
- else if (ISDIGIT (*stops))
- {
- if (!have_tabval)
- {
- tabval = 0;
- have_tabval = true;
- num_start = stops;
- }
-
- /* Detect overflow. */
- if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
- {
- size_t len = strspn (num_start, "0123456789");
- char *bad_num = xstrndup (num_start, len);
- error (0, 0, _("tab stop is too large %s"), quote (bad_num));
- free (bad_num);
- ok = false;
- stops = num_start + len - 1;
- }
- }
- else
- {
- error (0, 0, _("tab size contains invalid character(s): %s"),
- quote (stops));
- ok = false;
- break;
- }
- }
-
- if (!ok)
- exit (EXIT_FAILURE);
-
- if (have_tabval)
- add_tab_stop (tabval);
-}
-
-/* Check that the list of tab stops TABS, with ENTRIES entries,
- contains only nonzero, ascending values. */
-
-static void
-validate_tab_stops (uintmax_t const *tabs, size_t entries)
-{
- uintmax_t prev_tab = 0;
- size_t i;
-
- for (i = 0; i < entries; i++)
- {
- if (tabs[i] == 0)
- error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
- if (tabs[i] <= prev_tab)
- error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
- prev_tab = tabs[i];
- }
-}
-
-/* Close the old stream pointer FP if it is non-NULL,
- and return a new one opened to read the next input file.
- Open a filename of '-' as the standard input.
- Return NULL if there are no more input files. */
-
-static FILE *
-next_file (FILE *fp)
-{
- static char *prev_file;
- char *file;
-
- if (fp)
- {
- if (ferror (fp))
- {
- error (0, errno, "%s", quotef (prev_file));
- exit_status = EXIT_FAILURE;
- }
- if (STREQ (prev_file, "-"))
- clearerr (fp); /* Also clear EOF. */
- else if (fclose (fp) != 0)
- {
- error (0, errno, "%s", quotef (prev_file));
- exit_status = EXIT_FAILURE;
- }
- }
-
- while ((file = *file_list++) != NULL)
- {
- if (STREQ (file, "-"))
- {
- have_read_stdin = true;
- fp = stdin;
- }
- else
- fp = fopen (file, "r");
- if (fp)
- {
- prev_file = file;
- fadvise (fp, FADVISE_SEQUENTIAL);
- return fp;
- }
- error (0, errno, "%s", quotef (file));
- exit_status = EXIT_FAILURE;
- }
- return NULL;
-}
/* Change tabs to spaces, writing to stdout.
Read each file in 'file_list', in order. */
@@ -301,25 +136,13 @@ expand (void)
{
/* Column the next input tab stop is on. */
uintmax_t next_tab_column;
+ bool last_tab IF_LINT (=0);
- if (tab_size)
- next_tab_column = column + (tab_size - column % tab_size);
- else
- while (true)
- if (tab_index == first_free_tab)
- {
- next_tab_column = column + 1;
- break;
- }
- else
- {
- uintmax_t tab = tab_list[tab_index++];
- if (column < tab)
- {
- next_tab_column = tab;
- break;
- }
- }
+ next_tab_column = get_next_tab_column (column, &tab_index,
+ &last_tab);
+
+ if (last_tab)
+ next_tab_column = column + 1;
if (next_tab_column < column)
error (EXIT_FAILURE, 0, _("input line is too long"));
@@ -369,12 +192,7 @@ main (int argc, char **argv)
textdomain (PACKAGE);
atexit (close_stdout);
-
- have_read_stdin = false;
- exit_status = EXIT_SUCCESS;
convert_entire_line = true;
- tab_list = NULL;
- first_free_tab = 0;
while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
{
@@ -410,21 +228,13 @@ main (int argc, char **argv)
}
}
- validate_tab_stops (tab_list, first_free_tab);
-
- if (first_free_tab == 0)
- tab_size = 8;
- else if (first_free_tab == 1)
- tab_size = tab_list[0];
- else
- tab_size = 0;
+ finalize_tab_stops ();
- file_list = (optind < argc ? &argv[optind] : stdin_argv);
+ set_file_list ( (optind < argc) ? &argv[optind] : NULL);
expand ();
- if (have_read_stdin && fclose (stdin) != 0)
- error (EXIT_FAILURE, errno, "-");
+ cleanup_file_list_stdin ();
return exit_status;
}
diff --git a/src/local.mk b/src/local.mk
index 12a0d55d4..6afaf836a 100644
--- a/src/local.mk
+++ b/src/local.mk
@@ -44,6 +44,7 @@ noinst_HEADERS = \
src/copy.h \
src/cp-hash.h \
src/dircolors.h \
+ src/expand-common.h \
src/fiemap.h \
src/find-mount-point.h \
src/fs.h \
@@ -408,6 +409,9 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS)
src_ginstall_CPPFLAGS = -DENABLE_MATCHPATHCON=1 $(AM_CPPFLAGS)
+src_expand_SOURCES = src/expand.c src/expand-common.c
+src_unexpand_SOURCES = src/unexpand.c src/expand-common.c
+
# Ensure we don't link against libcoreutils.a as that lib is
# not compiled with -fPIC which causes issues on 64 bit at least
src_libstdbuf_so_LDADD = $(LIBINTL)
diff --git a/src/unexpand.c b/src/unexpand.c
index a75875659..a1317b173 100644
--- a/src/unexpand.c
+++ b/src/unexpand.c
@@ -40,51 +40,16 @@
#include <sys/types.h>
#include "system.h"
#include "error.h"
-#include "fadvise.h"
-#include "quote.h"
#include "xstrndup.h"
+#include "expand-common.h"
+
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "unexpand"
#define AUTHORS proper_name ("David MacKenzie")
-/* If true, convert blanks even after nonblank characters have been
- read on the line. */
-static bool convert_entire_line;
-
-/* If nonzero, the size of all tab stops. If zero, use 'tab_list' instead. */
-static size_t tab_size;
-
-/* The maximum distance between tab stops. */
-static size_t max_column_width;
-
-/* Array of the explicit column numbers of the tab stops;
- after 'tab_list' is exhausted, the rest of the line is printed
- unchanged. The first column is column 0. */
-static uintmax_t *tab_list;
-
-/* The number of allocated entries in 'tab_list'. */
-static size_t n_tabs_allocated;
-
-/* The index of the first invalid element of 'tab_list',
- where the next element can be added. */
-static size_t first_free_tab;
-/* Null-terminated array of input filenames. */
-static char **file_list;
-
-/* Default for 'file_list' if no files are given on the command line. */
-static char *stdin_argv[] =
-{
- (char *) "-", NULL
-};
-
-/* True if we have ever read standard input. */
-static bool have_read_stdin;
-
-/* The desired exit status. */
-static int exit_status;
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
@@ -134,148 +99,6 @@ Convert blanks in each FILE to tabs, writing to standard output.\n\
exit (status);
}
-/* Add tab stop TABVAL to the end of 'tab_list'. */
-
-static void
-add_tab_stop (uintmax_t tabval)
-{
- uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
- uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
-
- if (first_free_tab == n_tabs_allocated)
- tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
- tab_list[first_free_tab++] = tabval;
-
- if (max_column_width < column_width)
- {
- if (SIZE_MAX < column_width)
- error (EXIT_FAILURE, 0, _("tabs are too far apart"));
- max_column_width = column_width;
- }
-}
-
-/* Add the comma or blank separated list of tab stops STOPS
- to the list of tab stops. */
-
-static void
-parse_tab_stops (char const *stops)
-{
- bool have_tabval = false;
- uintmax_t tabval IF_LINT ( = 0);
- char const *num_start IF_LINT ( = NULL);
- bool ok = true;
-
- for (; *stops; stops++)
- {
- if (*stops == ',' || isblank (to_uchar (*stops)))
- {
- if (have_tabval)
- add_tab_stop (tabval);
- have_tabval = false;
- }
- else if (ISDIGIT (*stops))
- {
- if (!have_tabval)
- {
- tabval = 0;
- have_tabval = true;
- num_start = stops;
- }
-
- /* Detect overflow. */
- if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
- {
- size_t len = strspn (num_start, "0123456789");
- char *bad_num = xstrndup (num_start, len);
- error (0, 0, _("tab stop is too large %s"), quote (bad_num));
- free (bad_num);
- ok = false;
- stops = num_start + len - 1;
- }
- }
- else
- {
- error (0, 0, _("tab size contains invalid character(s): %s"),
- quote (stops));
- ok = false;
- break;
- }
- }
-
- if (!ok)
- exit (EXIT_FAILURE);
-
- if (have_tabval)
- add_tab_stop (tabval);
-}
-
-/* Check that the list of tab stops TABS, with ENTRIES entries,
- contains only nonzero, ascending values. */
-
-static void
-validate_tab_stops (uintmax_t const *tabs, size_t entries)
-{
- uintmax_t prev_tab = 0;
- size_t i;
-
- for (i = 0; i < entries; i++)
- {
- if (tabs[i] == 0)
- error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
- if (tabs[i] <= prev_tab)
- error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
- prev_tab = tabs[i];
- }
-}
-
-/* Close the old stream pointer FP if it is non-NULL,
- and return a new one opened to read the next input file.
- Open a filename of '-' as the standard input.
- Return NULL if there are no more input files. */
-
-static FILE *
-next_file (FILE *fp)
-{
- static char *prev_file;
- char *file;
-
- if (fp)
- {
- if (ferror (fp))
- {
- error (0, errno, "%s", quotef (prev_file));
- exit_status = EXIT_FAILURE;
- }
- if (STREQ (prev_file, "-"))
- clearerr (fp); /* Also clear EOF. */
- else if (fclose (fp) != 0)
- {
- error (0, errno, "%s", quotef (prev_file));
- exit_status = EXIT_FAILURE;
- }
- }
-
- while ((file = *file_list++) != NULL)
- {
- if (STREQ (file, "-"))
- {
- have_read_stdin = true;
- fp = stdin;
- }
- else
- fp = fopen (file, "r");
- if (fp)
- {
- prev_file = file;
- fadvise (fp, FADVISE_SEQUENTIAL);
- return fp;
- }
- error (0, errno, "%s", quotef (file));
- exit_status = EXIT_FAILURE;
- }
- return NULL;
-}
-
/* Change blanks to tabs, writing to stdout.
Read each file in 'file_list', in order. */
@@ -344,28 +167,13 @@ unexpand (void)
if (blank)
{
- if (next_tab_column <= column)
- {
- if (tab_size)
- next_tab_column =
- column + (tab_size - column % tab_size);
- else
- while (true)
- if (tab_index == first_free_tab)
- {
- convert = false;
- break;
- }
- else
- {
- uintmax_t tab = tab_list[tab_index++];
- if (column < tab)
- {
- next_tab_column = tab;
- break;
- }
- }
- }
+ bool last_tab IF_LINT (=0);
+
+ next_tab_column = get_next_tab_column (column, &tab_index,
+ &last_tab);
+
+ if (last_tab)
+ convert = false;
if (convert)
{
@@ -464,12 +272,6 @@ main (int argc, char **argv)
atexit (close_stdout);
- have_read_stdin = false;
- exit_status = EXIT_SUCCESS;
- convert_entire_line = false;
- tab_list = NULL;
- first_free_tab = 0;
-
while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
!= -1)
{
@@ -512,21 +314,13 @@ main (int argc, char **argv)
if (have_tabval)
add_tab_stop (tabval);
- validate_tab_stops (tab_list, first_free_tab);
-
- if (first_free_tab == 0)
- tab_size = max_column_width = 8;
- else if (first_free_tab == 1)
- tab_size = tab_list[0];
- else
- tab_size = 0;
+ finalize_tab_stops ();
- file_list = (optind < argc ? &argv[optind] : stdin_argv);
+ set_file_list ( (optind < argc) ? &argv[optind] : NULL);
unexpand ();
- if (have_read_stdin && fclose (stdin) != 0)
- error (EXIT_FAILURE, errno, "-");
+ cleanup_file_list_stdin ();
return exit_status;
}
diff --git a/tests/misc/expand.pl b/tests/misc/expand.pl
index a6b4a4d61..8a9cad144 100755
--- a/tests/misc/expand.pl
+++ b/tests/misc/expand.pl
@@ -18,7 +18,11 @@
use strict;
+my $limits = getlimits ();
+my $UINTMAX_OFLOW = $limits->{UINTMAX_OFLOW};
+
(my $program_name = $0) =~ s|.*/||;
+my $prog = 'expand';
# Turn off localization of executable's output.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
@@ -27,13 +31,127 @@ my @Tests =
(
['t1', '--tabs=3', {IN=>"a\tb"}, {OUT=>"a b"}],
['t2', '--tabs=3,6,9', {IN=>"a\tb\tc\td\te"}, {OUT=>"a b c d e"}],
+ ['t3', '--tabs="3 6 9"', {IN=>"a\tb\tc\td\te"}, {OUT=>"a b c d e"}],
+ # Leading space/commas are silently ignored; Mixing space/commas is allowed.
+ # (a side-effect of allowing direct "-3,9" parameter).
+ ['t4', '--tabs=", 3,6 9"', {IN=>"a\tb\tc\td\te"}, {OUT=>"a b c d e"}],
+ # tab stops parameter without values
+ ['t5', '--tabs=""', {IN=>"a\tb\tc"}, {OUT=>"a b c"}],
+ ['t6', '--tabs=","', {IN=>"a\tb\tc"}, {OUT=>"a b c"}],
+ ['t7', '--tabs=" "', {IN=>"a\tb\tc"}, {OUT=>"a b c"}],
+
+ # Input field wider than the specified tab list
+ ['t8', '--tabs=6,9', {IN=>"a\tbbbbbbbbbbbbb\tc"},
+ {OUT=>"a bbbbbbbbbbbbb c"}],
+
['i1', '--tabs=3 -i', {IN=>"\ta\tb"}, {OUT=>" a\tb"}],
['i2', '--tabs=3 -i', {IN=>" \ta\tb"}, {OUT=>" a\tb"}],
+
+ # Undocumented feature:
+ # treat "expand -7" as "expand --tabs 7" ,
+ # and "expand -90" as "expand --tabs 90",
+ ['u1', '-3', {IN=>"a\tb\tc"}, {OUT=>"a b c"}],
+ ['u2', '-4 -9', {IN=>"a\tb\tc"}, {OUT=>"a b c"}],
+ ['u3', '-11', {IN=>"a\tb\tc"}, {OUT=>"a b c"}],
+ # Test all digits (for full code coverage)
+ ['u4', '-2 -6', {IN=>"a\tb\tc"}, {OUT=>"a b c"}],
+ ['u5', '-7', {IN=>"a\tb"}, {OUT=>"a b"}],
+ ['u6', '-8', {IN=>"a\tb"}, {OUT=>"a b"}],
+ # This syntax is handled internally as "-3, -9"
+ ['u7', '-3,9', {IN=>"a\tb\tc"}, {OUT=>"a b c"}],
+
+ # Multiple non-empty files
+ ['f1', '--tabs=4',
+ {IN=>{"in1" => "a\tb\n"}}, {IN=>{"in2" => "c\td\n"}},
+ {OUT=>"a b\nc d\n"}],
+ # Multiple files, first file is empty
+ ['f2', '--tabs=4',
+ {IN=>{"in1" => ""}}, {IN=>{"in2" => "c\td\n"}},
+ {OUT=>"c d\n"}],
+ # Multiple files, second file is empty
+ ['f3', '--tabs=4',
+ {IN=>{"in1" => "a\tb\n"}}, {IN=>{"in2" => ""}},
+ {OUT=>"a b\n"}],
+
+
+ # Test '\b' (back-space) - subtract one column.
+ #
+ # Note:
+ # In a terminal window, 'expand' will appear to erase the 'a' characters
+ # due to overwriting them with spaces:
+ #
+ # $ printf 'aaa\b\b\bc\td\n'
+ # caa d
+ # $ printf 'aaa\b\b\bc\td\n' | expand
+ # c d
+ #
+ # However the characters are all printed:
+ #
+ # $ printf 'aaa\b\b\bc\td\n' | expand | od -An -ta
+ # a a a bs bs bs c sp sp sp sp sp sp sp d nl
+ #
+ # If users ever report a problem with these tests and just
+ # copy&paste from the terminal, their report will be confusing
+ # (the 'a' will not appear).
+ #
+ # To see an example, enable the 'b-confusing' test, and examine the
+ # reported log:
+ #
+ # expand.pl: test b-confusing: stdout mismatch
+ # *** b-confusing.2 Fri Jun 24 15:43:21 2016
+ # --- b-confusing.O Fri Jun 24 15:43:21 2016
+ # ***************
+ # *** 1 ****
+ # ! c d
+ # --- 1 ----
+ # ! c d
+ #
+ # ['b-confusing','', {IN=>"aaa\b\b\bc\td\n"}, {OUT=>"c d\n"}],
+
+ ['b1','', {IN=>"aaa\b\b\bc\td\n"}, {OUT=>"aaa\b\b\bc d\n"}],
+
+ # \b as first character, when column is zero
+ ['b2','', {IN=>"\bc\td"}, {OUT=>"\bc d"}],
+
+ # Testing tab list adjusted due to backspaces
+ # ('b3' is the baseline without backspaces).
+ ['b3','--tabs 2,4,6,10',
+ {IN=>"1\t2\t3\t4\t5\n" .
+ "a\tb\tc\td\te\n"},
+ {OUT=>"1 2 3 4 5\n" .
+ "a b c d e\n"}],
+
+ # On screen this will appear the same as 'b3'
+ ['b4','--tabs 2,4,6,10',
+ {IN=>"1\t2\t3\t4\t5\n" .
+ "a\tbHELLO\b\b\b\b\b\tc\td\te\n"},
+ {OUT=>"1 2 3 4 5\n" .
+ "a bHELLO\b\b\b\b\b c d e\n"}],
+
+ # On screen on 'bHE' will appear (LLO overwritten by spaces),
+ # 'c' should align with 4, 'd' with 5:
+ # 1 2 3 4 5
+ # a bHE c d e
+ ['b5','--tabs 2,4,6,10',
+ {IN=>"1\t2\t3\t4\t5\n" .
+ "a\tbHELLO\b\b\b\tc\td\te\n"},
+ {OUT=>"1 2 3 4 5\n" .
+ "a bHELLO\b\b\b c d e\n"}],
+
+
+ # Test errors
+ ['e1', '--tabs="a"', {IN=>''}, {OUT=>''}, {EXIT=>1},
+ {ERR => "$prog: tab size contains invalid character(s): 'a'\n"}],
+ ['e2', "-t $UINTMAX_OFLOW", {IN=>''}, {OUT=>''}, {EXIT=>1},
+ {ERR => "$prog: tab stop is too large '$UINTMAX_OFLOW'\n"}],
+ ['e3', '--tabs=0', {IN=>''}, {OUT=>''}, {EXIT=>1},
+ {ERR => "$prog: tab size cannot be 0\n"}],
+ ['e4', '--tabs=3,3', {IN=>''}, {OUT=>''}, {EXIT=>1},
+ {ERR => "$prog: tab sizes must be ascending\n"}],
);
my $save_temps = $ENV{DEBUG};
my $verbose = $ENV{VERBOSE};
-my $prog = 'expand';
my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
exit $fail;
diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl
index c592c5a52..6ba6d405c 100755
--- a/tests/misc/unexpand.pl
+++ b/tests/misc/unexpand.pl
@@ -90,6 +90,42 @@ my @Tests =
# setting of e.g., _POSIX2_VERSION=1.
['obs-ovflo', "-$limits->{UINTMAX_OFLOW}", {IN=>''}, {OUT=>''},
{EXIT => 1}, {ERR => "$prog: tab stop value is too large\n"}],
+
+
+ # Test input with backspaces '\b' ('bs1' is the baseline, without \b)
+ # Note: If users report errors in these tests, copy&pasting results from
+ # their terminate output might be confusing due to '\b' overriding
+ # characters. For details see '\b' tests in 'expand.pl'.
+ ['bs1', '-a -t4', {IN=>"aa c\n"}, {OUT=>"aa\tc\n"}],
+ ['bs2', '-a -t4', {IN=>"aa\b c\n"}, {OUT=>"aa\b c\n"}],
+ ['bs3', '-a -t4', {IN=>"aa\b c\n"}, {OUT=>"aa\b\tc\n"}],
+ ['bs4', '-a -t3', {IN=>"aa\b c\n"}, {OUT=>"aa\b\tc\n"}],
+
+ # Undocumented feature:
+ # treat "unexpand -7" as "unexpand --first-only --tabs 7" ,
+ # and "unexpand -90" as "unexpand --first-only --tabs 90",
+ ['u1', '-a -3', {IN=>"a b c"}, {OUT=>"a\tb\tc"}],
+ ['u2', '-a -4,9', {IN=>"a b c"}, {OUT=>"a\tb\tc"}],
+ ['u3', '-a -11', {IN=>"a b"}, {OUT=>"a\tb"}],
+ # Test all digits (for full code coverage)
+ ['u4', '-a -2,6', {IN=>"a b c"}, {OUT=>"a b\tc"}],
+ ['u5', '-a -7', {IN=>"a b"}, {OUT=>"a\tb"}],
+ ['u6', '-a -8', {IN=>"a b"}, {OUT=>"a\tb"}],
+ # This syntax is handled internally as "-3, -9"
+ ['u7', '-a -3,9', {IN=>"a b c"}, {OUT=>"a\tb\tc"}],
+ # Default (without -a) is --first-only:
+ ['u8', '-3', {IN=>" a b"}, {OUT=>"\ta b"}],
+
+ # Arguably this should minimize translation as is done on Solaris.
+ # I.e., not modify the input. But since the result is equivalent,
+ # and to be consistent in output with older versions, we output
+ # a '\t' rather than a space for the second tab position.
+ # For more detailed comparison with other implementations see:
+ # http://lists.gnu.org/archive/html/coreutils/2016-06/msg00015.html
+ # http://lists.gnu.org/archive/html/coreutils/2016-07/msg00011.html
+ ['ts1', '-t8,9', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t\t y\n"}],
+ # There is no ambiguity here. This should always be the output.
+ ['ts2', '-t5,8', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t y\n"}],
);
my $save_temps = $ENV{DEBUG};