From f55c9b41a254e3a7d37ae1d10491a410e22ff6b7 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 3 Aug 2004 23:27:20 +0000 Subject: Int cleanup and minor reorganization to be more like src/expand.c. Include quote.h, xstrndup.h. (TAB_STOP_SENTINEL): Increase from INT_MAX to INTMAX_MAX. (convert_entire_line, have_read_stdin, parse_tabstops, next_file, unexpand, main): Use bool for booleans. (tab_size, tab_list, add_tabstop, validate_tabstops, unexpand): Use uintmax_t for column counts. (first_free_tab, validate_tabstops, unexpand): Use size_t for sizes. (add_tabstop, parse_tabstops, main): Don't reserve UINTMAX_MAX as a tab stop. (parse_tabstops): Don't use ISBLANK on possibly-signed char. Detect overflow in tab stop string. (next_file, main): Use EXIT_FAILURE/EXIT_SUCCESS instead of 1/0. (unexpand): Concatenate input files the same way expand does. --- src/unexpand.c | 285 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 160 insertions(+), 125 deletions(-) diff --git a/src/unexpand.c b/src/unexpand.c index 2ac445966..81e405194 100644 --- a/src/unexpand.c +++ b/src/unexpand.c @@ -43,6 +43,8 @@ #include "system.h" #include "error.h" #include "posixver.h" +#include "quote.h" +#include "xstrndup.h" /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME "unexpand" @@ -50,51 +52,51 @@ #define AUTHORS "David MacKenzie" /* The number of bytes added at a time to the amount of memory - allocated for the output line. */ + allocated for the output line. */ #define OUTPUT_BLOCK 256 -/* The number of bytes added at a time to the amount of memory - allocated for the list of tabstops. */ -#define TABLIST_BLOCK 256 - /* A sentinel value that's placed at the end of the list of tab stops. This value must be a large number, but not so large that adding the - length of a line to it would cause the column variable to overflow. */ -#define TAB_STOP_SENTINEL INT_MAX + length of a line to it would cause the column variable to overflow. + FIXME: The algorithm isn't correct once the numbers get large; + also, no error is reported if overflow occurs. */ +#define TAB_STOP_SENTINEL INTMAX_MAX -/* The name this program was run with. */ +/* The name this program was run with. */ char *program_name; -/* If nonzero, convert blanks even after nonblank characters have been - read on the line. */ -static int convert_entire_line; +/* If true, convert blanks even after nonblank characters have been + read on the line. */ +static bool convert_entire_line; -/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */ -static int tab_size; +/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */ +static uintmax_t tab_size; /* Array of the explicit column numbers of the tab stops; after `tab_list' is exhausted, the rest of the line is printed - unchanged. The first column is column 0. */ -static int *tab_list; + unchanged. The first column is column 0. */ +static uintmax_t *tab_list; + +/* The number of allocated entries in `tab_list'. */ static size_t n_tabs_allocated; /* The index of the first invalid element of `tab_list', - where the next element can be added. */ -static int first_free_tab; + where the next element can be added. */ +static size_t first_free_tab; -/* Null-terminated array of input filenames. */ +/* Null-terminated array of input filenames. */ static char **file_list; -/* Default for `file_list' if no files are given on the command line. */ +/* Default for `file_list' if no files are given on the command line. */ static char *stdin_argv[] = { "-", NULL }; -/* Nonzero if we have ever read standard input. */ -static int have_read_stdin; +/* True if we have ever read standard input. */ +static bool have_read_stdin; -/* Status to return to the system. */ +/* The desired exit status. */ static int exit_status; /* For long options that have no equivalent short option, use a @@ -114,55 +116,115 @@ static struct option const longopts[] = {NULL, 0, NULL, 0} }; -/* Add tab stop TABVAL to the end of `tab_list', except - if TABVAL is -1, do nothing. */ +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Convert spaces in each FILE to tabs, writing to standard output.\n\ +With no FILE, or when FILE is -, read standard input.\n\ +\n\ +"), stdout); + fputs (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +"), stdout); + fputs (_("\ + -a, --all convert all whitespace, instead of just initial whitespace\n\ + --first-only convert only leading sequences of whitespace (overrides -a)\n\ + -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\ + -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); + } + exit (status); +} + +/* Add tab stop TABVAL to the end of `tab_list'. */ static void -add_tabstop (int tabval) +add_tabstop (uintmax_t tabval) { - if (tabval == -1) - return; if (first_free_tab == n_tabs_allocated) tab_list = x2nrealloc (tab_list, &n_tabs_allocated, sizeof *tab_list); tab_list[first_free_tab++] = tabval; } /* Add the comma or blank separated list of tabstops STOPS - to the list of tabstops. */ + to the list of tabstops. */ static void -parse_tabstops (const char *stops) +parse_tabstops (char const *stops) { - int tabval = -1; + bool have_tabval = false; + uintmax_t tabval IF_LINT (= 0); + char const *num_start IF_LINT (= NULL); + bool ok = true; for (; *stops; stops++) { - if (*stops == ',' || ISBLANK (*stops)) + if (*stops == ',' || ISBLANK (to_uchar (*stops))) { - add_tabstop (tabval); - tabval = -1; + if (have_tabval) + add_tabstop (tabval); + have_tabval = false; } else if (ISDIGIT (*stops)) { - if (tabval == -1) - tabval = 0; - tabval = tabval * 10 + *stops - '0'; + if (!have_tabval) + { + tabval = 0; + have_tabval = true; + num_start = stops; + } + { + /* Detect overflow. */ + uintmax_t new_t = 10 * tabval + *stops - '0'; + if (UINTMAX_MAX / 10 < tabval || new_t < tabval * 10) + { + size_t len = strspn (num_start, "0123456789"); + char *bad_num = xstrndup (num_start, len); + error (0, 0, _("tab stop is too large %s"), quote (bad_num)); + free (bad_num); + ok = false; + stops = num_start + len - 1; + } + tabval = new_t; + } } else - error (EXIT_FAILURE, 0, _("tab size contains an invalid character")); + { + error (0, 0, _("tab size contains invalid character(s): %s"), + quote (stops)); + ok = false; + break; + } } - add_tabstop (tabval); + if (!ok) + exit (EXIT_FAILURE); + + if (have_tabval) + add_tabstop (tabval); } /* Check that the list of tabstops TABS, with ENTRIES entries, - contains only nonzero, ascending values. */ + contains only nonzero, ascending values. */ static void -validate_tabstops (const int *tabs, int entries) +validate_tabstops (uintmax_t const *tabs, size_t entries) { - int prev_tab = 0; - int i; + uintmax_t prev_tab = 0; + size_t i; for (i = 0; i < entries; i++) { @@ -190,14 +252,14 @@ next_file (FILE *fp) if (ferror (fp)) { error (0, errno, "%s", prev_file); - exit_status = 1; + exit_status = EXIT_FAILURE; } if (fp == stdin) - clearerr (fp); /* Also clear EOF. */ + clearerr (fp); /* Also clear EOF. */ else if (fclose (fp) == EOF) { error (0, errno, "%s", prev_file); - exit_status = 1; + exit_status = EXIT_FAILURE; } } @@ -205,7 +267,7 @@ next_file (FILE *fp) { if (file[0] == '-' && file[1] == '\0') { - have_read_stdin = 1; + have_read_stdin = true; prev_file = file; return stdin; } @@ -216,27 +278,25 @@ next_file (FILE *fp) return fp; } error (0, errno, "%s", file); - exit_status = 1; + exit_status = EXIT_FAILURE; } return NULL; } /* Change spaces to tabs, writing to stdout. - Read each file in `file_list', in order. */ + Read each file in `file_list', in order. */ static void unexpand (void) { - FILE *fp; /* Input stream. */ - int c; /* Each input character. */ - /* Index in `tab_list' of next tabstop: */ - int tab_index = 0; /* For calculating width of pending tabs. */ - int print_tab_index = 0; /* For printing as many tabs as possible. */ - unsigned int column = 0; /* Column on screen of next char. */ - int next_tab_column; /* Column the next tab stop is on. */ - int convert = 1; /* If nonzero, perform translations. */ - unsigned int pending = 0; /* Pending columns of blanks. */ - int saved_errno; + FILE *fp; /* Input stream. */ + size_t tab_index = 0; /* Index in `tab_list' of next tabstop. */ + size_t print_tab_index = 0; /* For printing as many tabs as possible. */ + uintmax_t column = 0; /* Column of next char. */ + uintmax_t next_tab_column; /* Column the next tab stop is on. */ + bool convert = true; /* If true, perform translations. */ + uintmax_t pending = 0; /* Pending columns of blanks. */ + int saved_errno IF_LINT (= 0); fp = next_file ((FILE *) NULL); if (fp == NULL) @@ -247,8 +307,17 @@ unexpand (void) for (;;) { - c = getc (fp); - saved_errno = errno; + int c = getc (fp); + if (c == EOF) + { + fp = next_file (fp); + if (fp) + { + SET_BINARY2 (fileno (fp), STDOUT_FILENO); + continue; + } + saved_errno = errno; + } if (c == ' ' && convert && column < TAB_STOP_SENTINEL) { @@ -260,7 +329,7 @@ unexpand (void) if (tab_size == 0) { /* Do not let tab_index == first_free_tab; - stop when it is 1 less. */ + stop when it is 1 less. */ while (tab_index < first_free_tab - 1 && column >= tab_list[tab_index]) tab_index++; @@ -269,7 +338,7 @@ unexpand (void) tab_index++; if (column >= next_tab_column) { - convert = 0; /* Ran out of tab stops. */ + convert = false; /* Ran out of tab stops. */ goto flush_pend; } } @@ -284,7 +353,7 @@ unexpand (void) { flush_pend: /* Flush pending spaces. Print as many tabs as possible, - then print the rest as spaces. */ + then print the rest as spaces. */ if (pending == 1) { putchar (' '); @@ -296,7 +365,7 @@ unexpand (void) if (tab_size == 0) { /* Do not let print_tab_index == first_free_tab; - stop when it is 1 less. */ + stop when it is 1 less. */ while (print_tab_index < first_free_tab - 1 && column >= tab_list[print_tab_index]) print_tab_index++; @@ -329,14 +398,7 @@ unexpand (void) if (c == EOF) { errno = saved_errno; - fp = next_file (fp); - if (fp == NULL) - break; /* No more files. */ - else - { - SET_BINARY2 (fileno (fp), STDOUT_FILENO); - continue; - } + break; } if (convert) @@ -349,8 +411,7 @@ unexpand (void) else { ++column; - if (convert_entire_line == 0) - convert = 0; + convert &= convert_entire_line; } } @@ -360,54 +421,22 @@ unexpand (void) { tab_index = print_tab_index = 0; column = pending = 0; - convert = 1; + convert = true; } } } } -void -usage (int status) -{ - if (status != EXIT_SUCCESS) - fprintf (stderr, _("Try `%s --help' for more information.\n"), - program_name); - else - { - printf (_("\ -Usage: %s [OPTION]... [FILE]...\n\ -"), - program_name); - fputs (_("\ -Convert spaces in each FILE to tabs, writing to standard output.\n\ -With no FILE, or when FILE is -, read standard input.\n\ -\n\ -"), stdout); - fputs (_("\ -Mandatory arguments to long options are mandatory for short options too.\n\ -"), stdout); - fputs (_("\ - -a, --all convert all whitespace, instead of just initial whitespace\n\ - --first-only convert only leading sequences of whitespace (overrides -a)\n\ - -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\ - -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\ -"), stdout); - fputs (HELP_OPTION_DESCRIPTION, stdout); - fputs (VERSION_OPTION_DESCRIPTION, stdout); - printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); - } - exit (status); -} - int main (int argc, char **argv) { - int tabval = -1; /* Value of tabstop being read, or -1. */ - int c; /* Option character. */ + bool have_tabval = false; + uintmax_t tabval IF_LINT (= 0); + int c; - /* If nonzero, cancel the effect of any -a (explicit or implicit in -t), + /* If true, cancel the effect of any -a (explicit or implicit in -t), so that only leading white space will be considered. */ - int convert_first_only = 0; + bool convert_first_only = false; bool obsolete_tablist = false; @@ -419,9 +448,9 @@ main (int argc, char **argv) atexit (close_stdout); - have_read_stdin = 0; - exit_status = 0; - convert_entire_line = 0; + have_read_stdin = false; + exit_status = EXIT_SUCCESS; + convert_entire_line = false; tab_list = NULL; first_free_tab = 0; @@ -436,25 +465,29 @@ main (int argc, char **argv) case '?': usage (EXIT_FAILURE); case 'a': - convert_entire_line = 1; + convert_entire_line = true; break; case 't': - convert_entire_line = 1; + convert_entire_line = true; parse_tabstops (optarg); break; case CONVERT_FIRST_ONLY_OPTION: - convert_first_only = 1; + convert_first_only = true; break; case ',': - add_tabstop (tabval); - tabval = -1; + if (have_tabval) + add_tabstop (tabval); + have_tabval = false; obsolete_tablist = true; break; case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); default: - if (tabval == -1) - tabval = 0; + if (!have_tabval) + { + tabval = 0; + have_tabval = true; + } tabval = tabval * 10 + c - '0'; obsolete_tablist = true; break; @@ -469,9 +502,10 @@ main (int argc, char **argv) } if (convert_first_only) - convert_entire_line = 0; + convert_entire_line = false; - add_tabstop (tabval); + if (have_tabval) + add_tabstop (tabval); validate_tabstops (tab_list, first_free_tab); @@ -492,5 +526,6 @@ main (int argc, char **argv) if (have_read_stdin && fclose (stdin) == EOF) error (EXIT_FAILURE, errno, "-"); - exit (exit_status == 0 ? EXIT_SUCCESS : EXIT_FAILURE); + + exit (exit_status); } -- cgit v1.2.3-70-g09d2