summaryrefslogtreecommitdiff
path: root/src/fmt.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2004-08-03 00:00:49 +0000
committerPaul Eggert <eggert@cs.ucla.edu>2004-08-03 00:00:49 +0000
commitc5c14638098256dcdad1c205299c49812a91e0aa (patch)
treeaa51968ed5f4eb35d45c53a7f67265a4b66daca4 /src/fmt.c
parent71377ec627ac67e7dd2bb0e8911b9d21a5465e45 (diff)
downloadcoreutils-c5c14638098256dcdad1c205299c49812a91e0aa.tar.xz
(COST, MAXWORDS): Add a comment describing some of
fmt's arbitrary limits. (TRUE, FALSE): Remove; all uses changed to (true, false). (main): Use bool for booleans. Limit maximum width to MAXCHARS / 2. Use xstrtoul, not xstrtol, to parse width. (copy_rest): Remove unnecessary cast. (get_prefix): Rewrite to avoid cast. (check_punctuation): Use char *, not unsigned char *; C89 requires this. Avoid off-by-one buffer read overrun when line is empty. (flush_paragraph): Don't assume wptr-parabuf is <= INT_MAX. Remove unnecessary casts.
Diffstat (limited to 'src/fmt.c')
-rw-r--r--src/fmt.c131
1 files changed, 63 insertions, 68 deletions
diff --git a/src/fmt.c b/src/fmt.c
index f14dc3517..be8390c58 100644
--- a/src/fmt.c
+++ b/src/fmt.c
@@ -56,7 +56,12 @@
or too long. The definition of SHORT_COST(n) should not be changed.
However, EQUIV(n) may need tuning. */
-typedef long COST;
+/* FIXME: "fmt" misbehaves given large inputs or options. One
+ possible workaround for part of the problem is to change COST to be
+ a floating-point type. There are other problems besides COST,
+ though; see MAXWORDS below. */
+
+typedef long int COST;
#define MAXCOST TYPE_MAXIMUM (COST)
@@ -100,8 +105,12 @@ typedef long COST;
#define LINE_CREDIT EQUIV(3)
/* Size of paragraph buffer, in words and characters. Longer paragraphs
- are handled neatly (cf. flush_paragraph()), so there's little to gain
- by making these larger. */
+ are handled neatly (cf. flush_paragraph()), so long as these values
+ are considerably greater than required by the width. These values
+ cannot be extended indefinitely: doing so would run into size limits
+ and/or cause more overflows in cost calculations. FIXME: Remove these
+ arbitrary limits. */
+
#define MAXWORDS 1000
#define MAXCHARS 5000
@@ -115,13 +124,6 @@ typedef long COST;
output. */
#define TABWIDTH 8
-/* Miscellaneous definitions. */
-
-#undef TRUE
-#define TRUE true
-#undef FALSE
-#define FALSE false
-
/* Word descriptor structure. */
typedef struct Word WORD;
@@ -171,16 +173,16 @@ const char *program_name;
/* Option values. */
-/* If TRUE, first 2 lines may have different indent (default FALSE). */
+/* If true, first 2 lines may have different indent (default false). */
static bool crown;
-/* If TRUE, first 2 lines _must_ have different indent (default FALSE). */
+/* If true, first 2 lines _must_ have different indent (default false). */
static bool tagged;
-/* If TRUE, each line is a paragraph on its own (default FALSE). */
+/* If true, each line is a paragraph on its own (default false). */
static bool split;
-/* If TRUE, don't preserve inter-word spacing (default FALSE). */
+/* If true, don't preserve inter-word spacing (default false). */
static bool uniform;
/* Prefix minus leading and trailing spaces (default ""). */
@@ -228,7 +230,7 @@ static WORD word[MAXWORDS];
word. */
static WORD *word_limit;
-/* If TRUE, current input file contains tab characters, and so tabs can be
+/* If true, current input file contains tab characters, and so tabs can be
used for white space on output. */
static bool tabs;
@@ -318,7 +320,8 @@ int
main (register int argc, register char **argv)
{
int optchar;
- int fail;
+ bool ok = true;
+ char const *max_width_option = NULL;
initialize_main (&argc, &argv);
program_name = argv[0];
@@ -328,30 +331,17 @@ main (register int argc, register char **argv)
atexit (close_stdout);
- crown = tagged = split = uniform = FALSE;
+ crown = tagged = split = uniform = false;
max_width = WIDTH;
prefix = "";
prefix_length = prefix_lead_space = prefix_full_length = 0;
if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
{
- const char *s = argv[1] + 1;
- max_width = 0;
- /* Old option syntax; a dash followed by one or more digits.
- Move past the number. */
- for (; ISDIGIT (*s); ++s)
- {
- int old_max = max_width;
- max_width = max_width * 10 + *s - '0';
- if (INT_MAX / 10 < old_max || max_width < old_max)
- break;
- }
+ /* Old option syntax; a dash followed by one or more digits. */
+ max_width_option = argv[1] + 1;
- if (*s)
- error (EXIT_FAILURE, 0, _("invalid width option: %s"),
- quote (argv[1]));
-
- /* Make the options we just parsed invisible to getopt. */
+ /* Make the option we just parsed invisible to getopt. */
argv[1] = argv[0];
argv++;
argc--;
@@ -370,29 +360,23 @@ main (register int argc, register char **argv)
usage (EXIT_FAILURE);
case 'c':
- crown = TRUE;
+ crown = true;
break;
case 's':
- split = TRUE;
+ split = true;
break;
case 't':
- tagged = TRUE;
+ tagged = true;
break;
case 'u':
- uniform = TRUE;
+ uniform = true;
break;
case 'w':
- {
- long int tmp_long;
- if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
- || tmp_long <= 0 || tmp_long > INT_MAX)
- error (EXIT_FAILURE, 0, _("invalid width: %s"), quote (optarg));
- max_width = (int) tmp_long;
- }
+ max_width_option = optarg;
break;
case 'p':
@@ -405,9 +389,20 @@ main (register int argc, register char **argv)
}
+ if (max_width_option)
+ {
+ /* Limit max_width to MAXCHARS / 2; otherwise, the resulting
+ output can be quite ugly. */
+ unsigned long int tmp;
+ if (! (xstrtoul (max_width_option, NULL, 10, &tmp, "") == LONGINT_OK
+ && tmp <= MAXCHARS / 2))
+ error (EXIT_FAILURE, 0, _("invalid width: %s"),
+ quote (max_width_option));
+ max_width = tmp;
+ }
+
best_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
- fail = 0;
if (optind == argc)
fmt (stdin);
else
@@ -427,20 +422,20 @@ main (register int argc, register char **argv)
if (fclose (in_stream) == EOF)
{
error (0, errno, "%s", file);
- fail = 1;
+ ok = false;
}
}
else
{
error (0, errno, _("cannot open %s for reading"),
quote (file));
- fail = 1;
+ ok = false;
}
}
}
}
- exit (fail ? EXIT_FAILURE : EXIT_SUCCESS);
+ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}
/* Trim space from the front and back of the string P, yielding the prefix,
@@ -471,7 +466,7 @@ set_prefix (register char *p)
static void
fmt (FILE *f)
{
- tabs = FALSE;
+ tabs = false;
other_indent = 0;
next_char = get_prefix (f);
while (get_paragraph (f))
@@ -526,8 +521,8 @@ set_other_indent (bool same_paragraph)
If a prefix is in effect, it must be present at the same indent for
each line in the paragraph.
- Return FALSE if end-of-file was encountered before the start of a
- paragraph, else TRUE. */
+ Return false if end-of-file was encountered before the start of a
+ paragraph, else true. */
static bool
get_paragraph (FILE *f)
@@ -547,7 +542,7 @@ get_paragraph (FILE *f)
if (c == EOF)
{
next_char = EOF;
- return FALSE;
+ return false;
}
putchar ('\n');
c = get_prefix (f);
@@ -595,9 +590,9 @@ get_paragraph (FILE *f)
while (same_para (c) && in_column == other_indent)
c = get_line (f, c);
}
- (word_limit - 1)->period = (word_limit - 1)->final = TRUE;
+ (word_limit - 1)->period = (word_limit - 1)->final = true;
next_char = c;
- return TRUE;
+ return true;
}
/* Copy to the output a line that failed to match the prefix, or that
@@ -615,7 +610,7 @@ copy_rest (FILE *f, register int c)
{
put_space (next_prefix_indent);
for (s = prefix; out_column != in_column && *s; out_column++)
- putchar (*(unsigned char *)s++);
+ putchar (*s++);
put_space (in_column - out_column);
}
while (c != '\n' && c != EOF)
@@ -626,9 +621,9 @@ copy_rest (FILE *f, register int c)
return c;
}
-/* Return TRUE if a line whose first non-blank character after the
+/* Return true if a line whose first non-blank character after the
prefix (if any) is C could belong to the current paragraph,
- otherwise FALSE. */
+ otherwise false. */
static bool
same_para (register int c)
@@ -718,7 +713,8 @@ get_prefix (FILE *f)
next_prefix_indent = in_column;
for (p = prefix; *p != '\0'; p++)
{
- if (c != *(unsigned char *)p)
+ unsigned char pc = *p;
+ if (c != pc)
return c;
in_column++;
c = getc (f);
@@ -740,7 +736,7 @@ get_space (FILE *f, register int c)
in_column++;
else if (c == '\t')
{
- tabs = TRUE;
+ tabs = true;
in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
}
else
@@ -754,13 +750,13 @@ get_space (FILE *f, register int c)
static void
check_punctuation (register WORD *w)
{
- const unsigned char *start, *finish;
+ char const *start = w->text;
+ char const *finish = start + (w->length - 1);
+ unsigned char fin = *finish;
- start = (unsigned char *) w->text;
- finish = start + (w->length - 1);
w->paren = isopen (*start);
- w->punct = ISPUNCT (*finish);
- while (isclose (*finish) && finish > start)
+ w->punct = ISPUNCT (fin);
+ while (start < finish && isclose (*finish))
finish--;
w->period = isperiod (*finish);
}
@@ -780,7 +776,7 @@ flush_paragraph (void)
if (word_limit == word)
{
- printf ("%*s", (int) (wptr - parabuf), parabuf);
+ fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
wptr = parabuf;
return;
}
@@ -812,7 +808,7 @@ flush_paragraph (void)
/* Copy text of words down to start of parabuf -- we use memmove because
the source and target may overlap. */
- memmove (parabuf, split_point->text, (size_t) (wptr - split_point->text));
+ memmove (parabuf, split_point->text, wptr - split_point->text);
shift = split_point->text - parabuf;
wptr -= shift;
@@ -824,8 +820,7 @@ flush_paragraph (void)
/* Copy words from split_point down to word -- we use memmove because
the source and target may overlap. */
- memmove ((char *) word, (char *) split_point,
- (word_limit - split_point + 1) * sizeof (WORD));
+ memmove (word, split_point, (word_limit - split_point + 1) * sizeof *word);
word_limit -= split_point - word;
}