summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2010-11-10 20:34:52 -0800
committerJim Meyering <meyering@redhat.com>2010-11-11 08:16:24 +0100
commit6568b173db1c98f39a53eadd9b09e0a0e5c11920 (patch)
treeccbc21bc55d866e91db9e97453505d9c66263360
parent0339eb4598600024cedca14a7e882c1737dd2d82 (diff)
downloadcoreutils-6568b173db1c98f39a53eadd9b09e0a0e5c11920.tar.xz
csplit: do not rely on undefined behavior in printf formats
* doc/coreutils.texi (csplit invocation): Say that %d and %i are aliases for %u. * src/csplit.c (FLAG_THOUSANDS, FLAG_ALTERNATIVE): New constants. (get_format_flags): Now take char const * and int * and return size_t. It now stores info about the flags instead of merely scanning them. Also, it handles '0' correctly. Drop support for the undocumented '+' and ' ' flags since the value is unsigned. Add support for the (undocumented) "'" flag. All uses changed. (get_format_width, get_format_prec): Remove. (check_format_conv_type): Renamed from get_format_conv_type, with a different signature. It now converts the format to one that is compatible with unsigned int, and checks flags. All uses changed. (max_out): Have snprintf compute the number of bytes needed rather than attempting to do it ourselves (which doesn't work portably with outlandish formats such as %4294967296d). (check_format_conv_type, main): Check for overflow in size calculations. Don't assume size_t fits in unsigned int. * tests/misc/csplit: Check for proper handling of flags, with %0#6.3x. Coreutils 8.6 mishandles this somewhat-weird example.
-rw-r--r--doc/coreutils.texi3
-rw-r--r--src/csplit.c141
-rwxr-xr-xtests/misc/csplit15
3 files changed, 81 insertions, 78 deletions
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 8dfb069c0..ce56b0e5e 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3082,7 +3082,8 @@ specified, the suffix string must include exactly one
@code{printf(3)}-style conversion specification, possibly including
format specification flags, a field width, a precision specifications,
or all of these kinds of modifiers. The format letter must convert a
-binary integer argument to readable form; thus, only @samp{d}, @samp{i},
+binary unsigned integer argument to readable form. The format letters
+@samp{d} and @samp{i} are aliases for @samp{u}, and the
@samp{u}, @samp{o}, @samp{x}, and @samp{X} conversions are allowed. The
entire @var{suffix} is given (with the current output file number) to
@code{sprintf(3)} to form the file name suffixes for each of the
diff --git a/src/csplit.c b/src/csplit.c
index 948a795e2..531e492ed 100644
--- a/src/csplit.c
+++ b/src/csplit.c
@@ -1181,81 +1181,64 @@ parse_patterns (int argc, int start, char **argv)
}
}
-static unsigned int
-get_format_flags (char **format_ptr)
+
+
+/* Names for the printf format flags ' and #. These can be ORed together. */
+enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
+
+/* Scan the printf format flags in FORMAT, storing info about the
+ flags into *FLAGS_PTR. Return the number of flags found. */
+static size_t
+get_format_flags (char const *format, int *flags_ptr)
{
- unsigned int count = 0;
+ int flags = 0;
- for (; **format_ptr; (*format_ptr)++)
+ for (size_t count = 0; ; count++)
{
- switch (**format_ptr)
+ switch (format[count])
{
case '-':
+ case '0':
break;
- case '+':
- case ' ':
- count |= 1;
+ case '\'':
+ flags |= FLAG_THOUSANDS;
break;
case '#':
- count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
+ flags |= FLAG_ALTERNATIVE;
break;
default:
+ *flags_ptr = flags;
return count;
}
}
- return count;
-}
-
-static size_t
-get_format_width (char **format_ptr)
-{
- unsigned long int val = 0;
-
- if (ISDIGIT (**format_ptr)
- && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
- || SIZE_MAX < val))
- error (EXIT_FAILURE, 0, _("invalid format width"));
-
- /* Allow for enough octal digits to represent the value of UINT_MAX,
- even if the field width is less than that. */
- return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
-}
-
-static size_t
-get_format_prec (char **format_ptr)
-{
- if (**format_ptr != '.')
- return 0;
- (*format_ptr)++;
-
- if (! ISDIGIT (**format_ptr))
- return 0;
- else
- {
- unsigned long int val;
- if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
- || SIZE_MAX < val)
- error (EXIT_FAILURE, 0, _("invalid format precision"));
- return val;
- }
}
+/* Check that the printf format conversion specifier *FORMAT is valid
+ and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i',
+ since the format will be used with an unsigned value. */
static void
-get_format_conv_type (char **format_ptr)
+check_format_conv_type (char *format, int flags)
{
- unsigned char ch = *(*format_ptr)++;
+ unsigned char ch = *format;
+ int compatible_flags = FLAG_THOUSANDS;
switch (ch)
{
case 'd':
case 'i':
- case 'o':
+ *format = 'u';
+ break;
+
case 'u':
+ break;
+
+ case 'o':
case 'x':
case 'X':
+ compatible_flags = FLAG_ALTERNATIVE;
break;
case 0:
@@ -1270,45 +1253,46 @@ get_format_conv_type (char **format_ptr)
error (EXIT_FAILURE, 0,
_("invalid conversion specifier in suffix: \\%.3o"), ch);
}
+
+ if (flags & ~ compatible_flags)
+ error (EXIT_FAILURE, 0,
+ _("invalid flags in conversion specification: %%%c%c"),
+ (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
}
+/* Return the maximum number of bytes that can be generated by
+ applying FORMAT to an unsigned int value. If the format is
+ invalid, diagnose the problem and exit. */
static size_t
max_out (char *format)
{
- size_t out_count = 0;
bool percent = false;
- while (*format)
- {
- if (*format++ != '%')
- out_count++;
- else if (*format == '%')
- {
- format++;
- out_count++;
- }
- else
- {
- if (percent)
- error (EXIT_FAILURE, 0,
- _("too many %% conversion specifications in suffix"));
- percent = true;
- out_count += get_format_flags (&format);
- {
- size_t width = get_format_width (&format);
- size_t prec = get_format_prec (&format);
-
- out_count += MAX (width, prec);
- }
- get_format_conv_type (&format);
- }
- }
+ for (char *f = format; *f; f++)
+ if (*f == '%' && *++f != '%')
+ {
+ if (percent)
+ error (EXIT_FAILURE, 0,
+ _("too many %% conversion specifications in suffix"));
+ percent = true;
+ unsigned int flags;
+ f += get_format_flags (f, &flags);
+ while (ISDIGIT (*f))
+ f++;
+ if (*f == '.')
+ while (ISDIGIT (*++f))
+ continue;
+ check_format_conv_type (f, flags);
+ }
if (! percent)
error (EXIT_FAILURE, 0,
_("missing %% conversion specification in suffix"));
- return out_count;
+ int maxlen = snprintf (NULL, 0, format, UINT_MAX);
+ if (! (0 <= maxlen && maxlen <= SIZE_MAX))
+ xalloc_die ();
+ return maxlen;
}
int
@@ -1349,7 +1333,7 @@ main (int argc, char **argv)
case 'n':
if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
- || val > INT_MAX)
+ || MIN (INT_MAX, SIZE_MAX) < val)
error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
digits = val;
break;
@@ -1380,11 +1364,14 @@ main (int argc, char **argv)
usage (EXIT_FAILURE);
}
- unsigned int max_digit_string_len
+ size_t prefix_len = strlen (prefix);
+ size_t max_digit_string_len
= (suffix
? max_out (suffix)
: MAX (INT_STRLEN_BOUND (unsigned int), digits));
- filename_space = xmalloc (strlen (prefix) + max_digit_string_len + 1);
+ if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
+ xalloc_die ();
+ filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
set_input_file (argv[optind++]);
diff --git a/tests/misc/csplit b/tests/misc/csplit
index bef722485..f365da335 100755
--- a/tests/misc/csplit
+++ b/tests/misc/csplit
@@ -67,6 +67,21 @@ EOF
compare err experr || fail=1
rm -f in out exp err experr
+# `echo | csplit -b '%0#6.3x' - 1' incorrectly warned about the format
+# up through coreutils 8.6.
+echo > in
+csplit -b '%0#6.3x' in 1 > out 2> err || fail=1
+cat <<EOF > exp
+0
+1
+EOF
+compare out exp || fail=1
+touch experr
+compare err experr || fail=1
+compare 'xx 000' experr || fail=1
+compare 'xx 0x001' in || fail=1
+rm -f in out exp err experr xx*
+
# make sure `csplit FILE 0' fails.
echo > in
csplit in 0 > out 2> err && fail=1