diff options
-rw-r--r-- | AUTHORS | 2 | ||||
-rw-r--r-- | NEWS | 5 | ||||
-rw-r--r-- | TODO | 5 | ||||
-rw-r--r-- | doc/coreutils.texi | 17 | ||||
-rw-r--r-- | src/Makefile.am | 3 | ||||
-rw-r--r-- | src/expr.c | 655 |
6 files changed, 594 insertions, 93 deletions
@@ -25,7 +25,7 @@ du: Torbjörn Granlund, David MacKenzie, Paul Eggert, Jim Meyering echo: Brian Fox, Chet Ramey env: Richard Mlynarik, David MacKenzie expand: David MacKenzie -expr: Mike Parker +expr: Mike Parker, James Youngman factor: Paul Rubin false: Jim Meyering fmt: Ross Paterson @@ -21,8 +21,9 @@ GNU coreutils NEWS -*- outline -*- With this new option, after a short read, dd repeatedly calls read, until it fills the incomplete block, reaches EOF, or encounters an error. - factor accepts arbitrarily large numbers and factors them using - Pollard's rho algorithm. + If the GNU MP library is available at configure time, factor and + expr support arbitrarily large numbers. Pollard's rho algorithm is + used to factor large numbers. ls now colorizes files with capabilities if libcap is available @@ -155,6 +155,11 @@ remove all uses of the `register' keyword: Done. add a maint.mk rule remove or adjust chown's --changes option, since it can't always do what it currently says it does. +Support arbitrary-precision arithmetic in those tools for which it +makes sense. Factor and expr already support this via libgmp. +The "test" program is covered via its string-based comparison of +integers. To be converted: seq. + Adapt tools like wc, tr, fmt, etc. (most of the textutils) to be multibyte aware. The problem is that I want to avoid duplicating significant blocks of logic, yet I also want to incur only minimal diff --git a/doc/coreutils.texi b/doc/coreutils.texi index fd1e79e09..f5a2e41a1 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -11066,8 +11066,21 @@ may be used for grouping in the usual manner. You must quote parentheses and many operators to avoid the shell evaluating them, however. -The only options are @option{--help} and @option{--version}. @xref{Common -options}. Options must precede operands. +By default, @command{expr} performs operations using native arithmetic +types, but if a numeric overflow occurs and @command{expr} was built +with support for the GNU MP library, @command{expr}, it will switch to +using arbitrary-precision arithmetic. + +Apart from @option{--help} and @option{--version} (@pxref{Common +options}), the following options are supported: + +@table @samp +@item --bignum +Forces the use of the GNU MP library. +@item --no-bignum +Forces the use of native operations only. In the event of a numeric +overflow, @command{expr} will fail, even if GNU MP is available. +@end table @cindex exit status of @command{expr} Exit status: diff --git a/src/Makefile.am b/src/Makefile.am index 7410653ef..359d18ec6 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -130,6 +130,9 @@ seq_LDADD = $(LDADD) $(POW_LIB) nanosec_libs = $(LDADD) $(POW_LIB) $(LIB_NANOSLEEP) # for various GMP functions +expr_LDADD = $(LDADD) $(LIB_GMP) + +# for various GMP functions factor_LDADD = $(LDADD) $(LIB_GMP) sleep_LDADD = $(nanosec_libs) diff --git a/src/expr.c b/src/expr.c index c342291dc..bf55a4029 100644 --- a/src/expr.c +++ b/src/expr.c @@ -15,6 +15,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* Author: Mike Parker. + Modified for arbitrary-precision calculation by James Youngman. This program evaluates expressions. Each token (operator, operand, parenthesis) of the expression must be a seperate argument. The @@ -32,8 +33,11 @@ #include <sys/types.h> #include "system.h" +#include <assert.h> #include <regex.h> -#include "long-options.h" +#if HAVE_GMP +#include <gmp.h> +#endif #include "error.h" #include "quotearg.h" #include "strnumcmp.h" @@ -42,7 +46,7 @@ /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME "expr" -#define AUTHORS proper_name ("Mike Parker") +#define AUTHORS proper_name ("Mike Parker"), proper_name ("James Youngman") /* Exit statuses. */ enum @@ -57,10 +61,14 @@ enum EXPR_FAILURE }; -/* The kinds of value we can have. */ +/* The kinds of value we can have. + In the comments below, a variable is described as "arithmetic" if + it is either integer or mp_integer. Variables are of type mp_integer + only if GNU MP is available, but the type designator is always defined. */ enum valtype { integer, + mp_integer, string }; typedef enum valtype TYPE; @@ -71,7 +79,12 @@ struct valinfo TYPE type; /* Which kind. */ union { /* The value itself. */ - intmax_t i; + /* We could use intmax_t but that would integrate less well with GMP, + since GMP has mpz_set_si but no intmax_t equivalent. */ + signed long int i; +#if HAVE_GMP + mpz_t z; +#endif char *s; } u; }; @@ -85,6 +98,34 @@ static bool nomoreargs (void); static bool null (VALUE *v); static void printv (VALUE *v); +/* Arithmetic is done in one of three modes. + + The --bignum option forces all arithmetic to use bignums other than + string indexing (mode==MP_ALWAYS). The --no-bignum option forces + all arithmetic to use native types rather than bignums + (mode==MP_NEVER). + + The default mode is MP_AUTO if GMP is available and MP_NEVER if + not. Most functions will process a bignum if one is found, but + will not convert a native integer to a string if the mode is + MP_NEVER. */ +enum arithmetic_mode + { + MP_NEVER, /* Never use bignums */ +#if HAVE_GMP + MP_ALWAYS, /* Always use bignums. */ + MP_AUTO, /* Switch if result would otherwise overflow */ +#endif + }; +static enum arithmetic_mode mode = +#if HAVE_GMP + MP_AUTO +#else + MP_NEVER +#endif + ; + + void usage (int status) { @@ -99,6 +140,10 @@ Usage: %s EXPRESSION\n\ "), program_name, program_name); putchar ('\n'); + fputs (_("\ + --bignum always use arbitrary-precision arithmetic\n\ + --no-bignum always use single-precision arithmetic\n"), + stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); fputs (_("\ @@ -175,13 +220,37 @@ syntax_error (void) static void integer_overflow (char op) { - error (EXPR_FAILURE, ERANGE, "%c", op); + error (EXPR_FAILURE, 0, + _("arithmetic operation %c produced an out of range value, " + "but arbitrary-precision arithmetic is not available"), op); +} + +static void +string_too_long (void) +{ + error (EXPR_FAILURE, ERANGE, _("string too long")); } +enum +{ + USE_BIGNUM = CHAR_MAX + 1, + NO_USE_BIGNUM +}; + +static struct option const long_options[] = +{ + {"bignum", no_argument, NULL, USE_BIGNUM}, + {"no-bignum", no_argument, NULL, NO_USE_BIGNUM}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {NULL, 0, NULL, 0} +}; + int main (int argc, char **argv) { VALUE *v; + int c; initialize_main (&argc, &argv); set_program_name (argv[0]); @@ -192,23 +261,48 @@ main (int argc, char **argv) initialize_exit_failure (EXPR_FAILURE); atexit (close_stdout); - parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE_NAME, VERSION, - usage, AUTHORS, (char const *) NULL); - /* The above handles --help and --version. - Since there is no other invocation of getopt, handle `--' here. */ - if (argc > 1 && STREQ (argv[1], "--")) + /* The argument -0 should not result in an error message. */ + opterr = 0; + + while ((c = getopt_long (argc, argv, "+", long_options, NULL)) != -1) { - --argc; - ++argv; + /* "expr -0" should interpret the -0 as an integer argument. + arguments like --foo should also be interpreted as a string + argument to be "evaluated". + */ + if ('?' == c) + { + --optind; + break; + } + else + switch (c) + { + case USE_BIGNUM: +#if HAVE_GMP + mode = MP_ALWAYS; +#else + error (0, 0, _("arbitrary-precision support is not available")); +#endif + break; + + case NO_USE_BIGNUM: + mode = MP_NEVER; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + } } - if (argc <= 1) + if (argc <= optind) { error (0, 0, _("missing operand")); usage (EXPR_INVALID); } - args = argv + 1; + args = argv + optind; v = eval (true); if (!nomoreargs ()) @@ -221,9 +315,19 @@ main (int argc, char **argv) /* Return a VALUE for I. */ static VALUE * -int_value (intmax_t i) +int_value (long int i) { VALUE *v = xmalloc (sizeof *v); +#if HAVE_GMP + if (mode == MP_ALWAYS) + { + /* all integer values are handled as bignums. */ + mpz_init_set_si (v->u.z, i); + v->type = mp_integer; + return v; + } +#endif + v->type = integer; v->u.i = i; return v; @@ -240,13 +344,42 @@ str_value (char const *s) return v; } + +static VALUE * +substr_value (char const *s, size_t len, size_t pos, size_t nchars_wanted) +{ + if (pos >= len) + return str_value (""); + else + { + VALUE *v = xmalloc (sizeof *v); + size_t vlen = MIN (nchars_wanted, len - pos + 1); + char *vlim; + v->type = string; + v->u.s = xmalloc (vlen + 1); + vlim = mempcpy (v->u.s, s + pos, vlen); + *vlim = '\0'; + return v; + } +} + + /* Free VALUE V, including structure components. */ static void freev (VALUE *v) { if (v->type == string) - free (v->u.s); + { + free (v->u.s); + } + else if (v->type == mp_integer) + { + assert (mode != MP_NEVER); +#if HAVE_GMP + mpz_clear (v->u.z); +#endif + } free (v); } @@ -255,22 +388,24 @@ freev (VALUE *v) static void printv (VALUE *v) { - char *p; - char buf[INT_BUFSIZE_BOUND (intmax_t)]; - switch (v->type) { case integer: - p = imaxtostr (v->u.i, buf); + printf ("%ld\n", v->u.i); break; case string: - p = v->u.s; + puts (v->u.s); break; +#if HAVE_GMP + case mp_integer: + mpz_out_str (stdout, 10, v->u.z); + putchar ('\n'); + break; +#endif default: abort (); } - puts (p); } /* Return true if V is a null-string or zero-number. */ @@ -282,6 +417,10 @@ null (VALUE *v) { case integer: return v->u.i == 0; +#if HAVE_GMP + case mp_integer: + return mpz_sgn (v->u.z) == 0; +#endif case string: { char const *cp = v->u.s; @@ -324,14 +463,29 @@ looks_like_integer (char const *cp) static void tostring (VALUE *v) { - char buf[INT_BUFSIZE_BOUND (intmax_t)]; + char buf[INT_BUFSIZE_BOUND (long int)]; switch (v->type) { case integer: - v->u.s = xstrdup (imaxtostr (v->u.i, buf)); + snprintf (buf, sizeof buf, "%ld", v->u.i); + v->u.s = xstrdup (buf); v->type = string; break; +#if HAVE_GMP + case mp_integer: + { + char *s = mpz_get_str (NULL, 10, v->u.z); + if (!s) + { + xalloc_die (); + } + mpz_clear (v->u.z); + v->u.s = s; + v->type = string; + } + break; +#endif case string: break; default: @@ -339,7 +493,8 @@ tostring (VALUE *v) } } -/* Coerce V to an integer value. Return true on success, false on failure. */ +/* Coerce V to an arithmetic value. + Return true on success, false on failure. */ static bool toarith (VALUE *v) @@ -347,18 +502,40 @@ toarith (VALUE *v) switch (v->type) { case integer: + case mp_integer: return true; + case string: { - intmax_t value; + long int value; if (! looks_like_integer (v->u.s)) return false; - if (xstrtoimax (v->u.s, NULL, 10, &value, NULL) != LONGINT_OK) - error (EXPR_FAILURE, ERANGE, "%s", v->u.s); - free (v->u.s); - v->u.i = value; - v->type = integer; + if (xstrtol (v->u.s, NULL, 10, &value, NULL) != LONGINT_OK) + { +#if HAVE_GMP + if (mode != MP_NEVER) + { + char *s = v->u.s; + if (mpz_init_set_str (v->u.z, s, 10)) + abort (); /* Bug in looks_like_integer, perhaps. */ + v->type = mp_integer; + free (s); + } + else + { + error (EXPR_FAILURE, ERANGE, "%s", v->u.s); + } +#else + error (EXPR_FAILURE, ERANGE, "%s", v->u.s); +#endif + } + else + { + free (v->u.s); + v->u.i = value; + v->type = integer; + } return true; } default: @@ -366,6 +543,85 @@ toarith (VALUE *v) } } +/* Convert V into an integer (that is, a non-arbitrary-precision value) + Return true on success, false on failure. */ +static bool +toint (VALUE *v) +{ + if (!toarith (v)) + return false; +#if HAVE_GMP + if (v->type == mp_integer) + { + if (mpz_fits_slong_p (v->u.z)) + { + long value = mpz_get_si (v->u.z); + mpz_clear (v->u.z); + v->u.i = value; + v->type = integer; + } + else + return false; /* value was too big. */ + } +#else + if (v->type == mp_integer) + abort (); /* should not happen. */ +#endif + return true; +} + +/* Extract a size_t value from a positive arithmetic value, V. + The extracted value is stored in *VAL. */ +static bool +getsize (const VALUE *v, size_t *val, bool *negative) +{ + if (v->type == integer) + { + if (v->u.i < 0) + { + *negative = true; + return false; + } + else + { + *negative = false; + *val = v->u.i; + return true; + } + } + else if (v->type == mp_integer) + { +#if HAVE_GMP + if (mpz_sgn (v->u.z) < 0) + { + *negative = true; + return false; + } + else if (mpz_fits_ulong_p (v->u.z)) + { + unsigned long ul; + ul = mpz_get_ui (v->u.z); + *val = ul; + return true; + } + else + { + *negative = false; + return false; + } +#else + abort (); +#endif + + } + else + { + abort (); /* should not pass a string. */ + } +} + + + /* Return true and advance if the next token matches STR exactly. STR must not be NULL. */ @@ -544,13 +800,41 @@ eval6 (bool evaluate) } else if (nextarg ("index")) { + size_t pos, len; + l = eval6 (evaluate); r = eval6 (evaluate); tostring (l); tostring (r); - v = int_value (strcspn (l->u.s, r->u.s) + 1); - if (v->u.i == strlen (l->u.s) + 1) - v->u.i = 0; + pos = strcspn (l->u.s, r->u.s); + len = strlen (l->u.s); + if (pos == len) + { + v = int_value (0); + } + else + { + if (pos < LONG_MAX) + { + v = int_value (pos + 1); + } + else + { +#if HAVE_GMP + if (mode != MP_NEVER + && pos < ULONG_MAX) + { + v = xmalloc (sizeof *v); + mpz_init_set_ui (v->u.z, pos+1); + v->type = mp_integer; + } + else + string_too_long (); +#else + string_too_long (); +#endif + } + } freev (l); freev (r); return v; @@ -563,19 +847,32 @@ eval6 (bool evaluate) i2 = eval6 (evaluate); tostring (l); llen = strlen (l->u.s); - if (!toarith (i1) || !toarith (i2) - || llen < i1->u.i - || i1->u.i <= 0 || i2->u.i <= 0) + + if (!toarith (i1) || !toarith (i2)) v = str_value (""); else { - size_t vlen = MIN (i2->u.i, llen - i1->u.i + 1); - char *vlim; - v = xmalloc (sizeof *v); - v->type = string; - v->u.s = xmalloc (vlen + 1); - vlim = mempcpy (v->u.s, l->u.s + i1->u.i - 1, vlen); - *vlim = '\0'; + size_t pos, len; + bool negative = false; + + if (getsize (i1, &pos, &negative)) + if (getsize (i2, &len, &negative)) + if (pos == 0 || len == 0) + v = str_value (""); + else + v = substr_value (l->u.s, llen, pos-1, len); + else + if (negative) + v = str_value (""); + else + error (EXPR_FAILURE, ERANGE, + _("string offset is too large")); + else + if (negative) + v = str_value (""); + else + error (EXPR_FAILURE, ERANGE, + _("substring length too large")); } freev (l); freev (i1); @@ -618,6 +915,171 @@ eval5 (bool evaluate) } } + +#if HAVE_GMP +static void +promote (VALUE *x) +{ + if (x->type == integer) + mpz_init_set_si (x->u.z, x->u.i); +} +#endif + +/* L = L * R. Both L and R are arithmetic. */ +static void +domult (VALUE *l, VALUE *r) +{ + if (l->type == integer && r->type == integer) + { + long int val = 0; + val = l->u.i * r->u.i; + if (! (l->u.i == 0 || r->u.i == 0 + || ((val < 0) == ((l->u.i < 0) ^ (r->u.i < 0)) + && val / l->u.i == r->u.i))) + { + /* Result would (did) overflow. Handle with MP if available. */ + if (mode != MP_NEVER) + { +#if HAVE_GMP + mpz_init_set_si (l->u.z, l->u.i); + mpz_mul_si (l->u.z, l->u.z, r->u.i); /* L*=R */ + l->type = mp_integer; +#endif + } + else + { + integer_overflow ('*'); + } + } + else + { + l->u.i = val; + } + } + else + { + /* At least one operand is already mp_integer, so promote the other. */ +#if HAVE_GMP + /* We could use mpz_mul_si here if R is not already mp_integer, + but for the moment we'll try to minimise code paths. */ + if (l->type == integer) + mpz_init_set_si (l->u.z, l->u.i); + if (r->type == integer) + mpz_init_set_si (r->u.z, r->u.i); + l->type = r->type = mp_integer; + mpz_mul (l->u.z, l->u.z, r->u.z); /* L*=R */ +#else + abort (); +#endif + } +} + +/* L = L / R or (if WANT_MODULUS) L = L % R */ +static void +dodivide (VALUE *l, VALUE *r, bool want_modulus) +{ + if (r->type == integer && r->u.i == 0) + error (EXPR_INVALID, 0, _("division by zero")); +#if HAVE_GMP + if (r->type == mp_integer && mpz_sgn (r->u.z) == 0) + error (EXPR_INVALID, 0, _("division by zero")); +#endif + if (l->type == integer && r->type == integer) + { + if (l->u.i < - INT_MAX && r->u.i == -1) + { + /* Some x86-style hosts raise an exception for + INT_MIN / -1 and INT_MIN % -1, so handle these + problematic cases specially. */ + if (want_modulus) + { + /* X mod -1 is zero for all negative X. + Although strictly this is implementation-defined, + we don't want to coredump, so we avoid the calculation. */ + l->u.i = 0; + return; + } + else + { + if (mode != MP_NEVER) + { +#if HAVE_GMP + /* Handle the case by promoting. */ + mpz_init_set_si (l->u.z, l->u.i); + l->type = mp_integer; +#endif + } + else + { + integer_overflow ('/'); + } + } + } + else + { + l->u.i = want_modulus ? l->u.i % r->u.i : l->u.i / r->u.i; + return; + } + } + /* If we get to here, at least one operand is mp_integer + and R is not 0. */ +#if HAVE_GMP + { + bool round_up = false; /* do we round up? */ + int sign_l, sign_r; + promote (l); + promote (r); + sign_l = mpz_sgn (l->u.z); + sign_r = mpz_sgn (r->u.z); + + if (!want_modulus) + { + if (!sign_l) + { + mpz_set_si (l->u.z, 0); + } + else if (sign_l < 0 || sign_r < 0) + { + /* At least one operand is negative. For integer arithmetic, + it's platform-dependent if the operation rounds up or down. + We mirror what the implementation does. */ + switch ((3*sign_l) / (2*sign_r)) + { + case 2: /* round toward +inf. */ + case -1: /* round toward +inf. */ + mpz_cdiv_q (l->u.z, l->u.z, r->u.z); + break; + case -2: /* round toward -inf. */ + case 1: /* round toward -inf */ + mpz_fdiv_q (l->u.z, l->u.z, r->u.z); + break; + default: + abort (); + } + } + else + { + /* Both operands positive. Round toward -inf. */ + mpz_fdiv_q (l->u.z, l->u.z, r->u.z); + } + } + else + { + mpz_mod (l->u.z, l->u.z, r->u.z); /* L = L % R */ + + /* If either operand is negative, it's platform-dependent if + the remainer is positive or negative. We mirror what the + implementation does. */ + if (sign_l % sign_r < 0) + mpz_neg (l->u.z, l->u.z); /* L = (-L) */ + } + } +#else + abort (); +#endif +} + + /* Handle *, /, % operators. */ static VALUE * @@ -626,7 +1088,6 @@ eval4 (bool evaluate) VALUE *l; VALUE *r; enum { multiply, divide, mod } fxn; - intmax_t val = 0; #ifdef EVAL_TRACE trace ("eval4"); @@ -647,37 +1108,71 @@ eval4 (bool evaluate) { if (!toarith (l) || !toarith (r)) error (EXPR_INVALID, 0, _("non-numeric argument")); - if (fxn == multiply) + switch (fxn) { - val = l->u.i * r->u.i; - if (! (l->u.i == 0 || r->u.i == 0 - || ((val < 0) == ((l->u.i < 0) ^ (r->u.i < 0)) - && val / l->u.i == r->u.i))) - integer_overflow ('*'); + case multiply: + domult (l, r); + break; + case divide: + case mod: + dodivide (l, r, fxn==mod); + break; } - else + } + freev (r); + } +} + +/* L = L + R, or L = L - R */ +static void +doadd (VALUE *l, VALUE *r, bool add) +{ + long int val = 0; + + if (!toarith (l) || !toarith (r)) + error (EXPR_INVALID, 0, _("non-numeric argument")); + if (l->type == integer && r->type == integer) + { + if (add) + { + val = l->u.i + r->u.i; + if ((val < l->u.i) == (r->u.i < 0)) { - if (r->u.i == 0) - error (EXPR_INVALID, 0, _("division by zero")); - if (l->u.i < - INTMAX_MAX && r->u.i == -1) - { - /* Some x86-style hosts raise an exception for - INT_MIN / -1 and INT_MIN % -1, so handle these - problematic cases specially. */ - if (fxn == divide) - integer_overflow ('/'); - val = 0; - } - else - val = fxn == divide ? l->u.i / r->u.i : l->u.i % r->u.i; + l->u.i = val; + return; } } - freev (l); - freev (r); - l = int_value (val); + else + { + val = l->u.i - r->u.i; + if ((l->u.i < val) == (r->u.i < 0)) + { + l->u.i = val; + return; + } + } + } + /* If we get to here, either the operation overflowed or at least + one operand is an mp_integer. */ + if (mode != MP_NEVER) + { +#if HAVE_GMP + promote (l); + promote (r); + if (add) + mpz_add (l->u.z, l->u.z, r->u.z); + else + mpz_sub (l->u.z, l->u.z, r->u.z); +#endif + } + else + { + integer_overflow ('-'); } } + + /* Handle +, - operators. */ static VALUE * @@ -685,8 +1180,7 @@ eval3 (bool evaluate) { VALUE *l; VALUE *r; - enum { plus, minus } fxn; - intmax_t val = 0; + bool add; #ifdef EVAL_TRACE trace ("eval3"); @@ -695,32 +1189,17 @@ eval3 (bool evaluate) while (1) { if (nextarg ("+")) - fxn = plus; + add = true; else if (nextarg ("-")) - fxn = minus; + add = false; else return l; r = eval4 (evaluate); if (evaluate) { - if (!toarith (l) || !toarith (r)) - error (EXPR_INVALID, 0, _("non-numeric argument")); - if (fxn == plus) - { - val = l->u.i + r->u.i; - if ((val < l->u.i) != (r->u.i < 0)) - integer_overflow ('+'); - } - else - { - val = l->u.i - r->u.i; - if ((l->u.i < val) != (r->u.i < 0)) - integer_overflow ('-'); - } + doadd (l, r, add); } - freev (l); freev (r); - l = int_value (val); } } |