summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAssaf Gordon <assafgordon@gmail.com>2012-12-06 22:30:23 +0000
committerPádraig Brady <P@draigBrady.com>2013-02-04 23:40:32 +0000
commit8a303a8963dd255566f1625243ff19b029e0ecec (patch)
treea22eb5e5885b4044e244f2b0bc0387bf04b0849b /src
parentbe7932e863de07c4c7e4fc3c1db3eb6d04ba9af5 (diff)
downloadcoreutils-8a303a8963dd255566f1625243ff19b029e0ecec.tar.xz
numfmt: a new command to format numbers
* AUTHORS: Add my name. * NEWS: Mention the new program. * README: Reference the new program. * src/numfmt.c: New file. * src/.gitignore: Ignore the new binary. * build-aux/gen-lists-of-programs.sh: Update. * scripts/git-hooks/commit-msg: Allow numfmt: commit prefix. * po/POTFILES.in: Add new c file. * tests/misc/numfmt.pl: A new test file giving >93% coverage. * tests/local.mk: Reference the new test. * man/.gitignore: Ignore the new man page. * man/local.mk: Reference the new man page. * man/numfmt.x: A new template. * doc/coreutils.texi: Document the new command.
Diffstat (limited to 'src')
-rw-r--r--src/.gitignore1
-rw-r--r--src/numfmt.c1522
2 files changed, 1523 insertions, 0 deletions
diff --git a/src/.gitignore b/src/.gitignore
index 18cccc1d7..25573dfa2 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -59,6 +59,7 @@ nice
nl
nohup
nproc
+numfmt
od
paste
pathchk
diff --git a/src/numfmt.c b/src/numfmt.c
new file mode 100644
index 000000000..5929dbab5
--- /dev/null
+++ b/src/numfmt.c
@@ -0,0 +1,1522 @@
+/* Reformat numbers like 11505426432 to the more human-readable 11G
+ Copyright (C) 2012 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <float.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <langinfo.h>
+
+#include "mbsalign.h"
+#include "argmatch.h"
+#include "error.h"
+#include "quote.h"
+#include "system.h"
+#include "xstrtol.h"
+#include "xstrndup.h"
+
+/* The official name of this program (e.g., no 'g' prefix). */
+#define PROGRAM_NAME "numfmt"
+
+#define AUTHORS proper_name ("Assaf Gordon")
+
+/* Exit code when some numbers fail to convert. */
+enum { EXIT_CONVERSION_WARNINGS = 2 };
+
+enum
+{
+ FROM_OPTION = CHAR_MAX + 1,
+ FROM_UNIT_OPTION,
+ TO_OPTION,
+ TO_UNIT_OPTION,
+ ROUND_OPTION,
+ SUFFIX_OPTION,
+ GROUPING_OPTION,
+ PADDING_OPTION,
+ FIELD_OPTION,
+ DEBUG_OPTION,
+ DEV_DEBUG_OPTION,
+ HEADER_OPTION,
+ FORMAT_OPTION,
+ INVALID_OPTION
+};
+
+enum scale_type
+{
+ scale_none, /* the default: no scaling. */
+ scale_auto, /* --from only. */
+ scale_SI,
+ scale_IEC,
+ scale_IEC_I /* 'i' suffix is required. */
+};
+
+static char const *const scale_from_args[] =
+{
+ "none", "auto", "si", "iec", "iec-i", NULL
+};
+
+static enum scale_type const scale_from_types[] =
+{
+ scale_none, scale_auto, scale_SI, scale_IEC, scale_IEC_I
+};
+
+static char const *const scale_to_args[] =
+{
+ "none", "si", "iec", "iec-i", NULL
+};
+
+static enum scale_type const scale_to_types[] =
+{
+ scale_none, scale_SI, scale_IEC, scale_IEC_I
+};
+
+
+enum round_type
+{
+ round_ceiling,
+ round_floor,
+ round_from_zero,
+ round_to_zero,
+ round_nearest,
+};
+
+static char const *const round_args[] =
+{
+ "up", "down", "from-zero", "towards-zero", "nearest", NULL
+};
+
+static enum round_type const round_types[] =
+{
+ round_ceiling, round_floor, round_from_zero, round_to_zero, round_nearest
+};
+
+
+enum inval_type
+{
+ inval_abort,
+ inval_fail,
+ inval_warn,
+ inval_ignore
+};
+
+static char const *const inval_args[] =
+{
+ "abort", "fail", "warn", "ignore", NULL
+};
+
+static enum inval_type const inval_types[] =
+{
+ inval_abort, inval_fail, inval_warn, inval_ignore
+};
+
+static struct option const longopts[] =
+{
+ {"from", required_argument, NULL, FROM_OPTION},
+ {"from-unit", required_argument, NULL, FROM_UNIT_OPTION},
+ {"to", required_argument, NULL, TO_OPTION},
+ {"to-unit", required_argument, NULL, TO_UNIT_OPTION},
+ {"round", required_argument, NULL, ROUND_OPTION},
+ {"padding", required_argument, NULL, PADDING_OPTION},
+ {"suffix", required_argument, NULL, SUFFIX_OPTION},
+ {"grouping", no_argument, NULL, GROUPING_OPTION},
+ {"delimiter", required_argument, NULL, 'd'},
+ {"field", required_argument, NULL, FIELD_OPTION},
+ {"debug", no_argument, NULL, DEBUG_OPTION},
+ {"-devdebug", no_argument, NULL, DEV_DEBUG_OPTION},
+ {"header", optional_argument, NULL, HEADER_OPTION},
+ {"format", required_argument, NULL, FORMAT_OPTION},
+ {"invalid", required_argument, NULL, INVALID_OPTION},
+ {GETOPT_HELP_OPTION_DECL},
+ {GETOPT_VERSION_OPTION_DECL},
+ {NULL, 0, NULL, 0}
+};
+
+/* If delimiter has this value, blanks separate fields. */
+enum { DELIMITER_DEFAULT = CHAR_MAX + 1 };
+
+/* Maximum number of digits we can safely handle
+ without precision loss, if scaling is 'none'. */
+enum { MAX_UNSCALED_DIGITS = 18 };
+
+/* Maximum number of digits we can work with.
+ This is equivalent to 999Y.
+ NOTE: 'long double' can handle more than that, but there's
+ no official suffix assigned beyond Yotta (1000^8). */
+enum { MAX_ACCEPTABLE_DIGITS = 27 };
+
+static enum scale_type scale_from = scale_none;
+static enum scale_type scale_to = scale_none;
+static enum round_type _round = round_from_zero;
+static enum inval_type _invalid = inval_abort;
+static const char *suffix = NULL;
+static uintmax_t from_unit_size = 1;
+static uintmax_t to_unit_size = 1;
+static int grouping = 0;
+static char *padding_buffer = NULL;
+static size_t padding_buffer_size = 0;
+static long int padding_width = 0;
+static const char *format_str = NULL;
+static char *format_str_prefix = NULL;
+static char *format_str_suffix = NULL;
+
+/* By default, any conversion error will terminate the program. */
+static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
+
+
+/* auto-pad each line based on skipped whitespace. */
+static int auto_padding = 0;
+static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
+static long int field = 1;
+static int delimiter = DELIMITER_DEFAULT;
+
+/* if non-zero, the first 'header' lines from STDIN are skipped. */
+static uintmax_t header = 0;
+
+/* Debug for users: print warnings to STDERR about possible
+ error (similar to sort's debug). */
+static int debug = 0;
+
+/* debugging for developers - to be removed in final version? */
+static int dev_debug = 0;
+
+/* will be set according to the current locale. */
+static const char *decimal_point;
+static int decimal_point_length;
+
+
+static inline int
+default_scale_base (enum scale_type scale)
+{
+ switch (scale)
+ {
+ case scale_IEC:
+ case scale_IEC_I:
+ return 1024;
+
+ case scale_none:
+ case scale_auto:
+ case scale_SI:
+ default:
+ return 1000;
+ }
+}
+
+static inline int
+valid_suffix (const char suf)
+{
+ static const char *valid_suffixes = "KMGTPEZY";
+ return (strchr (valid_suffixes, suf) != NULL);
+}
+
+static inline int
+suffix_power (const char suf)
+{
+ switch (suf)
+ {
+ case 'K': /* kilo or kibi. */
+ return 1;
+
+ case 'M': /* mega or mebi. */
+ return 2;
+
+ case 'G': /* giga or gibi. */
+ return 3;
+
+ case 'T': /* tera or tebi. */
+ return 4;
+
+ case 'P': /* peta or pebi. */
+ return 5;
+
+ case 'E': /* exa or exbi. */
+ return 6;
+
+ case 'Z': /* zetta or 2**70. */
+ return 7;
+
+ case 'Y': /* yotta or 2**80. */
+ return 8;
+
+ default: /* should never happen. assert? */
+ return 0;
+ }
+}
+
+static inline const char *
+suffix_power_character (unsigned int power)
+{
+ switch (power)
+ {
+ case 0:
+ return "";
+
+ case 1:
+ return "K";
+
+ case 2:
+ return "M";
+
+ case 3:
+ return "G";
+
+ case 4:
+ return "T";
+
+ case 5:
+ return "P";
+
+ case 6:
+ return "E";
+
+ case 7:
+ return "Z";
+
+ case 8:
+ return "Y";
+
+ default:
+ return "(error)";
+ }
+}
+
+/* Similar to 'powl(3)' but without requiring 'libm'. */
+static long double
+powerld (long double base, unsigned int x)
+{
+ long double result = base;
+ if (x == 0)
+ return 1; /* note for test coverage: this is never
+ reached, as 'powerld' won't be called if
+ there's no suffix, hence, no "power". */
+
+ /* TODO: check for overflow, inf? */
+ while (--x)
+ result *= base;
+ return result;
+}
+
+/* Similar to 'fabs(3)' but without requiring 'libm'. */
+static inline long double
+absld (long double val)
+{
+ return val < 0 ? -val : val;
+}
+
+/* Scale down 'val', returns 'updated val' and 'x', such that
+ val*base^X = original val
+ Similar to "frexpl(3)" but without requiring 'libm',
+ allowing only integer scale, limited functionality and error checking. */
+static long double
+expld (long double val, unsigned int base, unsigned int /*output */ *x)
+{
+ unsigned int power = 0;
+
+ if (val >= -LDBL_MAX && val <= LDBL_MAX)
+ {
+ while (absld (val) >= base)
+ {
+ ++power;
+ val /= base;
+ }
+ }
+ if (x)
+ *x = power;
+ return val;
+}
+
+/* EXTREMELY limited 'ceil' - without 'libm'.
+ Assumes values that fit in intmax_t. */
+static inline intmax_t
+simple_round_ceiling (long double val)
+{
+ intmax_t intval = val;
+ if (intval < val)
+ intval++;
+ return intval;
+}
+
+/* EXTREMELY limited 'floor' - without 'libm'.
+ Assumes values that fit in intmax_t. */
+static inline intmax_t
+simple_round_floor (long double val)
+{
+ return -simple_round_ceiling (-val);
+}
+
+/* EXTREMELY limited 'round away from zero'.
+ Assumes values that fit in intmax_t. */
+static inline intmax_t
+simple_round_from_zero (long double val)
+{
+ return val < 0 ? simple_round_floor (val) : simple_round_ceiling (val);
+}
+
+/* EXTREMELY limited 'round away to zero'.
+ Assumes values that fit in intmax_t. */
+static inline intmax_t
+simple_round_to_zero (long double val)
+{
+ return val;
+}
+
+/* EXTREMELY limited 'round' - without 'libm'.
+ Assumes values that fit in intmax_t. */
+static inline intmax_t
+simple_round_nearest (long double val)
+{
+ return val < 0 ? val - 0.5 : val + 0.5;
+}
+
+static inline intmax_t
+simple_round (long double val, enum round_type t)
+{
+ switch (t)
+ {
+ case round_ceiling:
+ return simple_round_ceiling (val);
+
+ case round_floor:
+ return simple_round_floor (val);
+
+ case round_from_zero:
+ return simple_round_from_zero (val);
+
+ case round_to_zero:
+ return simple_round_to_zero (val);
+
+ case round_nearest:
+ return simple_round_nearest (val);
+
+ default:
+ /* to silence the compiler - this should never happen. */
+ return 0;
+ }
+}
+
+enum simple_strtod_error
+{
+ SSE_OK = 0,
+ SSE_OK_PRECISION_LOSS,
+ SSE_OVERFLOW,
+ SSE_INVALID_NUMBER,
+
+ /* the following are returned by 'simple_strtod_human'. */
+ SSE_VALID_BUT_FORBIDDEN_SUFFIX,
+ SSE_INVALID_SUFFIX,
+ SSE_MISSING_I_SUFFIX
+};
+
+/* Read an *integer* INPUT_STR,
+ but return the integer value in a 'long double' VALUE
+ hence, no UINTMAX_MAX limitation.
+ NEGATIVE is updated, and is stored separately from the VALUE
+ so that signbit() isn't required to determine the sign of -0..
+ ENDPTR is required (unlike strtod) and is used to store a pointer
+ to the character after the last character used in the conversion.
+
+ Note locale'd grouping is not supported,
+ nor is skipping of white-space supported.
+
+ Returns:
+ SSE_OK - valid number.
+ SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
+ SSE_OVERFLOW - if more than 27 digits (999Y) were used.
+ SSE_INVALID_NUMBER - if no digits were found. */
+static enum simple_strtod_error
+simple_strtod_int (const char *input_str,
+ char **endptr, long double *value, bool *negative)
+{
+ enum simple_strtod_error e = SSE_OK;
+
+ long double val = 0;
+ unsigned int digits = 0;
+
+ if (*input_str == '-')
+ {
+ input_str++;
+ *negative = true;
+ }
+ else
+ *negative = false;
+
+ *endptr = (char *) input_str;
+ while (*endptr && isdigit (**endptr))
+ {
+ int digit = (**endptr) - '0';
+
+ /* can this happen in some strange locale? */
+ if (digit < 0 || digit > 9)
+ return SSE_INVALID_NUMBER;
+
+ if (digits > MAX_UNSCALED_DIGITS)
+ e = SSE_OK_PRECISION_LOSS;
+
+ ++digits;
+ if (digits > MAX_ACCEPTABLE_DIGITS)
+ return SSE_OVERFLOW;
+
+ val *= 10;
+ val += digit;
+
+ ++(*endptr);
+ }
+ if (digits == 0)
+ return SSE_INVALID_NUMBER;
+ if (*negative)
+ val = -val;
+
+ if (value)
+ *value = val;
+
+ return e;
+}
+
+/* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]",
+ and return the value in a 'long double' VALUE.
+ ENDPTR is required (unlike strtod) and is used to store a pointer
+ to the character after the last character used in the conversion.
+ PRECISION is optional and used to indicate fractions are present.
+
+ Note locale'd grouping is not supported,
+ nor is skipping of white-space supported.
+
+ Returns:
+ SSE_OK - valid number.
+ SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
+ SSE_OVERFLOW - if more than 27 digits (999Y) were used.
+ SSE_INVALID_NUMBER - if no digits were found. */
+static enum simple_strtod_error
+simple_strtod_float (const char *input_str,
+ char **endptr,
+ long double *value,
+ size_t *precision)
+{
+ bool negative;
+ enum simple_strtod_error e = SSE_OK;
+
+ if (precision)
+ *precision = 0;
+
+ /* TODO: accept locale'd grouped values for the integral part. */
+ e = simple_strtod_int (input_str, endptr, value, &negative);
+ if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
+ return e;
+
+
+ /* optional decimal point + fraction. */
+ if (STREQ_LEN (*endptr, decimal_point, decimal_point_length))
+ {
+ char *ptr2;
+ long double val_frac = 0;
+ bool neg_frac;
+
+ (*endptr) += decimal_point_length;
+ enum simple_strtod_error e2 =
+ simple_strtod_int (*endptr, &ptr2, &val_frac, &neg_frac);
+ if (e2 != SSE_OK && e2 != SSE_OK_PRECISION_LOSS)
+ return e2;
+ if (e2 == SSE_OK_PRECISION_LOSS)
+ e = e2; /* propagate warning. */
+ if (neg_frac)
+ return SSE_INVALID_NUMBER;
+
+ /* number of digits in the fractions. */
+ size_t exponent = ptr2 - *endptr;
+
+ val_frac = ((long double) val_frac) / powerld (10, exponent);
+
+ if (value)
+ {
+ if (negative)
+ *value -= val_frac;
+ else
+ *value += val_frac;
+ }
+
+ if (precision)
+ *precision = exponent;
+
+ *endptr = ptr2;
+ }
+ return e;
+}
+
+/* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix",
+ and return the value in a 'long double' VALUE,
+ with the precision of the input returned in PRECISION.
+ ENDPTR is required (unlike strtod) and is used to store a pointer
+ to the character after the last character used in the conversion.
+ ALLOWED_SCALING determines the scaling supported.
+
+ TODO:
+ support locale'd grouping
+ accept scentific and hex floats (probably use strtold directly)
+
+ Returns:
+ SSE_OK - valid number.
+ SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
+ SSE_OVERFLOW - if more than 27 digits (999Y) were used.
+ SSE_INVALID_NUMBER - if no digits were found.
+ SSE_VALID_BUT_FORBIDDEN_SUFFIX
+ SSE_INVALID_SUFFIX
+ SSE_MISSING_I_SUFFIX */
+static enum simple_strtod_error
+simple_strtod_human (const char *input_str,
+ char **endptr, long double *value, size_t *precision,
+ enum scale_type allowed_scaling)
+{
+ int power = 0;
+ /* 'scale_auto' is checked below. */
+ int scale_base = default_scale_base (allowed_scaling);
+
+ if (dev_debug)
+ error (0, 0, _("simple_strtod_human:\n input string: '%s'\n "
+ "locale decimal-point: '%s'\n"), input_str, decimal_point);
+
+ enum simple_strtod_error e =
+ simple_strtod_float (input_str, endptr, value, precision);
+ if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
+ return e;
+
+ if (dev_debug)
+ error (0, 0, _(" parsed numeric value: %Lf\n"
+ " input precision = %d\n"), *value, (int)*precision);
+
+ if (**endptr != '\0')
+ {
+ /* process suffix. */
+
+ /* Skip any blanks between the number and suffix. */
+ while (isblank (**endptr))
+ (*endptr)++;
+
+ if (!valid_suffix (**endptr))
+ return SSE_INVALID_SUFFIX;
+
+ if (allowed_scaling == scale_none)
+ return SSE_VALID_BUT_FORBIDDEN_SUFFIX;
+
+ power = suffix_power (**endptr);
+ (*endptr)++; /* skip first suffix character. */
+
+ if (allowed_scaling == scale_auto && **endptr == 'i')
+ {
+ /* auto-scaling enabled, and the first suffix character
+ is followed by an 'i' (e.g. Ki, Mi, Gi). */
+ scale_base = 1024;
+ (*endptr)++; /* skip second ('i') suffix character. */
+ if (dev_debug)
+ error (0, 0, _(" Auto-scaling, found 'i', switching to base %d\n"),
+ scale_base);
+ }
+
+ *precision = 0; /* Reset, to select precision based on scale. */
+ }
+
+ if (allowed_scaling == scale_IEC_I)
+ {
+ if (**endptr == 'i')
+ (*endptr)++;
+ else
+ return SSE_MISSING_I_SUFFIX;
+ }
+
+ long double multiplier = powerld (scale_base, power);
+
+ if (dev_debug)
+ error (0, 0, _(" suffix power=%d^%d = %Lf\n"),
+ scale_base, power, multiplier);
+
+ /* TODO: detect loss of precision and overflows. */
+ (*value) = (*value) * multiplier;
+
+ if (dev_debug)
+ error (0, 0, _(" returning value: %Lf (%LG)\n"), *value, *value);
+
+ return e;
+}
+
+
+static void
+simple_strtod_fatal (enum simple_strtod_error err, char const *input_str)
+{
+ char const *msgid = NULL;
+
+ switch (err)
+ {
+ case SSE_OK_PRECISION_LOSS:
+ case SSE_OK:
+ /* should never happen - this function isn't called when OK. */
+ abort ();
+
+ case SSE_OVERFLOW:
+ msgid = N_("value too large to be converted: '%s'");
+ break;
+
+ case SSE_INVALID_NUMBER:
+ msgid = N_("invalid number: '%s'");
+ break;
+
+ case SSE_VALID_BUT_FORBIDDEN_SUFFIX:
+ msgid = N_("rejecting suffix in input: '%s' (consider using --from)");
+ break;
+
+ case SSE_INVALID_SUFFIX:
+ msgid = N_("invalid suffix in input: '%s'");
+ break;
+
+ case SSE_MISSING_I_SUFFIX:
+ msgid = N_("missing 'i' suffix in input: '%s' (e.g Ki/Mi/Gi)");
+ break;
+
+ }
+
+ if (_invalid != inval_ignore)
+ error (conv_exit_code, 0, gettext (msgid), input_str);
+}
+
+/* Convert VAL to a human format string in BUF. */
+static void
+double_to_human (long double val, int precision,
+ char *buf, size_t buf_size,
+ enum scale_type scale, int group, enum round_type round)
+{
+ if (dev_debug)
+ error (0, 0, _("double_to_human:\n"));
+
+ if (scale == scale_none)
+ {
+ val *= powerld (10, precision);
+ val = simple_round (val, round);
+ val /= powerld (10, precision);
+
+ if (dev_debug)
+ error (0, 0,
+ (group) ?
+ _(" no scaling, returning (grouped) value: %'.*Lf\n") :
+ _(" no scaling, returning value: %.*Lf\n"), precision, val);
+
+ int i = snprintf (buf, buf_size, (group) ? "%'.*Lf" : "%.*Lf",
+ precision, val);
+ if (i < 0 || i >= (int) buf_size)
+ error (EXIT_FAILURE, 0,
+ _("failed to prepare value '%Lf' for printing"), val);
+ return;
+ }
+
+ /* Scaling requested by user. */
+ double scale_base = default_scale_base (scale);
+
+ /* Normalize val to scale. */
+ unsigned int power = 0;
+ val = expld (val, scale_base, &power);
+ if (dev_debug)
+ error (0, 0, _(" scaled value to %Lf * %0.f ^ %d\n"),
+ val, scale_base, power);
+
+ /* Perform rounding. */
+ int ten_or_less = 0;
+ if (absld (val) < 10)
+ {
+ /* for values less than 10, we allow one decimal-point digit,
+ so adjust before rounding. */
+ ten_or_less = 1;
+ val *= 10;
+ }
+ val = simple_round (val, round);
+ /* two special cases after rounding:
+ 1. a "999.99" can turn into 1000 - so scale down
+ 2. a "9.99" can turn into 10 - so don't display decimal-point. */
+ if (absld (val) >= scale_base)
+ {
+ val /= scale_base;
+ power++;
+ }
+ if (ten_or_less)
+ val /= 10;
+
+ /* should "7.0" be printed as "7" ?
+ if removing the ".0" is preferred, enable the fourth condition. */
+ int show_decimal_point = (val != 0) && (absld (val) < 10) && (power > 0);
+ /* && (absld (val) > simple_round_floor (val))) */
+
+ if (dev_debug)
+ error (0, 0, _(" after rounding, value=%Lf * %0.f ^ %d\n"),
+ val, scale_base, power);
+
+ snprintf (buf, buf_size, (show_decimal_point) ? "%.1Lf%s" : "%.0Lf%s",
+ val, suffix_power_character (power));
+
+ if (scale == scale_IEC_I && power > 0)
+ strncat (buf, "i", buf_size - strlen (buf) - 1);
+
+ if (dev_debug)
+ error (0, 0, _(" returning value: '%s'\n"), buf);
+
+ return;
+}
+
+/* Convert a string of decimal digits, N_STRING, with an optional suffix
+ to an integral value. Upon successful conversion, return that value.
+ If it cannot be converted, give a diagnostic and exit. */
+static uintmax_t
+unit_to_umax (const char *n_string)
+{
+ strtol_error s_err;
+ char *end = NULL;
+ uintmax_t n;
+
+ s_err = xstrtoumax (n_string, &end, 10, &n, "KMGTPEZY");
+
+ if (s_err != LONGINT_OK || *end || n == 0)
+ error (EXIT_FAILURE, 0, _("invalid unit size: '%s'"), n_string);
+
+ return n;
+}
+
+
+static void
+setup_padding_buffer (size_t min_size)
+{
+ if (padding_buffer_size > min_size)
+ return;
+
+ padding_buffer_size = min_size + 1;
+ padding_buffer = realloc (padding_buffer, padding_buffer_size);
+ if (!padding_buffer)
+ error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"),
+ padding_buffer_size);
+}
+
+void
+usage (int status)
+{
+ if (status != EXIT_SUCCESS)
+ emit_try_help ();
+ else
+ {
+ printf (_("\
+Usage: %s [OPTIONS] [NUMBER]\n\
+"), program_name);
+ fputs (_("\
+Reformat NUMBER(s) from stdin or command arguments.\n\
+"), stdout);
+ emit_mandatory_arg_note ();
+ fputs (_("\
+ --from=UNIT auto-scale input numbers to UNITs. Default is 'none'.\n\
+ See UNIT below.\n\
+ --from-unit=N specify the input unit size (instead of the default 1).\n\
+ --to=UNIT auto-scale output numbers to UNITs.\n\
+ See UNIT below.\n\
+ --to-unit=N the output unit size (instead of the default 1).\n\
+ --round=METHOD the rounding method to use when scaling. METHOD can be:\n\
+ up, down, from-zero (default), towards-zero, nearest\n\
+ --suffix=SUFFIX add SUFFIX to output numbers, and accept optional SUFFIX\n\
+ in input numbers.\n\
+ --padding=N pad the output to N characters.\n\
+ Positive N will right-aligned. Negative N will left-align.\n\
+ Note: if the output is wider than N, padding is ignored.\n\
+ Default is to automatically pad if whitespace is found.\n\
+ --grouping group digits together (e.g. 1,000,000).\n\
+ Uses the locale-defined grouping (i.e. have no effect\n\
+ in C/POSIX locales).\n\
+ --header[=N] print (without converting) the first N header lines.\n\
+ N defaults to 1 if not specified.\n\
+ --field N replace the number in input field N (default is 1)\n\
+ -d, --delimiter=X use X instead of whitespace for field delimiter\n\
+ --format=FORMAT use printf style floating-point FORMAT.\n\
+ See FORMAT below for details.\n\
+ --invalid=MODE failure mode for invalid numbers: MODE can be:\n\
+ abort (the default), fail, warn, ignore.\n\
+ --debug print warnings about invalid input.\n\
+ \n\
+"), stdout);
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
+
+
+ fputs (_("\
+\n\
+UNIT options:\n\
+ none No auto-scaling is done. Suffixes will trigger an error.\n\
+ auto Accept optional single-letter/two-letter suffix:\n\
+ 1K = 1000\n\
+ 1Ki = 1024\n\
+ 1G = 1000000\n\
+ 1Gi = 1048576\n\
+ si Accept optional single letter suffix:\n\
+ 1K = 1000\n\
+ 1G = 1000000\n\
+ ...\n\
+ iec Accept optional single letter suffix:\n\
+ 1K = 1024\n\
+ 1G = 1048576\n\
+ ...\n\
+ iec-i Accept optional two-letter suffix:\n\
+ 1Ki = 1024\n\
+ 1Gi = 1048576\n\
+ ...\n\
+\n\
+"), stdout);
+
+ fputs (_("\
+\n\
+FORMAT must be suitable for printing one floating-point argument '%f'.\n\
+Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
+Optional width value (%10f) will pad output. Optional negative width values\n\
+(%-10f) will left-pad output.\n\
+\n\
+"), stdout);
+
+ printf (_("\
+\n\
+Exit status is 0 if all input numbers were successfully converted.\n\
+By default, %s will stop at the first conversion error with exit status 2.\n\
+With --invalid='fail' a warning is printed for each conversion error\n\
+and the exit status is 2. With --invalid='warn' each conversion error is\n\
+diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\
+errors are not diagnosed and the exit status is 0.\n\
+\n\
+"), program_name);
+
+
+
+ printf (_("\
+\n\
+Examples:\n\
+ $ %s --to=si 1000\n\
+ -> \"1.0K\"\n\
+ $ %s --to=iec 2048\n\
+ -> \"2.0K\"\n\
+ $ %s --to=iec-i 4096\n\
+ -> \"4.0Ki\"\n\
+ $ echo 1K | %s --from=si\n\
+ -> \"1000\"\n\
+ $ echo 1K | %s --from=iec\n\
+ -> \"1024\"\n\
+ $ df | %s --header --field 2 --to=si\n\
+ $ ls -l | %s --header --field 5 --to=iec\n\
+ $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
+ $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n\
+"),
+ program_name, program_name, program_name,
+ program_name, program_name, program_name,
+ program_name, program_name, program_name);
+ emit_ancillary_info ();
+ }
+ exit (status);
+}
+
+/* Given 'fmt' (a printf(3) compatible format string), extracts the following:
+ 1. padding (e.g. %20f)
+ 2. alignment (e.g. %-20f)
+ 3. grouping (e.g. %'f)
+
+ Only a limited subset of printf(3) syntax is supported.
+
+ TODO:
+ support .precision
+ support %e %g etc. rather than just %f
+
+ NOTES:
+ 1. This function sets the global variables:
+ padding_width, padding_alignment, grouping,
+ format_str_prefix, format_str_suffix
+ 2. The function aborts on any errors. */
+static void
+parse_format_string (char const *fmt)
+{
+ size_t i;
+ size_t prefix_len = 0;
+ size_t suffix_pos;
+ long int pad = 0;
+ char *endptr = NULL;
+
+ for (i = 0; !(fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1)
+ {
+ if (!fmt[i])
+ error (EXIT_FAILURE, 0,
+ _("format %s has no %% directive"), quote (fmt));
+ prefix_len++;
+ }
+
+ i++;
+ i += strspn (fmt + i, " ");
+ if (fmt[i] == '\'')
+ {
+ grouping = 1;
+ i++;
+ }
+ i += strspn (fmt + i, " ");
+ errno = 0;
+ pad = strtol (fmt + i, &endptr, 10);
+ if (errno != 0)
+ error (EXIT_FAILURE, 0,
+ _("invalid format %s (width overflow)"), quote (fmt));
+
+ if (endptr != (fmt + i) && pad != 0)
+ {
+ if (pad < 0)
+ {
+ padding_alignment = MBS_ALIGN_LEFT;
+ padding_width = -pad;
+ }
+ else
+ {
+ padding_width = pad;
+ }
+ }
+ i = endptr - fmt;
+
+ if (fmt[i] == '\0')
+ error (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt));
+
+ if (fmt[i] != 'f')
+ error (EXIT_FAILURE, 0, _("invalid format %s,"
+ " directive must be %%['][-][N]f"),
+ quote (fmt));
+ i++;
+ suffix_pos = i;
+
+ for (; fmt[i] != '\0'; i += (fmt[i] == '%') + 1)
+ if (fmt[i] == '%' && fmt[i + 1] != '%')
+ error (EXIT_FAILURE, 0, _("format %s has too many %% directives"),
+ quote (fmt));
+
+ if (prefix_len)
+ {
+ format_str_prefix = xstrndup (fmt, prefix_len);
+ if (!format_str_prefix)
+ error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"),
+ prefix_len + 1);
+ }
+ if (fmt[suffix_pos] != '\0')
+ {
+ format_str_suffix = strdup (fmt + suffix_pos);
+ if (!format_str_suffix)
+ error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"),
+ strlen (fmt + suffix_pos));
+ }
+
+ if (dev_debug)
+ error (0, 0, _("format String:\n input: %s\n grouping: %s\n"
+ " padding width: %zu\n alignment: %s\n"
+ " prefix: '%s'\n suffix: '%s'\n"),
+ quote (fmt), (grouping) ? "yes" : "no",
+ padding_width,
+ (padding_alignment == MBS_ALIGN_LEFT) ? "Left" : "Right",
+ format_str_prefix, format_str_suffix);
+}
+
+/* Parse a numeric value (with optional suffix) from a string.
+ Returns a long double value, with input precision.
+
+ If there's an error converting the string to value - exits with
+ an error.
+
+ If there are any trailing characters after the number
+ (besides a valid suffix) - exits with an error. */
+static enum simple_strtod_error
+parse_human_number (const char *str, long double /*output */ *value,
+ size_t *precision)
+{
+ char *ptr = NULL;
+
+ enum simple_strtod_error e =
+ simple_strtod_human (str, &ptr, value, precision, scale_from);
+ if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
+ {
+ simple_strtod_fatal (e, str);
+ return e;
+ }
+
+ if (ptr && *ptr != '\0')
+ {
+ if (_invalid != inval_ignore)
+ error (conv_exit_code, 0, _("invalid suffix in input '%s': '%s'"),
+ str, ptr);
+ e = SSE_INVALID_SUFFIX;
+ }
+ return e;
+}
+
+
+/* Print the given VAL, using the requested representation.
+ The number is printed to STDOUT, with padding and alignment. */
+static int
+prepare_padded_number (const long double val, size_t precision)
+{
+ /* Generate Output. */
+ char buf[128];
+
+ /* Can't reliably print too-large values without auto-scaling. */
+ unsigned int x;
+ expld (val, 10, &x);
+ if (scale_to == scale_none && x > MAX_UNSCALED_DIGITS)
+ {
+ if (_invalid != inval_ignore)
+ error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
+ " (consider using --to)"), val);
+ return 0;
+ }
+
+ if (x > MAX_ACCEPTABLE_DIGITS - 1)
+ {
+ if (_invalid != inval_ignore)
+ error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
+ " (cannot handle values > 999Y)"), val);
+ return 0;
+ }
+
+ double_to_human (val, precision, buf, sizeof (buf), scale_to, grouping,
+ _round);
+ if (suffix)
+ strncat (buf, suffix, sizeof (buf) - strlen (buf) -1);
+
+ if (dev_debug)
+ error (0, 0, _("formatting output:\n value: %Lf\n humanized: '%s'\n"),
+ val, buf);
+
+
+ if (padding_width && strlen (buf) < padding_width)
+ {
+ size_t w = padding_width;
+ mbsalign (buf, padding_buffer, padding_buffer_size, &w,
+ padding_alignment, MBA_UNIBYTE_ONLY);
+
+ if (dev_debug)
+ error (0, 0, _(" After padding: '%s'\n"), padding_buffer);
+
+ }
+ else
+ {
+ setup_padding_buffer (strlen (buf) + 1);
+ strcpy (padding_buffer, buf);
+ }
+
+ return 1;
+}
+
+static void
+print_padded_number (void)
+{
+ if (format_str_prefix)
+ fputs (format_str_prefix, stdout);
+
+ fputs (padding_buffer, stdout);
+
+ if (format_str_suffix)
+ fputs (format_str_suffix, stdout);
+}
+
+/* Converts the TEXT number string to the requested representation,
+ and handles automatic suffix addition. */
+static int
+process_suffixed_number (char *text, long double *result, size_t *precision)
+{
+ if (suffix && strlen (text) > strlen (suffix))
+ {
+ char *possible_suffix = text + strlen (text) - strlen (suffix);
+
+ if (STREQ (suffix, possible_suffix))
+ {
+ /* trim suffix, ONLY if it's at the end of the text. */
+ *possible_suffix = '\0';
+ if (dev_debug)
+ error (0, 0, _("trimming suffix '%s'\n"), suffix);
+ }
+ else
+ {
+ if (dev_debug)
+ error (0, 0, _("no valid suffix found\n"));
+ }
+ }
+
+ /* Skip white space - always. */
+ char *p = text;
+ while (*p && isblank (*p))
+ ++p;
+ const unsigned int skip_count = text - p;
+
+ /* setup auto-padding. */
+ if (auto_padding)
+ {
+ if (skip_count > 0 || field > 1)
+ {
+ padding_width = strlen (text);
+ setup_padding_buffer (padding_width);
+ }
+ else
+ {
+ padding_width = 0;
+ }
+ if (dev_debug)
+ error (0, 0, _("setting Auto-Padding to %ld characters\n"),
+ padding_width);
+ }
+
+ long double val = 0;
+ enum simple_strtod_error e = parse_human_number (p, &val, precision);
+ if (e == SSE_OK_PRECISION_LOSS && debug)
+ error (0, 0, _("large input value '%s': possible precision loss"), p);
+
+ if (from_unit_size != 1 || to_unit_size != 1)
+ val = (val * from_unit_size) / to_unit_size;
+
+ *result = val;
+
+ return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
+}
+
+/* Skip the requested number of fields in the input string.
+ Returns a pointer to the *delimiter* of the requested field,
+ or a pointer to NUL (if reached the end of the string). */
+static inline char *
+__attribute ((pure))
+skip_fields (char *buf, int fields)
+{
+ char *ptr = buf;
+ if (delimiter != DELIMITER_DEFAULT)
+ {
+ if (*ptr == delimiter)
+ fields--;
+ while (*ptr && fields--)
+ {
+ while (*ptr && *ptr == delimiter)
+ ++ptr;
+ while (*ptr && *ptr != delimiter)
+ ++ptr;
+ }
+ }
+ else
+ while (*ptr && fields--)
+ {
+ while (*ptr && isblank (*ptr))
+ ++ptr;
+ while (*ptr && !isblank (*ptr))
+ ++ptr;
+ }
+ return ptr;
+}
+
+/* Parse a delimited string, and extracts the requested field.
+ NOTE: the input buffer is modified.
+
+ TODO:
+ Maybe support multiple fields, though can always pipe output
+ into another numfmt to process other fields.
+ Maybe default to processing all fields rather than just first?
+
+ Output:
+ _PREFIX, _DATA, _SUFFIX will point to the relevant positions
+ in the input string, or be NULL if such a part doesn't exist. */
+static void
+extract_fields (char *line, int _field,
+ char ** _prefix, char ** _data, char ** _suffix)
+{
+ char *ptr = line;
+ *_prefix = NULL;
+ *_data = NULL;
+ *_suffix = NULL;
+
+ if (dev_debug)
+ error (0, 0, _("extracting Fields:\n input: '%s'\n field: %d\n"),
+ line, _field);
+
+ if (field > 1)
+ {
+ /* skip the requested number of fields. */
+ *_prefix = line;
+ ptr = skip_fields (line, field - 1);
+ if (*ptr == '\0')
+ {
+ /* not enough fields in the input - print warning? */
+ if (dev_debug)
+ error (0, 0, _(" TOO FEW FIELDS!\n prefix: '%s'\n"), *_prefix);
+ return;
+ }
+
+ *ptr = '\0';
+ ++ptr;
+ }
+
+ *_data = ptr;
+ *_suffix = skip_fields (*_data, 1);
+ if (**_suffix)
+ {
+ /* there is a suffix (i.e. the field is not the last on the line),
+ so null-terminate the _data before it. */
+ **_suffix = '\0';
+ ++(*_suffix);
+ }
+ else
+ *_suffix = NULL;
+
+ if (dev_debug)
+ error (0, 0, _(" prefix: '%s'\n number: '%s'\n suffix: '%s'\n"),
+ *_prefix, *_data, *_suffix);
+}
+
+
+/* Convert a number in a given line of text.
+ NEWLINE specifies whether to output a '\n' for this "line". */
+static int
+process_line (char *line, bool newline)
+{
+ char *pre, *num, *suf;
+ long double val = 0;
+ size_t precision = 0;
+ int valid_number = 0;
+
+ extract_fields (line, field, &pre, &num, &suf);
+ if (!num)
+ if (_invalid != inval_ignore)
+ error (conv_exit_code, 0, _("input line is too short, "
+ "no numbers found to convert in field %ld"),
+ field);
+
+ if (num)
+ {
+ valid_number = process_suffixed_number (num, &val, &precision);
+ if (valid_number)
+ valid_number = prepare_padded_number (val, precision);
+ }
+
+ if (pre)
+ fputs (pre, stdout);
+
+ if (pre && num)
+ fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
+
+ if (valid_number)
+ {
+ print_padded_number ();
+ }
+ else
+ {
+ if (num)
+ fputs (num, stdout);
+ }
+
+ if (suf)
+ {
+ fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
+ fputs (suf, stdout);
+ }
+
+ if (newline)
+ putchar ('\n');
+
+ return valid_number;
+}
+
+int
+main (int argc, char **argv)
+{
+ int valid_numbers = 1;
+
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ setlocale (LC_ALL, "");
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+
+ decimal_point = nl_langinfo (RADIXCHAR);
+ if (decimal_point == NULL || strlen (decimal_point) == 0)
+ decimal_point = ".";
+ decimal_point_length = strlen (decimal_point);
+
+ atexit (close_stdout);
+
+ while (true)
+ {
+ int c = getopt_long (argc, argv, "d:", longopts, NULL);
+
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case FROM_OPTION:
+ scale_from = XARGMATCH ("--from", optarg,
+ scale_from_args, scale_from_types);
+ break;
+
+ case FROM_UNIT_OPTION:
+ from_unit_size = unit_to_umax (optarg);
+ break;
+
+ case TO_OPTION:
+ scale_to =
+ XARGMATCH ("--to", optarg, scale_to_args, scale_to_types);
+ break;
+
+ case TO_UNIT_OPTION:
+ to_unit_size = unit_to_umax (optarg);
+ break;
+
+ case ROUND_OPTION:
+ _round = XARGMATCH ("--round", optarg, round_args, round_types);
+ break;
+
+ case GROUPING_OPTION:
+ grouping = 1;
+ break;
+
+ case PADDING_OPTION:
+ if (xstrtol (optarg, NULL, 10, &padding_width, "") != LONGINT_OK
+ || padding_width == 0)
+ error (EXIT_FAILURE, 0, _("invalid padding value '%s'"), optarg);
+ if (padding_width < 0)
+ {
+ padding_alignment = MBS_ALIGN_LEFT;
+ padding_width = -padding_width;
+ }
+ /* TODO: We probably want to apply a specific --padding
+ to --header lines too. */
+ break;
+
+ case FIELD_OPTION:
+ if (xstrtol (optarg, NULL, 10, &field, "") != LONGINT_OK
+ || field <= 0)
+ error (EXIT_FAILURE, 0, _("invalid field value '%s'"), optarg);
+ break;
+
+ case 'd':
+ /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
+ if (optarg[0] != '\0' && optarg[1] != '\0')
+ error (EXIT_FAILURE, 0,
+ _("the delimiter must be a single character"));
+ delimiter = optarg[0];
+ break;
+
+ case SUFFIX_OPTION:
+ suffix = optarg;
+ break;
+
+ case DEBUG_OPTION:
+ debug = 1;
+ break;
+
+ case DEV_DEBUG_OPTION:
+ dev_debug = 1;
+ debug = 1;
+ break;
+
+ case HEADER_OPTION:
+ if (optarg)
+ {
+ if (xstrtoumax (optarg, NULL, 10, &header, "") != LONGINT_OK
+ || header == 0)
+ error (EXIT_FAILURE, 0, _("invalid header value '%s'"),
+ optarg);
+ }
+ else
+ {
+ header = 1;
+ }
+ break;
+
+ case FORMAT_OPTION:
+ format_str = optarg;
+ break;
+
+ case INVALID_OPTION:
+ _invalid = XARGMATCH ("--invalid", optarg, inval_args, inval_types);
+ break;
+
+ case_GETOPT_HELP_CHAR;
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+ default:
+ usage (EXIT_FAILURE);
+ }
+ }
+
+ if (format_str != NULL && grouping)
+ error (EXIT_FAILURE, 0, _("--grouping cannot be combined with --format"));
+ if (format_str != NULL && padding_width > 0)
+ error (EXIT_FAILURE, 0, _("--padding cannot be combined with --format"));
+
+ /* Warn about no-op. */
+ if (debug && scale_from == scale_none && scale_to == scale_none
+ && !grouping && (padding_width == 0) && (format_str == NULL))
+ error (0, 0, _("no conversion option specified"));
+
+ if (format_str)
+ parse_format_string (format_str);
+
+ if (grouping)
+ {
+ if (scale_to != scale_none)
+ error (EXIT_FAILURE, 0, _("grouping cannot be combined with --to"));
+ if (debug && (strlen (nl_langinfo (THOUSEP)) == 0))
+ error (0, 0, _("grouping has no effect in this locale"));
+ }
+
+
+ setup_padding_buffer (padding_width);
+ auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT);
+
+ if (_invalid != inval_abort)
+ conv_exit_code = 0;
+
+ if (argc > optind)
+ {
+ if (debug && header)
+ error (0, 0, _("--header ignored with command-line input"));
+
+ for (; optind < argc; optind++)
+ valid_numbers &= process_line (argv[optind], true);
+ }
+ else
+ {
+ char *line = NULL;
+ size_t line_allocated = 0;
+ ssize_t len;
+
+ while (header-- && getline (&line, &line_allocated, stdin) > 0)
+ fputs (line, stdout);
+
+ while ((len = getline (&line, &line_allocated, stdin)) > 0)
+ {
+ bool newline = line[len - 1] == '\n';
+ if (newline)
+ line[len - 1] = '\0';
+ valid_numbers &= process_line (line, newline);
+ }
+
+ IF_LINT (free (line));
+
+ if (ferror (stdin))
+ error (0, errno, _("error reading input"));
+ }
+
+ free (padding_buffer);
+ free (format_str_prefix);
+ free (format_str_suffix);
+
+
+ if (debug && !valid_numbers)
+ error (0, 0, _("failed to convert some of the input numbers"));
+
+ int exit_status = EXIT_SUCCESS;
+ if (!valid_numbers && _invalid != inval_warn && _invalid != inval_ignore)
+ exit_status = EXIT_CONVERSION_WARNINGS;
+
+ exit (exit_status);
+}