diff options
author | Assaf Gordon <assafgordon@gmail.com> | 2012-12-06 22:30:23 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2013-02-04 23:40:32 +0000 |
commit | 8a303a8963dd255566f1625243ff19b029e0ecec (patch) | |
tree | a22eb5e5885b4044e244f2b0bc0387bf04b0849b /src | |
parent | be7932e863de07c4c7e4fc3c1db3eb6d04ba9af5 (diff) | |
download | coreutils-8a303a8963dd255566f1625243ff19b029e0ecec.tar.xz |
numfmt: a new command to format numbers
* AUTHORS: Add my name.
* NEWS: Mention the new program.
* README: Reference the new program.
* src/numfmt.c: New file.
* src/.gitignore: Ignore the new binary.
* build-aux/gen-lists-of-programs.sh: Update.
* scripts/git-hooks/commit-msg: Allow numfmt: commit prefix.
* po/POTFILES.in: Add new c file.
* tests/misc/numfmt.pl: A new test file giving >93% coverage.
* tests/local.mk: Reference the new test.
* man/.gitignore: Ignore the new man page.
* man/local.mk: Reference the new man page.
* man/numfmt.x: A new template.
* doc/coreutils.texi: Document the new command.
Diffstat (limited to 'src')
-rw-r--r-- | src/.gitignore | 1 | ||||
-rw-r--r-- | src/numfmt.c | 1522 |
2 files changed, 1523 insertions, 0 deletions
diff --git a/src/.gitignore b/src/.gitignore index 18cccc1d7..25573dfa2 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -59,6 +59,7 @@ nice nl nohup nproc +numfmt od paste pathchk diff --git a/src/numfmt.c b/src/numfmt.c new file mode 100644 index 000000000..5929dbab5 --- /dev/null +++ b/src/numfmt.c @@ -0,0 +1,1522 @@ +/* Reformat numbers like 11505426432 to the more human-readable 11G + Copyright (C) 2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> +#include <float.h> +#include <getopt.h> +#include <stdio.h> +#include <sys/types.h> +#include <langinfo.h> + +#include "mbsalign.h" +#include "argmatch.h" +#include "error.h" +#include "quote.h" +#include "system.h" +#include "xstrtol.h" +#include "xstrndup.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "numfmt" + +#define AUTHORS proper_name ("Assaf Gordon") + +/* Exit code when some numbers fail to convert. */ +enum { EXIT_CONVERSION_WARNINGS = 2 }; + +enum +{ + FROM_OPTION = CHAR_MAX + 1, + FROM_UNIT_OPTION, + TO_OPTION, + TO_UNIT_OPTION, + ROUND_OPTION, + SUFFIX_OPTION, + GROUPING_OPTION, + PADDING_OPTION, + FIELD_OPTION, + DEBUG_OPTION, + DEV_DEBUG_OPTION, + HEADER_OPTION, + FORMAT_OPTION, + INVALID_OPTION +}; + +enum scale_type +{ + scale_none, /* the default: no scaling. */ + scale_auto, /* --from only. */ + scale_SI, + scale_IEC, + scale_IEC_I /* 'i' suffix is required. */ +}; + +static char const *const scale_from_args[] = +{ + "none", "auto", "si", "iec", "iec-i", NULL +}; + +static enum scale_type const scale_from_types[] = +{ + scale_none, scale_auto, scale_SI, scale_IEC, scale_IEC_I +}; + +static char const *const scale_to_args[] = +{ + "none", "si", "iec", "iec-i", NULL +}; + +static enum scale_type const scale_to_types[] = +{ + scale_none, scale_SI, scale_IEC, scale_IEC_I +}; + + +enum round_type +{ + round_ceiling, + round_floor, + round_from_zero, + round_to_zero, + round_nearest, +}; + +static char const *const round_args[] = +{ + "up", "down", "from-zero", "towards-zero", "nearest", NULL +}; + +static enum round_type const round_types[] = +{ + round_ceiling, round_floor, round_from_zero, round_to_zero, round_nearest +}; + + +enum inval_type +{ + inval_abort, + inval_fail, + inval_warn, + inval_ignore +}; + +static char const *const inval_args[] = +{ + "abort", "fail", "warn", "ignore", NULL +}; + +static enum inval_type const inval_types[] = +{ + inval_abort, inval_fail, inval_warn, inval_ignore +}; + +static struct option const longopts[] = +{ + {"from", required_argument, NULL, FROM_OPTION}, + {"from-unit", required_argument, NULL, FROM_UNIT_OPTION}, + {"to", required_argument, NULL, TO_OPTION}, + {"to-unit", required_argument, NULL, TO_UNIT_OPTION}, + {"round", required_argument, NULL, ROUND_OPTION}, + {"padding", required_argument, NULL, PADDING_OPTION}, + {"suffix", required_argument, NULL, SUFFIX_OPTION}, + {"grouping", no_argument, NULL, GROUPING_OPTION}, + {"delimiter", required_argument, NULL, 'd'}, + {"field", required_argument, NULL, FIELD_OPTION}, + {"debug", no_argument, NULL, DEBUG_OPTION}, + {"-devdebug", no_argument, NULL, DEV_DEBUG_OPTION}, + {"header", optional_argument, NULL, HEADER_OPTION}, + {"format", required_argument, NULL, FORMAT_OPTION}, + {"invalid", required_argument, NULL, INVALID_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {NULL, 0, NULL, 0} +}; + +/* If delimiter has this value, blanks separate fields. */ +enum { DELIMITER_DEFAULT = CHAR_MAX + 1 }; + +/* Maximum number of digits we can safely handle + without precision loss, if scaling is 'none'. */ +enum { MAX_UNSCALED_DIGITS = 18 }; + +/* Maximum number of digits we can work with. + This is equivalent to 999Y. + NOTE: 'long double' can handle more than that, but there's + no official suffix assigned beyond Yotta (1000^8). */ +enum { MAX_ACCEPTABLE_DIGITS = 27 }; + +static enum scale_type scale_from = scale_none; +static enum scale_type scale_to = scale_none; +static enum round_type _round = round_from_zero; +static enum inval_type _invalid = inval_abort; +static const char *suffix = NULL; +static uintmax_t from_unit_size = 1; +static uintmax_t to_unit_size = 1; +static int grouping = 0; +static char *padding_buffer = NULL; +static size_t padding_buffer_size = 0; +static long int padding_width = 0; +static const char *format_str = NULL; +static char *format_str_prefix = NULL; +static char *format_str_suffix = NULL; + +/* By default, any conversion error will terminate the program. */ +static int conv_exit_code = EXIT_CONVERSION_WARNINGS; + + +/* auto-pad each line based on skipped whitespace. */ +static int auto_padding = 0; +static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT; +static long int field = 1; +static int delimiter = DELIMITER_DEFAULT; + +/* if non-zero, the first 'header' lines from STDIN are skipped. */ +static uintmax_t header = 0; + +/* Debug for users: print warnings to STDERR about possible + error (similar to sort's debug). */ +static int debug = 0; + +/* debugging for developers - to be removed in final version? */ +static int dev_debug = 0; + +/* will be set according to the current locale. */ +static const char *decimal_point; +static int decimal_point_length; + + +static inline int +default_scale_base (enum scale_type scale) +{ + switch (scale) + { + case scale_IEC: + case scale_IEC_I: + return 1024; + + case scale_none: + case scale_auto: + case scale_SI: + default: + return 1000; + } +} + +static inline int +valid_suffix (const char suf) +{ + static const char *valid_suffixes = "KMGTPEZY"; + return (strchr (valid_suffixes, suf) != NULL); +} + +static inline int +suffix_power (const char suf) +{ + switch (suf) + { + case 'K': /* kilo or kibi. */ + return 1; + + case 'M': /* mega or mebi. */ + return 2; + + case 'G': /* giga or gibi. */ + return 3; + + case 'T': /* tera or tebi. */ + return 4; + + case 'P': /* peta or pebi. */ + return 5; + + case 'E': /* exa or exbi. */ + return 6; + + case 'Z': /* zetta or 2**70. */ + return 7; + + case 'Y': /* yotta or 2**80. */ + return 8; + + default: /* should never happen. assert? */ + return 0; + } +} + +static inline const char * +suffix_power_character (unsigned int power) +{ + switch (power) + { + case 0: + return ""; + + case 1: + return "K"; + + case 2: + return "M"; + + case 3: + return "G"; + + case 4: + return "T"; + + case 5: + return "P"; + + case 6: + return "E"; + + case 7: + return "Z"; + + case 8: + return "Y"; + + default: + return "(error)"; + } +} + +/* Similar to 'powl(3)' but without requiring 'libm'. */ +static long double +powerld (long double base, unsigned int x) +{ + long double result = base; + if (x == 0) + return 1; /* note for test coverage: this is never + reached, as 'powerld' won't be called if + there's no suffix, hence, no "power". */ + + /* TODO: check for overflow, inf? */ + while (--x) + result *= base; + return result; +} + +/* Similar to 'fabs(3)' but without requiring 'libm'. */ +static inline long double +absld (long double val) +{ + return val < 0 ? -val : val; +} + +/* Scale down 'val', returns 'updated val' and 'x', such that + val*base^X = original val + Similar to "frexpl(3)" but without requiring 'libm', + allowing only integer scale, limited functionality and error checking. */ +static long double +expld (long double val, unsigned int base, unsigned int /*output */ *x) +{ + unsigned int power = 0; + + if (val >= -LDBL_MAX && val <= LDBL_MAX) + { + while (absld (val) >= base) + { + ++power; + val /= base; + } + } + if (x) + *x = power; + return val; +} + +/* EXTREMELY limited 'ceil' - without 'libm'. + Assumes values that fit in intmax_t. */ +static inline intmax_t +simple_round_ceiling (long double val) +{ + intmax_t intval = val; + if (intval < val) + intval++; + return intval; +} + +/* EXTREMELY limited 'floor' - without 'libm'. + Assumes values that fit in intmax_t. */ +static inline intmax_t +simple_round_floor (long double val) +{ + return -simple_round_ceiling (-val); +} + +/* EXTREMELY limited 'round away from zero'. + Assumes values that fit in intmax_t. */ +static inline intmax_t +simple_round_from_zero (long double val) +{ + return val < 0 ? simple_round_floor (val) : simple_round_ceiling (val); +} + +/* EXTREMELY limited 'round away to zero'. + Assumes values that fit in intmax_t. */ +static inline intmax_t +simple_round_to_zero (long double val) +{ + return val; +} + +/* EXTREMELY limited 'round' - without 'libm'. + Assumes values that fit in intmax_t. */ +static inline intmax_t +simple_round_nearest (long double val) +{ + return val < 0 ? val - 0.5 : val + 0.5; +} + +static inline intmax_t +simple_round (long double val, enum round_type t) +{ + switch (t) + { + case round_ceiling: + return simple_round_ceiling (val); + + case round_floor: + return simple_round_floor (val); + + case round_from_zero: + return simple_round_from_zero (val); + + case round_to_zero: + return simple_round_to_zero (val); + + case round_nearest: + return simple_round_nearest (val); + + default: + /* to silence the compiler - this should never happen. */ + return 0; + } +} + +enum simple_strtod_error +{ + SSE_OK = 0, + SSE_OK_PRECISION_LOSS, + SSE_OVERFLOW, + SSE_INVALID_NUMBER, + + /* the following are returned by 'simple_strtod_human'. */ + SSE_VALID_BUT_FORBIDDEN_SUFFIX, + SSE_INVALID_SUFFIX, + SSE_MISSING_I_SUFFIX +}; + +/* Read an *integer* INPUT_STR, + but return the integer value in a 'long double' VALUE + hence, no UINTMAX_MAX limitation. + NEGATIVE is updated, and is stored separately from the VALUE + so that signbit() isn't required to determine the sign of -0.. + ENDPTR is required (unlike strtod) and is used to store a pointer + to the character after the last character used in the conversion. + + Note locale'd grouping is not supported, + nor is skipping of white-space supported. + + Returns: + SSE_OK - valid number. + SSE_OK_PRECISION_LOSS - if more than 18 digits were used. + SSE_OVERFLOW - if more than 27 digits (999Y) were used. + SSE_INVALID_NUMBER - if no digits were found. */ +static enum simple_strtod_error +simple_strtod_int (const char *input_str, + char **endptr, long double *value, bool *negative) +{ + enum simple_strtod_error e = SSE_OK; + + long double val = 0; + unsigned int digits = 0; + + if (*input_str == '-') + { + input_str++; + *negative = true; + } + else + *negative = false; + + *endptr = (char *) input_str; + while (*endptr && isdigit (**endptr)) + { + int digit = (**endptr) - '0'; + + /* can this happen in some strange locale? */ + if (digit < 0 || digit > 9) + return SSE_INVALID_NUMBER; + + if (digits > MAX_UNSCALED_DIGITS) + e = SSE_OK_PRECISION_LOSS; + + ++digits; + if (digits > MAX_ACCEPTABLE_DIGITS) + return SSE_OVERFLOW; + + val *= 10; + val += digit; + + ++(*endptr); + } + if (digits == 0) + return SSE_INVALID_NUMBER; + if (*negative) + val = -val; + + if (value) + *value = val; + + return e; +} + +/* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]", + and return the value in a 'long double' VALUE. + ENDPTR is required (unlike strtod) and is used to store a pointer + to the character after the last character used in the conversion. + PRECISION is optional and used to indicate fractions are present. + + Note locale'd grouping is not supported, + nor is skipping of white-space supported. + + Returns: + SSE_OK - valid number. + SSE_OK_PRECISION_LOSS - if more than 18 digits were used. + SSE_OVERFLOW - if more than 27 digits (999Y) were used. + SSE_INVALID_NUMBER - if no digits were found. */ +static enum simple_strtod_error +simple_strtod_float (const char *input_str, + char **endptr, + long double *value, + size_t *precision) +{ + bool negative; + enum simple_strtod_error e = SSE_OK; + + if (precision) + *precision = 0; + + /* TODO: accept locale'd grouped values for the integral part. */ + e = simple_strtod_int (input_str, endptr, value, &negative); + if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS) + return e; + + + /* optional decimal point + fraction. */ + if (STREQ_LEN (*endptr, decimal_point, decimal_point_length)) + { + char *ptr2; + long double val_frac = 0; + bool neg_frac; + + (*endptr) += decimal_point_length; + enum simple_strtod_error e2 = + simple_strtod_int (*endptr, &ptr2, &val_frac, &neg_frac); + if (e2 != SSE_OK && e2 != SSE_OK_PRECISION_LOSS) + return e2; + if (e2 == SSE_OK_PRECISION_LOSS) + e = e2; /* propagate warning. */ + if (neg_frac) + return SSE_INVALID_NUMBER; + + /* number of digits in the fractions. */ + size_t exponent = ptr2 - *endptr; + + val_frac = ((long double) val_frac) / powerld (10, exponent); + + if (value) + { + if (negative) + *value -= val_frac; + else + *value += val_frac; + } + + if (precision) + *precision = exponent; + + *endptr = ptr2; + } + return e; +} + +/* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix", + and return the value in a 'long double' VALUE, + with the precision of the input returned in PRECISION. + ENDPTR is required (unlike strtod) and is used to store a pointer + to the character after the last character used in the conversion. + ALLOWED_SCALING determines the scaling supported. + + TODO: + support locale'd grouping + accept scentific and hex floats (probably use strtold directly) + + Returns: + SSE_OK - valid number. + SSE_OK_PRECISION_LOSS - if more than 18 digits were used. + SSE_OVERFLOW - if more than 27 digits (999Y) were used. + SSE_INVALID_NUMBER - if no digits were found. + SSE_VALID_BUT_FORBIDDEN_SUFFIX + SSE_INVALID_SUFFIX + SSE_MISSING_I_SUFFIX */ +static enum simple_strtod_error +simple_strtod_human (const char *input_str, + char **endptr, long double *value, size_t *precision, + enum scale_type allowed_scaling) +{ + int power = 0; + /* 'scale_auto' is checked below. */ + int scale_base = default_scale_base (allowed_scaling); + + if (dev_debug) + error (0, 0, _("simple_strtod_human:\n input string: '%s'\n " + "locale decimal-point: '%s'\n"), input_str, decimal_point); + + enum simple_strtod_error e = + simple_strtod_float (input_str, endptr, value, precision); + if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS) + return e; + + if (dev_debug) + error (0, 0, _(" parsed numeric value: %Lf\n" + " input precision = %d\n"), *value, (int)*precision); + + if (**endptr != '\0') + { + /* process suffix. */ + + /* Skip any blanks between the number and suffix. */ + while (isblank (**endptr)) + (*endptr)++; + + if (!valid_suffix (**endptr)) + return SSE_INVALID_SUFFIX; + + if (allowed_scaling == scale_none) + return SSE_VALID_BUT_FORBIDDEN_SUFFIX; + + power = suffix_power (**endptr); + (*endptr)++; /* skip first suffix character. */ + + if (allowed_scaling == scale_auto && **endptr == 'i') + { + /* auto-scaling enabled, and the first suffix character + is followed by an 'i' (e.g. Ki, Mi, Gi). */ + scale_base = 1024; + (*endptr)++; /* skip second ('i') suffix character. */ + if (dev_debug) + error (0, 0, _(" Auto-scaling, found 'i', switching to base %d\n"), + scale_base); + } + + *precision = 0; /* Reset, to select precision based on scale. */ + } + + if (allowed_scaling == scale_IEC_I) + { + if (**endptr == 'i') + (*endptr)++; + else + return SSE_MISSING_I_SUFFIX; + } + + long double multiplier = powerld (scale_base, power); + + if (dev_debug) + error (0, 0, _(" suffix power=%d^%d = %Lf\n"), + scale_base, power, multiplier); + + /* TODO: detect loss of precision and overflows. */ + (*value) = (*value) * multiplier; + + if (dev_debug) + error (0, 0, _(" returning value: %Lf (%LG)\n"), *value, *value); + + return e; +} + + +static void +simple_strtod_fatal (enum simple_strtod_error err, char const *input_str) +{ + char const *msgid = NULL; + + switch (err) + { + case SSE_OK_PRECISION_LOSS: + case SSE_OK: + /* should never happen - this function isn't called when OK. */ + abort (); + + case SSE_OVERFLOW: + msgid = N_("value too large to be converted: '%s'"); + break; + + case SSE_INVALID_NUMBER: + msgid = N_("invalid number: '%s'"); + break; + + case SSE_VALID_BUT_FORBIDDEN_SUFFIX: + msgid = N_("rejecting suffix in input: '%s' (consider using --from)"); + break; + + case SSE_INVALID_SUFFIX: + msgid = N_("invalid suffix in input: '%s'"); + break; + + case SSE_MISSING_I_SUFFIX: + msgid = N_("missing 'i' suffix in input: '%s' (e.g Ki/Mi/Gi)"); + break; + + } + + if (_invalid != inval_ignore) + error (conv_exit_code, 0, gettext (msgid), input_str); +} + +/* Convert VAL to a human format string in BUF. */ +static void +double_to_human (long double val, int precision, + char *buf, size_t buf_size, + enum scale_type scale, int group, enum round_type round) +{ + if (dev_debug) + error (0, 0, _("double_to_human:\n")); + + if (scale == scale_none) + { + val *= powerld (10, precision); + val = simple_round (val, round); + val /= powerld (10, precision); + + if (dev_debug) + error (0, 0, + (group) ? + _(" no scaling, returning (grouped) value: %'.*Lf\n") : + _(" no scaling, returning value: %.*Lf\n"), precision, val); + + int i = snprintf (buf, buf_size, (group) ? "%'.*Lf" : "%.*Lf", + precision, val); + if (i < 0 || i >= (int) buf_size) + error (EXIT_FAILURE, 0, + _("failed to prepare value '%Lf' for printing"), val); + return; + } + + /* Scaling requested by user. */ + double scale_base = default_scale_base (scale); + + /* Normalize val to scale. */ + unsigned int power = 0; + val = expld (val, scale_base, &power); + if (dev_debug) + error (0, 0, _(" scaled value to %Lf * %0.f ^ %d\n"), + val, scale_base, power); + + /* Perform rounding. */ + int ten_or_less = 0; + if (absld (val) < 10) + { + /* for values less than 10, we allow one decimal-point digit, + so adjust before rounding. */ + ten_or_less = 1; + val *= 10; + } + val = simple_round (val, round); + /* two special cases after rounding: + 1. a "999.99" can turn into 1000 - so scale down + 2. a "9.99" can turn into 10 - so don't display decimal-point. */ + if (absld (val) >= scale_base) + { + val /= scale_base; + power++; + } + if (ten_or_less) + val /= 10; + + /* should "7.0" be printed as "7" ? + if removing the ".0" is preferred, enable the fourth condition. */ + int show_decimal_point = (val != 0) && (absld (val) < 10) && (power > 0); + /* && (absld (val) > simple_round_floor (val))) */ + + if (dev_debug) + error (0, 0, _(" after rounding, value=%Lf * %0.f ^ %d\n"), + val, scale_base, power); + + snprintf (buf, buf_size, (show_decimal_point) ? "%.1Lf%s" : "%.0Lf%s", + val, suffix_power_character (power)); + + if (scale == scale_IEC_I && power > 0) + strncat (buf, "i", buf_size - strlen (buf) - 1); + + if (dev_debug) + error (0, 0, _(" returning value: '%s'\n"), buf); + + return; +} + +/* Convert a string of decimal digits, N_STRING, with an optional suffix + to an integral value. Upon successful conversion, return that value. + If it cannot be converted, give a diagnostic and exit. */ +static uintmax_t +unit_to_umax (const char *n_string) +{ + strtol_error s_err; + char *end = NULL; + uintmax_t n; + + s_err = xstrtoumax (n_string, &end, 10, &n, "KMGTPEZY"); + + if (s_err != LONGINT_OK || *end || n == 0) + error (EXIT_FAILURE, 0, _("invalid unit size: '%s'"), n_string); + + return n; +} + + +static void +setup_padding_buffer (size_t min_size) +{ + if (padding_buffer_size > min_size) + return; + + padding_buffer_size = min_size + 1; + padding_buffer = realloc (padding_buffer, padding_buffer_size); + if (!padding_buffer) + error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"), + padding_buffer_size); +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTIONS] [NUMBER]\n\ +"), program_name); + fputs (_("\ +Reformat NUMBER(s) from stdin or command arguments.\n\ +"), stdout); + emit_mandatory_arg_note (); + fputs (_("\ + --from=UNIT auto-scale input numbers to UNITs. Default is 'none'.\n\ + See UNIT below.\n\ + --from-unit=N specify the input unit size (instead of the default 1).\n\ + --to=UNIT auto-scale output numbers to UNITs.\n\ + See UNIT below.\n\ + --to-unit=N the output unit size (instead of the default 1).\n\ + --round=METHOD the rounding method to use when scaling. METHOD can be:\n\ + up, down, from-zero (default), towards-zero, nearest\n\ + --suffix=SUFFIX add SUFFIX to output numbers, and accept optional SUFFIX\n\ + in input numbers.\n\ + --padding=N pad the output to N characters.\n\ + Positive N will right-aligned. Negative N will left-align.\n\ + Note: if the output is wider than N, padding is ignored.\n\ + Default is to automatically pad if whitespace is found.\n\ + --grouping group digits together (e.g. 1,000,000).\n\ + Uses the locale-defined grouping (i.e. have no effect\n\ + in C/POSIX locales).\n\ + --header[=N] print (without converting) the first N header lines.\n\ + N defaults to 1 if not specified.\n\ + --field N replace the number in input field N (default is 1)\n\ + -d, --delimiter=X use X instead of whitespace for field delimiter\n\ + --format=FORMAT use printf style floating-point FORMAT.\n\ + See FORMAT below for details.\n\ + --invalid=MODE failure mode for invalid numbers: MODE can be:\n\ + abort (the default), fail, warn, ignore.\n\ + --debug print warnings about invalid input.\n\ + \n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + + + fputs (_("\ +\n\ +UNIT options:\n\ + none No auto-scaling is done. Suffixes will trigger an error.\n\ + auto Accept optional single-letter/two-letter suffix:\n\ + 1K = 1000\n\ + 1Ki = 1024\n\ + 1G = 1000000\n\ + 1Gi = 1048576\n\ + si Accept optional single letter suffix:\n\ + 1K = 1000\n\ + 1G = 1000000\n\ + ...\n\ + iec Accept optional single letter suffix:\n\ + 1K = 1024\n\ + 1G = 1048576\n\ + ...\n\ + iec-i Accept optional two-letter suffix:\n\ + 1Ki = 1024\n\ + 1Gi = 1048576\n\ + ...\n\ +\n\ +"), stdout); + + fputs (_("\ +\n\ +FORMAT must be suitable for printing one floating-point argument '%f'.\n\ +Optional quote (%'f) will enable --grouping (if supported by current locale).\n\ +Optional width value (%10f) will pad output. Optional negative width values\n\ +(%-10f) will left-pad output.\n\ +\n\ +"), stdout); + + printf (_("\ +\n\ +Exit status is 0 if all input numbers were successfully converted.\n\ +By default, %s will stop at the first conversion error with exit status 2.\n\ +With --invalid='fail' a warning is printed for each conversion error\n\ +and the exit status is 2. With --invalid='warn' each conversion error is\n\ +diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\ +errors are not diagnosed and the exit status is 0.\n\ +\n\ +"), program_name); + + + + printf (_("\ +\n\ +Examples:\n\ + $ %s --to=si 1000\n\ + -> \"1.0K\"\n\ + $ %s --to=iec 2048\n\ + -> \"2.0K\"\n\ + $ %s --to=iec-i 4096\n\ + -> \"4.0Ki\"\n\ + $ echo 1K | %s --from=si\n\ + -> \"1000\"\n\ + $ echo 1K | %s --from=iec\n\ + -> \"1024\"\n\ + $ df | %s --header --field 2 --to=si\n\ + $ ls -l | %s --header --field 5 --to=iec\n\ + $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\ + $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n\ +"), + program_name, program_name, program_name, + program_name, program_name, program_name, + program_name, program_name, program_name); + emit_ancillary_info (); + } + exit (status); +} + +/* Given 'fmt' (a printf(3) compatible format string), extracts the following: + 1. padding (e.g. %20f) + 2. alignment (e.g. %-20f) + 3. grouping (e.g. %'f) + + Only a limited subset of printf(3) syntax is supported. + + TODO: + support .precision + support %e %g etc. rather than just %f + + NOTES: + 1. This function sets the global variables: + padding_width, padding_alignment, grouping, + format_str_prefix, format_str_suffix + 2. The function aborts on any errors. */ +static void +parse_format_string (char const *fmt) +{ + size_t i; + size_t prefix_len = 0; + size_t suffix_pos; + long int pad = 0; + char *endptr = NULL; + + for (i = 0; !(fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1) + { + if (!fmt[i]) + error (EXIT_FAILURE, 0, + _("format %s has no %% directive"), quote (fmt)); + prefix_len++; + } + + i++; + i += strspn (fmt + i, " "); + if (fmt[i] == '\'') + { + grouping = 1; + i++; + } + i += strspn (fmt + i, " "); + errno = 0; + pad = strtol (fmt + i, &endptr, 10); + if (errno != 0) + error (EXIT_FAILURE, 0, + _("invalid format %s (width overflow)"), quote (fmt)); + + if (endptr != (fmt + i) && pad != 0) + { + if (pad < 0) + { + padding_alignment = MBS_ALIGN_LEFT; + padding_width = -pad; + } + else + { + padding_width = pad; + } + } + i = endptr - fmt; + + if (fmt[i] == '\0') + error (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt)); + + if (fmt[i] != 'f') + error (EXIT_FAILURE, 0, _("invalid format %s," + " directive must be %%['][-][N]f"), + quote (fmt)); + i++; + suffix_pos = i; + + for (; fmt[i] != '\0'; i += (fmt[i] == '%') + 1) + if (fmt[i] == '%' && fmt[i + 1] != '%') + error (EXIT_FAILURE, 0, _("format %s has too many %% directives"), + quote (fmt)); + + if (prefix_len) + { + format_str_prefix = xstrndup (fmt, prefix_len); + if (!format_str_prefix) + error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"), + prefix_len + 1); + } + if (fmt[suffix_pos] != '\0') + { + format_str_suffix = strdup (fmt + suffix_pos); + if (!format_str_suffix) + error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"), + strlen (fmt + suffix_pos)); + } + + if (dev_debug) + error (0, 0, _("format String:\n input: %s\n grouping: %s\n" + " padding width: %zu\n alignment: %s\n" + " prefix: '%s'\n suffix: '%s'\n"), + quote (fmt), (grouping) ? "yes" : "no", + padding_width, + (padding_alignment == MBS_ALIGN_LEFT) ? "Left" : "Right", + format_str_prefix, format_str_suffix); +} + +/* Parse a numeric value (with optional suffix) from a string. + Returns a long double value, with input precision. + + If there's an error converting the string to value - exits with + an error. + + If there are any trailing characters after the number + (besides a valid suffix) - exits with an error. */ +static enum simple_strtod_error +parse_human_number (const char *str, long double /*output */ *value, + size_t *precision) +{ + char *ptr = NULL; + + enum simple_strtod_error e = + simple_strtod_human (str, &ptr, value, precision, scale_from); + if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS) + { + simple_strtod_fatal (e, str); + return e; + } + + if (ptr && *ptr != '\0') + { + if (_invalid != inval_ignore) + error (conv_exit_code, 0, _("invalid suffix in input '%s': '%s'"), + str, ptr); + e = SSE_INVALID_SUFFIX; + } + return e; +} + + +/* Print the given VAL, using the requested representation. + The number is printed to STDOUT, with padding and alignment. */ +static int +prepare_padded_number (const long double val, size_t precision) +{ + /* Generate Output. */ + char buf[128]; + + /* Can't reliably print too-large values without auto-scaling. */ + unsigned int x; + expld (val, 10, &x); + if (scale_to == scale_none && x > MAX_UNSCALED_DIGITS) + { + if (_invalid != inval_ignore) + error (conv_exit_code, 0, _("value too large to be printed: '%Lg'" + " (consider using --to)"), val); + return 0; + } + + if (x > MAX_ACCEPTABLE_DIGITS - 1) + { + if (_invalid != inval_ignore) + error (conv_exit_code, 0, _("value too large to be printed: '%Lg'" + " (cannot handle values > 999Y)"), val); + return 0; + } + + double_to_human (val, precision, buf, sizeof (buf), scale_to, grouping, + _round); + if (suffix) + strncat (buf, suffix, sizeof (buf) - strlen (buf) -1); + + if (dev_debug) + error (0, 0, _("formatting output:\n value: %Lf\n humanized: '%s'\n"), + val, buf); + + + if (padding_width && strlen (buf) < padding_width) + { + size_t w = padding_width; + mbsalign (buf, padding_buffer, padding_buffer_size, &w, + padding_alignment, MBA_UNIBYTE_ONLY); + + if (dev_debug) + error (0, 0, _(" After padding: '%s'\n"), padding_buffer); + + } + else + { + setup_padding_buffer (strlen (buf) + 1); + strcpy (padding_buffer, buf); + } + + return 1; +} + +static void +print_padded_number (void) +{ + if (format_str_prefix) + fputs (format_str_prefix, stdout); + + fputs (padding_buffer, stdout); + + if (format_str_suffix) + fputs (format_str_suffix, stdout); +} + +/* Converts the TEXT number string to the requested representation, + and handles automatic suffix addition. */ +static int +process_suffixed_number (char *text, long double *result, size_t *precision) +{ + if (suffix && strlen (text) > strlen (suffix)) + { + char *possible_suffix = text + strlen (text) - strlen (suffix); + + if (STREQ (suffix, possible_suffix)) + { + /* trim suffix, ONLY if it's at the end of the text. */ + *possible_suffix = '\0'; + if (dev_debug) + error (0, 0, _("trimming suffix '%s'\n"), suffix); + } + else + { + if (dev_debug) + error (0, 0, _("no valid suffix found\n")); + } + } + + /* Skip white space - always. */ + char *p = text; + while (*p && isblank (*p)) + ++p; + const unsigned int skip_count = text - p; + + /* setup auto-padding. */ + if (auto_padding) + { + if (skip_count > 0 || field > 1) + { + padding_width = strlen (text); + setup_padding_buffer (padding_width); + } + else + { + padding_width = 0; + } + if (dev_debug) + error (0, 0, _("setting Auto-Padding to %ld characters\n"), + padding_width); + } + + long double val = 0; + enum simple_strtod_error e = parse_human_number (p, &val, precision); + if (e == SSE_OK_PRECISION_LOSS && debug) + error (0, 0, _("large input value '%s': possible precision loss"), p); + + if (from_unit_size != 1 || to_unit_size != 1) + val = (val * from_unit_size) / to_unit_size; + + *result = val; + + return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS); +} + +/* Skip the requested number of fields in the input string. + Returns a pointer to the *delimiter* of the requested field, + or a pointer to NUL (if reached the end of the string). */ +static inline char * +__attribute ((pure)) +skip_fields (char *buf, int fields) +{ + char *ptr = buf; + if (delimiter != DELIMITER_DEFAULT) + { + if (*ptr == delimiter) + fields--; + while (*ptr && fields--) + { + while (*ptr && *ptr == delimiter) + ++ptr; + while (*ptr && *ptr != delimiter) + ++ptr; + } + } + else + while (*ptr && fields--) + { + while (*ptr && isblank (*ptr)) + ++ptr; + while (*ptr && !isblank (*ptr)) + ++ptr; + } + return ptr; +} + +/* Parse a delimited string, and extracts the requested field. + NOTE: the input buffer is modified. + + TODO: + Maybe support multiple fields, though can always pipe output + into another numfmt to process other fields. + Maybe default to processing all fields rather than just first? + + Output: + _PREFIX, _DATA, _SUFFIX will point to the relevant positions + in the input string, or be NULL if such a part doesn't exist. */ +static void +extract_fields (char *line, int _field, + char ** _prefix, char ** _data, char ** _suffix) +{ + char *ptr = line; + *_prefix = NULL; + *_data = NULL; + *_suffix = NULL; + + if (dev_debug) + error (0, 0, _("extracting Fields:\n input: '%s'\n field: %d\n"), + line, _field); + + if (field > 1) + { + /* skip the requested number of fields. */ + *_prefix = line; + ptr = skip_fields (line, field - 1); + if (*ptr == '\0') + { + /* not enough fields in the input - print warning? */ + if (dev_debug) + error (0, 0, _(" TOO FEW FIELDS!\n prefix: '%s'\n"), *_prefix); + return; + } + + *ptr = '\0'; + ++ptr; + } + + *_data = ptr; + *_suffix = skip_fields (*_data, 1); + if (**_suffix) + { + /* there is a suffix (i.e. the field is not the last on the line), + so null-terminate the _data before it. */ + **_suffix = '\0'; + ++(*_suffix); + } + else + *_suffix = NULL; + + if (dev_debug) + error (0, 0, _(" prefix: '%s'\n number: '%s'\n suffix: '%s'\n"), + *_prefix, *_data, *_suffix); +} + + +/* Convert a number in a given line of text. + NEWLINE specifies whether to output a '\n' for this "line". */ +static int +process_line (char *line, bool newline) +{ + char *pre, *num, *suf; + long double val = 0; + size_t precision = 0; + int valid_number = 0; + + extract_fields (line, field, &pre, &num, &suf); + if (!num) + if (_invalid != inval_ignore) + error (conv_exit_code, 0, _("input line is too short, " + "no numbers found to convert in field %ld"), + field); + + if (num) + { + valid_number = process_suffixed_number (num, &val, &precision); + if (valid_number) + valid_number = prepare_padded_number (val, precision); + } + + if (pre) + fputs (pre, stdout); + + if (pre && num) + fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout); + + if (valid_number) + { + print_padded_number (); + } + else + { + if (num) + fputs (num, stdout); + } + + if (suf) + { + fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout); + fputs (suf, stdout); + } + + if (newline) + putchar ('\n'); + + return valid_number; +} + +int +main (int argc, char **argv) +{ + int valid_numbers = 1; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + decimal_point = nl_langinfo (RADIXCHAR); + if (decimal_point == NULL || strlen (decimal_point) == 0) + decimal_point = "."; + decimal_point_length = strlen (decimal_point); + + atexit (close_stdout); + + while (true) + { + int c = getopt_long (argc, argv, "d:", longopts, NULL); + + if (c == -1) + break; + + switch (c) + { + case FROM_OPTION: + scale_from = XARGMATCH ("--from", optarg, + scale_from_args, scale_from_types); + break; + + case FROM_UNIT_OPTION: + from_unit_size = unit_to_umax (optarg); + break; + + case TO_OPTION: + scale_to = + XARGMATCH ("--to", optarg, scale_to_args, scale_to_types); + break; + + case TO_UNIT_OPTION: + to_unit_size = unit_to_umax (optarg); + break; + + case ROUND_OPTION: + _round = XARGMATCH ("--round", optarg, round_args, round_types); + break; + + case GROUPING_OPTION: + grouping = 1; + break; + + case PADDING_OPTION: + if (xstrtol (optarg, NULL, 10, &padding_width, "") != LONGINT_OK + || padding_width == 0) + error (EXIT_FAILURE, 0, _("invalid padding value '%s'"), optarg); + if (padding_width < 0) + { + padding_alignment = MBS_ALIGN_LEFT; + padding_width = -padding_width; + } + /* TODO: We probably want to apply a specific --padding + to --header lines too. */ + break; + + case FIELD_OPTION: + if (xstrtol (optarg, NULL, 10, &field, "") != LONGINT_OK + || field <= 0) + error (EXIT_FAILURE, 0, _("invalid field value '%s'"), optarg); + break; + + case 'd': + /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ + if (optarg[0] != '\0' && optarg[1] != '\0') + error (EXIT_FAILURE, 0, + _("the delimiter must be a single character")); + delimiter = optarg[0]; + break; + + case SUFFIX_OPTION: + suffix = optarg; + break; + + case DEBUG_OPTION: + debug = 1; + break; + + case DEV_DEBUG_OPTION: + dev_debug = 1; + debug = 1; + break; + + case HEADER_OPTION: + if (optarg) + { + if (xstrtoumax (optarg, NULL, 10, &header, "") != LONGINT_OK + || header == 0) + error (EXIT_FAILURE, 0, _("invalid header value '%s'"), + optarg); + } + else + { + header = 1; + } + break; + + case FORMAT_OPTION: + format_str = optarg; + break; + + case INVALID_OPTION: + _invalid = XARGMATCH ("--invalid", optarg, inval_args, inval_types); + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (format_str != NULL && grouping) + error (EXIT_FAILURE, 0, _("--grouping cannot be combined with --format")); + if (format_str != NULL && padding_width > 0) + error (EXIT_FAILURE, 0, _("--padding cannot be combined with --format")); + + /* Warn about no-op. */ + if (debug && scale_from == scale_none && scale_to == scale_none + && !grouping && (padding_width == 0) && (format_str == NULL)) + error (0, 0, _("no conversion option specified")); + + if (format_str) + parse_format_string (format_str); + + if (grouping) + { + if (scale_to != scale_none) + error (EXIT_FAILURE, 0, _("grouping cannot be combined with --to")); + if (debug && (strlen (nl_langinfo (THOUSEP)) == 0)) + error (0, 0, _("grouping has no effect in this locale")); + } + + + setup_padding_buffer (padding_width); + auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT); + + if (_invalid != inval_abort) + conv_exit_code = 0; + + if (argc > optind) + { + if (debug && header) + error (0, 0, _("--header ignored with command-line input")); + + for (; optind < argc; optind++) + valid_numbers &= process_line (argv[optind], true); + } + else + { + char *line = NULL; + size_t line_allocated = 0; + ssize_t len; + + while (header-- && getline (&line, &line_allocated, stdin) > 0) + fputs (line, stdout); + + while ((len = getline (&line, &line_allocated, stdin)) > 0) + { + bool newline = line[len - 1] == '\n'; + if (newline) + line[len - 1] = '\0'; + valid_numbers &= process_line (line, newline); + } + + IF_LINT (free (line)); + + if (ferror (stdin)) + error (0, errno, _("error reading input")); + } + + free (padding_buffer); + free (format_str_prefix); + free (format_str_suffix); + + + if (debug && !valid_numbers) + error (0, 0, _("failed to convert some of the input numbers")); + + int exit_status = EXIT_SUCCESS; + if (!valid_numbers && _invalid != inval_warn && _invalid != inval_ignore) + exit_status = EXIT_CONVERSION_WARNINGS; + + exit (exit_status); +} |