diff options
author | Jim Meyering <jim@meyering.net> | 2002-10-19 13:48:04 +0000 |
---|---|---|
committer | Jim Meyering <jim@meyering.net> | 2002-10-19 13:48:04 +0000 |
commit | dce67bd2619d8df59a622ae6c1570e68d46b2930 (patch) | |
tree | 729922b17b35167eecec8d43f13d2d61892d7abc /lib | |
parent | 4774c63a5eaef8ce08726a2c06db0b8e2aff84fe (diff) | |
download | coreutils-dce67bd2619d8df59a622ae6c1570e68d46b2930.tar.xz |
Rewrite to support locale-specific
notations like thousands separators.
Specify what includer of include.h must include beforehand.
(human_group_digits, human_suppress_point_zero, human_autoscale,
human_base_1024, human_SI, human_B): New enum values.
(human_readable): Rename from human_readable_inexact; put the
options before the sizes. All uses changed. The old human_readable
function has been removed; use inttostr.h instead.
(human_options): Renamed from human_block_size, with new signature
that allows block sizes up to UINTMAX_MAX. All callers changed.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/human.c | 548 | ||||
-rw-r--r-- | lib/human.h | 92 |
2 files changed, 410 insertions, 230 deletions
diff --git a/lib/human.c b/lib/human.c index a9ccf38c4..b4d8694a1 100644 --- a/lib/human.c +++ b/lib/human.c @@ -1,7 +1,7 @@ /* human.c -- print human readable file size - Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 Free Software - Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free + Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,33 +18,42 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* Originally contributed by lm@sgi.com; - --si, output block size selection, and large file support - added by eggert@twinsun.com. */ + --si, output block size selection, large file support, + and grouping added by eggert@twinsun.com. */ #if HAVE_CONFIG_H # include <config.h> #endif -#include <sys/types.h> -#include <stdio.h> - -#if HAVE_LIMITS_H -# include <limits.h> +#if HAVE_STDBOOL_H +# include <stdbool.h> +#else +typedef enum {false = 0, true = 1} bool; #endif -#if HAVE_STRING_H -# include <string.h> +#if HAVE_INTTYPES_H +# include <inttypes.h> #else -# include <strings.h> +# if HAVE_STDINT_H +# include <stdint.h> +# endif #endif +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif +#ifndef UINTMAX_MAX +# define UINTMAX_MAX ((uintmax_t) -1) +#endif + +#include <limits.h> -#ifndef CHAR_BIT -# define CHAR_BIT 8 +#if HAVE_LOCALE_H && HAVE_LOCALECONV +# include <locale.h> #endif + #if HAVE_STDLIB_H # include <stdlib.h> #endif - #ifndef HAVE_DECL_GETENV "this configure-time declaration test was not run" #endif @@ -52,20 +61,30 @@ char *getenv (); #endif -#if ENABLE_NLS -# include <libintl.h> -# define _(Text) gettext (Text) -#else -# define _(Text) Text +#if HAVE_STRING_H +# include <string.h> #endif +#if HAVE_STRINGS_H +# include <strings.h> +#endif + +#include <stdio.h> +#include <sys/types.h> + +#include <gettext.h> +#define _(text) gettext (text) + #include <argmatch.h> #include <error.h> #include <xstrtol.h> #include "human.h" -static const char suffixes[] = +/* The maximum length of a suffix like "KiB". */ +#define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3 + +static const char power_letter[] = { 0, /* not used */ 'K', /* kibi ('k' for kilo is a special case) */ @@ -78,33 +97,18 @@ static const char suffixes[] = 'Y' /* yotta or 2**80 */ }; -/* Generate into P[-1] (and possibly P[-2]) the proper suffix for - POWER and BASE. Return the address of the generated suffix. */ -static char * -generate_suffix_backwards (char *p, int power, int base) -{ - char letter = suffixes[power]; - if (base == 1000) - { - *--p = 'B'; - if (power == 1) - letter = 'k'; - } - - *--p = letter; - return p; -} - -/* If INEXACT_STYLE is not human_round_to_even, and if easily +/* If INEXACT_STYLE is not human_round_to_nearest, and if easily possible, adjust VALUE according to the style. */ -static double -adjust_value (enum human_inexact_style inexact_style, double value) + +static long double +adjust_value (int inexact_style, long double value) { - /* Do not use the floor or ceil functions, as that would mean - linking with the standard math library, which is a porting pain. - So leave the value alone if it is too large to easily round. */ - if (inexact_style != human_round_to_even && value < (uintmax_t) -1) + /* Do not use the floorl or ceill functions, as that would mean + checking for their presence and possibly linking with the + standard math library, which is a porting pain. So leave the + value alone if it is too large to easily round. */ + if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX) { uintmax_t u = value; value = u + (inexact_style == human_ceiling && u != value); @@ -113,50 +117,106 @@ adjust_value (enum human_inexact_style inexact_style, double value) return value; } -/* Like human_readable_inexact, except always round to even. */ -char * -human_readable (uintmax_t n, char *buf, - int from_block_size, int output_block_size) +/* Group the digits of NUMBER according to the grouping rules of the + current locale. NUMBER contains NUMBERLEN digits. Modify the + bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for + each byte inserted. Return the starting address of the modified + number. + + To group the digits, use GROUPING and THOUSANDS_SEP as in `struct + lconv' from <locale.h>. */ + +static char * +group_number (char *number, size_t numberlen, + char const *grouping, char const *thousands_sep) { - return human_readable_inexact (n, buf, from_block_size, output_block_size, - human_round_to_even); + register char *d; + size_t grouplen = SIZE_MAX; + size_t thousands_seplen = strlen (thousands_sep); + size_t i = numberlen; + + /* The maximum possible value for NUMBERLEN is the number of digits + in the square of the largest uintmax_t, so double the size of + uintmax_t before converting to a bound. 302 / 1000 is ceil + (log10 (2.0)). Add 1 for integer division truncation. */ + char buf[2 * sizeof (uintmax_t) * CHAR_BIT * 302 / 1000 + 1]; + + memcpy (buf, number, numberlen); + d = number + numberlen; + + for (;;) + { + unsigned char g = *grouping; + + if (g) + { + grouplen = g < CHAR_MAX ? g : i; + grouping++; + } + + if (i < grouplen) + grouplen = i; + + d -= grouplen; + i -= grouplen; + memcpy (d, buf + i, grouplen); + if (i == 0) + return d; + + d -= thousands_seplen; + memcpy (d, thousands_sep, thousands_seplen); + } } -/* Convert N to a human readable format in BUF. +/* Convert N to a human readable format in BUF, using the options OPTS. N is expressed in units of FROM_BLOCK_SIZE. FROM_BLOCK_SIZE must be nonnegative. - OUTPUT_BLOCK_SIZE must be nonzero. If it is positive, use units of - OUTPUT_BLOCK_SIZE in the output number. - - Use INEXACT_STYLE to determine whether to take the ceiling or floor - of any result that cannot be expressed exactly. - - If OUTPUT_BLOCK_SIZE is negative, use a format like "127K" if - possible, using powers of -OUTPUT_BLOCK_SIZE; otherwise, use - ordinary decimal format. Normally -OUTPUT_BLOCK_SIZE is either - 1000 or 1024; it must be at least 2. Most people visually process - strings of 3-4 digits effectively, but longer strings of digits are - more prone to misinterpretation. Hence, converting to an - abbreviated form usually improves readability. Use a suffix - indicating which power is being used. For example, assuming - -OUTPUT_BLOCK_SIZE is 1024, 8500 would be converted to 8.3K, - 133456345 to 127M, 56990456345 to 53G, and so on. Numbers smaller - than -OUTPUT_BLOCK_SIZE aren't modified. If -OUTPUT_BLOCK_SIZE is - 1024, append a "B" after any size letter. */ + Use units of TO_BLOCK_SIZE in the output number. TO_BLOCK_SIZE + must be positive. + + Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling)) + to determine whether to take the ceiling or floor of any result + that cannot be expressed exactly. + + If (OPTS & human_group_digits), group the thousands digits + according to the locale, e.g., `1,000,000' in an American English + locale. + + If (OPTS & human_autoscale), deduce the output block size + automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the + output. Use powers of 1024 if (OPTS & human_base_1024), and powers + of 1000 otherwise. For example, assuming powers of 1024, 8500 + would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and + so on. Numbers smaller than the power aren't modified. + human_autoscale is normally used together with human_SI. + + If (OPTS & human_SI), append an SI prefix indicating which power is + being used. If in addition (OPTS & human_B), append "B" (if base + 1000) or "iB" (if base 1024) to the SI prefix. When ((OPTS & + human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a + power of 1024 or of 1000, depending on (OPTS & + human_base_1024). */ char * -human_readable_inexact (uintmax_t n, char *buf, - int from_block_size, int output_block_size, - enum human_inexact_style inexact_style) +human_readable (uintmax_t n, char *buf, int opts, + uintmax_t from_block_size, uintmax_t to_block_size) { + int inexact_style = + opts & (human_round_to_nearest | human_floor | human_ceiling); + unsigned int base = opts & human_base_1024 ? 1024 : 1000; uintmax_t amt; - int base; - int to_block_size; + uintmax_t multiplier; + uintmax_t divisor; + uintmax_t r2; + uintmax_t r10; int tenths = 0; - int power; + int exponent = -1; + int exponent_max = sizeof power_letter - 1; char *p; + char *psuffix; + char const *integerlim; /* 0 means adjusted N == AMT.TENTHS; 1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05; @@ -164,148 +224,203 @@ human_readable_inexact (uintmax_t n, char *buf, 3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1. */ int rounding = 0; - if (output_block_size < 0) + char const *decimal_point = "."; + size_t decimal_pointlen = 1; + char const *grouping = ""; + char const *thousands_sep = ""; +#if HAVE_LOCALE_H && HAVE_LOCALECONV + struct lconv const *l = localeconv (); + size_t pointlen = strlen (l->decimal_point); + if (0 < pointlen && pointlen <= MB_LEN_MAX) { - base = -output_block_size; - to_block_size = 1; + decimal_point = l->decimal_point; + decimal_pointlen = pointlen; } - else - { - base = 0; - to_block_size = output_block_size; - } - - p = buf + LONGEST_HUMAN_READABLE; - *p = '\0'; - -#ifdef lint - /* Suppress `used before initialized' warning. */ - power = 0; + grouping = l->grouping; + if (strlen (l->thousands_sep) <= MB_LEN_MAX) + thousands_sep = l->thousands_sep; #endif + psuffix = buf + LONGEST_HUMAN_READABLE - HUMAN_READABLE_SUFFIX_LENGTH_MAX; + p = psuffix; + /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE units. */ - { - int multiplier; - int divisor; - int r2; - int r10; - if (to_block_size <= from_block_size - ? (from_block_size % to_block_size != 0 - || (multiplier = from_block_size / to_block_size, - (amt = n * multiplier) / multiplier != n)) - : (from_block_size == 0 - || to_block_size % from_block_size != 0 - || (divisor = to_block_size / from_block_size, - r10 = (n % divisor) * 10, - r2 = (r10 % divisor) * 2, - amt = n / divisor, - tenths = r10 / divisor, - rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2), - 0))) - { - /* Either the result cannot be computed easily using uintmax_t, - or from_block_size is zero. Fall back on floating point. - FIXME: This can yield answers that are slightly off. */ - - double damt = n * (from_block_size / (double) to_block_size); - - if (! base) - sprintf (buf, "%.0f", adjust_value (inexact_style, damt)); - else - { - char suffix[3]; - char const *psuffix; - double e = 1; - power = 0; - - do - { - e *= base; - power++; - } - while (e * base <= damt && power < sizeof suffixes - 1); + if (to_block_size <= from_block_size + ? (from_block_size % to_block_size != 0 + || (multiplier = from_block_size / to_block_size, + (amt = n * multiplier) / multiplier != n)) + : (from_block_size == 0 + || to_block_size % from_block_size != 0 + || (divisor = to_block_size / from_block_size, + r10 = (n % divisor) * 10, + r2 = (r10 % divisor) * 2, + amt = n / divisor, + tenths = r10 / divisor, + rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2), + 0))) + { + /* Either the result cannot be computed easily using uintmax_t, + or from_block_size is zero. Fall back on floating point. + FIXME: This can yield answers that are slightly off. */ - damt /= e; + long double dto_block_size = to_block_size; + long double damt = n * (from_block_size / dto_block_size); + size_t buflen; + size_t nonintegerlen; - suffix[2] = '\0'; - psuffix = generate_suffix_backwards (suffix + 2, power, base); - sprintf (buf, "%.1f%s", - adjust_value (inexact_style, damt), psuffix); - if (4 + (base == 1000) < strlen (buf)) - sprintf (buf, "%.0f%s", - adjust_value (inexact_style, damt * 10) / 10, psuffix); - } + if (! (opts & human_autoscale)) + { + sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt)); + buflen = strlen (buf); + nonintegerlen = 0; + } + else + { + size_t buflen; + long double e = 1; + exponent = 0; - return buf; - } - } + do + { + e *= base; + exponent++; + } + while (e * base <= damt && exponent < exponent_max); - /* Use power of BASE notation if adjusted AMT is large enough. */ + damt /= e; - if (base && base <= amt) - { - power = 0; + sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt)); + buflen = strlen (buf); + nonintegerlen = decimal_pointlen + 1; - do - { - int r10 = (amt % base) * 10 + tenths; - int r2 = (r10 % base) * 2 + (rounding >> 1); - amt /= base; - tenths = r10 / base; - rounding = (r2 < base - ? 0 < r2 + rounding - : 2 + (base < r2 + rounding)); - power++; + if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen + || ((opts & human_suppress_point_zero) + && buf[buflen - 1] == '0')) + { + sprintf (buf, "%.0Lf", + adjust_value (inexact_style, damt * 10) / 10); + buflen = strlen (buf); + nonintegerlen = 0; + } } - while (base <= amt && power < sizeof suffixes - 1); - p = generate_suffix_backwards (p, power, base); + p = psuffix - buflen; + memmove (p, buf, buflen); + integerlim = p + buflen - nonintegerlen; + } + else + { + /* Use power of BASE notation if requested and if adjusted AMT + is large enough. */ - if (amt < 10) + if (opts & human_autoscale) { - if (2 * (1 - (int) inexact_style) - < rounding + (tenths & (inexact_style == human_round_to_even))) + exponent = 0; + + if (base <= amt) { - tenths++; - rounding = 0; + do + { + unsigned r10 = (amt % base) * 10 + tenths; + unsigned r2 = (r10 % base) * 2 + (rounding >> 1); + amt /= base; + tenths = r10 / base; + rounding = (r2 < base + ? (r2 + rounding) != 0 + : 2 + (base < r2 + rounding)); + exponent++; + } + while (base <= amt && exponent < exponent_max); - if (tenths == 10) + if (amt < 10) { - amt++; - tenths = 0; + if (inexact_style == human_round_to_nearest + ? 2 < rounding + (tenths & 1) + : inexact_style == human_ceiling && 0 < rounding) + { + tenths++; + rounding = 0; + + if (tenths == 10) + { + amt++; + tenths = 0; + } + } + + if (amt < 10 + && (tenths || ! (opts & human_suppress_point_zero))) + { + *--p = '0' + tenths; + p -= decimal_pointlen; + memcpy (p, decimal_point, decimal_pointlen); + tenths = rounding = 0; + } } } + } + + if (inexact_style == human_ceiling + ? 0 < tenths + rounding + : inexact_style == human_round_to_nearest + ? 5 < tenths + (2 < rounding + (amt & 1)) + : /* inexact_style == human_floor */ 0) + { + amt++; - if (amt < 10) + if ((opts & human_autoscale) + && amt == base && exponent < exponent_max) { - *--p = '0' + tenths; - *--p = '.'; - tenths = rounding = 0; + exponent++; + if (! (opts & human_suppress_point_zero)) + { + *--p = '0'; + p -= decimal_pointlen; + memcpy (p, decimal_point, decimal_pointlen); + } + amt = 1; } } + + integerlim = p; + + do + { + int digit = amt % 10; + *--p = digit + '0'; + } + while ((amt /= 10) != 0); } - if (inexact_style == human_ceiling - ? 0 < tenths + rounding - : inexact_style == human_round_to_even - ? 5 < tenths + (2 < rounding + (amt & 1)) - : /* inexact_style == human_floor */ 0) + if (opts & human_group_digits) + p = group_number (p, integerlim - p, grouping, thousands_sep); + + if (opts & human_SI) { - amt++; + if (exponent < 0) + { + uintmax_t power; + exponent = 0; + for (power = 1; power < to_block_size; power *= base) + if (++exponent == exponent_max) + break; + } + + if (exponent) + *psuffix++ = (! (opts & human_base_1024) && exponent == 1 + ? 'k' + : power_letter[exponent]); - if (amt == base && power < sizeof suffixes - 1) + if (opts & human_B) { - *p = suffixes[power + 1]; - *--p = '0'; - *--p = '.'; - amt = 1; + if ((opts & human_base_1024) && exponent) + *psuffix++ = 'i'; + *psuffix++ = 'B'; } } - do - *--p = '0' + (int) (amt % 10); - while ((amt /= 10) != 0); + *psuffix = '\0'; return p; } @@ -318,44 +433,70 @@ human_readable_inexact (uintmax_t n, char *buf, #endif static char const *const block_size_args[] = { "human-readable", "si", 0 }; -static int const block_size_types[] = { -1024, -1000 }; +static int const block_size_opts[] = + { + human_autoscale + human_SI + human_base_1024, + human_autoscale + human_SI + }; -static int +static uintmax_t default_block_size (void) { return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE; } static strtol_error -humblock (char const *spec, int *block_size) +humblock (char const *spec, uintmax_t *block_size, int *options) { int i; + int opts = 0; if (! spec && ! (spec = getenv ("BLOCK_SIZE"))) *block_size = default_block_size (); - else if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_types))) - *block_size = block_size_types[i]; else { - char *ptr; - unsigned long val; - strtol_error e = xstrtoul (spec, &ptr, 0, &val, "eEgGkKmMpPtTyYzZ0"); - if (e != LONGINT_OK) - return e; - if (*ptr) - return LONGINT_INVALID_SUFFIX_CHAR; - if ((int) val < 0 || val != (int) val) - return LONGINT_OVERFLOW; - *block_size = (int) val; + if (*spec == '\'') + { + opts |= human_group_digits; + spec++; + } + + if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts))) + { + opts |= block_size_opts[i]; + *block_size = 1; + } + else + { + char *ptr; + strtol_error e = xstrtoumax (spec, &ptr, 0, block_size, + "eEgGkKmMpPtTyYzZ0"); + if (e != LONGINT_OK) + return e; + if (*ptr) + return LONGINT_INVALID_SUFFIX_CHAR; + for (; ! ('0' <= *spec && *spec <= '9'); spec++) + if (spec == ptr) + { + opts |= human_SI; + if (ptr[-1] == 'B') + opts |= human_B; + if (ptr[-1] != 'B' || ptr[-2] == 'i') + opts |= human_base_1024; + break; + } + } } + *options = opts; return LONGINT_OK; } -void -human_block_size (char const *spec, int report_errors, int *block_size) +int +human_options (char const *spec, bool report_errors, uintmax_t *block_size) { - strtol_error e = humblock (spec, block_size); + int opts; + strtol_error e = humblock (spec, block_size, &opts); if (*block_size == 0) { *block_size = default_block_size (); @@ -363,4 +504,5 @@ human_block_size (char const *spec, int report_errors, int *block_size) } if (e != LONGINT_OK && report_errors) STRTOL_FATAL_ERROR (spec, _("block size"), e); + return opts; } diff --git a/lib/human.h b/lib/human.h index 4ec9f0da8..343fff7e8 100644 --- a/lib/human.h +++ b/lib/human.h @@ -1,39 +1,77 @@ #ifndef HUMAN_H_ # define HUMAN_H_ 1 -# if HAVE_CONFIG_H -# include <config.h> -# endif +/* Before including this file, you need something like the following: -# if HAVE_INTTYPES_H -# include <inttypes.h> -# endif + #if HAVE_CONFIG_H + # include <config.h> + #endif + + #if HAVE_STDBOOL_H + # include <stdbool.h> + #else + typedef enum {false = 0, true = 1} bool; + #endif + + #if HAVE_INTTYPES_H + # include <inttypes.h> + #else + # if HAVE_STDINT_H + # include <stdint.h> + # endif + #endif + + #include <limits.h> + + so that the proper identifiers are all declared. */ /* A conservative bound on the maximum length of a human-readable string. - The output can be the product of the largest uintmax_t and the largest int, - so add their sizes before converting to a bound on digits. */ -# define LONGEST_HUMAN_READABLE ((sizeof (uintmax_t) + sizeof (int)) \ - * CHAR_BIT / 3) - -# ifndef PARAMS -# if defined PROTOTYPES || (defined __STDC__ && __STDC__) -# define PARAMS(Args) Args -# else -# define PARAMS(Args) () -# endif -# endif - -enum human_inexact_style + The output can be the square of the largest uintmax_t, so double + its size before converting to a bound. + 302 / 1000 is ceil (log10 (2.0)). Add 1 for integer division truncation. + Also, the output can have a thousands separator between every digit, + so multiply by MB_LEN_MAX + 1 and then subtract MB_LEN_MAX. + Finally, append 3, the maximum length of a suffix. */ +# define LONGEST_HUMAN_READABLE \ + ((2 * sizeof (uintmax_t) * CHAR_BIT * 302 / 1000 + 1) * (MB_LEN_MAX + 1) \ + - MB_LEN_MAX + 3) + +/* Options for human_readable. */ +enum { - human_floor = -1, - human_round_to_even = 0, - human_ceiling = 1 + /* Unless otherwise specified these options may be ORed together. */ + + /* The following three options are mutually exclusive. */ + /* Round to plus infinity (default). */ + human_ceiling = 0, + /* Round to nearest, ties to even. */ + human_round_to_nearest = 1, + /* Round to minus infinity. */ + human_floor = 2, + + /* Group digits together, e.g. `1,000,000'. This uses the + locale-defined grouping; the traditional C locale does not group, + so this has effect only if some other locale is in use. */ + human_group_digits = 4, + + /* When autoscaling, suppress ".0" at end. */ + human_suppress_point_zero = 8, + + /* Scale output and use SI-style units, ignoring the output block size. */ + human_autoscale = 16, + + /* Prefer base 1024 to base 1000. */ + human_base_1024 = 32, + + /* Append SI prefix, e.g. "k" or "M". */ + human_SI = 64, + + /* Append "B" (if base 1000) or "iB" (if base 1024) to SI prefix. */ + human_B = 128 }; -char *human_readable PARAMS ((uintmax_t, char *, int, int)); -char *human_readable_inexact PARAMS ((uintmax_t, char *, int, int, - enum human_inexact_style)); +char *human_readable (uintmax_t, char *, int, uintmax_t, uintmax_t); -void human_block_size PARAMS ((char const *, int, int *)); +int human_options (char const *, bool, uintmax_t *); #endif /* HUMAN_H_ */ |