summaryrefslogtreecommitdiff
path: root/lib/human.c
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>2002-10-19 13:48:04 +0000
committerJim Meyering <jim@meyering.net>2002-10-19 13:48:04 +0000
commitdce67bd2619d8df59a622ae6c1570e68d46b2930 (patch)
tree729922b17b35167eecec8d43f13d2d61892d7abc /lib/human.c
parent4774c63a5eaef8ce08726a2c06db0b8e2aff84fe (diff)
downloadcoreutils-dce67bd2619d8df59a622ae6c1570e68d46b2930.tar.xz
Rewrite to support locale-specific
notations like thousands separators. Specify what includer of include.h must include beforehand. (human_group_digits, human_suppress_point_zero, human_autoscale, human_base_1024, human_SI, human_B): New enum values. (human_readable): Rename from human_readable_inexact; put the options before the sizes. All uses changed. The old human_readable function has been removed; use inttostr.h instead. (human_options): Renamed from human_block_size, with new signature that allows block sizes up to UINTMAX_MAX. All callers changed.
Diffstat (limited to 'lib/human.c')
-rw-r--r--lib/human.c548
1 files changed, 345 insertions, 203 deletions
diff --git a/lib/human.c b/lib/human.c
index a9ccf38c4..b4d8694a1 100644
--- a/lib/human.c
+++ b/lib/human.c
@@ -1,7 +1,7 @@
/* human.c -- print human readable file size
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 Free Software
- Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free
+ Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,33 +18,42 @@
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
/* Originally contributed by lm@sgi.com;
- --si, output block size selection, and large file support
- added by eggert@twinsun.com. */
+ --si, output block size selection, large file support,
+ and grouping added by eggert@twinsun.com. */
#if HAVE_CONFIG_H
# include <config.h>
#endif
-#include <sys/types.h>
-#include <stdio.h>
-
-#if HAVE_LIMITS_H
-# include <limits.h>
+#if HAVE_STDBOOL_H
+# include <stdbool.h>
+#else
+typedef enum {false = 0, true = 1} bool;
#endif
-#if HAVE_STRING_H
-# include <string.h>
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
#else
-# include <strings.h>
+# if HAVE_STDINT_H
+# include <stdint.h>
+# endif
#endif
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+#ifndef UINTMAX_MAX
+# define UINTMAX_MAX ((uintmax_t) -1)
+#endif
+
+#include <limits.h>
-#ifndef CHAR_BIT
-# define CHAR_BIT 8
+#if HAVE_LOCALE_H && HAVE_LOCALECONV
+# include <locale.h>
#endif
+
#if HAVE_STDLIB_H
# include <stdlib.h>
#endif
-
#ifndef HAVE_DECL_GETENV
"this configure-time declaration test was not run"
#endif
@@ -52,20 +61,30 @@
char *getenv ();
#endif
-#if ENABLE_NLS
-# include <libintl.h>
-# define _(Text) gettext (Text)
-#else
-# define _(Text) Text
+#if HAVE_STRING_H
+# include <string.h>
#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#include <gettext.h>
+#define _(text) gettext (text)
+
#include <argmatch.h>
#include <error.h>
#include <xstrtol.h>
#include "human.h"
-static const char suffixes[] =
+/* The maximum length of a suffix like "KiB". */
+#define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
+
+static const char power_letter[] =
{
0, /* not used */
'K', /* kibi ('k' for kilo is a special case) */
@@ -78,33 +97,18 @@ static const char suffixes[] =
'Y' /* yotta or 2**80 */
};
-/* Generate into P[-1] (and possibly P[-2]) the proper suffix for
- POWER and BASE. Return the address of the generated suffix. */
-static char *
-generate_suffix_backwards (char *p, int power, int base)
-{
- char letter = suffixes[power];
- if (base == 1000)
- {
- *--p = 'B';
- if (power == 1)
- letter = 'k';
- }
-
- *--p = letter;
- return p;
-}
-
-/* If INEXACT_STYLE is not human_round_to_even, and if easily
+/* If INEXACT_STYLE is not human_round_to_nearest, and if easily
possible, adjust VALUE according to the style. */
-static double
-adjust_value (enum human_inexact_style inexact_style, double value)
+
+static long double
+adjust_value (int inexact_style, long double value)
{
- /* Do not use the floor or ceil functions, as that would mean
- linking with the standard math library, which is a porting pain.
- So leave the value alone if it is too large to easily round. */
- if (inexact_style != human_round_to_even && value < (uintmax_t) -1)
+ /* Do not use the floorl or ceill functions, as that would mean
+ checking for their presence and possibly linking with the
+ standard math library, which is a porting pain. So leave the
+ value alone if it is too large to easily round. */
+ if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
{
uintmax_t u = value;
value = u + (inexact_style == human_ceiling && u != value);
@@ -113,50 +117,106 @@ adjust_value (enum human_inexact_style inexact_style, double value)
return value;
}
-/* Like human_readable_inexact, except always round to even. */
-char *
-human_readable (uintmax_t n, char *buf,
- int from_block_size, int output_block_size)
+/* Group the digits of NUMBER according to the grouping rules of the
+ current locale. NUMBER contains NUMBERLEN digits. Modify the
+ bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
+ each byte inserted. Return the starting address of the modified
+ number.
+
+ To group the digits, use GROUPING and THOUSANDS_SEP as in `struct
+ lconv' from <locale.h>. */
+
+static char *
+group_number (char *number, size_t numberlen,
+ char const *grouping, char const *thousands_sep)
{
- return human_readable_inexact (n, buf, from_block_size, output_block_size,
- human_round_to_even);
+ register char *d;
+ size_t grouplen = SIZE_MAX;
+ size_t thousands_seplen = strlen (thousands_sep);
+ size_t i = numberlen;
+
+ /* The maximum possible value for NUMBERLEN is the number of digits
+ in the square of the largest uintmax_t, so double the size of
+ uintmax_t before converting to a bound. 302 / 1000 is ceil
+ (log10 (2.0)). Add 1 for integer division truncation. */
+ char buf[2 * sizeof (uintmax_t) * CHAR_BIT * 302 / 1000 + 1];
+
+ memcpy (buf, number, numberlen);
+ d = number + numberlen;
+
+ for (;;)
+ {
+ unsigned char g = *grouping;
+
+ if (g)
+ {
+ grouplen = g < CHAR_MAX ? g : i;
+ grouping++;
+ }
+
+ if (i < grouplen)
+ grouplen = i;
+
+ d -= grouplen;
+ i -= grouplen;
+ memcpy (d, buf + i, grouplen);
+ if (i == 0)
+ return d;
+
+ d -= thousands_seplen;
+ memcpy (d, thousands_sep, thousands_seplen);
+ }
}
-/* Convert N to a human readable format in BUF.
+/* Convert N to a human readable format in BUF, using the options OPTS.
N is expressed in units of FROM_BLOCK_SIZE. FROM_BLOCK_SIZE must
be nonnegative.
- OUTPUT_BLOCK_SIZE must be nonzero. If it is positive, use units of
- OUTPUT_BLOCK_SIZE in the output number.
-
- Use INEXACT_STYLE to determine whether to take the ceiling or floor
- of any result that cannot be expressed exactly.
-
- If OUTPUT_BLOCK_SIZE is negative, use a format like "127K" if
- possible, using powers of -OUTPUT_BLOCK_SIZE; otherwise, use
- ordinary decimal format. Normally -OUTPUT_BLOCK_SIZE is either
- 1000 or 1024; it must be at least 2. Most people visually process
- strings of 3-4 digits effectively, but longer strings of digits are
- more prone to misinterpretation. Hence, converting to an
- abbreviated form usually improves readability. Use a suffix
- indicating which power is being used. For example, assuming
- -OUTPUT_BLOCK_SIZE is 1024, 8500 would be converted to 8.3K,
- 133456345 to 127M, 56990456345 to 53G, and so on. Numbers smaller
- than -OUTPUT_BLOCK_SIZE aren't modified. If -OUTPUT_BLOCK_SIZE is
- 1024, append a "B" after any size letter. */
+ Use units of TO_BLOCK_SIZE in the output number. TO_BLOCK_SIZE
+ must be positive.
+
+ Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
+ to determine whether to take the ceiling or floor of any result
+ that cannot be expressed exactly.
+
+ If (OPTS & human_group_digits), group the thousands digits
+ according to the locale, e.g., `1,000,000' in an American English
+ locale.
+
+ If (OPTS & human_autoscale), deduce the output block size
+ automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
+ output. Use powers of 1024 if (OPTS & human_base_1024), and powers
+ of 1000 otherwise. For example, assuming powers of 1024, 8500
+ would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
+ so on. Numbers smaller than the power aren't modified.
+ human_autoscale is normally used together with human_SI.
+
+ If (OPTS & human_SI), append an SI prefix indicating which power is
+ being used. If in addition (OPTS & human_B), append "B" (if base
+ 1000) or "iB" (if base 1024) to the SI prefix. When ((OPTS &
+ human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
+ power of 1024 or of 1000, depending on (OPTS &
+ human_base_1024). */
char *
-human_readable_inexact (uintmax_t n, char *buf,
- int from_block_size, int output_block_size,
- enum human_inexact_style inexact_style)
+human_readable (uintmax_t n, char *buf, int opts,
+ uintmax_t from_block_size, uintmax_t to_block_size)
{
+ int inexact_style =
+ opts & (human_round_to_nearest | human_floor | human_ceiling);
+ unsigned int base = opts & human_base_1024 ? 1024 : 1000;
uintmax_t amt;
- int base;
- int to_block_size;
+ uintmax_t multiplier;
+ uintmax_t divisor;
+ uintmax_t r2;
+ uintmax_t r10;
int tenths = 0;
- int power;
+ int exponent = -1;
+ int exponent_max = sizeof power_letter - 1;
char *p;
+ char *psuffix;
+ char const *integerlim;
/* 0 means adjusted N == AMT.TENTHS;
1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
@@ -164,148 +224,203 @@ human_readable_inexact (uintmax_t n, char *buf,
3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1. */
int rounding = 0;
- if (output_block_size < 0)
+ char const *decimal_point = ".";
+ size_t decimal_pointlen = 1;
+ char const *grouping = "";
+ char const *thousands_sep = "";
+#if HAVE_LOCALE_H && HAVE_LOCALECONV
+ struct lconv const *l = localeconv ();
+ size_t pointlen = strlen (l->decimal_point);
+ if (0 < pointlen && pointlen <= MB_LEN_MAX)
{
- base = -output_block_size;
- to_block_size = 1;
+ decimal_point = l->decimal_point;
+ decimal_pointlen = pointlen;
}
- else
- {
- base = 0;
- to_block_size = output_block_size;
- }
-
- p = buf + LONGEST_HUMAN_READABLE;
- *p = '\0';
-
-#ifdef lint
- /* Suppress `used before initialized' warning. */
- power = 0;
+ grouping = l->grouping;
+ if (strlen (l->thousands_sep) <= MB_LEN_MAX)
+ thousands_sep = l->thousands_sep;
#endif
+ psuffix = buf + LONGEST_HUMAN_READABLE - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
+ p = psuffix;
+
/* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE units. */
- {
- int multiplier;
- int divisor;
- int r2;
- int r10;
- if (to_block_size <= from_block_size
- ? (from_block_size % to_block_size != 0
- || (multiplier = from_block_size / to_block_size,
- (amt = n * multiplier) / multiplier != n))
- : (from_block_size == 0
- || to_block_size % from_block_size != 0
- || (divisor = to_block_size / from_block_size,
- r10 = (n % divisor) * 10,
- r2 = (r10 % divisor) * 2,
- amt = n / divisor,
- tenths = r10 / divisor,
- rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2),
- 0)))
- {
- /* Either the result cannot be computed easily using uintmax_t,
- or from_block_size is zero. Fall back on floating point.
- FIXME: This can yield answers that are slightly off. */
-
- double damt = n * (from_block_size / (double) to_block_size);
-
- if (! base)
- sprintf (buf, "%.0f", adjust_value (inexact_style, damt));
- else
- {
- char suffix[3];
- char const *psuffix;
- double e = 1;
- power = 0;
-
- do
- {
- e *= base;
- power++;
- }
- while (e * base <= damt && power < sizeof suffixes - 1);
+ if (to_block_size <= from_block_size
+ ? (from_block_size % to_block_size != 0
+ || (multiplier = from_block_size / to_block_size,
+ (amt = n * multiplier) / multiplier != n))
+ : (from_block_size == 0
+ || to_block_size % from_block_size != 0
+ || (divisor = to_block_size / from_block_size,
+ r10 = (n % divisor) * 10,
+ r2 = (r10 % divisor) * 2,
+ amt = n / divisor,
+ tenths = r10 / divisor,
+ rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2),
+ 0)))
+ {
+ /* Either the result cannot be computed easily using uintmax_t,
+ or from_block_size is zero. Fall back on floating point.
+ FIXME: This can yield answers that are slightly off. */
- damt /= e;
+ long double dto_block_size = to_block_size;
+ long double damt = n * (from_block_size / dto_block_size);
+ size_t buflen;
+ size_t nonintegerlen;
- suffix[2] = '\0';
- psuffix = generate_suffix_backwards (suffix + 2, power, base);
- sprintf (buf, "%.1f%s",
- adjust_value (inexact_style, damt), psuffix);
- if (4 + (base == 1000) < strlen (buf))
- sprintf (buf, "%.0f%s",
- adjust_value (inexact_style, damt * 10) / 10, psuffix);
- }
+ if (! (opts & human_autoscale))
+ {
+ sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
+ buflen = strlen (buf);
+ nonintegerlen = 0;
+ }
+ else
+ {
+ size_t buflen;
+ long double e = 1;
+ exponent = 0;
- return buf;
- }
- }
+ do
+ {
+ e *= base;
+ exponent++;
+ }
+ while (e * base <= damt && exponent < exponent_max);
- /* Use power of BASE notation if adjusted AMT is large enough. */
+ damt /= e;
- if (base && base <= amt)
- {
- power = 0;
+ sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
+ buflen = strlen (buf);
+ nonintegerlen = decimal_pointlen + 1;
- do
- {
- int r10 = (amt % base) * 10 + tenths;
- int r2 = (r10 % base) * 2 + (rounding >> 1);
- amt /= base;
- tenths = r10 / base;
- rounding = (r2 < base
- ? 0 < r2 + rounding
- : 2 + (base < r2 + rounding));
- power++;
+ if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
+ || ((opts & human_suppress_point_zero)
+ && buf[buflen - 1] == '0'))
+ {
+ sprintf (buf, "%.0Lf",
+ adjust_value (inexact_style, damt * 10) / 10);
+ buflen = strlen (buf);
+ nonintegerlen = 0;
+ }
}
- while (base <= amt && power < sizeof suffixes - 1);
- p = generate_suffix_backwards (p, power, base);
+ p = psuffix - buflen;
+ memmove (p, buf, buflen);
+ integerlim = p + buflen - nonintegerlen;
+ }
+ else
+ {
+ /* Use power of BASE notation if requested and if adjusted AMT
+ is large enough. */
- if (amt < 10)
+ if (opts & human_autoscale)
{
- if (2 * (1 - (int) inexact_style)
- < rounding + (tenths & (inexact_style == human_round_to_even)))
+ exponent = 0;
+
+ if (base <= amt)
{
- tenths++;
- rounding = 0;
+ do
+ {
+ unsigned r10 = (amt % base) * 10 + tenths;
+ unsigned r2 = (r10 % base) * 2 + (rounding >> 1);
+ amt /= base;
+ tenths = r10 / base;
+ rounding = (r2 < base
+ ? (r2 + rounding) != 0
+ : 2 + (base < r2 + rounding));
+ exponent++;
+ }
+ while (base <= amt && exponent < exponent_max);
- if (tenths == 10)
+ if (amt < 10)
{
- amt++;
- tenths = 0;
+ if (inexact_style == human_round_to_nearest
+ ? 2 < rounding + (tenths & 1)
+ : inexact_style == human_ceiling && 0 < rounding)
+ {
+ tenths++;
+ rounding = 0;
+
+ if (tenths == 10)
+ {
+ amt++;
+ tenths = 0;
+ }
+ }
+
+ if (amt < 10
+ && (tenths || ! (opts & human_suppress_point_zero)))
+ {
+ *--p = '0' + tenths;
+ p -= decimal_pointlen;
+ memcpy (p, decimal_point, decimal_pointlen);
+ tenths = rounding = 0;
+ }
}
}
+ }
+
+ if (inexact_style == human_ceiling
+ ? 0 < tenths + rounding
+ : inexact_style == human_round_to_nearest
+ ? 5 < tenths + (2 < rounding + (amt & 1))
+ : /* inexact_style == human_floor */ 0)
+ {
+ amt++;
- if (amt < 10)
+ if ((opts & human_autoscale)
+ && amt == base && exponent < exponent_max)
{
- *--p = '0' + tenths;
- *--p = '.';
- tenths = rounding = 0;
+ exponent++;
+ if (! (opts & human_suppress_point_zero))
+ {
+ *--p = '0';
+ p -= decimal_pointlen;
+ memcpy (p, decimal_point, decimal_pointlen);
+ }
+ amt = 1;
}
}
+
+ integerlim = p;
+
+ do
+ {
+ int digit = amt % 10;
+ *--p = digit + '0';
+ }
+ while ((amt /= 10) != 0);
}
- if (inexact_style == human_ceiling
- ? 0 < tenths + rounding
- : inexact_style == human_round_to_even
- ? 5 < tenths + (2 < rounding + (amt & 1))
- : /* inexact_style == human_floor */ 0)
+ if (opts & human_group_digits)
+ p = group_number (p, integerlim - p, grouping, thousands_sep);
+
+ if (opts & human_SI)
{
- amt++;
+ if (exponent < 0)
+ {
+ uintmax_t power;
+ exponent = 0;
+ for (power = 1; power < to_block_size; power *= base)
+ if (++exponent == exponent_max)
+ break;
+ }
+
+ if (exponent)
+ *psuffix++ = (! (opts & human_base_1024) && exponent == 1
+ ? 'k'
+ : power_letter[exponent]);
- if (amt == base && power < sizeof suffixes - 1)
+ if (opts & human_B)
{
- *p = suffixes[power + 1];
- *--p = '0';
- *--p = '.';
- amt = 1;
+ if ((opts & human_base_1024) && exponent)
+ *psuffix++ = 'i';
+ *psuffix++ = 'B';
}
}
- do
- *--p = '0' + (int) (amt % 10);
- while ((amt /= 10) != 0);
+ *psuffix = '\0';
return p;
}
@@ -318,44 +433,70 @@ human_readable_inexact (uintmax_t n, char *buf,
#endif
static char const *const block_size_args[] = { "human-readable", "si", 0 };
-static int const block_size_types[] = { -1024, -1000 };
+static int const block_size_opts[] =
+ {
+ human_autoscale + human_SI + human_base_1024,
+ human_autoscale + human_SI
+ };
-static int
+static uintmax_t
default_block_size (void)
{
return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
}
static strtol_error
-humblock (char const *spec, int *block_size)
+humblock (char const *spec, uintmax_t *block_size, int *options)
{
int i;
+ int opts = 0;
if (! spec && ! (spec = getenv ("BLOCK_SIZE")))
*block_size = default_block_size ();
- else if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_types)))
- *block_size = block_size_types[i];
else
{
- char *ptr;
- unsigned long val;
- strtol_error e = xstrtoul (spec, &ptr, 0, &val, "eEgGkKmMpPtTyYzZ0");
- if (e != LONGINT_OK)
- return e;
- if (*ptr)
- return LONGINT_INVALID_SUFFIX_CHAR;
- if ((int) val < 0 || val != (int) val)
- return LONGINT_OVERFLOW;
- *block_size = (int) val;
+ if (*spec == '\'')
+ {
+ opts |= human_group_digits;
+ spec++;
+ }
+
+ if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
+ {
+ opts |= block_size_opts[i];
+ *block_size = 1;
+ }
+ else
+ {
+ char *ptr;
+ strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
+ "eEgGkKmMpPtTyYzZ0");
+ if (e != LONGINT_OK)
+ return e;
+ if (*ptr)
+ return LONGINT_INVALID_SUFFIX_CHAR;
+ for (; ! ('0' <= *spec && *spec <= '9'); spec++)
+ if (spec == ptr)
+ {
+ opts |= human_SI;
+ if (ptr[-1] == 'B')
+ opts |= human_B;
+ if (ptr[-1] != 'B' || ptr[-2] == 'i')
+ opts |= human_base_1024;
+ break;
+ }
+ }
}
+ *options = opts;
return LONGINT_OK;
}
-void
-human_block_size (char const *spec, int report_errors, int *block_size)
+int
+human_options (char const *spec, bool report_errors, uintmax_t *block_size)
{
- strtol_error e = humblock (spec, block_size);
+ int opts;
+ strtol_error e = humblock (spec, block_size, &opts);
if (*block_size == 0)
{
*block_size = default_block_size ();
@@ -363,4 +504,5 @@ human_block_size (char const *spec, int report_errors, int *block_size)
}
if (e != LONGINT_OK && report_errors)
STRTOL_FATAL_ERROR (spec, _("block size"), e);
+ return opts;
}