diff options
author | Pádraig Brady <P@draigBrady.com> | 2013-11-11 02:51:17 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2013-11-11 23:30:39 +0000 |
commit | 91208453756bf0e86e25c2db35e481ac178f1255 (patch) | |
tree | 9da172f4244cfbd84950cd05eb36aac24463281b /src | |
parent | 90181a5926e96f34b5ffff21b81a2874a846ff6f (diff) | |
download | coreutils-91208453756bf0e86e25c2db35e481ac178f1255.tar.xz |
base64: improve encoding I/O efficiency
Since the I/O overhead is significant to the relatively
simple processing done by this utility, use fputs() rather
than fputc() to output '\n'.
Time to process a 100MiB file was measured to
decrease from 0.417s to 0.383s, i.e. an 8% improvement.
Related to these changes, is a processing improvement in
gnulib, which increases throughput by 60% when processing
full buffers, which improves processing of a 100MiB file
with standard wrapped output, down to 0.256s.
http://git.sv.gnu.org/gitweb/?p=gnulib.git;a=commit;h=43fd1e7b
Also increase the encoding buffer size from 3 to 30KiB.
This was seen to give a further 8% improvement, taking
processing time down to 0.235s in the wrapped output case.
The decoding size buffer is not adjusted,
due to the noted caveat with --ignore-garbage.
* src/base64.c (BLOCKSIZE): Split into ENC_ and DEC_ variants,
with the former increased from 3KiB to 30KiB.
(wrap_write): Use the simpler fputc() rather than fputs()
to output the '\n' character. Also check against EOF
rather than < 0 for errors.
(do_encode): Likewise.
* NEWS: Mention the large increase in performance, which
with the I/O improvements in coreutils and the processing
improvement in gnulib, amount to about a 60% throughput increase.
Diffstat (limited to 'src')
-rw-r--r-- | src/base64.c | 39 |
1 files changed, 20 insertions, 19 deletions
diff --git a/src/base64.c b/src/base64.c index 0a4006802..ee07dd41f 100644 --- a/src/base64.c +++ b/src/base64.c @@ -89,15 +89,16 @@ from any other non-alphabet bytes in the encoded stream.\n"), exit (status); } +#define ENC_BLOCKSIZE (1024*3*10) +#define ENC_B64BLOCKSIZE BASE64_LENGTH (ENC_BLOCKSIZE) /* Note that increasing this may decrease performance if --ignore-garbage - is used, because of the memmove operation below. */ -#define BLOCKSIZE 3072 -#define B64BLOCKSIZE BASE64_LENGTH (BLOCKSIZE) + is used, because of the memmove operation below. */ +#define DEC_BLOCKSIZE (1024*3) +#define DEC_B64BLOCKSIZE BASE64_LENGTH (DEC_BLOCKSIZE) /* Ensure that BLOCKSIZE is a multiple of 3 and 4. */ -#if BLOCKSIZE % 12 != 0 -# error "invalid BLOCKSIZE" -#endif +verify (ENC_BLOCKSIZE % 12 == 0); +verify (DEC_BLOCKSIZE % 12 == 0); static void wrap_write (const char *buffer, size_t len, @@ -120,7 +121,7 @@ wrap_write (const char *buffer, size_t len, if (to_write == 0) { - if (fputs ("\n", out) < 0) + if (fputc ('\n', out) == EOF) error (EXIT_FAILURE, errno, _("write error")); *current_column = 0; } @@ -138,8 +139,8 @@ static void do_encode (FILE *in, FILE *out, uintmax_t wrap_column) { size_t current_column = 0; - char inbuf[BLOCKSIZE]; - char outbuf[B64BLOCKSIZE]; + char inbuf[ENC_BLOCKSIZE]; + char outbuf[ENC_B64BLOCKSIZE]; size_t sum; do @@ -149,14 +150,14 @@ do_encode (FILE *in, FILE *out, uintmax_t wrap_column) sum = 0; do { - n = fread (inbuf + sum, 1, BLOCKSIZE - sum, in); + n = fread (inbuf + sum, 1, ENC_BLOCKSIZE - sum, in); sum += n; } - while (!feof (in) && !ferror (in) && sum < BLOCKSIZE); + while (!feof (in) && !ferror (in) && sum < ENC_BLOCKSIZE); if (sum > 0) { - /* Process input one block at a time. Note that BLOCKSIZE % + /* Process input one block at a time. Note that ENC_BLOCKSIZE % 3 == 0, so that no base64 pads will appear in output. */ base64_encode (inbuf, sum, outbuf, BASE64_LENGTH (sum)); @@ -164,10 +165,10 @@ do_encode (FILE *in, FILE *out, uintmax_t wrap_column) ¤t_column, out); } } - while (!feof (in) && !ferror (in) && sum == BLOCKSIZE); + while (!feof (in) && !ferror (in) && sum == ENC_BLOCKSIZE); /* When wrapping, terminate last line. */ - if (wrap_column && current_column > 0 && fputs ("\n", out) < 0) + if (wrap_column && current_column > 0 && fputc ('\n', out) == EOF) error (EXIT_FAILURE, errno, _("write error")); if (ferror (in)) @@ -177,8 +178,8 @@ do_encode (FILE *in, FILE *out, uintmax_t wrap_column) static void do_decode (FILE *in, FILE *out, bool ignore_garbage) { - char inbuf[B64BLOCKSIZE]; - char outbuf[BLOCKSIZE]; + char inbuf[DEC_B64BLOCKSIZE]; + char outbuf[DEC_BLOCKSIZE]; size_t sum; struct base64_decode_context ctx; @@ -193,7 +194,7 @@ do_decode (FILE *in, FILE *out, bool ignore_garbage) sum = 0; do { - n = fread (inbuf + sum, 1, B64BLOCKSIZE - sum, in); + n = fread (inbuf + sum, 1, DEC_B64BLOCKSIZE - sum, in); if (ignore_garbage) { @@ -210,7 +211,7 @@ do_decode (FILE *in, FILE *out, bool ignore_garbage) if (ferror (in)) error (EXIT_FAILURE, errno, _("read error")); } - while (sum < B64BLOCKSIZE && !feof (in)); + while (sum < DEC_B64BLOCKSIZE && !feof (in)); /* The following "loop" is usually iterated just once. However, when it processes the final input buffer, we want @@ -220,7 +221,7 @@ do_decode (FILE *in, FILE *out, bool ignore_garbage) { if (k == 1 && ctx.i == 0) break; - n = BLOCKSIZE; + n = DEC_BLOCKSIZE; ok = base64_decode_ctx (&ctx, inbuf, (k == 0 ? sum : 0), outbuf, &n); if (fwrite (outbuf, 1, n, out) < n) |