diff options
author | Pádraig Brady <P@draigBrady.com> | 2013-04-20 08:46:43 +0100 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2013-05-23 11:13:24 +0100 |
commit | fec363cebf581ef27f6d01686dec5a0499aa6818 (patch) | |
tree | 59d03cfaae216cc0dc4f9ec221f375fe7b605b14 /src | |
parent | 478dade09a4288f73e963b7f185ef9f73b681b42 (diff) | |
download | coreutils-fec363cebf581ef27f6d01686dec5a0499aa6818.tar.xz |
split: with --line-bytes only allocate memory as needed
* src/split.c (line_bytes_split): Rewrite to only buffer
when necessary. I.E. only increase the buffer when we've
already lines output in a split and we encounter a line
larger than the input buffer size, in which case a hold
buffer will be increased in increments of the input buffer size.
(lines_rr): Use the more abstract xalloc_die() just like
we did in line_bytes_split(), rather than explicitly
printing the "memory exhausted" message and exiting.
* tests/split/line-bytes.sh: Add a new test for this
function which previously had no test coverage.
* tests/local.mk: Reference the new test.
* NEWS: Mention the improvement.
Fixes http://bugs.gnu.org/13537
Diffstat (limited to 'src')
-rw-r--r-- | src/split.c | 139 |
1 files changed, 96 insertions, 43 deletions
diff --git a/src/split.c b/src/split.c index e5e75f0f5..feabe39b1 100644 --- a/src/split.c +++ b/src/split.c @@ -616,62 +616,115 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize) } while (n_read == bufsize); } - + /* Split into pieces that are as large as possible while still not more than N_BYTES bytes, and are split on line boundaries except - where lines longer than N_BYTES bytes occur. - FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a - buffer of size N_BYTES, in case N_BYTES is very large. */ + where lines longer than N_BYTES bytes occur. */ static void -line_bytes_split (size_t n_bytes) +line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize) { - char *bp; - bool eof = false; - size_t n_buffered = 0; - char *buf = xmalloc (n_bytes); + size_t n_read; + uintmax_t n_out = 0; /* for each split. */ + size_t n_hold = 0; + char *hold = NULL; /* for lines > bufsize. */ + size_t hold_size = 0; + bool split_line = false; /* Whether a \n was output in a split. */ do { - /* Fill up the full buffer size from the input file. */ - - size_t to_read = n_bytes - n_buffered; - size_t n_read = full_read (STDIN_FILENO, buf + n_buffered, to_read); - if (n_read < to_read && errno) + n_read = full_read (STDIN_FILENO, buf, bufsize); + if (n_read < bufsize && errno) error (EXIT_FAILURE, errno, "%s", infile); - - n_buffered += n_read; - if (n_buffered != n_bytes) + size_t n_left = n_read; + char *sob = buf; + while (n_left) { - if (n_buffered == 0) - break; - eof = true; - } + size_t split_rest = 0; + char *eoc = NULL; + char *eol; - /* Find where to end this chunk. */ - bp = buf + n_buffered; - if (n_buffered == n_bytes) - { - while (bp > buf && bp[-1] != '\n') - bp--; - } + /* Determine End Of Chunk and/or End of Line, + which are used below to select what to write or buffer. */ + if (n_bytes - n_out - n_hold <= n_left) + { + /* Have enough for split. */ + split_rest = n_bytes - n_out - n_hold; + eoc = sob + split_rest - 1; + eol = memrchr (sob, '\n', split_rest); + } + else + eol = memrchr (sob, '\n', n_left); - /* If chunk has no newlines, use all the chunk. */ - if (bp == buf) - bp = buf + n_buffered; + /* Output hold space if possible. */ + if (n_hold && !(!eol && n_out)) + { + cwrite (n_out == 0, hold, n_hold); + n_out += n_hold; + if (n_hold > bufsize) + hold = xrealloc (hold, bufsize); + n_hold = 0; + hold_size = bufsize; + } - /* Output the chars as one output file. */ - cwrite (true, buf, bp - buf); + /* Output to eol if present. */ + if (eol) + { + split_line = true; + size_t n_write = eol - sob + 1; + cwrite (n_out == 0, sob, n_write); + n_out += n_write; + n_left -= n_write; + sob += n_write; + if (eoc) + split_rest -= n_write; + } + + /* Output to eoc or eob if possible. */ + if (n_left && !split_line) + { + size_t n_write = eoc ? split_rest : n_left; + cwrite (n_out == 0, sob, n_write); + n_out += n_write; + n_left -= n_write; + sob += n_write; + if (eoc) + split_rest -= n_write; + } + + /* Update hold if needed. */ + if ((eoc && split_rest) || (!eoc && n_left)) + { + size_t n_buf = eoc ? split_rest : n_left; + if (hold_size - n_hold < n_buf) + { + if (hold_size <= SIZE_MAX - bufsize) + hold_size += bufsize; + else + xalloc_die (); + hold = xrealloc (hold, hold_size); + } + memcpy (hold + n_hold, sob, n_buf); + n_hold += n_buf; + n_left -= n_buf; + sob += n_buf; + } - /* Discard the chars we just output; move rest of chunk - down to be the start of the next chunk. Source and - destination probably overlap. */ - n_buffered -= bp - buf; - if (n_buffered > 0) - memmove (buf, bp, n_buffered); + /* Reset for new split. */ + if (eoc) + { + n_out = 0; + split_line = false; + } + } } - while (!eof); - free (buf); + while (n_read == bufsize); + + /* Handle no eol at end of file. */ + if (n_hold) + cwrite (n_out == 0, hold, n_hold); + + free (hold); } /* -n l/[K/]N: Write lines to files of approximately file size / N. @@ -926,7 +979,7 @@ lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize) else { if (SIZE_MAX < n) - error (exit_failure, 0, "%s", _("memory exhausted")); + xalloc_die (); files = xnmalloc (n, sizeof *files); /* Generate output file names. */ @@ -1408,7 +1461,7 @@ main (int argc, char **argv) break; case type_byteslines: - line_bytes_split (n_units); + line_bytes_split (n_units, buf, in_blk_size); break; case type_chunk_bytes: |