summaryrefslogtreecommitdiff
path: root/src/split.c
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2013-04-20 08:46:43 +0100
committerPádraig Brady <P@draigBrady.com>2013-05-23 11:13:24 +0100
commitfec363cebf581ef27f6d01686dec5a0499aa6818 (patch)
tree59d03cfaae216cc0dc4f9ec221f375fe7b605b14 /src/split.c
parent478dade09a4288f73e963b7f185ef9f73b681b42 (diff)
downloadcoreutils-fec363cebf581ef27f6d01686dec5a0499aa6818.tar.xz
split: with --line-bytes only allocate memory as needed
* src/split.c (line_bytes_split): Rewrite to only buffer when necessary. I.E. only increase the buffer when we've already lines output in a split and we encounter a line larger than the input buffer size, in which case a hold buffer will be increased in increments of the input buffer size. (lines_rr): Use the more abstract xalloc_die() just like we did in line_bytes_split(), rather than explicitly printing the "memory exhausted" message and exiting. * tests/split/line-bytes.sh: Add a new test for this function which previously had no test coverage. * tests/local.mk: Reference the new test. * NEWS: Mention the improvement. Fixes http://bugs.gnu.org/13537
Diffstat (limited to 'src/split.c')
-rw-r--r--src/split.c139
1 files changed, 96 insertions, 43 deletions
diff --git a/src/split.c b/src/split.c
index e5e75f0f5..feabe39b1 100644
--- a/src/split.c
+++ b/src/split.c
@@ -616,62 +616,115 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
}
while (n_read == bufsize);
}
-
+
/* Split into pieces that are as large as possible while still not more
than N_BYTES bytes, and are split on line boundaries except
- where lines longer than N_BYTES bytes occur.
- FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
- buffer of size N_BYTES, in case N_BYTES is very large. */
+ where lines longer than N_BYTES bytes occur. */
static void
-line_bytes_split (size_t n_bytes)
+line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
{
- char *bp;
- bool eof = false;
- size_t n_buffered = 0;
- char *buf = xmalloc (n_bytes);
+ size_t n_read;
+ uintmax_t n_out = 0; /* for each split. */
+ size_t n_hold = 0;
+ char *hold = NULL; /* for lines > bufsize. */
+ size_t hold_size = 0;
+ bool split_line = false; /* Whether a \n was output in a split. */
do
{
- /* Fill up the full buffer size from the input file. */
-
- size_t to_read = n_bytes - n_buffered;
- size_t n_read = full_read (STDIN_FILENO, buf + n_buffered, to_read);
- if (n_read < to_read && errno)
+ n_read = full_read (STDIN_FILENO, buf, bufsize);
+ if (n_read < bufsize && errno)
error (EXIT_FAILURE, errno, "%s", infile);
-
- n_buffered += n_read;
- if (n_buffered != n_bytes)
+ size_t n_left = n_read;
+ char *sob = buf;
+ while (n_left)
{
- if (n_buffered == 0)
- break;
- eof = true;
- }
+ size_t split_rest = 0;
+ char *eoc = NULL;
+ char *eol;
- /* Find where to end this chunk. */
- bp = buf + n_buffered;
- if (n_buffered == n_bytes)
- {
- while (bp > buf && bp[-1] != '\n')
- bp--;
- }
+ /* Determine End Of Chunk and/or End of Line,
+ which are used below to select what to write or buffer. */
+ if (n_bytes - n_out - n_hold <= n_left)
+ {
+ /* Have enough for split. */
+ split_rest = n_bytes - n_out - n_hold;
+ eoc = sob + split_rest - 1;
+ eol = memrchr (sob, '\n', split_rest);
+ }
+ else
+ eol = memrchr (sob, '\n', n_left);
- /* If chunk has no newlines, use all the chunk. */
- if (bp == buf)
- bp = buf + n_buffered;
+ /* Output hold space if possible. */
+ if (n_hold && !(!eol && n_out))
+ {
+ cwrite (n_out == 0, hold, n_hold);
+ n_out += n_hold;
+ if (n_hold > bufsize)
+ hold = xrealloc (hold, bufsize);
+ n_hold = 0;
+ hold_size = bufsize;
+ }
- /* Output the chars as one output file. */
- cwrite (true, buf, bp - buf);
+ /* Output to eol if present. */
+ if (eol)
+ {
+ split_line = true;
+ size_t n_write = eol - sob + 1;
+ cwrite (n_out == 0, sob, n_write);
+ n_out += n_write;
+ n_left -= n_write;
+ sob += n_write;
+ if (eoc)
+ split_rest -= n_write;
+ }
+
+ /* Output to eoc or eob if possible. */
+ if (n_left && !split_line)
+ {
+ size_t n_write = eoc ? split_rest : n_left;
+ cwrite (n_out == 0, sob, n_write);
+ n_out += n_write;
+ n_left -= n_write;
+ sob += n_write;
+ if (eoc)
+ split_rest -= n_write;
+ }
+
+ /* Update hold if needed. */
+ if ((eoc && split_rest) || (!eoc && n_left))
+ {
+ size_t n_buf = eoc ? split_rest : n_left;
+ if (hold_size - n_hold < n_buf)
+ {
+ if (hold_size <= SIZE_MAX - bufsize)
+ hold_size += bufsize;
+ else
+ xalloc_die ();
+ hold = xrealloc (hold, hold_size);
+ }
+ memcpy (hold + n_hold, sob, n_buf);
+ n_hold += n_buf;
+ n_left -= n_buf;
+ sob += n_buf;
+ }
- /* Discard the chars we just output; move rest of chunk
- down to be the start of the next chunk. Source and
- destination probably overlap. */
- n_buffered -= bp - buf;
- if (n_buffered > 0)
- memmove (buf, bp, n_buffered);
+ /* Reset for new split. */
+ if (eoc)
+ {
+ n_out = 0;
+ split_line = false;
+ }
+ }
}
- while (!eof);
- free (buf);
+ while (n_read == bufsize);
+
+ /* Handle no eol at end of file. */
+ if (n_hold)
+ cwrite (n_out == 0, hold, n_hold);
+
+ free (hold);
}
/* -n l/[K/]N: Write lines to files of approximately file size / N.
@@ -926,7 +979,7 @@ lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
else
{
if (SIZE_MAX < n)
- error (exit_failure, 0, "%s", _("memory exhausted"));
+ xalloc_die ();
files = xnmalloc (n, sizeof *files);
/* Generate output file names. */
@@ -1408,7 +1461,7 @@ main (int argc, char **argv)
break;
case type_byteslines:
- line_bytes_split (n_units);
+ line_bytes_split (n_units, buf, in_blk_size);
break;
case type_chunk_bytes: