diff options
author | Pádraig Brady <P@draigBrady.com> | 2010-12-08 08:33:15 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2010-12-08 23:36:17 +0000 |
commit | 195c455d0ae3884ab2d9680ac3043aa36e9c8c3d (patch) | |
tree | 5b201f869b06aeb2c3e8820292090b6a2c7e4d14 | |
parent | a1629ba1ecc227a26be6adae596d17defc3ca324 (diff) | |
download | coreutils-195c455d0ae3884ab2d9680ac3043aa36e9c8c3d.tar.xz |
split: fix a case where --elide-empty causes invalid chunking
When -n l/N is used and long lines are present that both
span partitions and multiple buffers, one would get
inconsistent chunk sizes.
* src/split.c (main): Add a new undocumented ---io-blksize option
to support full testing with varied buffer sizes.
(cwrite): Refactor most handling of --elide-empty to here.
(bytes_split): Remove handling of --elide-empty.
(lines_chunk_split): Likewise. The specific issue here
was the first handling of elide_empty_files interfered
with the replenishing of the input buffer.
* test/misc/split-lchunk: Add -e and the new ---io-blksize
combinations to the test.
-rw-r--r-- | src/split.c | 30 | ||||
-rwxr-xr-x | tests/misc/split-lchunk | 65 |
2 files changed, 59 insertions, 36 deletions
diff --git a/src/split.c b/src/split.c index 49a7a1c23..ae98bc76a 100644 --- a/src/split.c +++ b/src/split.c @@ -82,7 +82,8 @@ static bool unbuffered; non-character as a pseudo short option, starting with CHAR_MAX + 1. */ enum { - VERBOSE_OPTION = CHAR_MAX + 1 + VERBOSE_OPTION = CHAR_MAX + 1, + IO_BLKSIZE_OPTION }; static struct option const longopts[] = @@ -96,6 +97,8 @@ static struct option const longopts[] = {"suffix-length", required_argument, NULL, 'a'}, {"numeric-suffixes", no_argument, NULL, 'd'}, {"verbose", no_argument, NULL, VERBOSE_OPTION}, + {"-io-blksize", required_argument, NULL, + IO_BLKSIZE_OPTION}, /* do not document */ {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} @@ -255,6 +258,8 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes) { if (new_file_flag) { + if (!bp && bytes == 0 && elide_empty_files) + return; if (output_desc >= 0 && close (output_desc) < 0) error (EXIT_FAILURE, errno, "%s", outfile); next_file_name (); @@ -315,7 +320,7 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files) /* Ensure NUMBER files are created, which truncates any existing files or notifies any consumers on fifos. FIXME: Should we do this before EXIT_FAILURE? */ - while (!elide_empty_files && opened++ < max_files) + while (opened++ < max_files) cwrite (true, NULL, 0); } @@ -506,7 +511,7 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, chunk_end = file_size - 1; /* >= chunk_size. */ else chunk_end += chunk_size; - if (!elide_empty_files && chunk_end <= n_written - 1) + if (chunk_end <= n_written - 1) cwrite (true, NULL, 0); else next = false; @@ -517,7 +522,7 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, /* Ensure NUMBER files are created, which truncates any existing files or notifies any consumers on fifos. FIXME: Should we do this before EXIT_FAILURE? */ - while (!k && !elide_empty_files && chunk_no++ <= n) + while (!k && chunk_no++ <= n) cwrite (true, NULL, 0); } @@ -780,7 +785,7 @@ main (int argc, char **argv) type_undef, type_bytes, type_byteslines, type_lines, type_digits, type_chunk_bytes, type_chunk_lines, type_rr } split_type = type_undef; - size_t in_blk_size; /* optimal block size of input file device */ + size_t in_blk_size = 0; /* optimal block size of input file device */ char *buf; /* file i/o buffer */ size_t page_size = getpagesize (); uintmax_t k_units = 0; @@ -941,6 +946,18 @@ main (int argc, char **argv) elide_empty_files = true; break; + case IO_BLKSIZE_OPTION: + { + uintmax_t tmp_blk_size; + if (xstrtoumax (optarg, NULL, 10, &tmp_blk_size, + multipliers) != LONGINT_OK + || tmp_blk_size == 0 || SIZE_MAX - page_size < tmp_blk_size) + error (0, 0, _("%s: invalid IO block size"), optarg); + else + in_blk_size = tmp_blk_size; + } + break; + case VERBOSE_OPTION: verbose = true; break; @@ -997,7 +1014,8 @@ main (int argc, char **argv) if (fstat (STDIN_FILENO, &stat_buf) != 0) error (EXIT_FAILURE, errno, "%s", infile); - in_blk_size = io_blksize (stat_buf); + if (in_blk_size == 0) + in_blk_size = io_blksize (stat_buf); file_size = stat_buf.st_size; if (split_type == type_chunk_bytes || split_type == type_chunk_lines) diff --git a/tests/misc/split-lchunk b/tests/misc/split-lchunk index 4c7c20e0e..7c55092d1 100755 --- a/tests/misc/split-lchunk +++ b/tests/misc/split-lchunk @@ -48,7 +48,6 @@ echo 'split: 16: invalid chunk number' > exp split -n l/16/15 in 2>err && fail=1 compare err exp || fail=1 -: > out printf '%s' "\ 14 16 09 15 16 10 14 08 08 10 14 08 08 10 @@ -57,42 +56,48 @@ printf '%s' "\ 06 00 08 00 02 06 00 02 06 00 08 00 01 07 00 02 06 00 08 00 02 16 " > exp || framework_failure -# Note for full testing one should set bufsize to 1,2,5,10,80,100 -# at the start of lines_chunk_split(), for each run of this test. +sed 's/00 *//g' exp > exp.elide_empty || framework_failure DEBUGGING= test "$DEBUGGING" && test "$VERBOSE" && set +x -for N in 6 8 12 15 22; do - rm -f x* - split -n l/$N in - echo $(stat -c "%02s" x*) >> out - - if test "$DEBUGGING"; then - # Output partition pattern - size=$(printf "%s" "$lines" | wc -c) - chunk_size=$(($size/$N)) - end_size=$(($chunk_size + ($size % $N))) - { - yes "$(printf %${chunk_size}s ])" | head -n$(($N-1)) - printf %${end_size}s ] - } | tr -d '\n' | sed "s/\\(^.\\{1,$size\\}\\).*/\\1/" - echo - - # Output pattern generated for comparison - for s in $(stat -c "%s" x*); do - #s=0 transitions are not shown - test "$m" = "_" && m=- || m=_ - printf "%${s}s" '' | tr ' ' $m +for ELIDE_EMPTY in '' '-e'; do + for IO_BLKSIZE in 1 2 5 10 80 100; do + : > out + test "$DEBUGGING" && printf "\n---io-blk-size=$IO_BLKSIZE $ELIDE_EMPTY\n" + for N in 6 8 12 15 22; do + rm -f x* + split ---io-blksize=$IO_BLKSIZE $ELIDE_EMPTY -n l/$N in + echo $(stat -c "%02s" x*) >> out + + if test "$DEBUGGING"; then + # Output partition pattern + size=$(printf "%s" "$lines" | wc -c) + chunk_size=$(($size/$N)) + end_size=$(($chunk_size + ($size % $N))) + { + yes "$(printf %${chunk_size}s ])" | head -n$(($N-1)) + printf %${end_size}s ] + } | tr -d '\n' | sed "s/\\(^.\\{1,$size\\}\\).*/\\1/" + echo + + # Output pattern generated for comparison + for s in $(stat -c "%s" x*); do + #s=0 transitions are not shown + test "$m" = "_" && m=- || m=_ + printf "%${s}s" '' | tr ' ' $m + done + echo + + # Output lines for reference + echo "$lines" + fi done - echo - - # Output lines for reference - echo "$lines" - fi + test "$ELIDE_EMPTY" && EXP=exp.elide_empty || EXP=exp + compare out $EXP || fail=1 + done done test "$DEBUGGING" && test "$VERBOSE" && set -x -compare out exp || fail=1 # Check extraction of particular chunks : > out |