From 195c455d0ae3884ab2d9680ac3043aa36e9c8c3d Mon Sep 17 00:00:00 2001 From: Pádraig Brady Date: Wed, 8 Dec 2010 08:33:15 +0000 Subject: split: fix a case where --elide-empty causes invalid chunking When -n l/N is used and long lines are present that both span partitions and multiple buffers, one would get inconsistent chunk sizes. * src/split.c (main): Add a new undocumented ---io-blksize option to support full testing with varied buffer sizes. (cwrite): Refactor most handling of --elide-empty to here. (bytes_split): Remove handling of --elide-empty. (lines_chunk_split): Likewise. The specific issue here was the first handling of elide_empty_files interfered with the replenishing of the input buffer. * test/misc/split-lchunk: Add -e and the new ---io-blksize combinations to the test. --- tests/misc/split-lchunk | 65 ++++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 30 deletions(-) (limited to 'tests') diff --git a/tests/misc/split-lchunk b/tests/misc/split-lchunk index 4c7c20e0e..7c55092d1 100755 --- a/tests/misc/split-lchunk +++ b/tests/misc/split-lchunk @@ -48,7 +48,6 @@ echo 'split: 16: invalid chunk number' > exp split -n l/16/15 in 2>err && fail=1 compare err exp || fail=1 -: > out printf '%s' "\ 14 16 09 15 16 10 14 08 08 10 14 08 08 10 @@ -57,42 +56,48 @@ printf '%s' "\ 06 00 08 00 02 06 00 02 06 00 08 00 01 07 00 02 06 00 08 00 02 16 " > exp || framework_failure -# Note for full testing one should set bufsize to 1,2,5,10,80,100 -# at the start of lines_chunk_split(), for each run of this test. +sed 's/00 *//g' exp > exp.elide_empty || framework_failure DEBUGGING= test "$DEBUGGING" && test "$VERBOSE" && set +x -for N in 6 8 12 15 22; do - rm -f x* - split -n l/$N in - echo $(stat -c "%02s" x*) >> out - - if test "$DEBUGGING"; then - # Output partition pattern - size=$(printf "%s" "$lines" | wc -c) - chunk_size=$(($size/$N)) - end_size=$(($chunk_size + ($size % $N))) - { - yes "$(printf %${chunk_size}s ])" | head -n$(($N-1)) - printf %${end_size}s ] - } | tr -d '\n' | sed "s/\\(^.\\{1,$size\\}\\).*/\\1/" - echo - - # Output pattern generated for comparison - for s in $(stat -c "%s" x*); do - #s=0 transitions are not shown - test "$m" = "_" && m=- || m=_ - printf "%${s}s" '' | tr ' ' $m +for ELIDE_EMPTY in '' '-e'; do + for IO_BLKSIZE in 1 2 5 10 80 100; do + : > out + test "$DEBUGGING" && printf "\n---io-blk-size=$IO_BLKSIZE $ELIDE_EMPTY\n" + for N in 6 8 12 15 22; do + rm -f x* + split ---io-blksize=$IO_BLKSIZE $ELIDE_EMPTY -n l/$N in + echo $(stat -c "%02s" x*) >> out + + if test "$DEBUGGING"; then + # Output partition pattern + size=$(printf "%s" "$lines" | wc -c) + chunk_size=$(($size/$N)) + end_size=$(($chunk_size + ($size % $N))) + { + yes "$(printf %${chunk_size}s ])" | head -n$(($N-1)) + printf %${end_size}s ] + } | tr -d '\n' | sed "s/\\(^.\\{1,$size\\}\\).*/\\1/" + echo + + # Output pattern generated for comparison + for s in $(stat -c "%s" x*); do + #s=0 transitions are not shown + test "$m" = "_" && m=- || m=_ + printf "%${s}s" '' | tr ' ' $m + done + echo + + # Output lines for reference + echo "$lines" + fi done - echo - - # Output lines for reference - echo "$lines" - fi + test "$ELIDE_EMPTY" && EXP=exp.elide_empty || EXP=exp + compare out $EXP || fail=1 + done done test "$DEBUGGING" && test "$VERBOSE" && set -x -compare out exp || fail=1 # Check extraction of particular chunks : > out -- cgit v1.2.3-54-g00ecf