summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2010-12-08 08:33:15 +0000
committerPádraig Brady <P@draigBrady.com>2010-12-08 23:36:17 +0000
commit195c455d0ae3884ab2d9680ac3043aa36e9c8c3d (patch)
tree5b201f869b06aeb2c3e8820292090b6a2c7e4d14
parenta1629ba1ecc227a26be6adae596d17defc3ca324 (diff)
downloadcoreutils-195c455d0ae3884ab2d9680ac3043aa36e9c8c3d.tar.xz
split: fix a case where --elide-empty causes invalid chunking
When -n l/N is used and long lines are present that both span partitions and multiple buffers, one would get inconsistent chunk sizes. * src/split.c (main): Add a new undocumented ---io-blksize option to support full testing with varied buffer sizes. (cwrite): Refactor most handling of --elide-empty to here. (bytes_split): Remove handling of --elide-empty. (lines_chunk_split): Likewise. The specific issue here was the first handling of elide_empty_files interfered with the replenishing of the input buffer. * test/misc/split-lchunk: Add -e and the new ---io-blksize combinations to the test.
-rw-r--r--src/split.c30
-rwxr-xr-xtests/misc/split-lchunk65
2 files changed, 59 insertions, 36 deletions
diff --git a/src/split.c b/src/split.c
index 49a7a1c23..ae98bc76a 100644
--- a/src/split.c
+++ b/src/split.c
@@ -82,7 +82,8 @@ static bool unbuffered;
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum
{
- VERBOSE_OPTION = CHAR_MAX + 1
+ VERBOSE_OPTION = CHAR_MAX + 1,
+ IO_BLKSIZE_OPTION
};
static struct option const longopts[] =
@@ -96,6 +97,8 @@ static struct option const longopts[] =
{"suffix-length", required_argument, NULL, 'a'},
{"numeric-suffixes", no_argument, NULL, 'd'},
{"verbose", no_argument, NULL, VERBOSE_OPTION},
+ {"-io-blksize", required_argument, NULL,
+ IO_BLKSIZE_OPTION}, /* do not document */
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
@@ -255,6 +258,8 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes)
{
if (new_file_flag)
{
+ if (!bp && bytes == 0 && elide_empty_files)
+ return;
if (output_desc >= 0 && close (output_desc) < 0)
error (EXIT_FAILURE, errno, "%s", outfile);
next_file_name ();
@@ -315,7 +320,7 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
/* Ensure NUMBER files are created, which truncates
any existing files or notifies any consumers on fifos.
FIXME: Should we do this before EXIT_FAILURE? */
- while (!elide_empty_files && opened++ < max_files)
+ while (opened++ < max_files)
cwrite (true, NULL, 0);
}
@@ -506,7 +511,7 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
chunk_end = file_size - 1; /* >= chunk_size. */
else
chunk_end += chunk_size;
- if (!elide_empty_files && chunk_end <= n_written - 1)
+ if (chunk_end <= n_written - 1)
cwrite (true, NULL, 0);
else
next = false;
@@ -517,7 +522,7 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
/* Ensure NUMBER files are created, which truncates
any existing files or notifies any consumers on fifos.
FIXME: Should we do this before EXIT_FAILURE? */
- while (!k && !elide_empty_files && chunk_no++ <= n)
+ while (!k && chunk_no++ <= n)
cwrite (true, NULL, 0);
}
@@ -780,7 +785,7 @@ main (int argc, char **argv)
type_undef, type_bytes, type_byteslines, type_lines, type_digits,
type_chunk_bytes, type_chunk_lines, type_rr
} split_type = type_undef;
- size_t in_blk_size; /* optimal block size of input file device */
+ size_t in_blk_size = 0; /* optimal block size of input file device */
char *buf; /* file i/o buffer */
size_t page_size = getpagesize ();
uintmax_t k_units = 0;
@@ -941,6 +946,18 @@ main (int argc, char **argv)
elide_empty_files = true;
break;
+ case IO_BLKSIZE_OPTION:
+ {
+ uintmax_t tmp_blk_size;
+ if (xstrtoumax (optarg, NULL, 10, &tmp_blk_size,
+ multipliers) != LONGINT_OK
+ || tmp_blk_size == 0 || SIZE_MAX - page_size < tmp_blk_size)
+ error (0, 0, _("%s: invalid IO block size"), optarg);
+ else
+ in_blk_size = tmp_blk_size;
+ }
+ break;
+
case VERBOSE_OPTION:
verbose = true;
break;
@@ -997,7 +1014,8 @@ main (int argc, char **argv)
if (fstat (STDIN_FILENO, &stat_buf) != 0)
error (EXIT_FAILURE, errno, "%s", infile);
- in_blk_size = io_blksize (stat_buf);
+ if (in_blk_size == 0)
+ in_blk_size = io_blksize (stat_buf);
file_size = stat_buf.st_size;
if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
diff --git a/tests/misc/split-lchunk b/tests/misc/split-lchunk
index 4c7c20e0e..7c55092d1 100755
--- a/tests/misc/split-lchunk
+++ b/tests/misc/split-lchunk
@@ -48,7 +48,6 @@ echo 'split: 16: invalid chunk number' > exp
split -n l/16/15 in 2>err && fail=1
compare err exp || fail=1
-: > out
printf '%s' "\
14 16 09 15 16 10
14 08 08 10 14 08 08 10
@@ -57,42 +56,48 @@ printf '%s' "\
06 00 08 00 02 06 00 02 06 00 08 00 01 07 00 02 06 00 08 00 02 16
" > exp || framework_failure
-# Note for full testing one should set bufsize to 1,2,5,10,80,100
-# at the start of lines_chunk_split(), for each run of this test.
+sed 's/00 *//g' exp > exp.elide_empty || framework_failure
DEBUGGING=
test "$DEBUGGING" && test "$VERBOSE" && set +x
-for N in 6 8 12 15 22; do
- rm -f x*
- split -n l/$N in
- echo $(stat -c "%02s" x*) >> out
-
- if test "$DEBUGGING"; then
- # Output partition pattern
- size=$(printf "%s" "$lines" | wc -c)
- chunk_size=$(($size/$N))
- end_size=$(($chunk_size + ($size % $N)))
- {
- yes "$(printf %${chunk_size}s ])" | head -n$(($N-1))
- printf %${end_size}s ]
- } | tr -d '\n' | sed "s/\\(^.\\{1,$size\\}\\).*/\\1/"
- echo
-
- # Output pattern generated for comparison
- for s in $(stat -c "%s" x*); do
- #s=0 transitions are not shown
- test "$m" = "_" && m=- || m=_
- printf "%${s}s" '' | tr ' ' $m
+for ELIDE_EMPTY in '' '-e'; do
+ for IO_BLKSIZE in 1 2 5 10 80 100; do
+ : > out
+ test "$DEBUGGING" && printf "\n---io-blk-size=$IO_BLKSIZE $ELIDE_EMPTY\n"
+ for N in 6 8 12 15 22; do
+ rm -f x*
+ split ---io-blksize=$IO_BLKSIZE $ELIDE_EMPTY -n l/$N in
+ echo $(stat -c "%02s" x*) >> out
+
+ if test "$DEBUGGING"; then
+ # Output partition pattern
+ size=$(printf "%s" "$lines" | wc -c)
+ chunk_size=$(($size/$N))
+ end_size=$(($chunk_size + ($size % $N)))
+ {
+ yes "$(printf %${chunk_size}s ])" | head -n$(($N-1))
+ printf %${end_size}s ]
+ } | tr -d '\n' | sed "s/\\(^.\\{1,$size\\}\\).*/\\1/"
+ echo
+
+ # Output pattern generated for comparison
+ for s in $(stat -c "%s" x*); do
+ #s=0 transitions are not shown
+ test "$m" = "_" && m=- || m=_
+ printf "%${s}s" '' | tr ' ' $m
+ done
+ echo
+
+ # Output lines for reference
+ echo "$lines"
+ fi
done
- echo
-
- # Output lines for reference
- echo "$lines"
- fi
+ test "$ELIDE_EMPTY" && EXP=exp.elide_empty || EXP=exp
+ compare out $EXP || fail=1
+ done
done
test "$DEBUGGING" && test "$VERBOSE" && set -x
-compare out exp || fail=1
# Check extraction of particular chunks
: > out