From be107398e56e9f6ada8cd558b3f43bb1ed70fb84 Mon Sep 17 00:00:00 2001 From: Chen Guo Date: Fri, 8 Jan 2010 03:42:27 -0800 Subject: split: add --number to generate a particular number of files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * src/split.c (usage, long_options, main): New options --number, --unbuffered, --elide-empty-files. (set_suffix_length): New function to auto increase suffix length to handle a specified number of files. (create): New function. Refactored from cwrite() and ofile_open(). (bytes_split): Add max_files argument to support byte chunking. (lines_chunk_split): New function. Split file into chunks of lines. (bytes_chunk_extract): New function. Extract a chunk of file. (of_info): New struct. Used by functions lines_rr and ofile_open to keep track of file descriptors associated with output files. (ofile_open): New function. Shuffle file descriptors when there are more output files than available file descriptors. (lines_rr): New function to distribute lines round-robin to files. (chunk_parse): New function. Parses K/N syntax. * tests/misc/split-bchunk: New test for byte chunking. * tests/misc/split-lchunk: New test for line delimited chunking. * tests/misc/split-rchunk: New test for round-robin chunking. * tests/Makefile.am: Reference new tests. * tests/misc/split-fail: Add failure scenarios for new options. * tests/misc/split-l: Fix a typo. s/ln/split/. * doc/coreutils.texi (split invocation): Document --number. * NEWS: Mention the new feature. * .mailmap: Map new email address for shortlog. Signed-off-by: Pádraig Brady --- tests/Makefile.am | 3 ++ tests/misc/split-bchunk | 50 +++++++++++++++++++++ tests/misc/split-fail | 10 +++++ tests/misc/split-l | 2 +- tests/misc/split-lchunk | 117 ++++++++++++++++++++++++++++++++++++++++++++++++ tests/misc/split-rchunk | 66 +++++++++++++++++++++++++++ 6 files changed, 247 insertions(+), 1 deletion(-) create mode 100755 tests/misc/split-bchunk create mode 100755 tests/misc/split-lchunk create mode 100755 tests/misc/split-rchunk (limited to 'tests') diff --git a/tests/Makefile.am b/tests/Makefile.am index 971f427ce..2e33063bd 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -240,8 +240,11 @@ TESTS = \ misc/sort-unique \ misc/sort-version \ misc/split-a \ + misc/split-bchunk \ misc/split-fail \ misc/split-l \ + misc/split-lchunk \ + misc/split-rchunk \ misc/stat-birthtime \ misc/stat-fmt \ misc/stat-hyphen \ diff --git a/tests/misc/split-bchunk b/tests/misc/split-bchunk new file mode 100755 index 000000000..aef450bb7 --- /dev/null +++ b/tests/misc/split-bchunk @@ -0,0 +1,50 @@ +#!/bin/sh +# test splitting into 3 chunks + +# Copyright (C) 2010 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +print_ver_ split + +# N can be greater than the file size +# in which case no data is extracted, or empty files are written +split -n 10 /dev/null || fail=1 +test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "10x0" || fail=1 +rm x?? + +# Ensure --elide-empty-files is honored +split -e -n 10 /dev/null || fail=1 +stat x?? 2>/dev/null && fail=1 + +printf '1\n2\n3\n4\n5\n' > in || framework_failure + +split -n 3 in > out || fail=1 +split -n 1/3 in > b1 || fail=1 +split -n 2/3 in > b2 || fail=1 +split -n 3/3 in > b3 || fail=1 +printf '1\n2' > exp-1 +printf '\n3\n' > exp-2 +printf '4\n5\n' > exp-3 + +compare xaa exp-1 || fail=1 +compare xab exp-2 || fail=1 +compare xac exp-3 || fail=1 +compare b1 exp-1 || fail=1 +compare b2 exp-2 || fail=1 +compare b3 exp-3 || fail=1 +test -f xad && fail=1 + +Exit $fail diff --git a/tests/misc/split-fail b/tests/misc/split-fail index b0b22e4c4..cf0edc900 100755 --- a/tests/misc/split-fail +++ b/tests/misc/split-fail @@ -27,6 +27,10 @@ split -a 0 in 2> /dev/null || fail=1 split -b 0 in 2> /dev/null && fail=1 split -C 0 in 2> /dev/null && fail=1 split -l 0 in 2> /dev/null && fail=1 +split -n 0 in 2> /dev/null && fail=1 +split -n 1/0 in 2> /dev/null && fail=1 +split -n 0/1 in 2> /dev/null && fail=1 +split -n 2/1 in 2> /dev/null && fail=1 # Make sure -C doesn't create empty files. rm -f x?? || fail=1 @@ -44,6 +48,12 @@ split --lines=$UINTMAX_MAX in || fail=1 split --bytes=$OFF_T_MAX in || fail=1 split --line-bytes=$OFF_T_OFLOW 2> /dev/null in && fail=1 split --line-bytes=$SIZE_OFLOW 2> /dev/null in && fail=1 +if truncate -s$SIZE_OFLOW large; then + # Ensure we can split chunks of a large file on 32 bit hosts + split --number=$SIZE_OFLOW/$SIZE_OFLOW large >/dev/null || fail=1 +fi +split --number=r/$UINTMAX_MAX/$UINTMAX_MAX /dev/null || fail=1 +split --number=r/$UINTMAX_OFLOW /dev/null && fail=1 # Make sure that a huge obsolete option evokes the right failure. split -99999999999999999991 2> out && fail=1 diff --git a/tests/misc/split-l b/tests/misc/split-l index 8967439e8..428e2018c 100755 --- a/tests/misc/split-l +++ b/tests/misc/split-l @@ -17,7 +17,7 @@ # along with this program. If not, see . . "${srcdir=.}/init.sh"; path_prepend_ ../src -print_ver_ ln +print_ver_ split printf '1\n2\n3\n4\n5\n' > in || framework_failure diff --git a/tests/misc/split-lchunk b/tests/misc/split-lchunk new file mode 100755 index 000000000..4c7c20e0e --- /dev/null +++ b/tests/misc/split-lchunk @@ -0,0 +1,117 @@ +#!/bin/sh +# test splitting into newline delineated chunks (-n l/...) + +# Copyright (C) 2010 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +print_ver_ split + +# invalid number of chunks +echo 'split: 1o: invalid number of chunks' > exp +split -n l/1o 2>err && fail=1 +compare err exp || fail=1 + +echo "split: \`-': cannot determine file size" > exp +echo | split -n l/1 2>err && fail=1 +compare err exp || fail=1 + +# N can be greater than the file size +# in which case no data is extracted, or empty files are written +split -n l/10 /dev/null || fail=1 +test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "10x0" || fail=1 +rm x?? + +# Ensure --elide-empty-files is honored +split -e -n l/10 /dev/null || fail=1 +stat x?? 2>/dev/null && fail=1 + +# 80 bytes. ~ transformed to \n below +lines=\ +12345~1~12345~1~12345~1~12345~1~12345~~~12345~1~12345~1~12345~1~12345~1~12345~1~ + +printf "%s" "$lines" | tr '~' '\n' > in || framework_failure + +echo 'split: 16: invalid chunk number' > exp +split -n l/16/15 in 2>err && fail=1 +compare err exp || fail=1 + +: > out +printf '%s' "\ +14 16 09 15 16 10 +14 08 08 10 14 08 08 10 +06 08 08 02 06 08 08 02 06 08 08 10 +06 08 02 06 08 00 08 02 06 08 02 06 08 00 10 +06 00 08 00 02 06 00 02 06 00 08 00 01 07 00 02 06 00 08 00 02 16 +" > exp || framework_failure + +# Note for full testing one should set bufsize to 1,2,5,10,80,100 +# at the start of lines_chunk_split(), for each run of this test. + +DEBUGGING= +test "$DEBUGGING" && test "$VERBOSE" && set +x +for N in 6 8 12 15 22; do + rm -f x* + split -n l/$N in + echo $(stat -c "%02s" x*) >> out + + if test "$DEBUGGING"; then + # Output partition pattern + size=$(printf "%s" "$lines" | wc -c) + chunk_size=$(($size/$N)) + end_size=$(($chunk_size + ($size % $N))) + { + yes "$(printf %${chunk_size}s ])" | head -n$(($N-1)) + printf %${end_size}s ] + } | tr -d '\n' | sed "s/\\(^.\\{1,$size\\}\\).*/\\1/" + echo + + # Output pattern generated for comparison + for s in $(stat -c "%s" x*); do + #s=0 transitions are not shown + test "$m" = "_" && m=- || m=_ + printf "%${s}s" '' | tr ' ' $m + done + echo + + # Output lines for reference + echo "$lines" + fi +done +test "$DEBUGGING" && test "$VERBOSE" && set -x + +compare out exp || fail=1 + +# Check extraction of particular chunks +: > out +printf '1\n12345\n' > exp +split -n l/13/15 in > out +compare out exp || fail=1 +: > out +printf '' > exp +split -n l/14/15 in > out +compare out exp || fail=1 +: > out +printf '1\n12345\n1\n' > exp +split -n l/15/15 in > out +compare out exp || fail=1 + +# test input with no \n at end +printf '12\n34\n5' > in +printf '5' > exp +split -n l/7/7 in > out +compare out exp || fail=1 + +Exit $fail diff --git a/tests/misc/split-rchunk b/tests/misc/split-rchunk new file mode 100755 index 000000000..3957f73cc --- /dev/null +++ b/tests/misc/split-rchunk @@ -0,0 +1,66 @@ +#!/bin/sh +# test splitting into round-robin chunks + +# Copyright (C) 2010 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +print_ver_ split + +require_ulimit_ + +# N can be greater than the file size +# in which case no data is extracted, or empty files are written +split -n r/10 /dev/null || fail=1 +test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "10x0" || fail=1 +rm x?? + +# Ensure --elide-empty-files is honored +split -e -n r/10 /dev/null || fail=1 +stat x?? 2>/dev/null && fail=1 + +printf '1\n2\n3\n4\n5\n' > in || framework_failure + +split -n r/3 in > out || fail=1 +test -s out && fail=1 + +split -n r/1/3 in > r1 || fail=1 +split -n r/2/3 in > r2 || fail=1 +split -n r/3/3 in > r3 || fail=1 + +printf '1\n4\n' > exp-1 +printf '2\n5\n' > exp-2 +printf '3\n' > exp-3 + +compare xaa exp-1 || fail=1 +compare xab exp-2 || fail=1 +compare xac exp-3 || fail=1 +compare r1 exp-1 || fail=1 +compare r2 exp-2 || fail=1 +compare r3 exp-3 || fail=1 +test -f xad && fail=1 + +# Test input without trailing \n +printf '1\n2\n3\n4\n5' | split -n r/2/3 > out +printf '2\n5' > exp +compare out exp || fail=1 + +# Ensure we fall back to appending to a file at a time +# if we hit the limit for the number of open files. +rm x* +(ulimit -n 20 && yes | head -n90 | split -n r/30 ) || fail=1 +test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "30x6" || fail=1 + +Exit $fail -- cgit v1.2.3-54-g00ecf