From 465f9512b710ee2fe03c3caf65bfdccdce3544ae Mon Sep 17 00:00:00 2001 From: Cojocaru Alexandru Date: Tue, 7 May 2013 13:47:15 +0100 Subject: cut: improve performance, especially with --output-delimiter Use a sentinel value that's checked implicitly, rather than a bit array, to determine if an item should be output. Benchmark results for this change are: $ yes abcdfeg | head -n1MB > big-file $ for c in orig sentinel; do src/cut-$c 2>/dev/null echo -ne "\n== $c ==" time src/cut-$c -b1,3 big-file > /dev/null done == orig == real 0m0.049s user 0m0.044s sys 0m0.005s == sentinel == real 0m0.035s user 0m0.032s sys 0m0.002s ## Again with --output-delimiter ## $ for c in orig sentinel; do src/cut-$c 2>/dev/null echo -ne "\n== $c ==" time src/cut-$c -b1,3 --output-delimiter=: big-file > /dev/null done == orig == real 0m0.106s user 0m0.103s sys 0m0.002s == sentinel == real 0m0.055s user 0m0.052s sys 0m0.003s eol_range_start: Removed. 'n-' is no longer treated specially, and instead SIZE_MAX is set for the 'hi' limit, and tested implicitly. complement_rp: Used to complement 'rp' when '--complement' is specified. ADD_RANGE_PAIR: Macro renamed to 'add_range_pair' function. * tests/misc/cut-huge-range.sh: Adjust to the SENTINEL value. Also remove the overlapping range test as this is no longer dependent on large ranges and also is already handled with the EOL-subsumed-3 test in cut.pl. --- tests/misc/cut-huge-range.sh | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'tests') diff --git a/tests/misc/cut-huge-range.sh b/tests/misc/cut-huge-range.sh index 9905cd758..e9190a2c2 100755 --- a/tests/misc/cut-huge-range.sh +++ b/tests/misc/cut-huge-range.sh @@ -21,19 +21,22 @@ print_ver_ cut require_ulimit_v_ getlimits_ +# Ensure we can cut up to our sentinel value. +# This is currently SIZE_MAX, but could be raised to UINTMAX_MAX +# if we didn't allocate memory for each line as a unit. +CUT_MAX=$(expr $SIZE_MAX - 1) + # From coreutils-8.10 through 8.20, this would make cut try to allocate # a 256MiB bit vector. With a 20MB limit on VM, the following would fail. -(ulimit -v 20000; : | cut -b$INT_MAX- > err 2>&1) || fail=1 +(ulimit -v 20000; : | cut -b$CUT_MAX- > err 2>&1) || fail=1 # Up to and including coreutils-8.21, cut would allocate possibly needed -# memory upfront. Subsequently memory is allocated as required. -(ulimit -v 20000; : | cut -b1-$INT_MAX >> err 2>&1) || fail=1 - -# Ensure ranges are merged correctly when large range logic is in effect -echo 1 > exp -(dd bs=1MB if=/dev/zero count=1; echo '1') | -cut -b1-1000000,2-3,4-5,1000001 2>>err | tail -c2 > out || fail=1 -compare exp out || fail=1 +# memory upfront. Subsequently extra memory is no longer needed. +(ulimit -v 20000; : | cut -b1-$CUT_MAX >> err 2>&1) || fail=1 + +# Explicitly disallow values above CUT_MAX +(ulimit -v 20000; : | cut -b$SIZE_MAX 2>/dev/null) && fail=1 +(ulimit -v 20000; : | cut -b$SIZE_OFLOW 2>/dev/null) && fail=1 compare /dev/null err || fail=1 -- cgit v1.2.3-54-g00ecf