cut: make memory allocation independent of range width

The current implementation of cut, uses a bit array, an array of `struct range_pair's, and (when --output-delimiter is specified) a hash_table. The new implementation will use only an array of `struct range_pair's. The old implementation is memory inefficient because: 1. When -b with a big num is specified, it allocates a lot of memory for `printable_field'. 2. When --output-delimiter is specified, it will allocate 31 buckets. Even if only a few ranges are specified. Note CPU overhead is increased to determine if an item is to be printed, as shown by: $ yes abcdfeg | head -n1MB > big-file $ for c in with-bitarray without-bitarray; do src/cut-$c 2>/dev/null echo -ne "\n== $c ==" time src/cut-$c -b1,3 big-file > /dev/null done == with-bitarray == real 0m0.084s user 0m0.078s sys 0m0.006s == without-bitarray == real 0m0.111s user 0m0.108s sys 0m0.002s Subsequent patches will reduce this overhead. * src/cut.c (set_fields): Set and initialize RP instead of printable_field. * src/cut.c (is_range_start_index): Use CURRENT_RP rather than a hash. * tests/misc/cut.pl: Check if `eol_range_start' is set correctly. * tests/misc/cut-huge-range.sh: Rename from cut-huge-to-eol-range.sh, and add a test to verify large amounts of mem aren't allocated. Fixes http://bugs.gnu.org/13127
author: Cojocaru Alexandru <xojoc@gmx.com> 2012-12-09 10:43:10 +0100
committer: Pádraig Brady <P@draigBrady.com> 2013-04-29 17:54:27 +0100
commit: 3e466ad05181d95057e6612ff11059c91396cd0e (patch)
tree: 2110ad15ceb663c914eb61edb50d0df5408f4866 /tests
parent: e414ff4c4c3fe029a9702c9909bf4eccbef68c21 (diff)
download: coreutils-3e466ad05181d95057e6612ff11059c91396cd0e.tar.xz
3 files changed, 7 insertions, 1 deletions
diff --git a/tests/local.mk b/tests/local.mk
index f47da8d3a..fb5cc63b6 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -245,7 +245,7 @@ all_tests =					\
   tests/misc/pwd-option.sh			\
   tests/misc/chcon-fail.sh			\
   tests/misc/cut.pl				\
-  tests/misc/cut-huge-to-eol-range.sh		\
+  tests/misc/cut-huge-range.sh			\
   tests/misc/wc.pl				\
   tests/misc/wc-files0-from.pl			\
   tests/misc/wc-files0.sh			\
diff --git a/tests/misc/cut-huge-to-eol-range.sh b/tests/misc/cut-huge-range.sh
index e6abe6ec5..8783e96ad 100755
--- a/tests/misc/cut-huge-to-eol-range.sh
+++ b/tests/misc/cut-huge-range.sh
@@ -25,6 +25,10 @@ getlimits_
 # a 256MiB bit vector.  With a 20MB limit on VM, the following would fail.
 (ulimit -v 20000; : | cut -b$INT_MAX- > err 2>&1) || fail=1
 
+# Up to and including coreutils-8.21, cut would allocate possibly needed
+# memory upfront.  Subsequently memory is allocated as required.
+(ulimit -v 20000; : | cut -b1-$INT_MAX > err 2>&1) || fail=1
+
 compare /dev/null err || fail=1
 
 Exit $fail
diff --git a/tests/misc/cut.pl b/tests/misc/cut.pl
index 41e9e2093..1306722cd 100755
--- a/tests/misc/cut.pl
+++ b/tests/misc/cut.pl
@@ -210,6 +210,8 @@ my @Tests =
                                          {IN=>"123456\n"}, {OUT=>"23456\n"}],
   ['EOL-subsumed-3', '--complement -b3,4-4,5,2-',
                                          {IN=>"123456\n"}, {OUT=>"1\n"}],
+  ['EOL-subsumed-4', '--output-d=: -b1-2,2-3,3-',
+                                        {IN=>"1234\n"}, {OUT=>"1234\n"}],
  );
 
 if ($mb_locale ne 'C')
author	Cojocaru Alexandru <xojoc@gmx.com>	2012-12-09 10:43:10 +0100
committer	Pádraig Brady <P@draigBrady.com>	2013-04-29 17:54:27 +0100
commit	3e466ad05181d95057e6612ff11059c91396cd0e (patch)
tree	2110ad15ceb663c914eb61edb50d0df5408f4866 /tests
parent	e414ff4c4c3fe029a9702c9909bf4eccbef68c21 (diff)
download	coreutils-3e466ad05181d95057e6612ff11059c91396cd0e.tar.xz