From 3e466ad05181d95057e6612ff11059c91396cd0e Mon Sep 17 00:00:00 2001 From: Cojocaru Alexandru Date: Sun, 9 Dec 2012 10:43:10 +0100 Subject: cut: make memory allocation independent of range width The current implementation of cut, uses a bit array, an array of `struct range_pair's, and (when --output-delimiter is specified) a hash_table. The new implementation will use only an array of `struct range_pair's. The old implementation is memory inefficient because: 1. When -b with a big num is specified, it allocates a lot of memory for `printable_field'. 2. When --output-delimiter is specified, it will allocate 31 buckets. Even if only a few ranges are specified. Note CPU overhead is increased to determine if an item is to be printed, as shown by: $ yes abcdfeg | head -n1MB > big-file $ for c in with-bitarray without-bitarray; do src/cut-$c 2>/dev/null echo -ne "\n== $c ==" time src/cut-$c -b1,3 big-file > /dev/null done == with-bitarray == real 0m0.084s user 0m0.078s sys 0m0.006s == without-bitarray == real 0m0.111s user 0m0.108s sys 0m0.002s Subsequent patches will reduce this overhead. * src/cut.c (set_fields): Set and initialize RP instead of printable_field. * src/cut.c (is_range_start_index): Use CURRENT_RP rather than a hash. * tests/misc/cut.pl: Check if `eol_range_start' is set correctly. * tests/misc/cut-huge-range.sh: Rename from cut-huge-to-eol-range.sh, and add a test to verify large amounts of mem aren't allocated. Fixes http://bugs.gnu.org/13127 --- tests/local.mk | 2 +- tests/misc/cut-huge-range.sh | 34 ++++++++++++++++++++++++++++++++++ tests/misc/cut-huge-to-eol-range.sh | 30 ------------------------------ tests/misc/cut.pl | 2 ++ 4 files changed, 37 insertions(+), 31 deletions(-) create mode 100755 tests/misc/cut-huge-range.sh delete mode 100755 tests/misc/cut-huge-to-eol-range.sh (limited to 'tests') diff --git a/tests/local.mk b/tests/local.mk index f47da8d3a..fb5cc63b6 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -245,7 +245,7 @@ all_tests = \ tests/misc/pwd-option.sh \ tests/misc/chcon-fail.sh \ tests/misc/cut.pl \ - tests/misc/cut-huge-to-eol-range.sh \ + tests/misc/cut-huge-range.sh \ tests/misc/wc.pl \ tests/misc/wc-files0-from.pl \ tests/misc/wc-files0.sh \ diff --git a/tests/misc/cut-huge-range.sh b/tests/misc/cut-huge-range.sh new file mode 100755 index 000000000..8783e96ad --- /dev/null +++ b/tests/misc/cut-huge-range.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# Ensure that cut does not allocate mem for a range like -b9999999999999- + +# Copyright (C) 2012-2013 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ cut +require_ulimit_v_ +getlimits_ + +# From coreutils-8.10 through 8.20, this would make cut try to allocate +# a 256MiB bit vector. With a 20MB limit on VM, the following would fail. +(ulimit -v 20000; : | cut -b$INT_MAX- > err 2>&1) || fail=1 + +# Up to and including coreutils-8.21, cut would allocate possibly needed +# memory upfront. Subsequently memory is allocated as required. +(ulimit -v 20000; : | cut -b1-$INT_MAX > err 2>&1) || fail=1 + +compare /dev/null err || fail=1 + +Exit $fail diff --git a/tests/misc/cut-huge-to-eol-range.sh b/tests/misc/cut-huge-to-eol-range.sh deleted file mode 100755 index e6abe6ec5..000000000 --- a/tests/misc/cut-huge-to-eol-range.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh -# Ensure that cut does not allocate mem for a range like -b9999999999999- - -# Copyright (C) 2012-2013 Free Software Foundation, Inc. - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src -print_ver_ cut -require_ulimit_v_ -getlimits_ - -# From coreutils-8.10 through 8.20, this would make cut try to allocate -# a 256MiB bit vector. With a 20MB limit on VM, the following would fail. -(ulimit -v 20000; : | cut -b$INT_MAX- > err 2>&1) || fail=1 - -compare /dev/null err || fail=1 - -Exit $fail diff --git a/tests/misc/cut.pl b/tests/misc/cut.pl index 41e9e2093..1306722cd 100755 --- a/tests/misc/cut.pl +++ b/tests/misc/cut.pl @@ -210,6 +210,8 @@ my @Tests = {IN=>"123456\n"}, {OUT=>"23456\n"}], ['EOL-subsumed-3', '--complement -b3,4-4,5,2-', {IN=>"123456\n"}, {OUT=>"1\n"}], + ['EOL-subsumed-4', '--output-d=: -b1-2,2-3,3-', + {IN=>"1234\n"}, {OUT=>"1234\n"}], ); if ($mb_locale ne 'C') -- cgit v1.2.3-54-g00ecf