diff options
author | Pádraig Brady <P@draigBrady.com> | 2015-02-05 13:10:49 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2015-02-06 10:41:06 +0000 |
commit | 235d52c3eaf2c8f6dd6eadb45ac458e71d3afc75 (patch) | |
tree | da786a26d718ac0342c0e133a01a6b49e21def6e | |
parent | 81609dc1ea66b97e6aaf373538deddc283cbd6c6 (diff) | |
download | coreutils-235d52c3eaf2c8f6dd6eadb45ac458e71d3afc75.tar.xz |
tail: return inotify resources where possible
Each user has a maximum number of inotify watches,
so handle the cases where we exhaust these resources.
* src/tail.c (tail_forever_inotify): Ensure we inotify_rm_watch()
the watch for an inode, when replacing with a new watch for a name.
Return all used inotify resources when reverting to polling.
Revert to polling upon first indication of inotify resource exhaustion.
Revert to polling on any inotify resource exhaustion.
Diagnose resource exhaustion correctly in all cases.
Avoid redundant reinsertion in the hash for unchanged watches
(where only attributes of the file are changed).
* tests/tail-2/retry.sh: Avoid false failure when reverting to polling.
* tests/tail-2/inotify-rotate.sh: Likewise.
* tests/tail-2/symlink.sh: Likewise.
* tests/tail-2/inotify-rotate-resources.sh: New test to check
that we're calling inotify_rm_watch() for replaced files.
* tests/local.mk: Reference the new test.
* NEWS: Mention the bug fix.
* THANKS.in: Thanks for reporting and problem identification.
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | THANKS.in | 2 | ||||
-rw-r--r-- | src/tail.c | 86 | ||||
-rw-r--r-- | tests/local.mk | 1 | ||||
-rwxr-xr-x | tests/tail-2/inotify-rotate-resources.sh | 96 | ||||
-rwxr-xr-x | tests/tail-2/inotify-rotate.sh | 4 | ||||
-rwxr-xr-x | tests/tail-2/retry.sh | 23 | ||||
-rwxr-xr-x | tests/tail-2/symlink.sh | 13 |
8 files changed, 191 insertions, 38 deletions
@@ -35,6 +35,10 @@ GNU coreutils NEWS -*- outline -*- shuf -i with a single redundant operand, would crash instead of issuing a diagnostic. [bug introduced in coreutils-8.22] + tail releases inotify resources when unused. Previously it could exhaust + resources with many files, or with -F if files were replaced many times. + [bug introduced in coreutils-7.5] + ** New features chroot accepts the new --skip-chdir option to not change the working directory @@ -659,6 +659,8 @@ Xu Zhongxing xu_zhong_xing@163.com Yang Ren ryang@redhat.com Yanko Kaneti yaneti@declera.com Yann Dirson dirson@debian.org +Youngjun Song mastojun@gmail.com +Choi Jongu zoopi01@gmail.com Yutaka Amanai yasai-itame1942@jade.plala.or.jp ;; Local Variables: diff --git a/src/tail.c b/src/tail.c index b512c2a4e..5fdb956c6 100644 --- a/src/tail.c +++ b/src/tail.c @@ -1438,10 +1438,11 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, if (f[i].wd < 0) { - if (errno == ENOSPC) + if (errno == ENOSPC || errno == ENOMEM) { no_inotify_resources = true; error (0, 0, _("inotify resources exhausted")); + break; } else if (errno != f[i].errnum) error (0, errno, _("cannot watch %s"), quote (f[i].name)); @@ -1461,7 +1462,8 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, be currently present or accessible, so revert to polling. */ if (no_inotify_resources || found_unwatchable_dir) { - /* FIXME: release hash and inotify resources allocated above. */ + hash_free (wd_to_name); + errno = 0; return true; } @@ -1555,7 +1557,7 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, ev = void_ev; evbuf_off += sizeof (*ev) + ev->len; - if (ev->len) /* event on ev->name in watched directory */ + if (ev->len) /* event on ev->name in watched directory. */ { size_t j; for (j = 0; j < n_files; j++) @@ -1571,35 +1573,58 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, if (j == n_files) continue; - /* It's fine to add the same file more than once. */ + fspec = &(f[j]); + + /* Adding the same inode again will look up any existing wd. */ int new_wd = inotify_add_watch (wd, f[j].name, inotify_wd_mask); if (new_wd < 0) { - /* Can get ENOENT for a dangling symlink for example. */ - error (0, errno, _("cannot watch %s"), quote (f[j].name)); - continue; + if (errno == ENOSPC || errno == ENOMEM) + { + error (0, 0, _("inotify resources exhausted")); + hash_free (wd_to_name); + errno = 0; + return true; /* revert to polling. */ + } + else + { + /* Can get ENOENT for a dangling symlink for example. */ + error (0, errno, _("cannot watch %s"), quote (f[j].name)); + } + /* We'll continue below after removing the existing watch. */ } - fspec = &(f[j]); - - /* Remove 'fspec' and re-add it using 'new_fd' as its key. */ - hash_delete (wd_to_name, fspec); - fspec->wd = new_wd; + /* This will be false if only attributes of file change. */ + bool new_watch = fspec->wd < 0 || new_wd != fspec->wd; - /* If the file was moved then inotify will use the source file wd for - the destination file. Make sure the key is not present in the - table. */ - struct File_spec *prev = hash_delete (wd_to_name, fspec); - if (prev && prev != fspec) + if (new_watch) { - if (follow_mode == Follow_name) - recheck (prev, false); - prev->wd = -1; - close_fd (prev->fd, pretty_name (prev)); - } + if (0 <= fspec->wd) + { + inotify_rm_watch (wd, fspec->wd); + hash_delete (wd_to_name, fspec); + } - if (hash_insert (wd_to_name, fspec) == NULL) - xalloc_die (); + fspec->wd = new_wd; + + if (new_wd == -1) + continue; + + /* If the file was moved then inotify will use the source file wd + for the destination file. Make sure the key is not present in + the table. */ + struct File_spec *prev = hash_delete (wd_to_name, fspec); + if (prev && prev != fspec) + { + if (follow_mode == Follow_name) + recheck (prev, false); + prev->wd = -1; + close_fd (prev->fd, pretty_name (prev)); + } + + if (hash_insert (wd_to_name, fspec) == NULL) + xalloc_die (); + } if (follow_mode == Follow_name) recheck (fspec, false); @@ -2262,6 +2287,19 @@ main (int argc, char **argv) return EXIT_FAILURE; } error (0, errno, _("inotify cannot be used, reverting to polling")); + + /* Free resources as this process can be long lived, + and we may have exhausted system resources above. */ + + for (i = 0; i < n_files; i++) + { + /* It's OK to remove the same watch multiple times, + ignoring the EINVAL from redundant calls. */ + if (F[i].wd != -1) + inotify_rm_watch (wd, F[i].wd); + if (F[i].parent_wd != -1) + inotify_rm_watch (wd, F[i].parent_wd); + } } #endif disable_inotify = true; diff --git a/tests/local.mk b/tests/local.mk index bfa447c7d..53c7c83e0 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -172,6 +172,7 @@ all_tests = \ tests/tail-2/F-vs-missing.sh \ tests/tail-2/F-vs-rename.sh \ tests/tail-2/inotify-rotate.sh \ + tests/tail-2/inotify-rotate-resources.sh \ tests/chmod/no-x.sh \ tests/chgrp/basic.sh \ tests/rm/dangling-symlink.sh \ diff --git a/tests/tail-2/inotify-rotate-resources.sh b/tests/tail-2/inotify-rotate-resources.sh new file mode 100755 index 000000000..a43f17689 --- /dev/null +++ b/tests/tail-2/inotify-rotate-resources.sh @@ -0,0 +1,96 @@ +#!/bin/sh +# ensure that tail -F doesn't leak inotify resources + +# Copyright (C) 2015 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ tail + +grep '^#define HAVE_INOTIFY 1' "$CONFIG_HEADER" >/dev/null \ + || skip_ 'inotify required' + +require_strace_ inotify_rm_watch + +check_tail_output() +{ + local delay="$1" + grep "$tail_re" out > /dev/null || + { sleep $delay; return 1; } +} + +# Wait up to 25.5 seconds for grep REGEXP 'out' to succeed. +grep_timeout() { tail_re="$1" retry_delay_ check_tail_output .1 8; } + +strace_output() +{ + local delay="$1" + test -s strace.out || + { sleep $delay; return 1; } +} + +cleanup_fail() +{ + cat out + warn_ $1 + fail=1 +} + +# Normally less than a second is required here, but with heavy load +# and a lot of disk activity, even 20 seconds per grep_timeout is insufficient, +# which leads to this timeout (used as a safety net for cleanup) +# killing tail before processing is completed. +touch k || framework_failure_ +timeout 180 strace -e inotify_rm_watch -o strace.out tail -F k >> out 2>&1 & +pid=$! + +reverted_to_polling_=0 +for i in $(seq 2); do + echo $i + + echo 'tailed' > k; + # wait for 'tailed' in (after first iteration; new) file and then in 'out' + grep_timeout 'tailed' || { cleanup_fail 'failed to find "tailed"'; break; } + + mv k k.tmp + # wait for tail to detect the rename + grep_timeout 'inaccessible' || + { cleanup_fail 'failed to detect rename'; break; } + + # Assume this is not because we're leaking. + # The explicit check for inotify_rm_watch should confirm that. + grep -F 'reverting to polling' out >/dev/null && + { reverted_to_polling_=1; break; } + + # Note we strace here rather than consuming all available watches + # to be more efficient, but more importantly avoid depleting resources. + # Note also available resources can currently be tuned with: + # sudo sysctl -w fs.inotify.max_user_watches=$smallish_number + # However that impacts all processes for the current user, and also + # may not be supported in future, instead being auto scaled to RAM + # like the Linux epoll resources were. + if test "$i" -gt 1; then + retry_delay_ strace_output .1 8 || + { cleanup_fail 'failed to find inotify_rm_watch syscall'; break; } + fi + + >out && >strace.out || framework_failure_ 'failed to reset output files' +done + +test "$reverted_to_polling_" = 1 && skip_ 'inotify resources already depleted' +kill $pid +wait + +Exit $fail diff --git a/tests/tail-2/inotify-rotate.sh b/tests/tail-2/inotify-rotate.sh index 913bf992c..64724f9fd 100755 --- a/tests/tail-2/inotify-rotate.sh +++ b/tests/tail-2/inotify-rotate.sh @@ -32,9 +32,6 @@ check_tail_output() # Wait up to 25.5 seconds for grep REGEXP 'out' to succeed. grep_timeout() { tail_re="$1" retry_delay_ check_tail_output .1 8; } -# For details, see -# http://lists.gnu.org/archive/html/bug-coreutils/2009-11/msg00213.html - cleanup_fail() { cat out @@ -45,6 +42,7 @@ cleanup_fail() # Perform at least this many iterations, because on multi-core systems # the offending sequence of events can be surprisingly uncommon. +# See: http://lists.gnu.org/archive/html/bug-coreutils/2009-11/msg00213.html for i in $(seq 50); do echo $i rm -f k x out diff --git a/tests/tail-2/retry.sh b/tests/tail-2/retry.sh index 4f8422520..97bd6d3d4 100755 --- a/tests/tail-2/retry.sh +++ b/tests/tail-2/retry.sh @@ -19,6 +19,13 @@ . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src print_ver_ tail +# Function to count number of lines from tail +# while ignoring transient errors due to resource limits +countlines_ () +{ + grep -Ev 'inotify (resources exhausted|cannot be used)' out | wc -l +} + # Function to check the expected line count in 'out'. # Called via retry_delay_(). Sleep some time - see retry_delay_() - if the # line count is still smaller than expected. @@ -26,20 +33,20 @@ wait4lines_ () { local delay=$1 local elc=$2 # Expected line count. - [ "$( wc -l < out )" -ge "$elc" ] || { sleep $delay; return 1; } + [ "$(countlines_)" -ge "$elc" ] || { sleep $delay; return 1; } } # === Test: # Retry without --follow results in a warning. touch file tail --retry file > out 2>&1 || fail=1 -[ $( wc -l < out ) = 1 ] || fail=1 +[ "$(countlines_)" = 1 ] || fail=1 grep -F 'tail: warning: --retry ignored' out || fail=1 # === Test: # The same with a missing file: expect error message and exit 1. tail --retry missing > out 2>&1 && fail=1 -[ $( wc -l < out ) = 2 ] || fail=1 +[ "$(countlines_)" = 2 ] || fail=1 grep -F 'tail: warning: --retry ignored' out || fail=1 # === Test: @@ -53,7 +60,7 @@ retry_delay_ wait4lines_ .1 6 3 || fail=1 # Wait for the expected output. kill $pid wait $pid # Expect 3 lines in the output file. -[ $( wc -l < out ) = 3 ] || { fail=1; cat out; } +[ "$(countlines_)" = 3 ] || { fail=1; cat out; } grep -F 'cannot open' out || { fail=1; cat out; } grep -F 'has appeared' out || { fail=1; cat out; } grep '^X$' out || { fail=1; cat out; } @@ -69,7 +76,7 @@ retry_delay_ wait4lines_ .1 6 4 || fail=1 # Wait for the expected output. kill $pid wait $pid # Expect 4 lines in the output file. -[ $( wc -l < out ) = 4 ] || { fail=1; cat out; } +[ "$(countlines_)" = 4 ] || { fail=1; cat out; } grep -F 'retry only effective for the initial open' out \ || { fail=1; cat out; } grep -F 'cannot open' out || { fail=1; cat out; } @@ -86,7 +93,7 @@ mkdir missing || fail=1 # Create untailable 'missing'. retry_delay_ wait4lines_ .1 6 4 || fail=1 # Wait for the expected output. wait $pid rc=$? -[ $( wc -l < out ) = 4 ] || { fail=1; cat out; } +[ "$(countlines_)" = 4 ] || { fail=1; cat out; } grep -F 'retry only effective for the initial open' out \ || { fail=1; cat out; } grep -F 'cannot open' out || { fail=1; cat out; } @@ -100,14 +107,14 @@ rm -fd missing out || framework_failure_ # file to appear. Expect 2 lines in the output file ("cannot open" + # "no files remaining") and exit status 1. tail --follow=descriptor missing >out 2>&1 && fail=1 -[ $( wc -l < out ) = 2 ] || { fail=1; cat out; } +[ "$(countlines_)" = 2 ] || { fail=1; cat out; } grep -F 'cannot open' out || { fail=1; cat out; } grep -F 'no files remaining' out || { fail=1; cat out; } # === Test: # Likewise for --follow=name (without --retry). tail --follow=name missing >out 2>&1 && fail=1 -[ $( wc -l < out ) = 2 ] || { fail=1; cat out; } +[ "$(countlines_)" = 2 ] || { fail=1; cat out; } grep -F 'cannot open' out || { fail=1; cat out; } grep -F 'no files remaining' out || { fail=1; cat out; } diff --git a/tests/tail-2/symlink.sh b/tests/tail-2/symlink.sh index 300cb139b..274df9587 100755 --- a/tests/tail-2/symlink.sh +++ b/tests/tail-2/symlink.sh @@ -19,6 +19,13 @@ . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src print_ver_ tail +# Function to count number of lines from tail +# while ignoring transient errors due to resource limits +countlines_ () +{ + grep -Ev 'inotify (resources exhausted|cannot be used)' out | wc -l +} + # Function to check the expected line count in 'out'. # Called via retry_delay_(). Sleep some time - see retry_delay_() - if the # line count is still smaller than expected. @@ -26,7 +33,7 @@ wait4lines_ () { local delay=$1 local elc=$2 # Expected line count. - [ "$( wc -l < out )" -ge "$elc" ] || { sleep $delay; return 1; } + [ "$(countlines_)" -ge "$elc" ] || { sleep $delay; return 1; } } # Ensure changing targets of cli specified symlinks are handled. @@ -41,7 +48,7 @@ retry_delay_ wait4lines_ .1 6 3 || fail=1 # Wait for the expected output. kill $pid wait $pid # Expect 3 lines in the output file. -[ $( wc -l < out ) = 3 ] || { fail=1; cat out; } +[ "$(countlines_)" = 3 ] || { fail=1; cat out; } grep -F 'cannot open' out || { fail=1; cat out; } grep -F 'has appeared' out || { fail=1; cat out; } grep '^X$' out || { fail=1; cat out; } @@ -62,7 +69,7 @@ retry_delay_ wait4lines_ .1 6 4 || fail=1 # Wait for the expected output. kill $pid wait $pid # Expect 4 lines in the output file. -[ $( wc -l < out ) = 4 ] || { fail=1; cat out; } +[ "$(countlines_)" = 4 ] || { fail=1; cat out; } grep -F 'become inacce' out || { fail=1; cat out; } grep -F 'has appeared' out || { fail=1; cat out; } grep '^X1$' out || { fail=1; cat out; } |