From fe08796d7c5e7f67d934ce9e8285433651b0f538 Mon Sep 17 00:00:00 2001 From: Pádraig Brady Date: Fri, 11 Jul 2014 16:11:22 +0100 Subject: sort: avoid undefined operation with destroying locked mutex This didn't seem to cause any invalid operation on GNU/Linux at least, but depending on the implementation, mutex deadlocks could occur. For example this might be the cause of lockups seen on Solaris: http://lists.gnu.org/archive/html/coreutils/2013-03/msg00048.html This was identified with valgrind 3.9.0 with this setup: seq 200000 > file.sort valgrind --tool=drd src/sort file.sort -o file.sort With that, valgrind would _intermittently_ report the following: Destroying locked mutex: mutex 0x5419548, recursion count 1, owner 2. at 0x4C2E3F0: pthread_mutex_destroy(in vgpreload_drd-amd64-linux.so) by 0x409FA2: sortlines (sort.c:3649) by 0x409E26: sortlines (sort.c:3621) by 0x40AA9E: sort (sort.c:3955) by 0x40C5D9: main (sort.c:4739) mutex 0x5419548 was first observed at: at 0x4C2DE82: pthread_mutex_init(in vgpreload_drd-amd64-linux.so) by 0x409266: init_node (sort.c:3276) by 0x4092F4: init_node (sort.c:3286) by 0x4090DD: merge_tree_init (sort.c:3234) by 0x40AA5A: sort (sort.c:3951) by 0x40C5D9: main (sort.c:4739) Thread 2: The object at address 0x5419548 is not a mutex. at 0x4C2F4A4: pthread_mutex_unlock(in vgpreload_drd-amd64-linux.so) by 0x4093CA: unlock_node (sort.c:3323) by 0x409C85: merge_loop (sort.c:3531) by 0x409F8F: sortlines (sort.c:3644) by 0x409CE3: sortlines_thread (sort.c:3574) by 0x4E44F32: start_thread (in /usr/lib64/libpthread-2.18.so) by 0x514EEAC: clone (in /usr/lib64/libc-2.18.so) * src/sort.c (sortlines): Move pthread_mutex_destroy() out to merge_tree_destroy(), so that we don't overlap mutex destruction with threads still operating on the nodes. (sort): Call the destructors only with "lint" defined, as the memory used will be deallocated implicitly at process end. * NEWS: Mention the bug fix. --- src/sort.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/sort.c b/src/sort.c index af95f7110..c24931920 100644 --- a/src/sort.c +++ b/src/sort.c @@ -3237,10 +3237,17 @@ merge_tree_init (size_t nthreads, size_t nlines, struct line *dest) /* Destroy the merge tree. */ static void -merge_tree_destroy (struct merge_node *merge_tree) +merge_tree_destroy (size_t nthreads, struct merge_node *merge_tree) { - struct merge_node *root = merge_tree; - pthread_mutex_destroy (&root->lock); + size_t n_nodes = nthreads * 2; + struct merge_node *node = merge_tree; + + while (n_nodes--) + { + pthread_mutex_destroy (&node->lock); + node++; + } + free (merge_tree); } @@ -3642,8 +3649,6 @@ sortlines (struct line *restrict lines, size_t nthreads, queue_insert (queue, node); merge_loop (queue, total_lines, tfp, temp_output); } - - pthread_mutex_destroy (&node->lock); } /* Scan through FILES[NTEMPS .. NFILES-1] looking for files that are @@ -3947,12 +3952,14 @@ sort (char *const *files, size_t nfiles, char const *output_file, queue_init (&queue, nthreads); struct merge_node *merge_tree = merge_tree_init (nthreads, buf.nlines, line); - struct merge_node *root = merge_tree + 1; - sortlines (line, nthreads, buf.nlines, root, + sortlines (line, nthreads, buf.nlines, merge_tree + 1, &queue, tfp, temp_output); + +#ifdef lint + merge_tree_destroy (nthreads, merge_tree); queue_destroy (&queue); - merge_tree_destroy (merge_tree); +#endif } else write_unique (line - 1, tfp, temp_output); -- cgit v1.2.3-54-g00ecf