From e0245ae0aed72338d5dcb5b71f7a963859a93317 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 10 Feb 2011 08:48:27 +0100 Subject: sort: spawn fewer threads for small inputs * src/sort.c (SUBTHREAD_LINES_HEURISTIC): Do not spawn a new thread for every 4 lines. Increase this from 4 to 128K. 128K lines seems appropriate for a 5-year-old dual-core laptop, but it is too low for some common combinations of short lines and/or newer systems. * NEWS (Bug fixes): Mention it. --- NEWS | 9 ++++++--- src/sort.c | 16 ++++++++++------ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index 3157ef201..5770410bf 100644 --- a/NEWS +++ b/NEWS @@ -4,13 +4,16 @@ GNU coreutils NEWS -*- outline -*- ** Bug fixes - du would infloop when given --files0-from=DIR - [bug introduced in coreutils-7.1] - cut could segfault when invoked with a user-specified output delimiter and an unbounded range like "-f1234567890-". [bug introduced in coreutils-5.3.0] + du would infloop when given --files0-from=DIR + [bug introduced in coreutils-7.1] + + sort no longer spawns 7 worker threads to sort 16 lines + [bug introduced in coreutils-8.6] + wc would dereference a NULL pointer upon an early out-of-memory error [bug introduced in coreutils-7.1] diff --git a/src/sort.c b/src/sort.c index 13954cbbc..9b8666afb 100644 --- a/src/sort.c +++ b/src/sort.c @@ -109,12 +109,16 @@ struct rlimit { size_t rlim_cur; }; and is responsible for merging TOTAL lines. */ #define MAX_MERGE(total, level) (((total) >> (2 * ((level) + 1))) + 1) -/* Heuristic value for the number of lines for which it is worth - creating a subthread, during an internal merge sort, on a machine - that has processors galore. Currently this number is just a guess. - This value must be at least 4. We don't know of any machine where - this number has any practical effect. */ -enum { SUBTHREAD_LINES_HEURISTIC = 4 }; +/* Heuristic value for the number of lines for which it is worth creating + a subthread, during an internal merge sort. I.e., it is a small number + of "average" lines for which sorting via two threads is faster than + sorting via one on an "average" system. On an dual-core 2.0 GHz i686 + system with 3GB of RAM and 2MB of L2 cache, a file containing 128K + lines of gensort -a output is sorted slightly faster with --parallel=2 + than with --parallel=1. By contrast, using --parallel=1 is about 10% + faster than using --parallel=2 with a 64K-line input. */ +enum { SUBTHREAD_LINES_HEURISTIC = 128 * 1024 }; +verify (4 <= SUBTHREAD_LINES_HEURISTIC); /* The number of threads after which there are diminishing performance gains. */ -- cgit v1.2.3-54-g00ecf