diff options
author | Chen Guo <chenguo4@yahoo.com> | 2010-07-09 08:03:50 +0100 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2010-07-13 01:44:46 +0100 |
commit | 9face836f36c507f01a7d7a33138c5a303e3b1df (patch) | |
tree | e71865d7bb7882ba3b8d06a8cff2c38ad9511baf /gl | |
parent | f6e8c18f8d2077ea84aef679852622c8bd73789c (diff) | |
download | coreutils-9face836f36c507f01a7d7a33138c5a303e3b1df.tar.xz |
sort: parallelize internal sort
This patch is by Gene Auyeung, Chris Dickens, Chen Guo, and Mike
Nichols, based off of a patch by Paul Eggert, Glen Lenker, et. al.,
with a basic heap implementation based off of the GDSL heap,
originally by Nicolas Darnis.
The number of sorts done in parallel is limited to the number
of available processors by default, or can be further restricted
with the --parallel option.
On a dual-die, 8 core Intel Xeon, results show sorting with
8 threads is almost 4 times faster than using a single thread.
Timings when sorting a 96MB file:
THREADS TIME (s)
1 5.10
2 2.87
4 1.75
8 1.31
Single threaded sorting has also been improved,
especially for cheaper comparison operations:
COMMAND BEFORE (s) AFTER (s)
sort 8.822 8.716
sort -g 10.336 10.222
sort -n 3.077 2.961
LANG=C sort 2.169 2.066
* bootstrap.conf: Add heap, pthread.
* coreutils.texi (sort): Describe the new --parallel option.
* gl/lib/heap.c: New file. Very basic heap implementation.
* gl/lib/heap.h: New file.
* gl/modules/heap: New file.
* src/Makefile.am: Add LIB_PTHREAD.
* src/sort.c: Include heap.h, nproc.h, pthread.h.
(MAX_MERGE): New macro.
(SUBTHREAD_LINES_HEURISTIC, PARALLEL_OPTION): New constants.
(MERGE_END, MERGE_ROOT): New constants.
(struct merge_node): New struct.
(struct merge_node_queue): New struct.
(sortlines temp): Remove declaration.
(usage, long_options, main): New option, --parallel.
(specify_nthreads): New function.
(mergelines): New signature, to emphasize the fact that the HI area
must be part of the destination. All callers changed.
(sequential_sort): New function, renamed from sortlines. Merge in
the functionality of sortlines_temp.
(compare_nodes): New function.
(lock_node, unlock_node): New functions.
(queue_destroy): New function.
(queue_init): New function.
(queue_insert): New function.
(queue_pop): New function.
(write_unique): New function.
(mergelines_node): New function.
(check_insert): New function.
(update_parent): New function.
(merge_loop): New function.
(sortlines): Rewrite to support and use parallelism, with a new
signature. All callers changed.
(struct thread_args): New struct.
(sortlines_thread): New function.
(sortlines_temp): Remove.
(sort): New argument NTHREADS. All uses changed. Output moved to
mergelines_node.
(main): disable threading if we are sorting at random.
* tests/Makefile.am (TESTS): Add misc/sort-benchmark-random.
* tests/misc/sort-benchmark-random: New file.
Signed-off-by: Pádraig Brady <P@draigBrady.com>
Diffstat (limited to 'gl')
-rw-r--r-- | gl/lib/heap.c | 159 | ||||
-rw-r--r-- | gl/lib/heap.h | 34 | ||||
-rw-r--r-- | gl/modules/heap | 24 |
3 files changed, 217 insertions, 0 deletions
diff --git a/gl/lib/heap.c b/gl/lib/heap.c new file mode 100644 index 000000000..a37224fa0 --- /dev/null +++ b/gl/lib/heap.c @@ -0,0 +1,159 @@ +/* Barebones heap implementation supporting only insert and pop. + + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Full implementation: GDSL (http://gna.org/projects/gdsl/) by Nicolas + Darnis <ndarnis@free.fr>. */ + +#include <config.h> + +#include "heap.h" +#include "stdlib--.h" +#include "xalloc.h" + +static int heap_default_compare (const void *, const void *); +static size_t heapify_down (void **, size_t, size_t, + int (*)(const void *, const void *)); +static void heapify_up (void **, size_t, + int (*)(const void *, const void *)); + + +/* Allocate memory for the heap. */ + +struct heap * +heap_alloc (int (*compare)(const void *, const void *), size_t n_reserve) +{ + struct heap *heap; + void *xmalloc_ret = xmalloc (sizeof *heap); + heap = (struct heap *) xmalloc_ret; + if (!heap) + return NULL; + + if (n_reserve <= 0) + n_reserve = 1; + + xmalloc_ret = xmalloc (n_reserve * sizeof *(heap->array)); + heap->array = (void **) xmalloc_ret; + if (!heap->array) + { + free (heap); + return NULL; + } + + heap->array[0] = NULL; + heap->capacity = n_reserve; + heap->count = 0; + heap->compare = compare ? compare : heap_default_compare; + + return heap; +} + + +static int +heap_default_compare (const void *a, const void *b) +{ + return 0; +} + + +void +heap_free (struct heap *heap) +{ + free (heap->array); + free (heap); +} + +/* Insert element into heap. */ + +int +heap_insert (struct heap *heap, void *item) +{ + if (heap->capacity - 1 <= heap->count) + { + size_t new_size = (2 + heap->count) * sizeof *(heap->array); + void *realloc_ret = xrealloc (heap->array, new_size); + heap->array = (void **) realloc_ret; + heap->capacity = (2 + heap->count); + + if (!heap->array) + return -1; + } + + heap->array[++heap->count] = item; + heapify_up (heap->array, heap->count, heap->compare); + + return 0; +} + +/* Pop top element off heap. */ + +void * +heap_remove_top (struct heap *heap) +{ + if (heap->count == 0) + return NULL; + + void *top = heap->array[1]; + heap->array[1] = heap->array[heap->count--]; + heapify_down (heap->array, heap->count, 1, heap->compare); + + return top; +} + +/* Move element down into appropriate position in heap. */ + +static size_t +heapify_down (void **array, size_t count, size_t initial, + int (*compare)(const void *, const void *)) +{ + void *element = array[initial]; + + size_t parent = initial; + while (parent <= count / 2) + { + size_t child = 2 * parent; + + if (child < count && compare (array[child], array[child+1]) < 0) + child++; + + if (compare (array[child], element) <= 0) + break; + + array[parent] = array[child]; + parent = child; + } + + array[parent] = element; + return parent; +} + +/* Move element up into appropriate position in heap. */ + +static void +heapify_up (void **array, size_t count, + int (*compare)(const void *, const void *)) +{ + size_t k = count; + void *new_element = array[k]; + + while (k != 1 && compare (array[k/2], new_element) <= 0) + { + array[k] = array[k/2]; + k /= 2; + } + + array[k] = new_element; +} diff --git a/gl/lib/heap.h b/gl/lib/heap.h new file mode 100644 index 000000000..0ea516a79 --- /dev/null +++ b/gl/lib/heap.h @@ -0,0 +1,34 @@ +/* Barebones heap implementation supporting only insert and pop. + + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Full implementation: GDSL (http://gna.org/projects/gdsl/) by Nicolas + Darnis <ndarnis@free.fr>. Adapted by Gene Auyeung. */ + +#include <stddef.h> + +struct heap +{ + void **array; /* array[0] is not used */ + size_t capacity; /* Array size */ + size_t count; /* Used as index to last element. Also is num of items. */ + int (*compare)(const void *, const void *); +}; + +struct heap *heap_alloc (int (*)(const void *, const void *), size_t); +void heap_free (struct heap *); +int heap_insert (struct heap *heap, void *item); +void *heap_remove_top (struct heap *heap); diff --git a/gl/modules/heap b/gl/modules/heap new file mode 100644 index 000000000..cd97e2965 --- /dev/null +++ b/gl/modules/heap @@ -0,0 +1,24 @@ +Description: +Binary heap with minimal number of methods. Used in sort. + +Files: +lib/heap.c +lib/heap.h + +Depends-on: +stdlib-safer +xalloc + +configure.ac: + +Makefile.am: +lib_SOURCES += heap.c heap.h + +Include: +"heap.h" + +License +GPL + +Maintainer: +Gene Auyeung |