Big performance improvement when sorting many small files,

building on a suggestion by Charles Randall. (fillbuf): Skip memmove if it would be a no-op, as many memmove implementations are slow in that case. Don't examine leftover bytes for eolchar, since they may be left over from a previous file, and we want to read from this file. (sort): At end of file, if there is more input and buffer room, concatenate the next input file.
author: Jim Meyering <jim@meyering.net> 2000-03-03 08:18:48 +0000
committer: Jim Meyering <jim@meyering.net> 2000-03-03 08:18:48 +0000
commit: 8e1e6d357a8d32a62bfcc5c3ce02dfa7529247f5 (patch)
tree: 6a343fbabf3e0d9e599dc179f8ca30edb774b749
parent: 8b8685ef313c2e3112690345dbf6be98f18435dd (diff)
download: coreutils-8e1e6d357a8d32a62bfcc5c3ce02dfa7529247f5.tar.xz
1 files changed, 17 insertions, 3 deletions
diff --git a/src/sort.c b/src/sort.c
index 1d0741944..e96f0db19 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -547,10 +547,13 @@ fillbuf (struct buffer *buf, FILE *fp)
 {
   int cc;
 
-  memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
-  cc = buf->used = buf->left;
+  if (buf->used != buf->left)
+    {
+      memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
+      buf->used = buf->left;
+    }
 
-  while (!feof (fp) && !memchr (buf->buf + buf->used - cc, eolchar, cc))
+  while (!feof (fp))
     {
       if (buf->used == buf->alloc)
 	{
@@ -565,6 +568,8 @@ fillbuf (struct buffer *buf, FILE *fp)
 	  exit (SORT_FAILURE);
 	}
       buf->used += cc;
+      if (memchr (buf->buf + buf->used - cc, eolchar, cc))
+	break;
     }
 
   if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
@@ -1678,6 +1683,15 @@ sort (char **files, int nfiles, FILE *ofp, const char *output_file)
       fp = xfopen (*files++, "r");
       while (fillbuf (&buf, fp))
 	{
+	  if (nfiles && buf.used != buf.alloc && feof (fp))
+	    {
+	      /* End of file, but there is more input and buffer room.
+		 Concatenate the next input file; this is faster in
+		 the usual case.  */
+	      buf.left = buf.used;
+	      break;
+	    }
+
 	  findlines (&buf, &lines);
 	  if (ntmp < lines.used)
 	    {
author	Jim Meyering <jim@meyering.net>	2000-03-03 08:18:48 +0000
committer	Jim Meyering <jim@meyering.net>	2000-03-03 08:18:48 +0000
commit	8e1e6d357a8d32a62bfcc5c3ce02dfa7529247f5 (patch)
tree	6a343fbabf3e0d9e599dc179f8ca30edb774b749
parent	8b8685ef313c2e3112690345dbf6be98f18435dd (diff)
download	coreutils-8e1e6d357a8d32a62bfcc5c3ce02dfa7529247f5.tar.xz