wc: don't miscount /sys and similar file systems

Fix similar problems in head, od, split, tac, and tail. Reported by George Shuklin in: http://bugs.gnu.org/18621 * NEWS: Document this. * src/head.c (elseek): Move up. (elide_tail_bytes_pipe, elide_tail_lines_pipe): New arg CURRENT_POS. All uses changed. (elide_tail_bytes_file, elide_tail_lines_file): New arg ST and remove arg SIZE. All uses changed. * src/head.c (elide_tail_bytes_file): * src/od.c (skip): Avoid optimization for /sys files, where st_size is bogus and st_size == st_blksize. Don't report error at EOF when not optimizing. * src/head.c, src/od.c, src/tail.c: Include "stat-size.h". * src/split.c (input_file_size): New function. (bytes_split, lines_chunk_split, bytes_chunk_extract): New arg INITIAL_READ. All uses changed. Use it to double-check st_size. * src/tac.c (tac_seekable): New arg FILE_POS. All uses changed. (copy_to_temp): Return size of temp file. All uses changed. * src/tac.c (tac_seekable): * src/tail.c (tail_bytes): * src/wc.c (wc): Don't trust st_size; double-check by reading. * src/wc.c (wc): New arg CURRENT_POS. All uses changed. * tests/local.mk (all_tests): Add tests/misc/wc-proc.sh, tests/misc/od-j.sh, tests/tail-2/tail-c.sh. * tests/misc/head-c.sh: * tests/misc/tac-2-nonseekable.sh: * tests/split/b-chunk.sh: Add tests for problems with /proc and /sys files. * tests/misc/od-j.sh, tests/misc/wc-proc.sh, tests/tail-2/tail-c.sh: New files.
author: Paul Eggert <eggert@cs.ucla.edu> 2014-10-07 16:46:08 -0700
committer: Paul Eggert <eggert@cs.ucla.edu> 2014-10-07 16:47:37 -0700
commit: 2662702b9e8643f62c670bbf2fa94b1be1ccf9af (patch)
tree: c82775c20abc304fa4f187218830dc3aa2f7e481
parent: b020002b4bfae55d5bbcf66bd7ce787a4e6da689 (diff)
download: coreutils-2662702b9e8643f62c670bbf2fa94b1be1ccf9af.tar.xz
14 files changed, 463 insertions, 188 deletions
diff --git a/NEWS b/NEWS
index 1811ae473..a323b0c6e 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   file types, a warning is issued for source directories with duplicate names,
   or with -H the directory is copied again using the symlink name.
 
+  head, od, split, tac, tail, and wc no longer mishandle input from files in
+  /proc and /sys file systems that report somewhat-incorrect file sizes.
+
 ** New features
 
   chroot accepts the new --skip-chdir option to not change the working directory
diff --git a/src/head.c b/src/head.c
index d2f1fce60..2782f8e8c 100644
--- a/src/head.c
+++ b/src/head.c
@@ -36,6 +36,7 @@
 #include "quote.h"
 #include "quotearg.h"
 #include "safe-read.h"
+#include "stat-size.h"
 #include "xfreopen.h"
 #include "xstrtol.h"
 
@@ -206,13 +207,42 @@ copy_fd (int src_fd, uintmax_t n_bytes)
   return COPY_FD_OK;
 }
 
-/* Print all but the last N_ELIDE bytes from the input available via
-   the non-seekable file descriptor FD.  Return true upon success.
+/* Call lseek (FD, OFFSET, WHENCE), where file descriptor FD
+   corresponds to the file FILENAME.  WHENCE must be SEEK_SET or
+   SEEK_CUR.  Return the resulting offset.  Give a diagnostic and
+   return -1 if lseek fails.  */
+
+static off_t
+elseek (int fd, off_t offset, int whence, char const *filename)
+{
+  off_t new_offset = lseek (fd, offset, whence);
+  char buf[INT_BUFSIZE_BOUND (offset)];
+
+  if (new_offset < 0)
+    error (0, errno,
+           _(whence == SEEK_SET
+             ? N_("%s: cannot seek to offset %s")
+             : N_("%s: cannot seek to relative offset %s")),
+           quotearg_colon (filename),
+           offtostr (offset, buf));
+
+  return new_offset;
+}
+
+/* For an input file with name FILENAME and descriptor FD,
+   output all but the last N_ELIDE_0 bytes.
+   If CURRENT_POS is nonnegative, assume that the input file is
+   positioned at CURRENT_POS and that it should be repositioned to
+   just before the elided bytes before returning.
+   Return true upon success.
    Give a diagnostic and return false upon error.  */
 static bool
-elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
+elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0,
+                       off_t current_pos)
 {
   size_t n_elide = n_elide_0;
+  uintmax_t desired_pos = current_pos;
+  bool ok = true;
 
 #ifndef HEAD_TAIL_PIPE_READ_BUFSIZE
 # define HEAD_TAIL_PIPE_READ_BUFSIZE BUFSIZ
@@ -251,7 +281,6 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
 
   if (n_elide <= HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD)
     {
-      bool ok = true;
       bool first = true;
       bool eof = false;
       size_t n_to_read = READ_BUFSIZE + n_elide;
@@ -293,22 +322,26 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
           /* Output any (but maybe just part of the) elided data from
              the previous round.  */
           if (! first)
-            xwrite_stdout (b[!i] + READ_BUFSIZE, n_elide - delta);
+            {
+              desired_pos += n_elide - delta;
+              xwrite_stdout (b[!i] + READ_BUFSIZE, n_elide - delta);
+            }
           first = false;
 
           if (n_elide < n_read)
-            xwrite_stdout (b[i], n_read - n_elide);
+            {
+              desired_pos += n_read - n_elide;
+              xwrite_stdout (b[i], n_read - n_elide);
+            }
         }
 
       free (b[0]);
-      return ok;
     }
   else
     {
       /* Read blocks of size READ_BUFSIZE, until we've read at least n_elide
          bytes.  Then, for each new buffer we read, also write an old one.  */
 
-      bool ok = true;
       bool eof = false;
       size_t n_read;
       bool buffered_enough;
@@ -357,7 +390,10 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
             buffered_enough = true;
 
           if (buffered_enough)
-            xwrite_stdout (b[i_next], n_read);
+            {
+              desired_pos += n_read;
+              xwrite_stdout (b[i_next], n_read);
+            }
         }
 
       /* Output any remainder: rem bytes from b[i] + n_read.  */
@@ -366,6 +402,7 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
           if (buffered_enough)
             {
               size_t n_bytes_left_in_b_i = READ_BUFSIZE - n_read;
+              desired_pos += rem;
               if (rem < n_bytes_left_in_b_i)
                 {
                   xwrite_stdout (b[i] + n_read, rem);
@@ -392,6 +429,7 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
                */
               size_t y = READ_BUFSIZE - rem;
               size_t x = n_read - y;
+              desired_pos += x;
               xwrite_stdout (b[i_next], x);
             }
         }
@@ -400,36 +438,16 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
       for (i = 0; i < n_alloc; i++)
         free (b[i]);
       free (b);
-
-      return ok;
     }
-}
-
-/* Call lseek (FD, OFFSET, WHENCE), where file descriptor FD
-   corresponds to the file FILENAME.  WHENCE must be SEEK_SET or
-   SEEK_CUR.  Return the resulting offset.  Give a diagnostic and
-   return -1 if lseek fails.  */
-
-static off_t
-elseek (int fd, off_t offset, int whence, char const *filename)
-{
-  off_t new_offset = lseek (fd, offset, whence);
-  char buf[INT_BUFSIZE_BOUND (offset)];
 
-  if (new_offset < 0)
-    error (0, errno,
-           _(whence == SEEK_SET
-             ? N_("%s: cannot seek to offset %s")
-             : N_("%s: cannot seek to relative offset %s")),
-           quotearg_colon (filename),
-           offtostr (offset, buf));
-
-  return new_offset;
+  if (0 <= current_pos && elseek (fd, desired_pos, SEEK_SET, filename) < 0)
+    ok = false;
+  return ok;
 }
 
 /* For the file FILENAME with descriptor FD, output all but the last N_ELIDE
    bytes.  If SIZE is nonnegative, this is a regular file positioned
-   at START_POS with SIZE bytes.  Return true on success.
+   at CURRENT_POS with SIZE bytes.  Return true on success.
    Give a diagnostic and return false upon error.  */
 
 /* NOTE: if the input file shrinks by more than N_ELIDE bytes between
@@ -437,10 +455,11 @@ elseek (int fd, off_t offset, int whence, char const *filename)
 
 static bool
 elide_tail_bytes_file (const char *filename, int fd, uintmax_t n_elide,
-                       off_t current_pos, off_t size)
+                       struct stat const *st, off_t current_pos)
 {
-  if (size < 0)
-    return elide_tail_bytes_pipe (filename, fd, n_elide);
+  off_t size = st->st_size;
+  if (size <= ST_BLKSIZE (*st))
+    return elide_tail_bytes_pipe (filename, fd, n_elide, current_pos);
   else
     {
       /* Be careful here.  The current position may actually be
@@ -460,13 +479,16 @@ elide_tail_bytes_file (const char *filename, int fd, uintmax_t n_elide,
     }
 }
 
-/* Print all but the last N_ELIDE lines from the input stream
-   open for reading via file descriptor FD.
+/* For an input file with name FILENAME and descriptor FD,
+   output all but the last N_ELIDE_0 bytes.
+   If CURRENT_POS is nonnegative, the input file is positioned there
+   and should be repositioned to just before the elided bytes.
    Buffer the specified number of lines as a linked list of LBUFFERs,
    adding them as needed.  Return true if successful.  */
 
 static bool
-elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
+elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide,
+                       off_t current_pos)
 {
   struct linebuffer
   {
@@ -475,6 +497,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
     size_t nlines;
     struct linebuffer *next;
   };
+  uintmax_t desired_pos = current_pos;
   typedef struct linebuffer LBUFFER;
   LBUFFER *first, *last, *tmp;
   size_t total_lines = 0;	/* Total number of newlines in all buffers.  */
@@ -497,6 +520,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
 
       if (! n_elide)
         {
+          desired_pos += n_read;
           xwrite_stdout (tmp->buffer, n_read);
           continue;
         }
@@ -536,6 +560,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
           last = last->next = tmp;
           if (n_elide < total_lines - first->nlines)
             {
+              desired_pos += first->nbytes;
               xwrite_stdout (first->buffer, first->nbytes);
               tmp = first;
               total_lines -= first->nlines;
@@ -565,6 +590,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
 
   for (tmp = first; n_elide < total_lines - tmp->nlines; tmp = tmp->next)
     {
+      desired_pos += tmp->nbytes;
       xwrite_stdout (tmp->buffer, tmp->nbytes);
       total_lines -= tmp->nlines;
     }
@@ -581,6 +607,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
           ++tmp->nlines;
           --n;
         }
+      desired_pos += p - tmp->buffer;
       xwrite_stdout (tmp->buffer, p - tmp->buffer);
     }
 
@@ -591,6 +618,9 @@ free_lbuffers:
       free (first);
       first = tmp;
     }
+
+  if (0 <= current_pos && elseek (fd, desired_pos, SEEK_SET, filename) < 0)
+    ok = false;
   return ok;
 }
 
@@ -714,10 +744,11 @@ elide_tail_lines_seekable (const char *pretty_filename, int fd,
 
 static bool
 elide_tail_lines_file (const char *filename, int fd, uintmax_t n_elide,
-                       off_t current_pos, off_t size)
+                       struct stat const *st, off_t current_pos)
 {
-  if (size < 0)
-    return elide_tail_lines_pipe (filename, fd, n_elide);
+  off_t size = st->st_size;
+  if (size <= ST_BLKSIZE (*st))
+    return elide_tail_lines_pipe (filename, fd, n_elide, current_pos);
   else
     {
       /* Find the offset, OFF, of the Nth newline from the end,
@@ -802,28 +833,24 @@ head (const char *filename, int fd, uintmax_t n_units, bool count_lines,
 
   if (elide_from_end)
     {
-      off_t current_pos = -1, size = -1;
-      if (! presume_input_pipe)
+      off_t current_pos = -1;
+      struct stat st;
+      if (fstat (fd, &st) != 0)
         {
-          struct stat st;
-          if (fstat (fd, &st) != 0)
-            {
-              error (0, errno, _("cannot fstat %s"),
-                     quotearg_colon (filename));
-              return false;
-            }
-          if (S_ISREG (st.st_mode))
-            {
-              size = st.st_size;
-              current_pos = elseek (fd, 0, SEEK_CUR, filename);
-              if (current_pos < 0)
-                return false;
-            }
+          error (0, errno, _("cannot fstat %s"),
+                 quotearg_colon (filename));
+          return false;
+        }
+      if (! presume_input_pipe && usable_st_size (&st))
+        {
+          current_pos = elseek (fd, 0, SEEK_CUR, filename);
+          if (current_pos < 0)
+            return false;
         }
       if (count_lines)
-        return elide_tail_lines_file (filename, fd, n_units, current_pos, size);
+        return elide_tail_lines_file (filename, fd, n_units, &st, current_pos);
       else
-        return elide_tail_bytes_file (filename, fd, n_units, current_pos, size);
+        return elide_tail_bytes_file (filename, fd, n_units, &st, current_pos);
     }
   if (count_lines)
     return head_lines (filename, fd, n_units);
diff --git a/src/od.c b/src/od.c
index 18b16836d..7ac663ad4 100644
--- a/src/od.c
+++ b/src/od.c
@@ -27,6 +27,7 @@
 #include "error.h"
 #include "ftoastr.h"
 #include "quote.h"
+#include "stat-size.h"
 #include "xfreopen.h"
 #include "xprintf.h"
 #include "xstrtol.h"
@@ -1034,9 +1035,11 @@ skip (uintmax_t n_skip)
              If the number of bytes left to skip is larger than
              the size of the current file, we can decrement n_skip
              and go on to the next file.  Skip this optimization also
-             when st_size is 0, because some kernels report that
-             nonempty files in /proc have st_size == 0.  */
-          if (S_ISREG (file_stats.st_mode) && 0 < file_stats.st_size)
+             when st_size is no greater than the block size, because
+             some kernels report nonsense small file sizes for
+             proc-like file systems.  */
+          if (usable_st_size (&file_stats)
+              && ST_BLKSIZE (file_stats) < file_stats.st_size)
             {
               if ((uintmax_t) file_stats.st_size < n_skip)
                 n_skip -= file_stats.st_size;
@@ -1052,6 +1055,7 @@ skip (uintmax_t n_skip)
             }
 
           /* If it's not a regular file with nonnegative size,
+             or if it's so small that it might be in a proc-like file system,
              position the file pointer by reading.  */
 
           else
@@ -1067,10 +1071,15 @@ skip (uintmax_t n_skip)
                   n_skip -= n_bytes_read;
                   if (n_bytes_read != n_bytes_to_read)
                     {
-                      in_errno = errno;
-                      ok = false;
-                      n_skip = 0;
-                      break;
+                      if (ferror (in_stream))
+                        {
+                          in_errno = errno;
+                          ok = false;
+                          n_skip = 0;
+                          break;
+                        }
+                      if (feof (in_stream))
+                        break;
                     }
                 }
             }
diff --git a/src/split.c b/src/split.c
index 9b238e450..ec0da7deb 100644
--- a/src/split.c
+++ b/src/split.c
@@ -246,6 +246,37 @@ r/K/N   likewise but only output Kth of N to stdout\n\
   exit (status);
 }
 
+/* Return the number of bytes that can be read from FD, a file with
+   apparent size SIZE.  Actually read the data into BUF (of size
+   BUFSIZE) if the file appears to be smaller than BUFSIZE, as this
+   works better on proc-like file systems.  If the returned value is
+   less than BUFSIZE, store all the file's data into BUF; otherwise,
+   restore the input file's position so that the file can be reread if
+   needed.  */
+
+static off_t
+input_file_size (int fd, off_t size, char *buf, size_t bufsize)
+{
+  if (size < bufsize)
+    {
+      size = 0;
+      while (true)
+        {
+          size_t save = size < bufsize ? size : 0;
+          size_t n_read = safe_read (fd, buf + save, bufsize - save);
+          if (n_read == 0)
+            break;
+          if (n_read == SAFE_READ_ERROR)
+            error (EXIT_FAILURE, errno, "%s", infile);
+          size += n_read;
+        }
+      if (bufsize <= size && lseek (fd, - size, SEEK_CUR) < 0)
+        error (EXIT_FAILURE, errno, "%s", infile);
+    }
+
+  return size;
+}
+
 /* Compute the next sequential output file name and store it into the
    string 'outfile'.  */
 
@@ -511,10 +542,13 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes)
 }
 
 /* Split into pieces of exactly N_BYTES bytes.
-   Use buffer BUF, whose size is BUFSIZE.  */
+   Use buffer BUF, whose size is BUFSIZE.
+   If INITIAL_READ != SIZE_MAX, the entire input file has already been
+   partly read into BUF and BUF contains INITIAL_READ input bytes.  */
 
 static void
-bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
+bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read,
+             uintmax_t max_files)
 {
   size_t n_read;
   bool new_file_flag = true;
@@ -525,9 +559,17 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
 
   do
     {
-      n_read = safe_read (STDIN_FILENO, buf, bufsize);
-      if (n_read == SAFE_READ_ERROR)
-        error (EXIT_FAILURE, errno, "%s", infile);
+      if (initial_read != SIZE_MAX)
+        {
+          n_read = initial_read;
+          initial_read = SIZE_MAX;
+        }
+      else
+        {
+          n_read = safe_read (STDIN_FILENO, buf, bufsize);
+          if (n_read == SAFE_READ_ERROR)
+            error (EXIT_FAILURE, errno, "%s", infile);
+        }
       bp_out = buf;
       to_read = n_read;
       while (true)
@@ -736,7 +778,7 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
 
 static void
 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
-                   off_t file_size)
+                   size_t initial_read, off_t file_size)
 {
   assert (n && k <= n && n <= file_size);
 
@@ -751,7 +793,12 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
     {
       /* Start reading 1 byte before kth chunk of file.  */
       off_t start = (k - 1) * chunk_size - 1;
-      if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
+      if (initial_read != SIZE_MAX)
+        {
+          memmove (buf, buf + start, initial_read - start);
+          initial_read -= start;
+        }
+      else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
         error (EXIT_FAILURE, errno, "%s", infile);
       n_written = start;
       chunk_no = k - 1;
@@ -761,10 +808,19 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
   while (n_written < file_size)
     {
       char *bp = buf, *eob;
-      size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
-      if (n_read == SAFE_READ_ERROR)
-        error (EXIT_FAILURE, errno, "%s", infile);
-      else if (n_read == 0)
+      size_t n_read;
+      if (initial_read != SIZE_MAX)
+        {
+          n_read = initial_read;
+          initial_read = SIZE_MAX;
+        }
+      else
+        {
+          n_read = safe_read (STDIN_FILENO, buf, bufsize);
+          if (n_read == SAFE_READ_ERROR)
+            error (EXIT_FAILURE, errno, "%s", infile);
+        }
+      if (n_read == 0)
         break; /* eof.  */
       n_read = MIN (n_read, file_size - n_written);
       chunk_truncated = false;
@@ -841,7 +897,7 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
 
 static void
 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
-                     off_t file_size)
+                     size_t initial_read, off_t file_size)
 {
   off_t start;
   off_t end;
@@ -851,15 +907,29 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
   start = (k - 1) * (file_size / n);
   end = (k == n) ? file_size : k * (file_size / n);
 
-  if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
+  if (initial_read != SIZE_MAX)
+    {
+      memmove (buf, buf + start, initial_read - start);
+      initial_read -= start;
+    }
+  else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
     error (EXIT_FAILURE, errno, "%s", infile);
 
   while (start < end)
     {
-      size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
-      if (n_read == SAFE_READ_ERROR)
-        error (EXIT_FAILURE, errno, "%s", infile);
-      else if (n_read == 0)
+      size_t n_read;
+      if (initial_read != SIZE_MAX)
+        {
+          n_read = initial_read;
+          initial_read = SIZE_MAX;
+        }
+      else
+        {
+          n_read = safe_read (STDIN_FILENO, buf, bufsize);
+          if (n_read == SAFE_READ_ERROR)
+            error (EXIT_FAILURE, errno, "%s", infile);
+        }
+      if (n_read == 0)
         break; /* eof.  */
       n_read = MIN (n_read, end - start);
       if (full_write (STDOUT_FILENO, buf, n_read) != n_read
@@ -1403,22 +1473,34 @@ main (int argc, char **argv)
   if (in_blk_size == 0)
     in_blk_size = io_blksize (in_stat_buf);
 
+  void *b = xmalloc (in_blk_size + 1 + page_size - 1);
+  char *buf = ptr_align (b, page_size);
+  size_t initial_read = SIZE_MAX;
+
   if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
     {
       off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
-      if (usable_st_size (&in_stat_buf))
-        file_size = in_stat_buf.st_size;
-      else if (0 <= input_offset)
+      if (0 <= input_offset)
         {
-          file_size = lseek (STDIN_FILENO, 0, SEEK_END);
-          input_offset = (file_size < 0
-                          ? file_size
-                          : lseek (STDIN_FILENO, input_offset, SEEK_SET));
+          if (usable_st_size (&in_stat_buf))
+            {
+              file_size = input_file_size (STDIN_FILENO, in_stat_buf.st_size,
+                                           buf, in_blk_size);
+              if (file_size < in_blk_size)
+                initial_read = file_size;
+            }
+          else
+            {
+              file_size = lseek (STDIN_FILENO, 0, SEEK_END);
+              input_offset = (file_size < 0
+                              ? file_size
+                              : lseek (STDIN_FILENO, input_offset, SEEK_SET));
+              file_size -= input_offset;
+            }
         }
       if (input_offset < 0)
         error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
                quote (infile));
-      file_size -= input_offset;
       /* Overflow, and sanity checking.  */
       if (OFF_T_MAX < n_units)
         {
@@ -1431,9 +1513,6 @@ main (int argc, char **argv)
       file_size = MAX (file_size, n_units);
     }
 
-  void *b = xmalloc (in_blk_size + 1 + page_size - 1);
-  char *buf = ptr_align (b, page_size);
-
   /* When filtering, closure of one pipe must not terminate the process,
      as there may still be other streams expecting input from us.  */
   if (filter_command)
@@ -1454,7 +1533,7 @@ main (int argc, char **argv)
       break;
 
     case type_bytes:
-      bytes_split (n_units, buf, in_blk_size, 0);
+      bytes_split (n_units, buf, in_blk_size, SIZE_MAX, 0);
       break;
 
     case type_byteslines:
@@ -1463,13 +1542,16 @@ main (int argc, char **argv)
 
     case type_chunk_bytes:
       if (k_units == 0)
-        bytes_split (file_size / n_units, buf, in_blk_size, n_units);
+        bytes_split (file_size / n_units, buf, in_blk_size, initial_read,
+                     n_units);
       else
-        bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
+        bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
+                             file_size);
       break;
 
     case type_chunk_lines:
-      lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
+      lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
+                         file_size);
       break;
 
     case type_rr:
diff --git a/src/tac.c b/src/tac.c
index 192dbd3be..248afa9d7 100644
--- a/src/tac.c
+++ b/src/tac.c
@@ -187,10 +187,11 @@ output (const char *start, const char *past_end)
 }
 
 /* Print in reverse the file open on descriptor FD for reading FILE.
+   The file is already positioned at FILE_POS, which should be near its end.
    Return true if successful.  */
 
 static bool
-tac_seekable (int input_fd, const char *file)
+tac_seekable (int input_fd, const char *file, off_t file_pos)
 {
   /* Pointer to the location in 'G_buffer' where the search for
      the next separator will begin. */
@@ -203,9 +204,6 @@ tac_seekable (int input_fd, const char *file)
   /* Length of the record growing in 'G_buffer'. */
   size_t saved_record_size;
 
-  /* Offset in the file of the next read. */
-  off_t file_pos;
-
   /* True if 'output' has not been called yet for any file.
      Only used when the separator is attached to the preceding record. */
   bool first_time = true;
@@ -213,27 +211,43 @@ tac_seekable (int input_fd, const char *file)
   char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
   size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
 
-  /* Find the size of the input file. */
-  file_pos = lseek (input_fd, 0, SEEK_END);
-  if (file_pos < 1)
-    return true;			/* It's an empty file. */
-
   /* Arrange for the first read to lop off enough to leave the rest of the
      file a multiple of 'read_size'.  Since 'read_size' can change, this may
      not always hold during the program run, but since it usually will, leave
      it here for i/o efficiency (page/sector boundaries and all that).
      Note: the efficiency gain has not been verified. */
-  saved_record_size = file_pos % read_size;
-  if (saved_record_size == 0)
-    saved_record_size = read_size;
-  file_pos -= saved_record_size;
-  /* 'file_pos' now points to the start of the last (probably partial) block
-     in the input file. */
+  size_t remainder = file_pos % read_size;
+  if (remainder != 0)
+    {
+      file_pos -= remainder;
+      if (lseek (input_fd, file_pos, SEEK_SET) < 0)
+        error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+    }
 
-  if (lseek (input_fd, file_pos, SEEK_SET) < 0)
-    error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+  /* Scan backward, looking for end of file.  This caters to proc-like
+     file systems where the file size is just an estimate.  */
+  while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0
+         && file_pos != 0)
+    {
+      off_t rsize = read_size;
+      if (lseek (input_fd, -rsize, SEEK_CUR) < 0)
+        error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+      file_pos -= read_size;
+    }
 
-  if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
+  /* Now scan forward, looking for end of file.  */
+  while (saved_record_size == read_size)
+    {
+      size_t nread = safe_read (input_fd, G_buffer, read_size);
+      if (nread == 0)
+        break;
+      saved_record_size = nread;
+      if (saved_record_size == SAFE_READ_ERROR)
+        break;
+      file_pos += nread;
+    }
+
+  if (saved_record_size == SAFE_READ_ERROR)
     {
       error (0, errno, _("%s: read error"), quotearg_colon (file));
       return false;
@@ -485,15 +499,16 @@ temp_stream (FILE **fp, char **file_name)
 
 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
    a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
-   and file name.  Return true if successful.  */
+   and file name.  Return the number of bytes copied, or -1 on error.  */
 
-static bool
+static off_t
 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
 {
   FILE *fp;
   char *file_name;
+  off_t bytes_copied = 0;
   if (!temp_stream (&fp, &file_name))
-    return false;
+    return -1;
 
   while (1)
     {
@@ -511,6 +526,8 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
           error (0, errno, _("%s: write error"), quotearg_colon (file_name));
           goto Fail;
         }
+
+      bytes_copied += bytes_read;
     }
 
   if (fflush (fp) != 0)
@@ -521,11 +538,11 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
 
   *g_tmp = fp;
   *g_tempfile = file_name;
-  return true;
+  return bytes_copied;
 
  Fail:
   fclose (fp);
-  return false;
+  return -1;
 }
 
 /* Copy INPUT_FD to a temporary, then tac that file.
@@ -536,10 +553,11 @@ tac_nonseekable (int input_fd, const char *file)
 {
   FILE *tmp_stream;
   char *tmp_file;
-  if (!copy_to_temp (&tmp_stream, &tmp_file, input_fd, file))
+  off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
+  if (bytes_copied < 0)
     return false;
 
-  bool ok = tac_seekable (fileno (tmp_stream), tmp_file);
+  bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied);
   return ok;
 }
 
@@ -578,7 +596,7 @@ tac_file (const char *filename)
 
   ok = (file_size < 0 || isatty (fd)
         ? tac_nonseekable (fd, filename)
-        : tac_seekable (fd, filename));
+        : tac_seekable (fd, filename, file_size));
 
   if (!is_stdin && close (fd) != 0)
     {
diff --git a/src/tail.c b/src/tail.c
index f5d258517..4c5f943c7 100644
--- a/src/tail.c
+++ b/src/tail.c
@@ -40,6 +40,7 @@
 #include "posixver.h"
 #include "quote.h"
 #include "safe-read.h"
+#include "stat-size.h"
 #include "stat-time.h"
 #include "xfreopen.h"
 #include "xnanosleep.h"
@@ -1665,40 +1666,30 @@ tail_bytes (const char *pretty_filename, int fd, uintmax_t n_bytes,
           if (t)
             return t < 0;
         }
-      *read_pos += dump_remainder (pretty_filename, fd, COPY_TO_EOF);
+      n_bytes = COPY_TO_EOF;
     }
   else
     {
-      if ( ! presume_input_pipe
-           && S_ISREG (stats.st_mode) && n_bytes <= OFF_T_MAX)
+      off_t end_pos = ((! presume_input_pipe && usable_st_size (&stats)
+                        && n_bytes <= OFF_T_MAX)
+                       ? stats.st_size : -1);
+      if (end_pos <= ST_BLKSIZE (stats))
+        return pipe_bytes (pretty_filename, fd, n_bytes, read_pos);
+      off_t current_pos = xlseek (fd, 0, SEEK_CUR, pretty_filename);
+      if (current_pos < end_pos)
         {
-          off_t current_pos = xlseek (fd, 0, SEEK_CUR, pretty_filename);
-          off_t end_pos = xlseek (fd, 0, SEEK_END, pretty_filename);
-          off_t diff = end_pos - current_pos;
-          /* Be careful here.  The current position may actually be
-             beyond the end of the file.  */
-          off_t bytes_remaining = diff < 0 ? 0 : diff;
-          off_t nb = n_bytes;
-
-          if (bytes_remaining <= nb)
-            {
-              /* From the current position to end of file, there are no
-                 more bytes than have been requested.  So reposition the
-                 file pointer to the incoming current position and print
-                 everything after that.  */
-              *read_pos = xlseek (fd, current_pos, SEEK_SET, pretty_filename);
-            }
-          else
+          off_t bytes_remaining = end_pos - current_pos;
+
+          if (n_bytes < bytes_remaining)
             {
-              /* There are more bytes remaining than were requested.
-                 Back up.  */
-              *read_pos = xlseek (fd, -nb, SEEK_END, pretty_filename);
+              current_pos = end_pos - n_bytes;
+              xlseek (fd, current_pos, SEEK_SET, pretty_filename);
             }
-          *read_pos += dump_remainder (pretty_filename, fd, n_bytes);
         }
-      else
-        return pipe_bytes (pretty_filename, fd, n_bytes, read_pos);
+      *read_pos = current_pos;
     }
+
+  *read_pos += dump_remainder (pretty_filename, fd, n_bytes);
   return true;
 }
 
diff --git a/src/wc.c b/src/wc.c
index 1ff007dcf..24069f7e9 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -184,9 +184,10 @@ write_counts (uintmax_t lines,
 
 /* Count words.  FILE_X is the name of the file (or NULL for standard
    input) that is open on descriptor FD.  *FSTATUS is its status.
+   CURRENT_POS is the current file offset if known, negative if unknown.
    Return true if successful.  */
 static bool
-wc (int fd, char const *file_x, struct fstatus *fstatus)
+wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
 {
   bool ok = true;
   char buf[BUFFER_SIZE + 1];
@@ -229,32 +230,34 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
 
   if (count_bytes && !count_chars && !print_lines && !count_complicated)
     {
-      off_t current_pos, end_pos;
-
       if (0 < fstatus->failed)
         fstatus->failed = fstat (fd, &fstatus->st);
 
-      if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
-          && (current_pos = lseek (fd, 0, SEEK_CUR)) != -1
-          && (end_pos = lseek (fd, 0, SEEK_END)) != -1)
+      /* For sized files, seek to one buffer before EOF rather than to EOF.
+         This works better for files in proc-like file systems where
+         the size is only approximate.  */
+      if (! fstatus->failed && usable_st_size (&fstatus->st)
+          && 0 <= fstatus->st.st_size)
         {
-          /* Be careful here.  The current position may actually be
-             beyond the end of the file.  As in the example above.  */
-          bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
+          size_t end_pos = fstatus->st.st_size;
+          off_t hi_pos = end_pos - end_pos % BUFFER_SIZE;
+          if (current_pos < 0)
+            current_pos = lseek (fd, 0, SEEK_CUR);
+          if (0 <= current_pos && current_pos < hi_pos
+              && 0 <= lseek (fd, hi_pos, SEEK_CUR))
+            bytes = hi_pos - current_pos;
         }
-      else
+
+      fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+      while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
         {
-          fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
-          while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+          if (bytes_read == SAFE_READ_ERROR)
             {
-              if (bytes_read == SAFE_READ_ERROR)
-                {
-                  error (0, errno, "%s", file);
-                  ok = false;
-                  break;
-                }
-              bytes += bytes_read;
+              error (0, errno, "%s", file);
+              ok = false;
+              break;
             }
+          bytes += bytes_read;
         }
     }
   else if (!count_chars && !count_complicated)
@@ -500,7 +503,7 @@ wc_file (char const *file, struct fstatus *fstatus)
       have_read_stdin = true;
       if (O_BINARY && ! isatty (STDIN_FILENO))
         xfreopen (NULL, "rb", stdin);
-      return wc (STDIN_FILENO, file, fstatus);
+      return wc (STDIN_FILENO, file, fstatus, -1);
     }
   else
     {
@@ -512,7 +515,7 @@ wc_file (char const *file, struct fstatus *fstatus)
         }
       else
         {
-          bool ok = wc (fd, file, fstatus);
+          bool ok = wc (fd, file, fstatus, 0);
           if (close (fd) != 0)
             {
               error (0, errno, "%s", file);
diff --git a/tests/local.mk b/tests/local.mk
index 8498acbbe..e01f4d830 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -256,6 +256,7 @@ all_tests =					\
   tests/misc/wc-files0-from.pl			\
   tests/misc/wc-files0.sh			\
   tests/misc/wc-parallel.sh			\
+  tests/misc/wc-proc.sh				\
   tests/misc/cat-proc.sh			\
   tests/misc/cat-buf.sh				\
   tests/misc/cat-self.sh			\
@@ -295,6 +296,7 @@ all_tests =					\
   tests/misc/nproc-positive.sh			\
   tests/misc/numfmt.pl				\
   tests/misc/od-N.sh				\
+  tests/misc/od-j.sh				\
   tests/misc/od-multiple-t.sh			\
   tests/misc/od-x8.sh				\
   tests/misc/paste.pl				\
@@ -399,6 +401,7 @@ all_tests =					\
   tests/tail-2/wait.sh				\
   tests/tail-2/retry.sh				\
   tests/tail-2/symlink.sh				\
+  tests/tail-2/tail-c.sh			\
   tests/chmod/c-option.sh			\
   tests/chmod/equal-x.sh			\
   tests/chmod/equals.sh				\
diff --git a/tests/misc/head-c.sh b/tests/misc/head-c.sh
index d6433d0dd..807e96544 100755
--- a/tests/misc/head-c.sh
+++ b/tests/misc/head-c.sh
@@ -42,4 +42,16 @@ esac
 # based on the value passed to -c
 (ulimit -v 20000; head --bytes=-$SSIZE_MAX < /dev/null) || fail=1
 
+# Make sure it works on funny files in /proc and /sys.
+
+for file in /proc/cpuinfo /sys/kernel/profiling; do
+  if test -r $file; then
+    cp -f $file copy &&
+    head -c -1 copy > exp1 || framework_failure_
+
+    head -c -1 $file > out1 || fail=1
+    compare exp1 out1 || fail=1
+  fi
+done
+
 Exit $fail
diff --git a/tests/misc/od-j.sh b/tests/misc/od-j.sh
new file mode 100755
index 000000000..a40a99ff7
--- /dev/null
+++ b/tests/misc/od-j.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+# Verify that 'od -j N' skips N bytes of input.
+
+# Copyright 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ od
+
+for file in ${srcdir=.}/tests/init.sh /proc/version /sys/kernel/profiling; do
+  test -r $file || continue
+
+  cp -f $file copy &&
+  bytes=$(wc -c < copy) || framework_failure_
+
+  od -An $file > exp || fail=1
+  od -An -j $bytes $file $file > out || fail=1
+  compare out exp || fail=1
+
+  od -An -j 4096 copy copy > exp1 2> experr1; expstatus=$?
+  od -An -j 4096 $file $file > out1 2> err1; status=$?
+  test $status -eq $expstatus || fail=1
+  compare out1 exp1 || fail=1
+  compare err1 experr1 || fail=1
+done
+
+Exit $fail
diff --git a/tests/misc/tac-2-nonseekable.sh b/tests/misc/tac-2-nonseekable.sh
index c27694c17..a4a35ab9d 100755
--- a/tests/misc/tac-2-nonseekable.sh
+++ b/tests/misc/tac-2-nonseekable.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-# ensure that tac works with two or more non-seekable inputs
+# ensure that tac works with non-seekable or quasi-seekable inputs
 
 # Copyright (C) 2011-2014 Free Software Foundation, Inc.
 
@@ -24,4 +24,16 @@ echo x > exp || fail=1
 compare exp out || fail=1
 compare /dev/null err || fail=1
 
+# Make sure it works on funny files in /proc and /sys.
+
+for file in /proc/version /sys/kernel/profiling; do
+  if test -r $file; then
+    cp -f $file copy &&
+    tac copy > exp1 || framework_failure_
+
+    tac $file > out1 || fail=1
+    compare exp1 out1 || fail=1
+  fi
+done
+
 Exit $fail
diff --git a/tests/misc/wc-proc.sh b/tests/misc/wc-proc.sh
new file mode 100755
index 000000000..828160d4f
--- /dev/null
+++ b/tests/misc/wc-proc.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# Test wc on /proc and /sys files.
+
+# Copyright 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ wc
+
+for file in /proc/version /sys/kernel/profiling; do
+  if test -r $file; then
+    cp -f $file copy &&
+    wc -c < copy > exp1 || framework_failure_
+
+    wc -c < $file > out1 || fail=1
+    compare exp1 out1 || fail=1
+  fi
+done
+
+Exit $fail
diff --git a/tests/split/b-chunk.sh b/tests/split/b-chunk.sh
index 86f95a057..fefa0900d 100755
--- a/tests/split/b-chunk.sh
+++ b/tests/split/b-chunk.sh
@@ -31,20 +31,29 @@ stat x?? 2>/dev/null && fail=1
 
 printf '1\n2\n3\n4\n5\n' > in || framework_failure_
 
-split -n 3 in > out || fail=1
-split -n 1/3 in > b1 || fail=1
-split -n 2/3 in > b2 || fail=1
-split -n 3/3 in > b3 || fail=1
-printf '1\n2' > exp-1
-printf '\n3\n' > exp-2
-printf '4\n5\n' > exp-3
-
-compare exp-1 xaa || fail=1
-compare exp-2 xab || fail=1
-compare exp-3 xac || fail=1
-compare exp-1 b1 || fail=1
-compare exp-2 b2 || fail=1
-compare exp-3 b3 || fail=1
-test -f xad && fail=1
+for file in in /proc/version /sys/kernel/profiling; do
+  split -n 3 $file > out || fail=1
+  split -n 1/3 $file > b1 || fail=1
+  split -n 2/3 $file > b2 || fail=1
+  split -n 3/3 $file > b3 || fail=1
+
+  case $file in
+    in)
+      printf '1\n2' > exp-1
+      printf '\n3\n' > exp-2
+      printf '4\n5\n' > exp-3
+
+      compare exp-1 xaa || fail=1
+      compare exp-2 xab || fail=1
+      compare exp-3 xac || fail=1
+      ;;
+  esac
+
+  compare xaa b1 || fail=1
+  compare xab b2 || fail=1
+  compare xac b3 || fail=1
+  cat xaa xab xac | compare - $file || fail=1
+  test -f xad && fail=1
+done
 
 Exit $fail
diff --git a/tests/tail-2/tail-c.sh b/tests/tail-2/tail-c.sh
new file mode 100755
index 000000000..cdbaa46a9
--- /dev/null
+++ b/tests/tail-2/tail-c.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+# exercise tail -c
+
+# Copyright 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ tail
+require_ulimit_v_
+
+# Make sure it works on funny files in /proc and /sys.
+
+for file in /proc/version /sys/kernel/profiling; do
+  if test -r $file; then
+    cp -f $file copy &&
+    tail -c -1 copy > exp1 || framework_failure_
+
+    tail -c -1 $file > out1 || fail=1
+    compare exp1 out1 || fail=1
+  fi
+done
+
+Exit $fail
author	Paul Eggert <eggert@cs.ucla.edu>	2014-10-07 16:46:08 -0700
committer	Paul Eggert <eggert@cs.ucla.edu>	2014-10-07 16:47:37 -0700
commit	2662702b9e8643f62c670bbf2fa94b1be1ccf9af (patch)
tree	c82775c20abc304fa4f187218830dc3aa2f7e481
parent	b020002b4bfae55d5bbcf66bd7ce787a4e6da689 (diff)
download	coreutils-2662702b9e8643f62c670bbf2fa94b1be1ccf9af.tar.xz