Extend head to accept --lines=-N (--bytes=-N) and to print all

but the N lines (bytes) at the end of the file. Include full-write.h, full-read.h, inttostr.h, quote.h. Use quote() in diagnostics, rather than literal `' marks. (copy_fd, elide_tail_bytes_pipe, elide_tail_bytes_file): New functions. (elide_tail_lines_pipe, elide_tail_lines_file): New functions. (head_file): Reorganize so as to call head from only one place. (main): Likewise, for head_file. Handle new, undocumented option, --presume-input-pipe. Handle negative line and byte counts.
author: Jim Meyering <jim@meyering.net> 2003-05-03 15:10:13 +0000
committer: Jim Meyering <jim@meyering.net> 2003-05-03 15:10:13 +0000
commit: 57c1158f0256f7f2e2204acc44dbfa72c494dbbe (patch)
tree: fa2c56c7555805b006209f5ebf98af51a2917775 /src/head.c
parent: 61d5480f0085b792f1f31c50dd5bb405452be8d0 (diff)
download: coreutils-57c1158f0256f7f2e2204acc44dbfa72c494dbbe.tar.xz
1 files changed, 430 insertions, 22 deletions
diff --git a/src/head.c b/src/head.c
index 0c972e283..cbdffaab2 100644
--- a/src/head.c
+++ b/src/head.c
@@ -1,5 +1,5 @@
 /* head -- output first part of file(s)
-   Copyright (C) 89, 90, 91, 1995-2002 Free Software Foundation, Inc.
+   Copyright (C) 89, 90, 91, 1995-2003 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -29,12 +29,18 @@
 #include <stdio.h>
 #include <getopt.h>
 #include <sys/types.h>
+
 #include "system.h"
+
 #include "closeout.h"
 #include "error.h"
+#include "full-write.h"
+#include "full-read.h"
+#include "inttostr.h"
 #include "posixver.h"
-#include "xstrtol.h"
+#include "quote.h"
 #include "safe-read.h"
+#include "xstrtol.h"
 
 /* The official name of this program (e.g., no `g' prefix).  */
 #define PROGRAM_NAME "head"
@@ -47,6 +53,12 @@
 /* Size of atomic reads. */
 #define BUFSIZE (512 * 8)
 
+/* Useful only when eliding tail bytes or lines.
+   If nonzero, skip the is-regular-file test used to determine whether
+   to use the lseek optimization.  Instead, use the more general (and
+   more expensive) code unconditionally. Intended solely for testing.  */
+static int presume_input_pipe;
+
 /* If nonzero, print filename headers. */
 static int print_headers;
 
@@ -65,10 +77,19 @@ char *program_name;
 /* Have we ever read standard input?  */
 static int have_read_stdin;
 
+/* For long options that have no equivalent short option, use a
+   non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
+enum
+{
+  PRESUME_INPUT_PIPE_OPTION = CHAR_MAX + 1
+};
+
 static struct option const long_options[] =
 {
   {"bytes", required_argument, NULL, 'c'},
   {"lines", required_argument, NULL, 'n'},
+  {"presume-input-pipe", no_argument, NULL,
+   PRESUME_INPUT_PIPE_OPTION}, /* do not document */
   {"quiet", no_argument, NULL, 'q'},
   {"silent", no_argument, NULL, 'q'},
   {"verbose", no_argument, NULL, 'v'},
@@ -126,6 +147,346 @@ write_header (const char *filename)
   first_file = 0;
 }
 
+enum Copy_fd_status
+  {
+    COPY_FD_OK = 0,
+    COPY_FD_READ_ERROR,
+    COPY_FD_WRITE_ERROR,
+    COPY_FD_UNEXPECTED_EOF
+  };
+
+/* Copy no more than N_BYTES from file descriptor SRC_FD to O_STREAM.
+   Return an appropriate indication of success or failure. */
+
+static enum Copy_fd_status
+copy_fd (int src_fd, FILE *o_stream, uintmax_t n_bytes)
+{
+  char buf[BUFSIZE];
+  const size_t buf_size = sizeof (buf);
+
+  /* Copy the file contents.  */
+  while (0 < n_bytes)
+    {
+      size_t n_to_read = MIN (buf_size, n_bytes);
+      size_t n_read = safe_read (src_fd, buf, n_to_read);
+      if (n_read == SAFE_READ_ERROR)
+	return COPY_FD_READ_ERROR;
+
+      /* assert (n_read <= n_bytes); */
+      n_bytes -= n_read;
+
+      if (n_read == 0 && n_bytes != 0)
+	return COPY_FD_UNEXPECTED_EOF;
+
+      if (fwrite (buf, 1, n_read, o_stream) < n_read)
+	return COPY_FD_WRITE_ERROR;
+    }
+
+  return COPY_FD_OK;
+}
+
+static int
+elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
+{
+  size_t n_elide = n_elide_0;
+
+#ifndef HEAD_TAIL_PIPE_READ_BUFSIZE
+# define HEAD_TAIL_PIPE_READ_BUFSIZE BUFSIZE
+#endif
+#define READ_BUFSIZE HEAD_TAIL_PIPE_READ_BUFSIZE
+
+  /* If we're eliding no more than this many bytes, then it's ok to allocate
+     more memory in order to use a more time-efficient algorithm.
+     FIXME: use a fraction of available memory instead, as in sort.  */
+#ifndef HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD
+# define HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD 1024 * 1024
+#endif
+
+#if HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD < 2 * READ_BUFSIZE
+  "HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD must be at least 2 * READ_BUFSIZE"
+#endif
+
+  if (SIZE_MAX < n_elide_0 + READ_BUFSIZE)
+    {
+      char umax_buf[INT_BUFSIZE_BOUND (uintmax_t)];
+      error (EXIT_FAILURE, 0, _("%s: number of bytes is large"),
+	     umaxtostr (n_elide_0, umax_buf));
+    }
+
+  /* Two cases to consider...
+     1) n_elide is small enough that we can afford to double-buffer:
+        allocate 2 * (READ_BUFSIZE + n_elide) bytes
+     2) n_elide is too big for that, so we allocate only
+        (READ_BUFSIZE + n_elide) bytes
+
+     CAUTION: do not fail (out of memory) when asked to elide
+     a ridiculous amount, but when given only a small input.  */
+
+  if (n_elide <= HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD)
+    {
+      int fail = 0;
+      bool first = true;
+      bool eof = false;
+      size_t n_to_read = READ_BUFSIZE + n_elide;
+      unsigned int i;
+      char *b[2];
+      b[0] = xmalloc (2 * n_to_read);
+      b[1] = b[0] + n_to_read;
+
+      for (i = 0; ! eof ; i = !i)
+	{
+	  size_t n_read = full_read (fd, b[i], n_to_read);
+	  size_t delta = 0;
+	  if (n_read < n_to_read)
+	    {
+	      if (errno != 0)
+		{
+		  error (0, errno, _("error reading %s"), quote (filename));
+		  fail = 1;
+		  break;
+		}
+
+	      /* reached EOF */
+	      if (n_read <= n_elide)
+		{
+		  if (first)
+		    {
+		      /* The input is no larger than the number of bytes
+			 to elide.  So there's nothing to output, and
+			 we're done.  */
+		    }
+		  else
+		    {
+		      delta = n_elide - n_read;
+		    }
+		}
+	      eof = true;
+	    }
+
+	  /* Output any (but maybe just part of the) elided data from
+	     the previous round.  */
+	  if ( ! first)
+	    {
+	      /* Don't bother checking for errors here.
+		 If there's a failure, the test of the following
+		 fwrite or in close_stdout will catch it.  */
+	      fwrite (b[!i] + READ_BUFSIZE, 1, n_elide - delta, stdout);
+	    }
+	  first = false;
+
+	  if (n_elide < n_read
+	      && fwrite (b[i], 1, n_read - n_elide, stdout) < n_read - n_elide)
+	    {
+	      error (0, errno, _("write error"));
+	      fail = 1;
+	      break;
+	    }
+	}
+
+      free (b[0]);
+      return fail;
+    }
+  else
+    {
+      /* Read blocks of size READ_BUFSIZE, until we've read at least n_elide
+	 bytes.  Then, for each new buffer we read, also write an old one.  */
+
+      int fail = 0;
+      bool eof = false;
+      size_t n_read;
+      bool buffered_enough;
+      size_t i, i_next;
+      char **b;
+      /* Round n_elide up to a multiple of READ_BUFSIZE.  */
+      size_t rem = READ_BUFSIZE - (n_elide % READ_BUFSIZE);
+      size_t n_elide_round = n_elide + rem;
+      size_t n_bufs = n_elide_round / READ_BUFSIZE + 1;
+      b = xcalloc (n_bufs, sizeof *b);
+
+      buffered_enough = false;
+      for (i = 0, i_next = 1; !eof; i = i_next, i_next = (i_next + 1) % n_bufs)
+	{
+	  if (b[i] == NULL)
+	    b[i] = xmalloc (READ_BUFSIZE);
+	  n_read = full_read (fd, b[i], READ_BUFSIZE);
+	  if (n_read < READ_BUFSIZE)
+	    {
+	      if (errno != 0)
+		{
+		  error (0, errno, _("error reading %s"), quote (filename));
+		  fail = 1;
+		  goto free_mem;
+		}
+	      eof = true;
+	    }
+
+	  if (i + 1 == n_bufs)
+	    buffered_enough = true;
+
+	  if (buffered_enough)
+	    {
+	      if (fwrite (b[i_next], 1, n_read, stdout) < n_read)
+		{
+		  error (0, errno, _("write error"));
+		  fail = 1;
+		  goto free_mem;
+		}
+	    }
+	}
+
+      /* Output any remainder: rem bytes from b[i] + n_read.  */
+      if (rem)
+	{
+	  if (buffered_enough)
+	    {
+	      size_t n_bytes_left_in_b_i = READ_BUFSIZE - n_read;
+	      if (rem < n_bytes_left_in_b_i)
+		{
+		  fwrite (b[i] + n_read, 1, rem, stdout);
+		}
+	      else
+		{
+		  fwrite (b[i] + n_read, 1, n_bytes_left_in_b_i, stdout);
+		  fwrite (b[i_next], 1, rem - n_bytes_left_in_b_i, stdout);
+		}
+	    }
+	  else if (i + 1 == n_bufs)
+	    {
+	      /* This happens when n_elide < file_size < n_elide_round.
+
+		 |READ_BUF.|
+		 |                      |  rem |
+		 |---------!---------!---------!---------|
+		 |---- n_elide ---------|
+		 |                      | x |
+		 |                   |y |
+		 |---- file size -----------|
+		 |                   |n_read|
+		 |---- n_elide_round ----------|
+	       */
+	      size_t y = READ_BUFSIZE - rem;
+	      size_t x = n_read - y;
+	      fwrite (b[i_next], 1, x, stdout);
+	    }
+	}
+
+    free_mem:;
+      for (i = 0; i < n_bufs; i++)
+	if (b[i])
+	  free (b[i]);
+      free (b);
+
+      return fail;
+    }
+}
+
+/* FIXME: describe.  */
+
+/* NOTE Getting the length by seeking is not robust, due to a race condition.
+   The problem arises if the file grows or shrinks between the length
+   determination and the actual reading.  */
+
+static int
+elide_tail_bytes_file (const char *filename, int fd, uintmax_t n_elide)
+{
+  struct stat stats;
+
+  /* We need binary input, since `tail' relies on `lseek' and byte counts,
+     while binary output will preserve the style (Unix/DOS) of text file.  */
+  SET_BINARY2 (fd, STDOUT_FILENO);
+
+  if (presume_input_pipe || fstat (fd, &stats) || ! S_ISREG (stats.st_mode))
+    {
+      return elide_tail_bytes_pipe (filename, fd, n_elide);
+    }
+  else
+    {
+      off_t current_pos, end_pos;
+      uintmax_t bytes_remaining;
+      off_t diff;
+      enum Copy_fd_status err;
+
+      if ((current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) == -1
+	  || (end_pos = lseek (fd, (off_t) 0, SEEK_END)) == -1)
+	{
+	  error (0, errno, _("cannot lseek %s"), quote (filename));
+	  return 1;
+	}
+
+      /* Be careful here.  The current position may actually be
+	 beyond the end of the file.  */
+      bytes_remaining = (diff = end_pos - current_pos) < 0 ? 0 : diff;
+
+      if (bytes_remaining <= n_elide)
+	return 0;
+
+      /* Seek back to `current' position, then copy the required
+	 number of bytes from fd.  */
+      if (lseek (fd, (off_t) 0, current_pos) == -1)
+	{
+	  error (0, errno, _("%s: cannot lseek back to original position"),
+		 quote (filename));
+	  return 1;
+	}
+
+      err = copy_fd (fd, stdout, bytes_remaining - n_elide);
+      if (err == COPY_FD_OK)
+	return 0;
+
+      switch (err)
+	{
+	case COPY_FD_READ_ERROR:
+	  error (0, errno, "error reading %s", quote (filename));
+	  break;
+	case COPY_FD_WRITE_ERROR:
+	  error (0, errno, "error writing %s", quote (filename));
+	  break;
+	case COPY_FD_UNEXPECTED_EOF:
+	  error (0, errno, "%s: file has shrunk too much", quote (filename));
+	  break;
+	default:
+	  abort ();
+	}
+      return 1;
+    }
+}
+
+/* FIXME: comment */
+
+static int
+elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
+{
+  /* FIXME: working here */
+  abort ();
+}
+
+/* FIXME: comment */
+
+static int
+elide_tail_lines_file (const char *filename, int fd, uintmax_t n_elide)
+{
+  struct stat stats;
+
+  /* We need binary input, since `tail' relies on `lseek' and byte counts,
+     while binary output will preserve the style (Unix/DOS) of text file.  */
+  SET_BINARY2 (fd, STDOUT_FILENO);
+
+  if (presume_input_pipe || fstat (fd, &stats) || ! S_ISREG (stats.st_mode))
+    {
+      return elide_tail_lines_pipe (filename, fd, n_elide);
+    }
+  else
+    {
+      /* Find the offset, OFF, of the Nth newline from the end,
+	 but not counting the last byte of the file.
+	 If found, write from current position to OFF, inclusive.
+	 Otherwise, just return 0.  */
+
+      /* FIXME: working here */
+      abort ();
+    }
+}
+
 static int
 head_bytes (const char *filename, int fd, uintmax_t bytes_to_write)
 {
@@ -143,12 +504,12 @@ head_bytes (const char *filename, int fd, uintmax_t bytes_to_write)
       bytes_read = safe_read (fd, buffer, bytes_to_read);
       if (bytes_read == SAFE_READ_ERROR)
 	{
-	  error (0, errno, "%s", filename);
+	  error (0, errno, _("error reading %s"), quote (filename));
 	  return 1;
 	}
       if (bytes_read == 0)
 	break;
-      if (fwrite (buffer, 1, bytes_read, stdout) == 0)
+      if (fwrite (buffer, 1, bytes_read, stdout) < bytes_read)
 	error (EXIT_FAILURE, errno, _("write error"));
       bytes_to_write -= bytes_read;
     }
@@ -161,6 +522,7 @@ head_lines (const char *filename, int fd, uintmax_t lines_to_write)
   char buffer[BUFSIZE];
 
   /* Need BINARY I/O for the byte counts to be accurate.  */
+  /* FIXME: do we really need this when counting *lines*?  */
   SET_BINARY2 (fd, fileno (stdout));
 
   while (lines_to_write)
@@ -170,7 +532,7 @@ head_lines (const char *filename, int fd, uintmax_t lines_to_write)
 
       if (bytes_read == SAFE_READ_ERROR)
 	{
-	  error (0, errno, "%s", filename);
+	  error (0, errno, _("error reading %s"), quote (filename));
 	  return 1;
 	}
       if (bytes_read == 0)
@@ -188,22 +550,34 @@ head_lines (const char *filename, int fd, uintmax_t lines_to_write)
 		struct stat st;
 		if (fstat (fd, &st) != 0 || S_ISREG (st.st_mode))
 		  error (0, e, _("cannot reposition file pointer for %s"),
-			 filename);
+			 quote (filename));
 	      }
 	    break;
 	  }
-      if (fwrite (buffer, 1, bytes_to_write, stdout) == 0)
+      if (fwrite (buffer, 1, bytes_to_write, stdout) < bytes_to_write)
 	error (EXIT_FAILURE, errno, _("write error"));
     }
   return 0;
 }
 
 static int
-head (const char *filename, int fd, uintmax_t n_units, int count_lines)
+head (const char *filename, int fd, uintmax_t n_units, int count_lines,
+      int elide_from_end)
 {
   if (print_headers)
     write_header (filename);
 
+  if (elide_from_end)
+    {
+      if (count_lines)
+	{
+	  return elide_tail_lines_file (filename, fd, n_units);
+	}
+      else
+	{
+	  return elide_tail_bytes_file (filename, fd, n_units);
+	}
+    }
   if (count_lines)
     return head_lines (filename, fd, n_units);
   else
@@ -211,29 +585,35 @@ head (const char *filename, int fd, uintmax_t n_units, int count_lines)
 }
 
 static int
-head_file (const char *filename, uintmax_t n_units, int count_lines)
+head_file (const char *filename, uintmax_t n_units, int count_lines,
+	   int elide_from_end)
 {
   int fd;
+  int fail;
 
   if (STREQ (filename, "-"))
     {
       have_read_stdin = 1;
-      return head (_("standard input"), STDIN_FILENO, n_units, count_lines);
+      fd = STDIN_FILENO;
+      filename = _("standard input");
     }
   else
     {
       fd = open (filename, O_RDONLY);
-      if (fd >= 0)
+      if (fd < 0)
 	{
-	  int errors;
-
-	  errors = head (filename, fd, n_units, count_lines);
-	  if (close (fd) == 0)
-	    return errors;
+	  error (0, errno, "cannot open %s for reading", quote (filename));
+	  return 1;
 	}
-      error (0, errno, "%s", filename);
-      return 1;
     }
+
+  fail = head (filename, fd, n_units, count_lines, elide_from_end);
+  if (fd != STDIN_FILENO && close (fd) == -1)
+    {
+      error (0, errno, "closing %s", quote (filename));
+      fail = 1;
+    }
+  return fail;
 }
 
 /* Convert a string of decimal digits, N_STRING, with a single, optional suffix
@@ -274,6 +654,7 @@ main (int argc, char **argv)
   enum header_mode header_mode = multiple_files;
   int exit_status = 0;
   int c;
+  size_t i;
 
   /* Number of items to print. */
   uintmax_t n_units = DEFAULT_NUMBER;
@@ -282,6 +663,15 @@ main (int argc, char **argv)
      Otherwise, interpret it as the number of bytes.  */
   int count_lines = 1;
 
+  /* Elide the specified number of lines or bytes, counting from
+     the end of the file.  */
+  int elide_from_end = 0;
+
+  /* Initializer for file_list if no file-arguments
+     were specified on the command line.  */
+  static char const *const default_file_list[] = {"-", NULL};
+  char const *const *file_list;
+
   program_name = argv[0];
   setlocale (LC_ALL, "");
   bindtextdomain (PACKAGE, LOCALEDIR);
@@ -377,13 +767,23 @@ main (int argc, char **argv)
 	case 0:
 	  break;
 
+	case PRESUME_INPUT_PIPE_OPTION:
+	  presume_input_pipe = 1;
+	  break;
+
 	case 'c':
 	  count_lines = 0;
+	  elide_from_end = (*optarg == '-');
+	  if (elide_from_end)
+	    ++optarg;
 	  n_units = string_to_integer (count_lines, optarg);
 	  break;
 
 	case 'n':
 	  count_lines = 1;
+	  elide_from_end = (*optarg == '-');
+	  if (elide_from_end)
+	    ++optarg;
 	  n_units = string_to_integer (count_lines, optarg);
 	  break;
 
@@ -408,11 +808,19 @@ main (int argc, char **argv)
       || (header_mode == multiple_files && optind < argc - 1))
     print_headers = 1;
 
-  if (optind == argc)
-    exit_status |= head_file ("-", n_units, count_lines);
+  if ( ! count_lines && elide_from_end && OFF_T_MAX < n_units)
+    {
+      char buf[INT_BUFSIZE_BOUND (uintmax_t)];
+      error (EXIT_FAILURE, 0, _("%s: number of bytes is too large"),
+	     umaxtostr (n_units, buf));
+    }
+
+  file_list = (argc - optind == 0
+	       ? default_file_list
+	       : (char const *const *) &argv[optind]);
 
-  for (; optind < argc; ++optind)
-    exit_status |= head_file (argv[optind], n_units, count_lines);
+  for (i = 0; file_list[i]; ++i)
+    exit_status |= head_file (file_list[i], n_units, count_lines, elide_from_end);
 
   if (have_read_stdin && close (STDIN_FILENO) < 0)
     error (EXIT_FAILURE, errno, "-");
author	Jim Meyering <jim@meyering.net>	2003-05-03 15:10:13 +0000
committer	Jim Meyering <jim@meyering.net>	2003-05-03 15:10:13 +0000
commit	57c1158f0256f7f2e2204acc44dbfa72c494dbbe (patch)
tree	fa2c56c7555805b006209f5ebf98af51a2917775 /src/head.c
parent	61d5480f0085b792f1f31c50dd5bb405452be8d0 (diff)
download	coreutils-57c1158f0256f7f2e2204acc44dbfa72c494dbbe.tar.xz