summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>1992-11-08 02:50:43 +0000
committerJim Meyering <jim@meyering.net>1992-11-08 02:50:43 +0000
commitb25038ce9a234ea0906ddcbd8a0012e917e6c661 (patch)
treea4360f1b307910d9266f65fc851479c218219009 /src
parentf33e06711c51330972e2adf07d21a4e69c8f44f6 (diff)
downloadcoreutils-b25038ce9a234ea0906ddcbd8a0012e917e6c661.tar.xz
Initial revision
Diffstat (limited to 'src')
-rw-r--r--src/cat.c660
-rw-r--r--src/cksum.c274
-rw-r--r--src/comm.c221
-rw-r--r--src/csplit.c1308
-rw-r--r--src/cut.c586
-rw-r--r--src/expand.c377
-rw-r--r--src/fold.c250
-rw-r--r--src/head.c380
-rw-r--r--src/join.c690
-rw-r--r--src/nl.c546
-rw-r--r--src/od.c1697
-rw-r--r--src/paste.c458
-rw-r--r--src/pr.c1844
-rw-r--r--src/sort.c1746
-rw-r--r--src/split.c532
-rw-r--r--src/sum.c217
-rw-r--r--src/tac.c628
-rw-r--r--src/tail.c858
-rw-r--r--src/tr.c1813
-rw-r--r--src/unexpand.c432
-rw-r--r--src/uniq.c321
-rw-r--r--src/wc.c231
22 files changed, 16069 insertions, 0 deletions
diff --git a/src/cat.c b/src/cat.c
new file mode 100644
index 000000000..34c438491
--- /dev/null
+++ b/src/cat.c
@@ -0,0 +1,660 @@
+/* cat -- concatenate files and print on the standard output.
+ Copyright (C) 1988, 1990, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Differences from the Unix cat:
+ * Always unbuffered, -u is ignored.
+ * 100 times faster with -v -u.
+ * 20 times faster with -v.
+
+ By tege@sics.se, Torbjorn Granlund, advised by rms, Richard Stallman. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#ifndef _POSIX_SOURCE
+#include <sys/ioctl.h>
+#endif
+#include "system.h"
+
+#define max(h,i) ((h) > (i) ? (h) : (i))
+
+char *stpcpy ();
+char *xmalloc ();
+void cat ();
+void error ();
+void next_line_num ();
+void simple_cat ();
+
+/* Name under which this program was invoked. */
+char *program_name;
+
+/* Name of input file. May be "-". */
+char *infile;
+
+/* Descriptor on which input file is open. */
+int input_desc;
+
+/* Descriptor on which output file is open. Always is 1. */
+int output_desc;
+
+/* Buffer for line numbers. */
+char line_buf[13] =
+{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0', '\t', '\0'};
+
+/* Position in `line_buf' where printing starts. This will not change
+ unless the number of lines are more than 999999. */
+char *line_num_print = line_buf + 5;
+
+/* Position of the first digit in `line_buf'. */
+char *line_num_start = line_buf + 10;
+
+/* Position of the last digit in `line_buf'. */
+char *line_num_end = line_buf + 10;
+
+/* Preserves the `cat' function's local `newlines' between invocations. */
+int newlines2 = 0;
+
+/* Count of non-fatal error conditions. */
+int exit_stat = 0;
+
+void
+usage (reason)
+ char *reason;
+{
+ if (reason != NULL)
+ fprintf (stderr, "%s: %s\n", program_name, reason);
+
+ fprintf (stderr, "\
+Usage: %s [-benstuvAET] [--number] [--number-nonblank] [--squeeze-blank]\n\
+ [--show-nonprinting] [--show-ends] [--show-tabs] [--show-all]\n\
+ [file...]\n",
+ program_name);
+
+ exit (2);
+}
+
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ /* Optimal size of i/o operations of output. */
+ int outsize;
+
+ /* Optimal size of i/o operations of input. */
+ int insize;
+
+ /* Pointer to the input buffer. */
+ unsigned char *inbuf;
+
+ /* Pointer to the output buffer. */
+ unsigned char *outbuf;
+
+ int c;
+
+ /* Index in argv to processed argument. */
+ int argind;
+
+ /* Device number of the output (file or whatever). */
+ int out_dev;
+
+ /* I-node number of the output. */
+ int out_ino;
+
+ /* Nonzero if the output file should not be the same as any input file. */
+ int check_redirection = 1;
+
+ /* Nonzero if we have ever read standard input. */
+ int have_read_stdin = 0;
+
+ struct stat stat_buf;
+
+ /* Variables that are set according to the specified options. */
+ int numbers = 0;
+ int numbers_at_empty_lines = 1;
+ int squeeze_empty_lines = 0;
+ int mark_line_ends = 0;
+ int quote = 0;
+ int output_tabs = 1;
+ int options = 0;
+
+ static struct option long_options[] =
+ {
+ {"number-nonblank", 0, NULL, 'b'},
+ {"number", 0, NULL, 'n'},
+ {"squeeze-blank", 0, NULL, 's'},
+ {"show-nonprinting", 0, NULL, 'v'},
+ {"show-ends", 0, NULL, 'E'},
+ {"show-tabs", 0, NULL, 'T'},
+ {"show-all", 0, NULL, 'A'},
+ {NULL, 0, NULL, 0}
+ };
+
+ program_name = argv[0];
+
+ /* Parse command line options. */
+
+ while ((c = getopt_long (argc, argv, "benstuvAET", long_options, (int *) 0))
+ != EOF)
+ {
+ options++;
+ switch (c)
+ {
+ case 'b':
+ numbers = 1;
+ numbers_at_empty_lines = 0;
+ break;
+
+ case 'e':
+ mark_line_ends = 1;
+ quote = 1;
+ break;
+
+ case 'n':
+ numbers = 1;
+ break;
+
+ case 's':
+ squeeze_empty_lines = 1;
+ break;
+
+ case 't':
+ output_tabs = 0;
+ quote = 1;
+ break;
+
+ case 'u':
+ /* We provide the -u feature unconditionally. */
+ options--;
+ break;
+
+ case 'v':
+ quote = 1;
+ break;
+
+ case 'A':
+ quote = 1;
+ mark_line_ends = 1;
+ output_tabs = 0;
+ break;
+
+ case 'E':
+ mark_line_ends = 1;
+ break;
+
+ case 'T':
+ output_tabs = 0;
+ break;
+
+ default:
+ usage ((char *) 0);
+ }
+ }
+
+ output_desc = 1;
+
+ /* Get device, i-node number, and optimal blocksize of output. */
+
+ if (fstat (output_desc, &stat_buf) < 0)
+ error (1, errno, "standard output");
+
+ outsize = ST_BLKSIZE (stat_buf);
+ /* Input file can be output file for non-regular files.
+ fstat on pipes returns S_IFSOCK on some systems, S_IFIFO
+ on others, so the checking should not be done for those types,
+ and to allow things like cat < /dev/tty > /dev/tty, checking
+ is not done for device files either. */
+
+ if (S_ISREG (stat_buf.st_mode))
+ {
+ out_dev = stat_buf.st_dev;
+ out_ino = stat_buf.st_ino;
+ }
+ else
+ check_redirection = 0;
+
+ /* Check if any of the input files are the same as the output file. */
+
+ /* Main loop. */
+
+ infile = "-";
+ argind = optind;
+
+ do
+ {
+ if (argind < argc)
+ infile = argv[argind];
+
+ if (infile[0] == '-' && infile[1] == 0)
+ {
+ have_read_stdin = 1;
+ input_desc = 0;
+ }
+ else
+ {
+ input_desc = open (infile, O_RDONLY);
+ if (input_desc < 0)
+ {
+ error (0, errno, "%s", infile);
+ exit_stat = 1;
+ continue;
+ }
+ }
+
+ if (fstat (input_desc, &stat_buf) < 0)
+ {
+ error (0, errno, "%s", infile);
+ exit_stat = 1;
+ goto contin;
+ }
+ insize = ST_BLKSIZE (stat_buf);
+
+ /* Compare the device and i-node numbers of this input file with
+ the corresponding values of the (output file associated with)
+ stdout, and skip this input file if they coincide. Input
+ files cannot be redirected to themselves. */
+
+ if (check_redirection
+ && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino)
+ {
+ error (0, 0, "%s: input file is output file", infile);
+ exit_stat = 1;
+ goto contin;
+ }
+
+ /* Select which version of `cat' to use. If any options (more than -u)
+ were specified, use `cat', otherwise use `simple_cat'. */
+
+ if (options == 0)
+ {
+ insize = max (insize, outsize);
+ inbuf = (unsigned char *) xmalloc (insize);
+
+ simple_cat (inbuf, insize);
+ }
+ else
+ {
+ inbuf = (unsigned char *) xmalloc (insize + 1);
+
+ /* Why are (OUTSIZE - 1 + INSIZE * 4 + 13) bytes allocated for
+ the output buffer?
+
+ A test whether output needs to be written is done when the input
+ buffer empties or when a newline appears in the input. After
+ output is written, at most (OUTSIZE - 1) bytes will remain in the
+ buffer. Now INSIZE bytes of input is read. Each input character
+ may grow by a factor of 4 (by the prepending of M-^). If all
+ characters do, and no newlines appear in this block of input, we
+ will have at most (OUTSIZE - 1 + INSIZE) bytes in the buffer. If
+ the last character in the preceeding block of input was a
+ newline, a line number may be written (according to the given
+ options) as the first thing in the output buffer. (Done after the
+ new input is read, but before processing of the input begins.) A
+ line number requires seldom more than 13 positions. */
+
+ outbuf = (unsigned char *) xmalloc (outsize - 1 + insize * 4 + 13);
+
+ cat (inbuf, insize, outbuf, outsize, quote,
+ output_tabs, numbers, numbers_at_empty_lines, mark_line_ends,
+ squeeze_empty_lines);
+
+ free (outbuf);
+ }
+
+ free (inbuf);
+
+ contin:
+ if (strcmp (infile, "-") && close (input_desc) < 0)
+ {
+ error (0, errno, "%s", infile);
+ exit_stat = 1;
+ }
+ }
+ while (++argind < argc);
+
+ if (have_read_stdin && close (0) < 0)
+ error (1, errno, "-");
+ if (close (1) < 0)
+ error (1, errno, "write error");
+
+ exit (exit_stat);
+}
+
+/* Plain cat. Copies the file behind `input_desc' to the file behind
+ `output_desc'. */
+
+void
+simple_cat (buf, bufsize)
+ /* Pointer to the buffer, used by reads and writes. */
+ unsigned char *buf;
+
+ /* Number of characters preferably read or written by each read and write
+ call. */
+ int bufsize;
+{
+ /* Actual number of characters read, and therefore written. */
+ int n_read;
+
+ /* Loop until the end of the file. */
+
+ for (;;)
+ {
+ /* Read a block of input. */
+
+ n_read = read (input_desc, buf, bufsize);
+ if (n_read < 0)
+ {
+ error (0, errno, "%s", infile);
+ exit_stat = 1;
+ return;
+ }
+
+ /* End of this file? */
+
+ if (n_read == 0)
+ break;
+
+ /* Write this block out. */
+
+ if (write (output_desc, buf, n_read) != n_read)
+ error (1, errno, "write error");
+ }
+}
+
+/* Cat the file behind INPUT_DESC to the file behind OUTPUT_DESC.
+ Called if any option more than -u was specified.
+
+ A newline character is always put at the end of the buffer, to make
+ an explicit test for buffer end unnecessary. */
+
+void
+cat (inbuf, insize, outbuf, outsize, quote,
+ output_tabs, numbers, numbers_at_empty_lines,
+ mark_line_ends, squeeze_empty_lines)
+
+ /* Pointer to the beginning of the input buffer. */
+ unsigned char *inbuf;
+
+ /* Number of characters read in each read call. */
+ int insize;
+
+ /* Pointer to the beginning of the output buffer. */
+ unsigned char *outbuf;
+
+ /* Number of characters written by each write call. */
+ int outsize;
+
+ /* Variables that have values according to the specified options. */
+ int quote;
+ int output_tabs;
+ int numbers;
+ int numbers_at_empty_lines;
+ int mark_line_ends;
+ int squeeze_empty_lines;
+{
+ /* Last character read from the input buffer. */
+ unsigned char ch;
+
+ /* Pointer to the next character in the input buffer. */
+ unsigned char *bpin;
+
+ /* Pointer to the first non-valid byte in the input buffer, i.e. the
+ current end of the buffer. */
+ unsigned char *eob;
+
+ /* Pointer to the position where the next character shall be written. */
+ unsigned char *bpout;
+
+ /* Number of characters read by the last read call. */
+ int n_read;
+
+ /* Determines how many consequtive newlines there have been in the
+ input. 0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1,
+ etc. Initially 0 to indicate that we are at the beginning of a
+ new line. The "state" of the procedure is determined by
+ NEWLINES. */
+ int newlines = newlines2;
+
+#ifdef FIONREAD
+ /* If nonzero, use the FIONREAD ioctl, as an optimization.
+ (On Ultrix, it is not supported on NFS filesystems.) */
+ int use_fionread = 1;
+#endif
+
+ /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input
+ is read immediately. */
+
+ eob = inbuf;
+ bpin = eob + 1;
+
+ bpout = outbuf;
+
+ for (;;)
+ {
+ do
+ {
+ /* Write if there are at least OUTSIZE bytes in OUTBUF. */
+
+ if (bpout - outbuf >= outsize)
+ {
+ unsigned char *wp = outbuf;
+ do
+ {
+ if (write (output_desc, wp, outsize) != outsize)
+ error (1, errno, "write error");
+ wp += outsize;
+ }
+ while (bpout - wp >= outsize);
+
+ /* Move the remaining bytes to the beginning of the
+ buffer. */
+
+ bcopy (wp, outbuf, bpout - wp);
+ bpout = outbuf + (bpout - wp);
+ }
+
+ /* Is INBUF empty? */
+
+ if (bpin > eob)
+ {
+#ifdef FIONREAD
+ int n_to_read = 0;
+
+ /* Is there any input to read immediately?
+ If not, we are about to wait,
+ so write all buffered output before waiting. */
+
+ if (use_fionread
+ && ioctl (input_desc, FIONREAD, &n_to_read) < 0)
+ {
+ /* Ultrix returns EOPNOTSUPP on NFS;
+ HP-UX returns ENOTTY on pipes. */
+ if (errno == EOPNOTSUPP || errno == ENOTTY)
+ use_fionread = 0;
+ else
+ {
+ error (0, errno, "cannot do ioctl on `%s'", infile);
+ exit_stat = 1;
+ newlines2 = newlines;
+ return;
+ }
+ }
+ if (n_to_read == 0)
+#endif
+ {
+ int n_write = bpout - outbuf;
+
+ if (write (output_desc, outbuf, n_write) != n_write)
+ error (1, errno, "write error");
+ bpout = outbuf;
+ }
+
+ /* Read more input into INBUF. */
+
+ n_read = read (input_desc, inbuf, insize);
+ if (n_read < 0)
+ {
+ error (0, errno, "%s", infile);
+ exit_stat = 1;
+ newlines2 = newlines;
+ return;
+ }
+ if (n_read == 0)
+ {
+ newlines2 = newlines;
+ return;
+ }
+
+ /* Update the pointers and insert a sentinel at the buffer
+ end. */
+
+ bpin = inbuf;
+ eob = bpin + n_read;
+ *eob = '\n';
+ }
+ else
+ {
+ /* It was a real (not a sentinel) newline. */
+
+ /* Was the last line empty?
+ (i.e. have two or more consecutive newlines been read?) */
+
+ if (++newlines > 0)
+ {
+ /* Are multiple adjacent empty lines to be substituted by
+ single ditto (-s), and this was the second empty line? */
+
+ if (squeeze_empty_lines && newlines >= 2)
+ {
+ ch = *bpin++;
+ continue;
+ }
+
+ /* Are line numbers to be written at empty lines (-n)? */
+
+ if (numbers && numbers_at_empty_lines)
+ {
+ next_line_num ();
+ bpout = (unsigned char *) stpcpy (bpout, line_num_print);
+ }
+ }
+
+ /* Output a currency symbol if requested (-e). */
+
+ if (mark_line_ends)
+ *bpout++ = '$';
+
+ /* Output the newline. */
+
+ *bpout++ = '\n';
+ }
+ ch = *bpin++;
+ }
+ while (ch == '\n');
+
+ /* Are we at the beginning of a line, and line numbers are requested? */
+
+ if (newlines >= 0 && numbers)
+ {
+ next_line_num ();
+ bpout = (unsigned char *) stpcpy (bpout, line_num_print);
+ }
+
+ /* Here CH cannot contain a newline character. */
+
+ /* The loops below continue until a newline character is found,
+ which means that the buffer is empty or that a proper newline
+ has been found. */
+
+ /* If quoting, i.e. at least one of -v, -e, or -t specified,
+ scan for chars that need conversion. */
+ if (quote)
+ for (;;)
+ {
+ if (ch >= 32)
+ {
+ if (ch < 127)
+ *bpout++ = ch;
+ else if (ch == 127)
+ *bpout++ = '^',
+ *bpout++ = '?';
+ else
+ {
+ *bpout++ = 'M',
+ *bpout++ = '-';
+ if (ch >= 128 + 32)
+ if (ch < 128 + 127)
+ *bpout++ = ch - 128;
+ else
+ *bpout++ = '^',
+ *bpout++ = '?';
+ else
+ *bpout++ = '^',
+ *bpout++ = ch - 128 + 64;
+ }
+ }
+ else if (ch == '\t' && output_tabs)
+ *bpout++ = '\t';
+ else if (ch == '\n')
+ {
+ newlines = -1;
+ break;
+ }
+ else
+ *bpout++ = '^',
+ *bpout++ = ch + 64;
+
+ ch = *bpin++;
+ }
+ else
+ /* Not quoting, neither of -v, -e, or -t specified. */
+ for (;;)
+ {
+ if (ch == '\t' && !output_tabs)
+ *bpout++ = '^',
+ *bpout++ = ch + 64;
+ else if (ch != '\n')
+ *bpout++ = ch;
+ else
+ {
+ newlines = -1;
+ break;
+ }
+
+ ch = *bpin++;
+ }
+ }
+}
+
+/* Compute the next line number. */
+
+void
+next_line_num ()
+{
+ char *endp = line_num_end;
+ do
+ {
+ if ((*endp)++ < '9')
+ return;
+ *endp-- = '0';
+ }
+ while (endp >= line_num_start);
+ *--line_num_start = '1';
+ if (line_num_start < line_num_print)
+ line_num_print--;
+}
diff --git a/src/cksum.c b/src/cksum.c
new file mode 100644
index 000000000..df9c3130b
--- /dev/null
+++ b/src/cksum.c
@@ -0,0 +1,274 @@
+/* cksum -- calculate and print POSIX.2 checksums and sizes of files
+ Copyright (C) 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Q. Frank Xia, qx@math.columbia.edu.
+ Cosmetic changes and reorganization by David MacKenzie, djm@gnu.ai.mit.edu.
+
+ Usage: cksum [file...]
+
+ The code segment between "#ifdef CRCTAB" and "#else" is the code
+ which calculates the "crctab". It is included for those who want
+ verify the correctness of the "crctab". To recreate the "crctab",
+ do following:
+
+ cc -DCRCTAB -o crctab cksum.c
+ crctab > crctab.h
+
+ As Bruce Evans pointed out to me, the crctab in the sample C code
+ in 4.9.10 Rationale of P1003.2/D11.2 is represented in reversed order.
+ Namely, 0x01 is represented as 0x80, 0x02 is represented as 0x40, etc.
+ The generating polynomial is crctab[0x80]=0xedb88320 instead of
+ crctab[1]=0x04C11DB7. But the code works only for a non-reverse order
+ crctab. Therefore, the sample implementation is wrong.
+
+ This software is compatible with neither the System V nor the BSD
+ `sum' program. It is supposed to conform to P1003.2/D11.2,
+ except foreign language interface (4.9.5.3 of P1003.2/D11.2) support.
+ Any inconsistency with the standard except 4.9.5.3 is a bug. */
+
+#ifdef CRCTAB
+
+#include <stdio.h>
+
+#define BIT(x) ( (unsigned long)1 << (x) )
+#define SBIT BIT(31)
+
+/* The generating polynomial is
+
+ 32 26 23 22 16 12 11 10 8 7 5 4 2 1
+ G(X)=X + X + X + X + X + X + X + X + X + X + X + X + X + X + 1
+
+ The i bit in GEN is set if X^i is a summand of G(X) except X^32. */
+
+#define GEN (BIT(26)|BIT(23)|BIT(22)|BIT(16)|BIT(12)|BIT(11)|BIT(10)\
+ |BIT(8) |BIT(7) |BIT(5) |BIT(4) |BIT(2) |BIT(1) |BIT(0));
+
+unsigned long r[8];
+
+void
+fill_r ()
+{
+ int i;
+
+ r[0] = GEN;
+ for (i = 1; i < 8; i++)
+ r[i] = (r[i - 1] & SBIT) ? (r[i - 1] << 1) ^ r[0] : r[i - 1] << 1;
+}
+
+unsigned long
+remainder (m)
+ int m;
+{
+ unsigned long rem = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ if (BIT (i) & m)
+ rem = rem ^ r[i];
+
+ return rem & 0xFFFFFFFF; /* Make it run on 64-bit machine. */
+}
+
+void
+main ()
+{
+ int i;
+
+ fill_r ();
+ printf ("unsigned long crctab[256] = {\n 0x0");
+ for (i = 0; i < 51; i++)
+ {
+ printf (",\n 0x%08X, 0x%08X, 0x%08X, 0x%08X, 0x%08X",
+ remainder (i * 5 + 1), remainder (i * 5 + 2), remainder (i * 5 + 3),
+ remainder (i * 5 + 4), remainder (i * 5 + 5));
+ }
+ printf ("\n};\n");
+ exit (0);
+}
+
+#else /* !CRCTAB */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include "system.h"
+
+/* Number of bytes to read at once. */
+#define BUFLEN (1 << 16)
+
+unsigned long crctab[256] =
+{
+ 0x0,
+ 0x04C11DB7, 0x09823B6E, 0x0D4326D9, 0x130476DC, 0x17C56B6B,
+ 0x1A864DB2, 0x1E475005, 0x2608EDB8, 0x22C9F00F, 0x2F8AD6D6,
+ 0x2B4BCB61, 0x350C9B64, 0x31CD86D3, 0x3C8EA00A, 0x384FBDBD,
+ 0x4C11DB70, 0x48D0C6C7, 0x4593E01E, 0x4152FDA9, 0x5F15ADAC,
+ 0x5BD4B01B, 0x569796C2, 0x52568B75, 0x6A1936C8, 0x6ED82B7F,
+ 0x639B0DA6, 0x675A1011, 0x791D4014, 0x7DDC5DA3, 0x709F7B7A,
+ 0x745E66CD, 0x9823B6E0, 0x9CE2AB57, 0x91A18D8E, 0x95609039,
+ 0x8B27C03C, 0x8FE6DD8B, 0x82A5FB52, 0x8664E6E5, 0xBE2B5B58,
+ 0xBAEA46EF, 0xB7A96036, 0xB3687D81, 0xAD2F2D84, 0xA9EE3033,
+ 0xA4AD16EA, 0xA06C0B5D, 0xD4326D90, 0xD0F37027, 0xDDB056FE,
+ 0xD9714B49, 0xC7361B4C, 0xC3F706FB, 0xCEB42022, 0xCA753D95,
+ 0xF23A8028, 0xF6FB9D9F, 0xFBB8BB46, 0xFF79A6F1, 0xE13EF6F4,
+ 0xE5FFEB43, 0xE8BCCD9A, 0xEC7DD02D, 0x34867077, 0x30476DC0,
+ 0x3D044B19, 0x39C556AE, 0x278206AB, 0x23431B1C, 0x2E003DC5,
+ 0x2AC12072, 0x128E9DCF, 0x164F8078, 0x1B0CA6A1, 0x1FCDBB16,
+ 0x018AEB13, 0x054BF6A4, 0x0808D07D, 0x0CC9CDCA, 0x7897AB07,
+ 0x7C56B6B0, 0x71159069, 0x75D48DDE, 0x6B93DDDB, 0x6F52C06C,
+ 0x6211E6B5, 0x66D0FB02, 0x5E9F46BF, 0x5A5E5B08, 0x571D7DD1,
+ 0x53DC6066, 0x4D9B3063, 0x495A2DD4, 0x44190B0D, 0x40D816BA,
+ 0xACA5C697, 0xA864DB20, 0xA527FDF9, 0xA1E6E04E, 0xBFA1B04B,
+ 0xBB60ADFC, 0xB6238B25, 0xB2E29692, 0x8AAD2B2F, 0x8E6C3698,
+ 0x832F1041, 0x87EE0DF6, 0x99A95DF3, 0x9D684044, 0x902B669D,
+ 0x94EA7B2A, 0xE0B41DE7, 0xE4750050, 0xE9362689, 0xEDF73B3E,
+ 0xF3B06B3B, 0xF771768C, 0xFA325055, 0xFEF34DE2, 0xC6BCF05F,
+ 0xC27DEDE8, 0xCF3ECB31, 0xCBFFD686, 0xD5B88683, 0xD1799B34,
+ 0xDC3ABDED, 0xD8FBA05A, 0x690CE0EE, 0x6DCDFD59, 0x608EDB80,
+ 0x644FC637, 0x7A089632, 0x7EC98B85, 0x738AAD5C, 0x774BB0EB,
+ 0x4F040D56, 0x4BC510E1, 0x46863638, 0x42472B8F, 0x5C007B8A,
+ 0x58C1663D, 0x558240E4, 0x51435D53, 0x251D3B9E, 0x21DC2629,
+ 0x2C9F00F0, 0x285E1D47, 0x36194D42, 0x32D850F5, 0x3F9B762C,
+ 0x3B5A6B9B, 0x0315D626, 0x07D4CB91, 0x0A97ED48, 0x0E56F0FF,
+ 0x1011A0FA, 0x14D0BD4D, 0x19939B94, 0x1D528623, 0xF12F560E,
+ 0xF5EE4BB9, 0xF8AD6D60, 0xFC6C70D7, 0xE22B20D2, 0xE6EA3D65,
+ 0xEBA91BBC, 0xEF68060B, 0xD727BBB6, 0xD3E6A601, 0xDEA580D8,
+ 0xDA649D6F, 0xC423CD6A, 0xC0E2D0DD, 0xCDA1F604, 0xC960EBB3,
+ 0xBD3E8D7E, 0xB9FF90C9, 0xB4BCB610, 0xB07DABA7, 0xAE3AFBA2,
+ 0xAAFBE615, 0xA7B8C0CC, 0xA379DD7B, 0x9B3660C6, 0x9FF77D71,
+ 0x92B45BA8, 0x9675461F, 0x8832161A, 0x8CF30BAD, 0x81B02D74,
+ 0x857130C3, 0x5D8A9099, 0x594B8D2E, 0x5408ABF7, 0x50C9B640,
+ 0x4E8EE645, 0x4A4FFBF2, 0x470CDD2B, 0x43CDC09C, 0x7B827D21,
+ 0x7F436096, 0x7200464F, 0x76C15BF8, 0x68860BFD, 0x6C47164A,
+ 0x61043093, 0x65C52D24, 0x119B4BE9, 0x155A565E, 0x18197087,
+ 0x1CD86D30, 0x029F3D35, 0x065E2082, 0x0B1D065B, 0x0FDC1BEC,
+ 0x3793A651, 0x3352BBE6, 0x3E119D3F, 0x3AD08088, 0x2497D08D,
+ 0x2056CD3A, 0x2D15EBE3, 0x29D4F654, 0xC5A92679, 0xC1683BCE,
+ 0xCC2B1D17, 0xC8EA00A0, 0xD6AD50A5, 0xD26C4D12, 0xDF2F6BCB,
+ 0xDBEE767C, 0xE3A1CBC1, 0xE760D676, 0xEA23F0AF, 0xEEE2ED18,
+ 0xF0A5BD1D, 0xF464A0AA, 0xF9278673, 0xFDE69BC4, 0x89B8FD09,
+ 0x8D79E0BE, 0x803AC667, 0x84FBDBD0, 0x9ABC8BD5, 0x9E7D9662,
+ 0x933EB0BB, 0x97FFAD0C, 0xAFB010B1, 0xAB710D06, 0xA6322BDF,
+ 0xA2F33668, 0xBCB4666D, 0xB8757BDA, 0xB5365D03, 0xB1F740B4
+};
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Nonzero if any of the files read were the standard input. */
+int have_read_stdin;
+
+/* Calculate and print the checksum and length in bytes
+ of file FILE, or of the standard input if FILE is "-".
+ If PRINT_NAME is nonzero, print FILE next to the checksum and size.
+ Return 0 if successful, -1 if an error occurs. */
+
+int
+cksum (file, print_name)
+ char *file;
+ int print_name;
+{
+ unsigned char buf[BUFLEN];
+ unsigned long crc = 0;
+ long length = 0;
+ long bytes_read;
+ register FILE *fp;
+
+ if (!strcmp (file, "-"))
+ {
+ fp = stdin;
+ have_read_stdin = 1;
+ }
+ else
+ {
+ fp = fopen (file, "r");
+ if (fp == NULL)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+ }
+
+ while ((bytes_read = fread (buf, 1, BUFLEN, fp)) > 0)
+ {
+ unsigned char *cp = buf;
+
+ length += bytes_read;
+ while (bytes_read--)
+ crc = (crc << 8) ^ crctab[((crc >> 24) ^ *(cp++)) & 0xFF];
+ }
+
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", file);
+ if (strcmp (file, "-"))
+ fclose (fp);
+ return -1;
+ }
+
+ if (strcmp (file, "-") && fclose (fp) == EOF)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+
+ bytes_read = length;
+ while (bytes_read > 0)
+ {
+ crc = (crc << 8) ^ crctab[((crc >> 24) ^ bytes_read) & 0xFF];
+ bytes_read >>= 8;
+ }
+
+ crc = ~crc & 0xFFFFFFFF;
+
+ printf ("%10lu %8ld", crc, length);
+ if (print_name)
+ printf (" %s", file);
+ putchar ('\n');
+
+ return 0;
+}
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int errors = 0;
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+
+ if (argc == 1)
+ {
+ if (cksum ("-", 0) < 0)
+ errors = 1;
+ }
+ else
+ {
+ int optind;
+
+ for (optind = 1; optind < argc; ++optind)
+ if (cksum (argv[optind], 1) < 0)
+ errors = 1;
+ }
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ exit (errors);
+}
+
+#endif /* !CRCTAB */
diff --git a/src/comm.c b/src/comm.c
new file mode 100644
index 000000000..4362b6475
--- /dev/null
+++ b/src/comm.c
@@ -0,0 +1,221 @@
+/* comm -- compare two sorted files line by line.
+ Copyright (C) 1986, 1990, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Richard Stallman and David MacKenzie. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+#include "linebuffer.h"
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+/* If nonzero, print lines that are found only in file 1. */
+int only_file_1;
+
+/* If nonzero, print lines that are found only in file 2. */
+int only_file_2;
+
+/* If nonzero, print lines that are found in both files. */
+int both;
+
+/* The name this program was run with. */
+char *program_name;
+
+int compare_files ();
+void error ();
+void writeline ();
+void usage ();
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ int c;
+
+ program_name = argv[0];
+
+ only_file_1 = 1;
+ only_file_2 = 1;
+ both = 1;
+
+ while ((c = getopt (argc, argv, "123")) != EOF)
+ switch (c)
+ {
+ case '1':
+ only_file_1 = 0;
+ break;
+
+ case '2':
+ only_file_2 = 0;
+ break;
+
+ case '3':
+ both = 0;
+ break;
+
+ default:
+ usage ();
+ }
+
+ if (optind + 2 != argc)
+ usage ();
+
+ exit (compare_files (argv + optind));
+}
+
+/* Compare INFILES[0] and INFILES[1].
+ If either is "-", use the standard input for that file.
+ Assume that each input file is sorted;
+ merge them and output the result.
+ Return 0 if successful, 1 if any errors occur. */
+
+int
+compare_files (infiles)
+ char **infiles;
+{
+ /* For each file, we have one linebuffer in lb1. */
+ struct linebuffer lb1[2];
+
+ /* thisline[i] points to the linebuffer holding the next available line
+ in file i, or is NULL if there are no lines left in that file. */
+ struct linebuffer *thisline[2];
+
+ /* streams[i] holds the input stream for file i. */
+ FILE *streams[2];
+
+ int i, ret = 0;
+
+ /* Initialize the storage. */
+ for (i = 0; i < 2; i++)
+ {
+ initbuffer (&lb1[i]);
+ thisline[i] = &lb1[i];
+ streams[i] = strcmp (infiles[i], "-")
+ ? fopen (infiles[i], "r") : stdin;
+ if (!streams[i])
+ {
+ error (0, errno, "%s", infiles[i]);
+ return 1;
+ }
+
+ thisline[i] = readline (thisline[i], streams[i]);
+ }
+
+ while (thisline[0] || thisline[1])
+ {
+ int order;
+
+ /* Compare the next available lines of the two files. */
+
+ if (!thisline[0])
+ order = 1;
+ else if (!thisline[1])
+ order = -1;
+ else
+ {
+ /* Cannot use bcmp -- it only returns a boolean value. */
+ order = memcmp (thisline[0]->buffer, thisline[1]->buffer,
+ min (thisline[0]->length, thisline[1]->length));
+ if (order == 0)
+ order = thisline[0]->length - thisline[1]->length;
+ }
+
+ /* Output the line that is lesser. */
+ if (order == 0)
+ writeline (thisline[1], stdout, 3);
+ else if (order > 0)
+ writeline (thisline[1], stdout, 2);
+ else
+ writeline (thisline[0], stdout, 1);
+
+ /* Step the file the line came from.
+ If the files match, step both files. */
+ if (order >= 0)
+ thisline[1] = readline (thisline[1], streams[1]);
+ if (order <= 0)
+ thisline[0] = readline (thisline[0], streams[0]);
+ }
+
+ /* Free all storage and close all input streams. */
+ for (i = 0; i < 2; i++)
+ {
+ free (lb1[i].buffer);
+ if (ferror (streams[i]) || fclose (streams[i]) == EOF)
+ {
+ error (0, errno, "%s", infiles[i]);
+ ret = 1;
+ }
+ }
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ {
+ error (0, errno, "write error");
+ ret = 1;
+ }
+ return ret;
+}
+
+/* Output the line in linebuffer LINE to stream STREAM
+ provided the switches say it should be output.
+ CLASS is 1 for a line found only in file 1,
+ 2 for a line only in file 2, 3 for a line in both. */
+
+void
+writeline (line, stream, class)
+ struct linebuffer *line;
+ FILE *stream;
+ int class;
+{
+ switch (class)
+ {
+ case 1:
+ if (!only_file_1)
+ return;
+ break;
+
+ case 2:
+ if (!only_file_2)
+ return;
+ /* Skip the tab stop for case 1, if we are printing case 1. */
+ if (only_file_1)
+ putc ('\t', stream);
+ break;
+
+ case 3:
+ if (!both)
+ return;
+ /* Skip the tab stop for case 1, if we are printing case 1. */
+ if (only_file_1)
+ putc ('\t', stream);
+ /* Skip the tab stop for case 2, if we are printing case 2. */
+ if (only_file_2)
+ putc ('\t', stream);
+ break;
+ }
+
+ fwrite (line->buffer, sizeof (char), line->length, stream);
+ putc ('\n', stream);
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "Usage: %s [-123] file1 file2\n", program_name);
+ exit (1);
+}
diff --git a/src/csplit.c b/src/csplit.c
new file mode 100644
index 000000000..56bffa385
--- /dev/null
+++ b/src/csplit.c
@@ -0,0 +1,1308 @@
+/* csplit - split a file into sections determined by context lines
+ Copyright (C) 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
+ Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <signal.h>
+#include "regex.h"
+#include "system.h"
+
+#if !defined(USG) && !defined(STDC_HEADERS)
+char *memchr ();
+#endif
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+void error ();
+
+void cleanup ();
+void close_output_file ();
+void create_output_file ();
+void save_line_to_file ();
+void usage ();
+
+#ifndef TRUE
+#define FALSE 0
+#define TRUE 1
+#endif
+
+/* Increment size of area for control records. */
+#define ALLOC_SIZE 20
+
+/* The default prefix for output file names. */
+#define DEFAULT_PREFIX "xx"
+
+typedef int boolean;
+
+/* A compiled pattern arg. */
+struct control
+{
+ char *regexpr; /* Non-compiled regular expression. */
+ struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
+ int offset; /* Offset from regexp to split at. */
+ int lines_required; /* Number of lines required. */
+ int repeat; /* Repeat count. */
+ int argnum; /* ARGV index. */
+ boolean ignore; /* If true, produce no output (for regexp). */
+};
+
+/* Initial size of data area in buffers. */
+#define START_SIZE 8191
+
+/* Increment size for data area. */
+#define INCR_SIZE 2048
+
+/* Number of lines kept in each node in line list. */
+#define CTRL_SIZE 80
+
+#ifdef DEBUG
+/* Some small values to test the algorithms. */
+#define START_SIZE 200
+#define INCR_SIZE 10
+#define CTRL_SIZE 1
+#endif
+
+/* A string with a length count. */
+struct cstring
+{
+ int len;
+ char *str;
+};
+
+/* Pointers to the beginnings of lines in the buffer area.
+ These structures are linked together if needed. */
+struct line
+{
+ unsigned used; /* Number of offsets used in this struct. */
+ unsigned insert_index; /* Next offset to use when inserting line. */
+ unsigned retrieve_index; /* Next index to use when retrieving line. */
+ struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
+ struct line *next; /* Next in linked list. */
+};
+
+/* The structure to hold the input lines.
+ Contains a pointer to the data area and a list containing
+ pointers to the individual lines. */
+struct buffer_record
+{
+ unsigned bytes_alloc; /* Size of the buffer area. */
+ unsigned bytes_used; /* Bytes used in the buffer area. */
+ unsigned start_line; /* First line number in this buffer. */
+ unsigned first_available; /* First line that can be retrieved. */
+ unsigned num_lines; /* Number of complete lines in this buffer. */
+ char *buffer; /* Data area. */
+ struct line *line_start; /* Head of list of pointers to lines. */
+ struct line *curr_line; /* The line start record currently in use. */
+ struct buffer_record *next;
+};
+
+/* Input file descriptor. */
+int input_desc = 0;
+
+/* List of available buffers. */
+struct buffer_record *free_list = NULL;
+
+/* Start of buffer list. */
+struct buffer_record *head = NULL;
+
+/* Partially read line. */
+char *hold_area = NULL;
+
+/* Number of chars in `hold_area'. */
+unsigned hold_count = 0;
+
+/* Number of the last line in the buffers. */
+unsigned last_line_number = 0;
+
+/* Number of the line currently being examined. */
+unsigned current_line = 0;
+
+/* Number of the last line in the input file. */
+unsigned last_line_in_file = 0;
+
+/* If TRUE, we have read EOF. */
+boolean have_read_eof = FALSE;
+
+/* Name of output files. */
+char *filename_space = NULL;
+
+/* Prefix part of output file names. */
+char *prefix = NULL;
+
+/* Number of digits to use in output file names. */
+int digits = 2;
+
+/* Number of files created so far. */
+unsigned files_created = 0;
+
+/* Number of bytes written to current file. */
+unsigned bytes_written;
+
+/* Output file pointer. */
+FILE *output_stream = NULL;
+
+/* Perhaps it would be cleaner to pass arg values instead of indexes. */
+char **global_argv;
+
+/* If TRUE, do not print the count of bytes in each output file. */
+boolean suppress_count;
+
+/* If TRUE, remove output files on error. */
+boolean remove_files;
+
+/* The compiled pattern arguments, which determine how to split
+ the input file. */
+struct control *controls;
+
+/* Number of elements in `controls'. */
+unsigned control_used;
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+char *
+xmalloc (n)
+ unsigned n;
+{
+ char *p;
+
+ p = malloc (n);
+ if (p == NULL)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ }
+ return p;
+}
+
+/* Change the size of an allocated block of memory P to N bytes,
+ with error checking.
+ If P is NULL, run xmalloc.
+ If N is 0, run free and return NULL. */
+
+char *
+xrealloc (p, n)
+ char *p;
+ unsigned n;
+{
+ if (p == NULL)
+ return xmalloc (n);
+ if (n == 0)
+ {
+ free (p);
+ return 0;
+ }
+ p = realloc (p, n);
+ if (p == NULL)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ }
+ return p;
+}
+
+/* Keep track of NUM chars of a partial line in buffer START.
+ These chars will be retrieved later when another large buffer is read.
+ It is not necessary to create a new buffer for these chars; instead,
+ we keep a pointer to the existing buffer. This buffer *is* on the
+ free list, and when the next buffer is obtained from this list
+ (even if it is this one), these chars will be placed at the
+ start of the new buffer. */
+
+void
+save_to_hold_area (start, num)
+ char *start;
+ unsigned num;
+{
+ hold_area = start;
+ hold_count = num;
+}
+
+/* Read up to MAX chars from the input stream into DEST.
+ Return the number of chars read. */
+
+int
+read_input (dest, max)
+ char *dest;
+ unsigned max;
+{
+ int bytes_read;
+
+ if (max == 0)
+ return 0;
+
+ bytes_read = read (input_desc, dest, max);
+
+ if (bytes_read == 0)
+ have_read_eof = TRUE;
+
+ if (bytes_read < 0)
+ {
+ error (0, errno, "read error");
+ cleanup ();
+ }
+
+ return bytes_read;
+}
+
+/* Initialize existing line record P. */
+
+void
+clear_line_control (p)
+ struct line *p;
+{
+ p->used = 0;
+ p->insert_index = 0;
+ p->retrieve_index = 0;
+}
+
+/* Initialize all line records in B. */
+
+void
+clear_all_line_control (b)
+ struct buffer_record *b;
+{
+ struct line *l;
+
+ for (l = b->line_start; l; l = l->next)
+ clear_line_control (l);
+}
+
+/* Return a new, initialized line record. */
+
+struct line *
+new_line_control ()
+{
+ struct line *p;
+
+ p = (struct line *) xmalloc (sizeof (struct line));
+
+ p->next = NULL;
+ clear_line_control (p);
+
+ return p;
+}
+
+/* Record LINE_START, which is the address of the start of a line
+ of length LINE_LEN in the large buffer, in the lines buffer of B. */
+
+void
+keep_new_line (b, line_start, line_len)
+ struct buffer_record *b;
+ char *line_start;
+ int line_len;
+{
+ struct line *l;
+
+ /* If there is no existing area to keep line info, get some. */
+ if (b->line_start == NULL)
+ b->line_start = b->curr_line = new_line_control ();
+
+ /* If existing area for lines is full, get more. */
+ if (b->curr_line->used == CTRL_SIZE)
+ {
+ b->curr_line->next = new_line_control ();
+ b->curr_line = b->curr_line->next;
+ }
+
+ l = b->curr_line;
+
+ /* Record the start of the line, and update counters. */
+ l->starts[l->insert_index].str = line_start;
+ l->starts[l->insert_index].len = line_len;
+ l->used++;
+ l->insert_index++;
+}
+
+/* Scan the buffer in B for newline characters
+ and record the line start locations and lengths in B.
+ Return the number of lines found in this buffer.
+
+ There may be an incomplete line at the end of the buffer;
+ a pointer is kept to this area, which will be used when
+ the next buffer is filled. */
+
+unsigned
+record_line_starts (b)
+ struct buffer_record *b;
+{
+ char *line_start; /* Start of current line. */
+ char *line_end; /* End of each line found. */
+ unsigned bytes_left; /* Length of incomplete last line. */
+ unsigned lines; /* Number of lines found. */
+ unsigned line_length; /* Length of each line found. */
+
+ if (b->bytes_used == 0)
+ return 0;
+
+ lines = 0;
+ line_start = b->buffer;
+ bytes_left = b->bytes_used;
+
+ for (;;)
+ {
+ line_end = memchr (line_start, '\n', bytes_left);
+ if (line_end == NULL)
+ break;
+ line_length = line_end - line_start + 1;
+ keep_new_line (b, line_start, line_length);
+ bytes_left -= line_length;
+ line_start = line_end + 1;
+ lines++;
+ }
+
+ /* Check for an incomplete last line. */
+ if (bytes_left)
+ {
+ if (have_read_eof)
+ {
+ keep_new_line (b, line_start, bytes_left);
+ lines++;
+ last_line_in_file = last_line_number + lines;
+ }
+ else
+ save_to_hold_area (line_start, bytes_left);
+ }
+
+ b->num_lines = lines;
+ b->first_available = b->start_line = last_line_number + 1;
+ last_line_number += lines;
+
+ return lines;
+}
+
+/* Return a new buffer with room to store SIZE bytes, plus
+ an extra byte for safety. */
+
+struct buffer_record *
+create_new_buffer (size)
+ unsigned size;
+{
+ struct buffer_record *new_buffer;
+
+ new_buffer = (struct buffer_record *)
+ xmalloc (sizeof (struct buffer_record));
+
+ new_buffer->buffer = (char *) xmalloc (size + 1);
+
+ new_buffer->bytes_alloc = size;
+ new_buffer->line_start = new_buffer->curr_line = NULL;
+
+ return new_buffer;
+}
+
+/* Return a new buffer of at least MINSIZE bytes. If a buffer of at
+ least that size is currently free, use it, otherwise create a new one. */
+
+struct buffer_record *
+get_new_buffer (min_size)
+ unsigned min_size;
+{
+ struct buffer_record *p, *q;
+ struct buffer_record *new_buffer; /* Buffer to return. */
+ unsigned alloc_size; /* Actual size that will be requested. */
+
+ alloc_size = START_SIZE;
+ while (min_size > alloc_size)
+ alloc_size += INCR_SIZE;
+
+ if (free_list == NULL)
+ new_buffer = create_new_buffer (alloc_size);
+ else
+ {
+ /* Use first-fit to find a buffer. */
+ p = new_buffer = NULL;
+ q = free_list;
+
+ do
+ {
+ if (q->bytes_alloc >= min_size)
+ {
+ if (p == NULL)
+ free_list = q->next;
+ else
+ p->next = q->next;
+ break;
+ }
+ p = q;
+ q = q->next;
+ }
+ while (q);
+
+ new_buffer = (q ? q : create_new_buffer (alloc_size));
+
+ new_buffer->curr_line = new_buffer->line_start;
+ clear_all_line_control (new_buffer);
+ }
+
+ new_buffer->num_lines = 0;
+ new_buffer->bytes_used = 0;
+ new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
+ new_buffer->next = NULL;
+
+ return new_buffer;
+}
+
+/* Add buffer BUF to the list of free buffers. */
+
+void
+free_buffer (buf)
+ struct buffer_record *buf;
+{
+ buf->next = free_list;
+ free_list = buf;
+}
+
+/* Append buffer BUF to the linked list of buffers that contain
+ some data yet to be processed. */
+
+void
+save_buffer (buf)
+ struct buffer_record *buf;
+{
+ struct buffer_record *p;
+
+ buf->next = NULL;
+ buf->curr_line = buf->line_start;
+
+ if (head == NULL)
+ head = buf;
+ else
+ {
+ for (p = head; p->next; p = p->next)
+ /* Do nothing. */ ;
+ p->next = buf;
+ }
+}
+
+/* Fill a buffer of input.
+
+ Set the initial size of the buffer to a default.
+ Fill the buffer (from the hold area and input stream)
+ and find the individual lines.
+ If no lines are found (the buffer is too small to hold the next line),
+ release the current buffer (whose contents would have been put in the
+ hold area) and repeat the process with another large buffer until at least
+ one entire line has been read.
+
+ Return TRUE if a new buffer was obtained, otherwise false
+ (in which case end-of-file must have been encountered). */
+
+boolean
+load_buffer ()
+{
+ struct buffer_record *b;
+ unsigned bytes_wanted = START_SIZE; /* Minimum buffer size. */
+ unsigned bytes_avail; /* Size of new buffer created. */
+ unsigned lines_found; /* Number of lines in this new buffer. */
+ char *p; /* Place to load into buffer. */
+
+ if (have_read_eof)
+ return FALSE;
+
+ /* We must make the buffer at least as large as the amount of data
+ in the partial line left over from the last call. */
+ if (bytes_wanted < hold_count)
+ bytes_wanted = hold_count;
+
+ do
+ {
+ b = get_new_buffer (bytes_wanted);
+ bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
+ p = b->buffer;
+
+ /* First check the `holding' area for a partial line. */
+ if (hold_count)
+ {
+ if (p != hold_area)
+ bcopy (hold_area, p, hold_count);
+ p += hold_count;
+ b->bytes_used += hold_count;
+ bytes_avail -= hold_count;
+ hold_count = 0;
+ }
+
+ b->bytes_used += (unsigned) read_input (p, bytes_avail);
+
+ lines_found = record_line_starts (b);
+ bytes_wanted = b->bytes_alloc + INCR_SIZE;
+ if (!lines_found)
+ free_buffer (b);
+ }
+ while (!lines_found && !have_read_eof);
+
+ if (lines_found)
+ save_buffer (b);
+
+ return lines_found != 0;
+}
+
+/* Return the line number of the first line that has not yet been retrieved. */
+
+unsigned
+get_first_line_in_buffer ()
+{
+ if (head == NULL && !load_buffer ())
+ error (1, errno, "input disappeared");
+
+ return head->first_available;
+}
+
+/* Return a pointer to the logical first line in the buffer and make the
+ next line the logical first line.
+ Return NULL if there is no more input. */
+
+struct cstring *
+remove_line ()
+{
+ struct cstring *line; /* Return value. */
+ unsigned line_got; /* Number of the line retrieved. */
+ struct line *l; /* For convenience. */
+
+ if (head == NULL && !load_buffer ())
+ return NULL;
+
+ if (current_line < head->first_available)
+ current_line = head->first_available;
+
+ line_got = head->first_available++;
+
+ l = head->curr_line;
+
+ line = &l->starts[l->retrieve_index];
+
+ /* Advance index to next line. */
+ if (++l->retrieve_index == l->used)
+ {
+ /* Go on to the next line record. */
+ head->curr_line = l->next;
+ if (head->curr_line == NULL || head->curr_line->used == 0)
+ {
+ /* Go on to the next data block. */
+ struct buffer_record *b = head;
+ head = head->next;
+ free_buffer (b);
+ }
+ }
+
+ return line;
+}
+
+/* Search the buffers for line LINENUM, reading more input if necessary.
+ Return a pointer to the line, or NULL if it is not found in the file. */
+
+struct cstring *
+find_line (linenum)
+ unsigned linenum;
+{
+ struct buffer_record *b;
+
+ if (head == NULL && !load_buffer ())
+ return NULL;
+
+ if (linenum < head->start_line)
+ return NULL;
+
+ for (b = head;;)
+ {
+ if (linenum < b->start_line + b->num_lines)
+ {
+ /* The line is in this buffer. */
+ struct line *l;
+ unsigned offset; /* How far into the buffer the line is. */
+
+ l = b->line_start;
+ offset = linenum - b->start_line;
+ /* Find the control record. */
+ while (offset >= CTRL_SIZE)
+ {
+ l = l->next;
+ offset -= CTRL_SIZE;
+ }
+ return &l->starts[offset];
+ }
+ if (b->next == NULL && !load_buffer ())
+ return NULL;
+ b = b->next; /* Try the next data block. */
+ }
+}
+
+/* Return TRUE if at least one more line is available for input. */
+
+boolean
+no_more_lines ()
+{
+ return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
+}
+
+/* Set the name of the input file to NAME and open it. */
+
+void
+set_input_file (name)
+ char *name;
+{
+ if (!strcmp (name, "-"))
+ input_desc = 0;
+ else
+ {
+ input_desc = open (name, O_RDONLY);
+ if (input_desc < 0)
+ error (1, errno, "%s", name);
+ }
+}
+
+/* Write all lines from the beginning of the buffer up to, but
+ not including, line LAST_LINE, to the current output file.
+ If IGNORE is TRUE, do not output lines selected here.
+ ARGNUM is the index in ARGV of the current pattern. */
+
+void
+write_to_file (last_line, ignore, argnum)
+ unsigned last_line;
+ boolean ignore;
+ int argnum;
+{
+ struct cstring *line;
+ unsigned first_line; /* First available input line. */
+ unsigned lines; /* Number of lines to output. */
+ unsigned i;
+
+ first_line = get_first_line_in_buffer ();
+
+ if (first_line > last_line)
+ {
+ error (0, 0, "%s: line number out of range", global_argv[argnum]);
+ cleanup ();
+ }
+
+ lines = last_line - first_line;
+
+ for (i = 0; i < lines; i++)
+ {
+ line = remove_line ();
+ if (line == NULL)
+ {
+ error (0, 0, "%s: line number out of range", global_argv[argnum]);
+ cleanup ();
+ }
+ if (!ignore)
+ save_line_to_file (line);
+ }
+}
+
+/* Output any lines left after all regexps have been processed. */
+
+void
+dump_rest_of_file ()
+{
+ struct cstring *line;
+
+ while ((line = remove_line ()) != NULL)
+ save_line_to_file (line);
+}
+
+/* Handle an attempt to read beyond EOF under the control of record P,
+ on iteration REPETITION if nonzero. */
+
+void
+handle_line_error (p, repetition)
+ struct control *p;
+ int repetition;
+{
+ fprintf (stderr, "%s: `%d': line number out of range",
+ program_name, p->lines_required);
+ if (repetition)
+ fprintf (stderr, " on repetition %d\n", repetition);
+ else
+ fprintf (stderr, "\n");
+
+ cleanup ();
+}
+
+/* Determine the line number that marks the end of this file,
+ then get those lines and save them to the output file.
+ P is the control record.
+ REPETITION is the repetition number. */
+
+void
+process_line_count (p, repetition)
+ struct control *p;
+ int repetition;
+{
+ unsigned linenum;
+ unsigned last_line_to_save = p->lines_required * (repetition + 1);
+ struct cstring *line;
+
+ create_output_file ();
+
+ linenum = get_first_line_in_buffer ();
+
+ /* Check for requesting a line that has already been written out.
+ If this ever happens, it's due to a bug in csplit. */
+ if (linenum >= last_line_to_save)
+ handle_line_error (p, repetition);
+
+ while (linenum++ < last_line_to_save)
+ {
+ line = remove_line ();
+ if (line == NULL)
+ handle_line_error (p, repetition);
+ save_line_to_file (line);
+ }
+
+ close_output_file ();
+
+ /* Ensure that the line number specified is not 1 greater than
+ the number of lines in the file. */
+ if (no_more_lines ())
+ handle_line_error (p, repetition);
+}
+
+void
+regexp_error (p, repetition, ignore)
+ struct control *p;
+ int repetition;
+ boolean ignore;
+{
+ fprintf (stderr, "%s: `%s': match not found",
+ program_name, global_argv[p->argnum]);
+
+ if (repetition)
+ fprintf (stderr, " on repetition %d\n", repetition);
+ else
+ fprintf (stderr, "\n");
+
+ if (!ignore)
+ {
+ dump_rest_of_file ();
+ close_output_file ();
+ }
+ cleanup ();
+}
+
+/* Read the input until a line matches the regexp in P, outputting
+ it unless P->IGNORE is TRUE.
+ REPETITION is this repeat-count; 0 means the first time. */
+
+void
+process_regexp (p, repetition)
+ struct control *p;
+ int repetition;
+{
+ struct cstring *line; /* From input file. */
+ register unsigned line_len; /* To make "$" in regexps work. */
+ unsigned break_line; /* First line number of next file. */
+ boolean ignore = p->ignore; /* If TRUE, skip this section. */
+ int ret;
+
+ if (!ignore)
+ create_output_file ();
+
+ /* If there is no offset for the regular expression, or
+ it is positive, then it is not necessary to buffer the lines. */
+
+ if (p->offset >= 0)
+ {
+ for (;;)
+ {
+ line = find_line (++current_line);
+ if (line == NULL)
+ regexp_error (p, repetition, ignore);
+ line_len = line->len;
+ if (line->str[line_len - 1] == '\n')
+ line_len--;
+ ret = re_search (&p->re_compiled, line->str, line_len,
+ 0, line_len, (struct re_registers *) 0);
+ if (ret == -2)
+ {
+ error (0, 0, "error in regular expression search");
+ cleanup ();
+ }
+ if (ret == -1)
+ {
+ line = remove_line ();
+ if (!ignore)
+ save_line_to_file (line);
+ }
+ else
+ break;
+ }
+ }
+ else
+ {
+ /* Buffer the lines. */
+ for (;;)
+ {
+ line = find_line (++current_line);
+ if (line == NULL)
+ regexp_error (p, repetition, ignore);
+ line_len = line->len;
+ if (line->str[line_len - 1] == '\n')
+ line_len--;
+ ret = re_search (&p->re_compiled, line->str, line_len,
+ 0, line_len, (struct re_registers *) 0);
+ if (ret == -2)
+ {
+ error (0, 0, "error in regular expression search");
+ cleanup ();
+ }
+ if (ret >= 0)
+ break;
+ }
+ }
+
+ /* Account for any offset from this regexp. */
+ break_line = current_line + p->offset;
+
+ write_to_file (break_line, ignore, p->argnum);
+
+ if (!ignore)
+ close_output_file ();
+
+ current_line = break_line;
+}
+
+/* Split the input file according to the control records we have built. */
+
+void
+split_file ()
+{
+ register int i, j;
+
+ for (i = 0; i < control_used; i++)
+ {
+ if (controls[i].regexpr)
+ {
+ for (j = 0; j <= controls[i].repeat; j++)
+ process_regexp (&controls[i], j);
+ }
+ else
+ {
+ for (j = 0; j <= controls[i].repeat; j++)
+ process_line_count (&controls[i], j);
+ }
+ }
+
+ create_output_file ();
+ dump_rest_of_file ();
+ close_output_file ();
+}
+
+/* Return the name of output file number NUM. */
+
+char *
+make_filename (num)
+ int num;
+{
+ sprintf (filename_space, "%s%0*d", prefix, digits, num);
+ return filename_space;
+}
+
+/* Create the next output file. */
+
+void
+create_output_file ()
+{
+ char *name;
+
+ name = make_filename (files_created);
+ output_stream = fopen (name, "w");
+ if (output_stream == NULL)
+ {
+ error (0, errno, "%s", name);
+ cleanup ();
+ }
+ files_created++;
+ bytes_written = 0;
+}
+
+/* Delete all the files we have created. */
+
+void
+delete_all_files ()
+{
+ int i;
+ char *name;
+
+ for (i = 0; i < files_created; i++)
+ {
+ name = make_filename (i);
+ if (unlink (name))
+ error (0, errno, "%s", name);
+ }
+}
+
+/* Close the current output file and print the count
+ of characters in this file. */
+
+void
+close_output_file ()
+{
+ if (output_stream)
+ {
+ if (fclose (output_stream) == EOF)
+ {
+ error (0, errno, "write error");
+ cleanup ();
+ }
+ if (!suppress_count)
+ fprintf (stdout, "%d\n", bytes_written);
+ output_stream = NULL;
+ }
+}
+
+/* Optionally remove files created so far; then exit.
+ Called when an error detected. */
+
+void
+cleanup ()
+{
+ if (output_stream)
+ close_output_file ();
+
+ if (remove_files)
+ delete_all_files ();
+
+ exit (1);
+}
+
+/* Save line LINE to the output file and
+ increment the character count for the current file. */
+
+void
+save_line_to_file (line)
+ struct cstring *line;
+{
+ fwrite (line->str, sizeof (char), line->len, output_stream);
+ bytes_written += line->len;
+}
+
+/* Return a new, initialized control record. */
+
+struct control *
+new_control_record ()
+{
+ static unsigned control_allocated = 0; /* Total space allocated. */
+ register struct control *p;
+
+ if (control_allocated == 0)
+ {
+ control_allocated = ALLOC_SIZE;
+ controls = (struct control *)
+ xmalloc (sizeof (struct control) * control_allocated);
+ }
+ else if (control_used == control_allocated)
+ {
+ control_allocated += ALLOC_SIZE;
+ controls = (struct control *)
+ xrealloc (controls, sizeof (struct control) * control_allocated);
+ }
+ p = &controls[control_used++];
+ p->regexpr = NULL;
+ p->repeat = 0;
+ p->lines_required = 0;
+ p->offset = 0;
+ return p;
+}
+
+/* Convert string NUM to an integer and put the value in *RESULT.
+ Return a TRUE if the string consists entirely of digits,
+ FALSE if not. */
+
+boolean
+string_to_number (result, num)
+ int *result;
+ char *num;
+{
+ register char ch;
+ register int val = 0;
+
+ if (*num == '\0')
+ return FALSE;
+
+ while (ch = *num++)
+ {
+ if (!isdigit (ch))
+ return FALSE;
+ val = val * 10 + ch - '0';
+ }
+
+ *result = val;
+ return TRUE;
+}
+
+/* Check if there is a numeric offset after a regular expression.
+ STR is the entire command line argument.
+ ARGNUM is the index in ARGV of STR.
+ P is the control record for this regular expression.
+ NUM is the numeric part of STR. */
+
+void
+check_for_offset (argnum, p, str, num)
+ int argnum;
+ struct control *p;
+ char *str;
+ char *num;
+{
+ if (*num != '-' && *num != '+')
+ error (1, 0, "%s: `+' or `-' expected after delimeter", str);
+
+ if (!string_to_number (&p->offset, num + 1))
+ error (1, 0, "%s: integer expected after `%c'", str, *num);
+
+ if (*num == '-')
+ p->offset = -p->offset;
+}
+
+/* Given that the first character of command line arg STR is '{',
+ make sure that the rest of the string is a valid repeat count
+ and store its value in P.
+ ARGNUM is the ARGV index of STR. */
+
+void
+parse_repeat_count (argnum, p, str)
+ int argnum;
+ struct control *p;
+ char *str;
+{
+ char *end;
+
+ end = str + strlen (str) - 1;
+ if (*end != '}')
+ error (1, 0, "%s: `}' is required in repeat count", str);
+ *end = '\0';
+
+ if (!string_to_number (&p->repeat, str + 1))
+ error (1, 0, "%s}: integer required between `{' and `}'",
+ global_argv[argnum]);
+
+ *end = '}';
+}
+
+/* Extract the regular expression from STR and check for a numeric offset.
+ STR should start with the regexp delimiter character.
+ Return a new control record for the regular expression.
+ ARGNUM is the ARGV index of STR.
+ Unless IGNORE is TRUE, mark these lines for output. */
+
+struct control *
+extract_regexp (argnum, ignore, str)
+ int argnum;
+ boolean ignore;
+ char *str;
+{
+ int len; /* Number of chars in this regexp. */
+ char delim = *str;
+ char *closing_delim;
+ struct control *p;
+ char *err;
+
+ closing_delim = rindex (str + 1, delim);
+ if (closing_delim == NULL)
+ error (1, 0, "%s: closing delimeter `%c' missing", str, delim);
+
+ len = closing_delim - str - 1;
+ p = new_control_record ();
+ p->argnum = argnum;
+ p->ignore = ignore;
+
+ p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
+ strncpy (p->regexpr, str + 1, len);
+ p->re_compiled.allocated = len * 2;
+ p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
+ p->re_compiled.fastmap = xmalloc (256);
+ p->re_compiled.translate = 0;
+ err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
+ if (err)
+ {
+ error (0, 0, "%s: invalid regular expression: %s", str, err);
+ cleanup ();
+ }
+
+ if (closing_delim[1])
+ check_for_offset (argnum, p, str, closing_delim + 1);
+
+ return p;
+}
+
+/* Extract the break patterns from args START through ARGC - 1 of ARGV.
+ After each pattern, check if the next argument is a repeat count. */
+
+void
+parse_patterns (argc, start, argv)
+ int argc;
+ int start;
+ char **argv;
+{
+ int i; /* Index into ARGV. */
+ struct control *p; /* New control record created. */
+
+ for (i = start; i < argc; i++)
+ {
+ if (*argv[i] == '/' || *argv[i] == '%')
+ {
+ p = extract_regexp (i, *argv[i] == '%', argv[i]);
+ }
+ else
+ {
+ p = new_control_record ();
+ p->argnum = i;
+ if (!string_to_number (&p->lines_required, argv[i]))
+ error (1, 0, "%s: invalid pattern", argv[i]);
+ }
+
+ if (i + 1 < argc && *argv[i + 1] == '{')
+ {
+ /* We have a repeat count. */
+ i++;
+ parse_repeat_count (i, p, argv[i]);
+ }
+ }
+}
+
+void
+interrupt_handler ()
+{
+ error (0, 0, "interrupted");
+ cleanup ();
+}
+
+struct option longopts[] =
+{
+ {"digits", 1, NULL, 'n'},
+ {"quiet", 0, NULL, 's'},
+ {"silent", 0, NULL, 's'},
+ {"keep-files", 0, NULL, 'k'},
+ {"prefix", 1, NULL, 'f'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int optc;
+#ifdef _POSIX_VERSION
+ struct sigaction oldact, newact;
+#endif /* _POSIX_VERSION */
+
+ program_name = argv[0];
+ global_argv = argv;
+ controls = NULL;
+ control_used = 0;
+ suppress_count = FALSE;
+ remove_files = TRUE;
+ prefix = DEFAULT_PREFIX;
+
+#ifdef _POSIX_VERSION
+ newact.sa_handler = interrupt_handler;
+ sigemptyset (&newact.sa_mask);
+ newact.sa_flags = 0;
+
+ sigaction (SIGHUP, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGHUP, &newact, NULL);
+
+ sigaction (SIGINT, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGINT, &newact, NULL);
+
+ sigaction (SIGQUIT, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGQUIT, &newact, NULL);
+
+ sigaction (SIGTERM, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGTERM, &newact, NULL);
+#else /* !_POSIX_VERSION */
+ if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
+ signal (SIGHUP, interrupt_handler);
+ if (signal (SIGINT, SIG_IGN) != SIG_IGN)
+ signal (SIGINT, interrupt_handler);
+ if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
+ signal (SIGQUIT, interrupt_handler);
+ if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
+ signal (SIGTERM, interrupt_handler);
+#endif
+
+ while ((optc = getopt_long (argc, argv, "f:kn:s", longopts, (int *) 0))
+ != EOF)
+ switch (optc)
+ {
+ case 'f':
+ prefix = optarg;
+ break;
+
+ case 'k':
+ remove_files = FALSE;
+ break;
+
+ case 'n':
+ if (!string_to_number (&digits, optarg))
+ error (1, 0, "%s: invalid number", optarg);
+ break;
+
+ case 's':
+ suppress_count = TRUE;
+ break;
+
+ default:
+ usage ();
+ }
+
+ if (optind >= argc - 1)
+ usage ();
+
+ filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
+
+ set_input_file (argv[optind++]);
+
+ parse_patterns (argc, optind, argv);
+
+ split_file ();
+
+ if (close (input_desc) < 0)
+ {
+ error (0, errno, "read error");
+ cleanup ();
+ }
+
+ exit (0);
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-sk] [-f prefix] [-n digits] [--prefix=prefix]\n\
+ [--digits=digits] [--quiet] [--silent] [--keep-files] file pattern...\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/cut.c b/src/cut.c
new file mode 100644
index 000000000..93808b063
--- /dev/null
+++ b/src/cut.c
@@ -0,0 +1,586 @@
+/* cut - remove parts of lines of files
+ Copyright (C) 1984 by David M. Ihnat
+
+ This program is a total rewrite of the Bell Laboratories Unix(Tm)
+ command of the same name, as of System V. It contains no proprietary
+ code, and therefore may be used without violation of any proprietary
+ agreements whatsoever. However, you will notice that the program is
+ copyrighted by me. This is to assure the program does *not* fall
+ into the public domain. Thus, I may specify just what I am now:
+ This program may be freely copied and distributed, provided this notice
+ remains; it may not be sold for profit without express written consent of
+ the author.
+ Please note that I recreated the behavior of the Unix(Tm) 'cut' command
+ as faithfully as possible; however, I haven't run a full set of regression
+ tests. Thus, the user of this program accepts full responsibility for any
+ effects or loss; in particular, the author is not responsible for any losses,
+ explicit or incidental, that may be incurred through use of this program.
+
+ I ask that any bugs (and, if possible, fixes) be reported to me when
+ possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us
+
+ POSIX changes, bug fixes, long-named options, and cleanup
+ by David MacKenzie <djm@ai.mit.edu>.
+
+ Options:
+ --bytes=byte-list
+ -b byte-list Print only the bytes in positions listed
+ in BYTE-LIST.
+ Tabs and backspaces are treated like any
+ other character; they take up 1 byte.
+
+ --characters=character-list
+ -c character-list Print only characters in positions listed
+ in CHARACTER-LIST.
+ The same as -b for now, but
+ internationalization will change that.
+ Tabs and backspaces are treated like any
+ other character; they take up 1 character.
+
+ --fields=field-list
+ -f field-list Print only the fields listed in FIELD-LIST.
+ Fields are separated by a TAB by default.
+
+ --delimiter=delim
+ -d delim For -f, fields are separated by the first
+ character in DELIM instead of TAB.
+
+ -n Do not split multibyte chars (no-op for now).
+
+ --only-delimited
+ -s For -f, do not print lines that do not contain
+ the field separator character.
+
+ The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers
+ or ranges separated by commas. The first byte, character, and field
+ are numbered 1.
+
+ A FILE of `-' means standard input. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
+#else
+#define ISDIGIT(c) (isdigit ((c)))
+#endif
+
+char *xmalloc ();
+char *xrealloc ();
+int set_fields ();
+int cut_file ();
+void cut_stream ();
+void cut_bytes ();
+void cut_fields ();
+void enlarge_line ();
+void error ();
+void invalid_list ();
+void usage ();
+
+/* The number of elements allocated for the input line
+ and the byte or field number.
+ Enlarged as necessary. */
+int line_size;
+
+/* Processed output buffer. */
+char *outbuf;
+
+/* Where to save next char to output. */
+char *outbufptr;
+
+/* Raw line buffer for field mode. */
+char *inbuf;
+
+/* Where to save next input char. */
+char *inbufptr;
+
+/* What can be done about a byte or field. */
+enum field_action
+{
+ field_omit,
+ field_output
+};
+
+/* In byte mode, which bytes to output.
+ In field mode, which `delim'-separated fields to output.
+ Both bytes and fields are numbered starting with 1,
+ so the first element of `fields' is unused. */
+enum field_action *fields;
+
+enum operating_mode
+{
+ undefined_mode,
+
+ /* Output characters that are in the given bytes. */
+ byte_mode,
+
+ /* Output the given delimeter-separated fields. */
+ field_mode
+};
+
+enum operating_mode operating_mode;
+
+/* If nonzero,
+ for field mode, do not output lines containing no delimeter characters. */
+int delimited_lines_only;
+
+/* The delimeter character for field mode. */
+unsigned char delim;
+
+/* Nonzero if we have ever read standard input. */
+int have_read_stdin;
+
+/* The name this program was run with. */
+char *program_name;
+
+struct option longopts[] =
+{
+ {"bytes", 1, 0, 'b'},
+ {"characters", 1, 0, 'c'},
+ {"fields", 1, 0, 'f'},
+ {"delimiter", 1, 0, 'd'},
+ {"only-delimited", 0, 0, 's'},
+ {0, 0, 0, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int optc, exit_status = 0;
+
+ program_name = argv[0];
+
+ line_size = 512;
+ operating_mode = undefined_mode;
+ delimited_lines_only = 0;
+ delim = '\0';
+ have_read_stdin = 0;
+
+ fields = (enum field_action *)
+ xmalloc (line_size * sizeof (enum field_action));
+ outbuf = (char *) xmalloc (line_size);
+ inbuf = (char *) xmalloc (line_size);
+
+ for (optc = 0; optc < line_size; optc++)
+ fields[optc] = field_omit;
+
+ while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (optc)
+ {
+ case 'b':
+ case 'c':
+ /* Build the byte list. */
+ if (operating_mode != undefined_mode)
+ usage ();
+ operating_mode = byte_mode;
+ if (set_fields (optarg) == 0)
+ error (2, 0, "no fields given");
+ break;
+
+ case 'f':
+ /* Build the field list. */
+ if (operating_mode != undefined_mode)
+ usage ();
+ operating_mode = field_mode;
+ if (set_fields (optarg) == 0)
+ error (2, 0, "no fields given");
+ break;
+
+ case 'd':
+ /* New delimiter. */
+ if (optarg[0] == '\0')
+ error (2, 0, "no delimiter given");
+ if (optarg[1] != '\0')
+ error (2, 0, "delimiter must be a single character");
+ delim = optarg[0];
+ break;
+
+ case 'n':
+ break;
+
+ case 's':
+ delimited_lines_only++;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (operating_mode == undefined_mode)
+ usage ();
+
+ if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode)
+ usage ();
+
+ if (delim == '\0')
+ delim = '\t';
+
+ if (optind == argc)
+ exit_status |= cut_file ("-");
+ else
+ for (; optind < argc; optind++)
+ exit_status |= cut_file (argv[optind]);
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ {
+ error (0, errno, "-");
+ exit_status = 1;
+ }
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, 0, "write error");
+
+ exit (exit_status);
+}
+
+/* Select for printing the positions in `fields' that are listed in
+ byte or field specification FIELDSTR. FIELDSTR should be
+ composed of one or more numbers or ranges of numbers, separated by
+ blanks or commas. Incomplete ranges may be given: `-m' means
+ `1-m'; `n-' means `n' through end of line or last field.
+
+ Return the number of fields selected. */
+
+int
+set_fields (fieldstr)
+ char *fieldstr;
+{
+ int initial = 1; /* Value of first number in a range. */
+ int dash_found = 0; /* Nonzero if a '-' is found in this field. */
+ int value = 0; /* If nonzero, a number being accumulated. */
+ int fields_selected = 0; /* Number of fields selected so far. */
+ /* If nonzero, index of first field in a range that goes to end of line. */
+ int eol_range_start = 0;
+
+ for (;;)
+ {
+ if (*fieldstr == '-')
+ {
+ /* Starting a range. */
+ if (dash_found)
+ invalid_list ();
+ dash_found++;
+ fieldstr++;
+
+ if (value)
+ {
+ if (value >= line_size)
+ enlarge_line (value);
+ initial = value;
+ value = 0;
+ }
+ else
+ initial = 1;
+ }
+ else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0')
+ {
+ /* Ending the string, or this field/byte sublist. */
+ if (dash_found)
+ {
+ dash_found = 0;
+
+ /* A range. Possibilites: -n, m-n, n-.
+ In any case, `initial' contains the start of the range. */
+ if (value == 0)
+ {
+ /* `n-'. From `initial' to end of line. */
+ eol_range_start = initial;
+ fields_selected++;
+ }
+ else
+ {
+ /* `m-n' or `-n' (1-n). */
+ if (value < initial)
+ invalid_list ();
+
+ if (value >= line_size)
+ enlarge_line (value);
+
+ /* Is there already a range going to end of line? */
+ if (eol_range_start != 0)
+ {
+ /* Yes. Is the new sequence already contained
+ in the old one? If so, no processing is
+ necessary. */
+ if (initial < eol_range_start)
+ {
+ /* No, the new sequence starts before the
+ old. Does the old range going to end of line
+ extend into the new range? */
+ if (eol_range_start < value)
+ /* Yes. Simply move the end of line marker. */
+ eol_range_start = initial;
+ else
+ {
+ /* No. A simple range, before and disjoint from
+ the range going to end of line. Fill it. */
+ for (; initial <= value; initial++)
+ fields[initial] = field_output;
+ }
+
+ /* In any case, some fields were selected. */
+ fields_selected++;
+ }
+ }
+ else
+ {
+ /* There is no range going to end of line. */
+ for (; initial <= value; initial++)
+ fields[initial] = field_output;
+ fields_selected++;
+ }
+ value = 0;
+ }
+ }
+ else if (value != 0)
+ {
+ /* A simple field number, not a range. */
+ if (value >= line_size)
+ enlarge_line (value);
+
+ fields[value] = field_output;
+ value = 0;
+ fields_selected++;
+ }
+
+ if (*fieldstr == '\0')
+ {
+ /* If there was a range going to end of line, fill the
+ array from the end of line point. */
+ if (eol_range_start)
+ for (initial = eol_range_start; initial < line_size; initial++)
+ fields[initial] = field_output;
+
+ return fields_selected;
+ }
+
+ fieldstr++;
+ }
+ else if (ISDIGIT (*fieldstr))
+ {
+ value = 10 * value + *fieldstr - '0';
+ fieldstr++;
+ }
+ else
+ invalid_list ();
+ }
+}
+
+/* Process file FILE to standard output.
+ Return 0 if successful, 1 if not. */
+
+int
+cut_file (file)
+ char *file;
+{
+ FILE *stream;
+
+ if (!strcmp (file, "-"))
+ {
+ have_read_stdin = 1;
+ stream = stdin;
+ }
+ else
+ {
+ stream = fopen (file, "r");
+ if (stream == NULL)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ }
+
+ cut_stream (stream);
+
+ if (ferror (stream))
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ if (!strcmp (file, "-"))
+ clearerr (stream); /* Also clear EOF. */
+ else if (fclose (stream) == EOF)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ return 0;
+}
+
+void
+cut_stream (stream)
+ FILE *stream;
+{
+ if (operating_mode == byte_mode)
+ cut_bytes (stream);
+ else
+ cut_fields (stream);
+}
+
+/* Print the file open for reading on stream STREAM
+ with the bytes marked `field_omit' in `fields' removed from each line. */
+
+void
+cut_bytes (stream)
+ FILE *stream;
+{
+ register int c; /* Each character from the file. */
+ int doneflag = 0; /* Nonzero if EOF reached. */
+ int char_count; /* Number of chars in the line so far. */
+
+ while (doneflag == 0)
+ {
+ /* Start processing a line. */
+ outbufptr = outbuf;
+ char_count = 0;
+
+ do
+ {
+ c = getc (stream);
+ if (c == EOF)
+ {
+ doneflag++;
+ break;
+ }
+
+ /* If this character is to be sent, stow it in the outbuffer. */
+
+ if (++char_count == line_size - 1)
+ enlarge_line (char_count);
+
+ if (fields[char_count] == field_output || c == '\n')
+ *outbufptr++ = c;
+ }
+ while (c != '\n');
+
+ if (char_count)
+ fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
+ }
+}
+
+/* Print the file open for reading on stream STREAM
+ with the fields marked `field_omit' in `fields' removed from each line.
+ All characters are initially stowed in the raw input buffer, until
+ at least one field has been found. */
+
+void
+cut_fields (stream)
+ FILE *stream;
+{
+ register int c; /* Each character from the file. */
+ int doneflag = 0; /* Nonzero if EOF reached. */
+ int char_count; /* Number of chars in line before any delim. */
+ int fieldfound; /* Nonzero if any fields to print found. */
+ int curr_field; /* Current index in `fields'. */
+
+ while (doneflag == 0)
+ {
+ char_count = 0;
+ fieldfound = 0;
+ curr_field = 1;
+ outbufptr = outbuf;
+ inbufptr = inbuf;
+
+ do
+ {
+ c = getc (stream);
+ if (c == EOF)
+ {
+ doneflag++;
+ break;
+ }
+
+ if (fields[curr_field] == field_output && c != '\n')
+ {
+ /* Working on a field. It, and its terminating
+ delimiter, go only into the processed buffer. */
+ fieldfound = 1;
+ if (outbufptr - outbuf == line_size - 2)
+ enlarge_line (outbufptr - outbuf);
+ *outbufptr++ = c;
+ }
+ else if (fieldfound == 0)
+ {
+ if (++char_count == line_size - 1)
+ enlarge_line (char_count);
+ *inbufptr++ = c;
+ }
+
+ if (c == delim && ++curr_field == line_size - 1)
+ enlarge_line (curr_field);
+ }
+ while (c != '\n');
+
+ if (fieldfound)
+ {
+ /* Something was found. Print it. */
+ if (outbufptr[-1] == delim)
+ --outbufptr; /* Suppress trailing delimiter. */
+
+ fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
+ if (c == '\n')
+ putc (c, stdout);
+ }
+ else if (!delimited_lines_only && char_count)
+ /* A line with some characters, no delimiters, and no
+ suppression. Print it. */
+ fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout);
+ }
+}
+
+/* Extend the buffers to accomodate at least NEW_SIZE characters. */
+
+void
+enlarge_line (new_size)
+ int new_size;
+{
+ char *newp;
+ int i;
+
+ new_size += 256; /* Leave some room to grow. */
+
+ fields = (enum field_action *)
+ xrealloc (fields, new_size * sizeof (enum field_action));
+
+ newp = (char *) xrealloc (outbuf, new_size);
+ outbufptr += newp - outbuf;
+ outbuf = newp;
+
+ newp = (char *) xrealloc (inbuf, new_size);
+ inbufptr += newp - inbuf;
+ inbuf = newp;
+
+ for (i = line_size; i < new_size; i++)
+ fields[i] = field_omit;
+ line_size = new_size;
+}
+
+void
+invalid_list ()
+{
+ error (2, 0, "invalid byte or field list");
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\
+ %s {-c character-list,--characters=character-list} [file...]\n\
+ %s {-f field-list,--fields=field-list} [-d delim] [-s]\n\
+ [--delimiter=delim] [--only-delimited] [file...]\n",
+ program_name, program_name, program_name);
+ exit (2);
+}
diff --git a/src/expand.c b/src/expand.c
new file mode 100644
index 000000000..8e471379e
--- /dev/null
+++ b/src/expand.c
@@ -0,0 +1,377 @@
+/* expand - convert tabs to spaces
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* By default, convert all tabs to spaces.
+ Preserves backspace characters in the output; they decrement the
+ column count for tab calculations.
+ The default action is equivalent to -8.
+
+ Options:
+ --tabs=tab1[,tab2[,...]]
+ -t tab1[,tab2[,...]]
+ -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
+ spaces apart instead of the default 8. Otherwise,
+ set the tabs at columns tab1, tab2, etc. (numbered from
+ 0); replace any tabs beyond the tabstops given with
+ single spaces.
+ --initial
+ -i Only convert initial tabs on each line to spaces.
+
+ David MacKenzie <djm@ai.mit.edu> */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
+#else
+#define ISDIGIT(c) (isdigit ((c)))
+#endif
+
+/* The number of bytes added at a time to the amount of memory
+ allocated for the output line. */
+#define OUTPUT_BLOCK 256
+
+/* The number of bytes added at a time to the amount of memory
+ allocated for the list of tabstops. */
+#define TABLIST_BLOCK 256
+
+char *xmalloc ();
+char *xrealloc ();
+void error ();
+
+FILE *next_file ();
+void add_tabstop ();
+void expand ();
+void parse_tabstops ();
+void usage ();
+void validate_tabstops ();
+
+/* If nonzero, convert blanks even after nonblank characters have been
+ read on the line. */
+int convert_entire_line;
+
+/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
+int tab_size;
+
+/* Array of the explicit column numbers of the tab stops;
+ after `tab_list' is exhausted, each additional tab is replaced
+ by a space. The first column is column 0. */
+int *tab_list;
+
+/* The index of the first invalid element of `tab_list',
+ where the next element can be added. */
+int first_free_tab;
+
+/* Null-terminated array of input filenames. */
+char **file_list;
+
+/* Default for `file_list' if no files are given on the command line. */
+char *stdin_argv[] =
+{
+ "-", NULL
+};
+
+/* Nonzero if we have ever read standard input. */
+int have_read_stdin;
+
+/* Status to return to the system. */
+int exit_status;
+
+/* The name this program was run with. */
+char *program_name;
+
+struct option longopts[] =
+{
+ {"tabs", 1, NULL, 't'},
+ {"initial", 0, NULL, 'i'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int tabval = -1; /* Value of tabstop being read, or -1. */
+ int c; /* Option character. */
+
+ have_read_stdin = 0;
+ exit_status = 0;
+ convert_entire_line = 1;
+ tab_list = NULL;
+ first_free_tab = 0;
+ program_name = argv[0];
+
+ while ((c = getopt_long (argc, argv, "it:,0123456789", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (c)
+ {
+ case '?':
+ usage ();
+ case 'i':
+ convert_entire_line = 0;
+ break;
+ case 't':
+ parse_tabstops (optarg);
+ break;
+ case ',':
+ add_tabstop (tabval);
+ tabval = -1;
+ break;
+ default:
+ if (tabval == -1)
+ tabval = 0;
+ tabval = tabval * 10 + c - '0';
+ break;
+ }
+ }
+
+ add_tabstop (tabval);
+
+ validate_tabstops (tab_list, first_free_tab);
+
+ if (first_free_tab == 0)
+ tab_size = 8;
+ else if (first_free_tab == 1)
+ tab_size = tab_list[0];
+ else
+ tab_size = 0;
+
+ if (optind == argc)
+ file_list = stdin_argv;
+ else
+ file_list = &argv[optind];
+
+ expand ();
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, 0, "write error");
+
+ exit (exit_status);
+}
+
+/* Add the comma or blank separated list of tabstops STOPS
+ to the list of tabstops. */
+
+void
+parse_tabstops (stops)
+ char *stops;
+{
+ int tabval = -1;
+
+ for (; *stops; stops++)
+ {
+ if (*stops == ',' || isblank (*stops))
+ {
+ add_tabstop (tabval);
+ tabval = -1;
+ }
+ else if (ISDIGIT (*stops))
+ {
+ if (tabval == -1)
+ tabval = 0;
+ tabval = tabval * 10 + *stops - '0';
+ }
+ else
+ error (1, 0, "tab size contains an invalid character");
+ }
+
+ add_tabstop (tabval);
+}
+
+/* Add tab stop TABVAL to the end of `tab_list', except
+ if TABVAL is -1, do nothing. */
+
+void
+add_tabstop (tabval)
+ int tabval;
+{
+ if (tabval == -1)
+ return;
+ if (first_free_tab % TABLIST_BLOCK == 0)
+ tab_list = (int *) xrealloc (tab_list, first_free_tab + TABLIST_BLOCK);
+ tab_list[first_free_tab++] = tabval;
+}
+
+/* Check that the list of tabstops TABS, with ENTRIES entries,
+ contains only nonzero, ascending values. */
+
+void
+validate_tabstops (tabs, entries)
+ int *tabs;
+ int entries;
+{
+ int prev_tab = 0;
+ int i;
+
+ for (i = 0; i < entries; i++)
+ {
+ if (tabs[i] == 0)
+ error (1, 0, "tab size cannot be 0");
+ if (tabs[i] <= prev_tab)
+ error (1, 0, "tab sizes must be ascending");
+ prev_tab = tabs[i];
+ }
+}
+
+/* Change tabs to spaces, writing to stdout.
+ Read each file in `file_list', in order. */
+
+void
+expand ()
+{
+ FILE *fp; /* Input stream. */
+ int c; /* Each input character. */
+ int tab_index = 0; /* Index in `tab_list' of next tabstop. */
+ int column = 0; /* Column on screen of the next char. */
+ int next_tab_column; /* Column the next tab stop is on. */
+ int convert = 1; /* If nonzero, perform translations. */
+
+ fp = next_file ((FILE *) NULL);
+ for (;;)
+ {
+ c = getc (fp);
+ if (c == EOF)
+ {
+ fp = next_file (fp);
+ if (fp == NULL)
+ break; /* No more files. */
+ else
+ continue;
+ }
+
+ if (c == '\n')
+ {
+ putchar (c);
+ tab_index = 0;
+ column = 0;
+ convert = 1;
+ }
+ else if (c == '\t' && convert)
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (tab_index < first_free_tab - 1
+ && column >= tab_list[tab_index])
+ tab_index++;
+ next_tab_column = tab_list[tab_index];
+ if (tab_index < first_free_tab - 1)
+ tab_index++;
+ if (column >= next_tab_column)
+ next_tab_column = column + 1; /* Ran out of tab stops. */
+ }
+ else
+ {
+ next_tab_column = column + tab_size - column % tab_size;
+ }
+ while (column < next_tab_column)
+ {
+ putchar (' ');
+ ++column;
+ }
+ }
+ else
+ {
+ if (convert)
+ {
+ if (c == '\b')
+ {
+ if (column > 0)
+ --column;
+ }
+ else
+ {
+ ++column;
+ if (convert_entire_line == 0)
+ convert = 0;
+ }
+ }
+ putchar (c);
+ }
+ }
+}
+
+/* Close the old stream pointer FP if it is non-NULL,
+ and return a new one opened to read the next input file.
+ Open a filename of `-' as the standard input.
+ Return NULL if there are no more input files. */
+
+FILE *
+next_file (fp)
+ FILE *fp;
+{
+ static char *prev_file;
+ char *file;
+
+ if (fp)
+ {
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", prev_file);
+ exit_status = 1;
+ }
+ if (fp == stdin)
+ clearerr (fp); /* Also clear EOF. */
+ else if (fclose (fp) == EOF)
+ {
+ error (0, errno, "%s", prev_file);
+ exit_status = 1;
+ }
+ }
+
+ while ((file = *file_list++) != NULL)
+ {
+ if (file[0] == '-' && file[1] == '\0')
+ {
+ have_read_stdin = 1;
+ prev_file = file;
+ return stdin;
+ }
+ fp = fopen (file, "r");
+ if (fp)
+ {
+ prev_file = file;
+ return fp;
+ }
+ error (0, errno, "%s", file);
+ exit_status = 1;
+ }
+ return NULL;
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-tab1[,tab2[,...]]] [-t tab1[,tab2[,...]]] [-i]\n\
+ [--tabs=tab1[,tab2[,...]]] [--initial] [file...]\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/fold.c b/src/fold.c
new file mode 100644
index 000000000..d5d4ae3fe
--- /dev/null
+++ b/src/fold.c
@@ -0,0 +1,250 @@
+/* fold -- wrap each input line to fit in specified width.
+ Copyright (C) 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by David MacKenzie. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+char *xrealloc ();
+int adjust_column ();
+int fold_file ();
+void error ();
+
+/* If nonzero, try to break on whitespace. */
+int break_spaces;
+
+/* If nonzero, count bytes, not column positions. */
+int count_bytes;
+
+/* If nonzero, at least one of the files we read was standard input. */
+int have_read_stdin;
+
+/* The name this program was run with. */
+char *program_name;
+
+struct option longopts[] =
+{
+ {"bytes", 0, NULL, 'b'},
+ {"spaces", 0, NULL, 's'},
+ {"width", 1, NULL, 'w'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int width = 80;
+ int i;
+ int optc;
+ int errs = 0;
+
+ program_name = argv[0];
+ break_spaces = count_bytes = have_read_stdin = 0;
+
+ while ((optc = getopt_long (argc, argv, "bsw:", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (optc)
+ {
+ case 'b': /* Count bytes rather than columns. */
+ count_bytes = 1;
+ break;
+
+ case 's': /* Break at word boundaries. */
+ break_spaces = 1;
+ break;
+
+ case 'w': /* Line width. */
+ width = atoi (optarg);
+ if (width < 1)
+ error (1, 0, "%s: invalid line width", optarg);
+ break;
+
+ default:
+ fprintf (stderr, "\
+Usage: %s [-bs] [-w width] [--bytes] [--spaces] [--width=width] [file...]\n",
+ argv[0]);
+ exit (1);
+ }
+ }
+
+ if (argc == optind)
+ errs |= fold_file ("-", width);
+ else
+ for (i = optind; i < argc; i++)
+ errs |= fold_file (argv[i], width);
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (fclose (stdout) == EOF)
+ error (1, errno, "write error");
+
+ exit (errs);
+}
+
+/* Fold file FILENAME, or standard input if FILENAME is "-",
+ to stdout, with maximum line length WIDTH.
+ Return 0 if successful, 1 if an error occurs. */
+
+int
+fold_file (filename, width)
+ char *filename;
+ int width;
+{
+ FILE *istream;
+ register int c;
+ int column = 0; /* Screen column where next char will go. */
+ int offset_out = 0; /* Index in `line_out' for next char. */
+ static char *line_out = NULL;
+ static size_t allocated_out = 0;
+
+ if (!strcmp (filename, "-"))
+ {
+ istream = stdin;
+ have_read_stdin = 1;
+ }
+ else
+ istream = fopen (filename, "r");
+
+ if (istream == NULL)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+
+ while ((c = getc (istream)) != EOF)
+ {
+ if (offset_out + 1 >= allocated_out)
+ {
+ allocated_out += 1024;
+ line_out = xrealloc (line_out, allocated_out);
+ }
+
+ if (c == '\n')
+ {
+ line_out[offset_out++] = c;
+ fwrite (line_out, sizeof (char), offset_out, stdout);
+ column = offset_out = 0;
+ continue;
+ }
+
+ rescan:
+ column = adjust_column (column, c);
+
+ if (column > width)
+ {
+ /* This character would make the line too long.
+ Print the line plus a newline, and make this character
+ start the next line. */
+ if (break_spaces)
+ {
+ /* Look for the last blank. */
+ int logical_end;
+
+ for (logical_end = offset_out - 1; logical_end >= 0;
+ logical_end--)
+ if (isblank (line_out[logical_end]))
+ break;
+ if (logical_end >= 0)
+ {
+ int i;
+
+ /* Found a blank. Don't output the part after it. */
+ logical_end++;
+ fwrite (line_out, sizeof (char), logical_end, stdout);
+ putchar ('\n');
+ /* Move the remainder to the beginning of the next line.
+ The areas being copied here might overlap. */
+ bcopy (line_out + logical_end, line_out,
+ offset_out - logical_end);
+ offset_out -= logical_end;
+ for (column = i = 0; i < offset_out; i++)
+ column = adjust_column (column, line_out[i]);
+ goto rescan;
+ }
+ }
+ line_out[offset_out++] = '\n';
+ fwrite (line_out, sizeof (char), offset_out, stdout);
+ column = offset_out = 0;
+ goto rescan;
+ }
+
+ line_out[offset_out++] = c;
+ }
+
+ if (offset_out)
+ fwrite (line_out, sizeof (char), offset_out, stdout);
+
+ if (ferror (istream))
+ {
+ error (0, errno, "%s", filename);
+ if (strcmp (filename, "-"))
+ fclose (istream);
+ return 1;
+ }
+ if (strcmp (filename, "-") && fclose (istream) == EOF)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+
+ if (ferror (stdout))
+ {
+ error (0, errno, "write error");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Assuming the current column is COLUMN, return the column that
+ printing C will move the cursor to.
+ The first column is 0. */
+
+int
+adjust_column (column, c)
+ int column;
+ char c;
+{
+ if (!count_bytes)
+ {
+ if (c == '\b')
+ {
+ if (column > 0)
+ column--;
+ }
+ else if (c == '\r')
+ column = 0;
+ else if (c == '\t')
+ column = column + 8 - column % 8;
+ else /* if (isprint (c)) */
+ column++;
+ }
+ else
+ column++;
+ return column;
+}
diff --git a/src/head.c b/src/head.c
new file mode 100644
index 000000000..0302b60ac
--- /dev/null
+++ b/src/head.c
@@ -0,0 +1,380 @@
+/* head -- output first part of file(s)
+ Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Options:
+ -b Print first N 512-byte blocks.
+ -c, --bytes=N[bkm] Print first N bytes
+ [or 512-byte blocks, kilobytes, or megabytes].
+ -k Print first N kilobytes.
+ -N, -l, -n, --lines=N Print first N lines.
+ -m Print first N megabytes.
+ -q, --quiet, --silent Never print filename headers.
+ -v, --verbose Always print filename headers.
+
+ Reads from standard input if no files are given or when a filename of
+ ``-'' is encountered.
+ By default, filename headers are printed only if more than one file
+ is given.
+ By default, prints the first 10 lines (head -n 10).
+
+ David MacKenzie <djm@ai.mit.edu> */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
+#else
+#define ISDIGIT(c) (isdigit ((c)))
+#endif
+
+/* Number of lines/chars/blocks to head. */
+#define DEFAULT_NUMBER 10
+
+/* Size of atomic reads. */
+#define BUFSIZE (512 * 8)
+
+/* Number of bytes per item we are printing.
+ If 0, head in lines. */
+int unit_size;
+
+/* If nonzero, print filename headers. */
+int print_headers;
+
+/* When to print the filename banners. */
+enum header_mode
+{
+ multiple_files, always, never
+};
+
+int head ();
+int head_bytes ();
+int head_file ();
+int head_lines ();
+long atou ();
+void error ();
+void parse_unit ();
+void usage ();
+void write_header ();
+void xwrite ();
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Have we ever read standard input? */
+int have_read_stdin;
+
+struct option long_options[] =
+{
+ {"bytes", 1, NULL, 'c'},
+ {"lines", 1, NULL, 'n'},
+ {"quiet", 0, NULL, 'q'},
+ {"silent", 0, NULL, 'q'},
+ {"verbose", 0, NULL, 'v'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ enum header_mode header_mode = multiple_files;
+ int exit_status = 0;
+ long number = -1; /* Number of items to print (-1 if undef.). */
+ int c; /* Option character. */
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+ unit_size = 0;
+ print_headers = 0;
+
+ if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
+ {
+ /* Old option syntax; a dash, one or more digits, and one or
+ more option letters. Move past the number. */
+ for (number = 0, ++argv[1]; ISDIGIT (*argv[1]); ++argv[1])
+ number = number * 10 + *argv[1] - '0';
+ /* Parse any appended option letters. */
+ while (*argv[1])
+ {
+ switch (*argv[1])
+ {
+ case 'b':
+ unit_size = 512;
+ break;
+
+ case 'c':
+ unit_size = 1;
+ break;
+
+ case 'k':
+ unit_size = 1024;
+ break;
+
+ case 'l':
+ unit_size = 0;
+ break;
+
+ case 'm':
+ unit_size = 1048576;
+ break;
+
+ case 'q':
+ header_mode = never;
+ break;
+
+ case 'v':
+ header_mode = always;
+ break;
+
+ default:
+ error (0, 0, "unrecognized option `-%c'", *argv[1]);
+ usage ();
+ }
+ ++argv[1];
+ }
+ /* Make the options we just parsed invisible to getopt. */
+ argv[1] = argv[0];
+ argv++;
+ argc--;
+ }
+
+ while ((c = getopt_long (argc, argv, "c:n:qv", long_options, (int *) 0))
+ != EOF)
+ {
+ switch (c)
+ {
+ case 'c':
+ unit_size = 1;
+ parse_unit (optarg);
+ goto getnum;
+ case 'n':
+ unit_size = 0;
+ getnum:
+ number = atou (optarg);
+ if (number == -1)
+ error (1, 0, "invalid number `%s'", optarg);
+ break;
+
+ case 'q':
+ header_mode = never;
+ break;
+
+ case 'v':
+ header_mode = always;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (number == -1)
+ number = DEFAULT_NUMBER;
+
+ if (unit_size > 1)
+ number *= unit_size;
+
+ if (header_mode == always
+ || (header_mode == multiple_files && optind < argc - 1))
+ print_headers = 1;
+
+ if (optind == argc)
+ exit_status |= head_file ("-", number);
+
+ for (; optind < argc; ++optind)
+ exit_status |= head_file (argv[optind], number);
+
+ if (have_read_stdin && close (0) < 0)
+ error (1, errno, "-");
+ if (close (1) < 0)
+ error (1, errno, "write error");
+
+ exit (exit_status);
+}
+
+int
+head_file (filename, number)
+ char *filename;
+ long number;
+{
+ int fd;
+
+ if (!strcmp (filename, "-"))
+ {
+ have_read_stdin = 1;
+ filename = "standard input";
+ if (print_headers)
+ write_header (filename);
+ return head (filename, 0, number);
+ }
+ else
+ {
+ fd = open (filename, O_RDONLY);
+ if (fd >= 0)
+ {
+ int errors;
+
+ if (print_headers)
+ write_header (filename);
+ errors = head (filename, fd, number);
+ if (close (fd) == 0)
+ return errors;
+ }
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+}
+
+void
+write_header (filename)
+ char *filename;
+{
+ static int first_file = 1;
+
+ if (first_file)
+ {
+ xwrite (1, "==> ", 4);
+ first_file = 0;
+ }
+ else
+ xwrite (1, "\n==> ", 5);
+ xwrite (1, filename, strlen (filename));
+ xwrite (1, " <==\n", 5);
+}
+
+int
+head (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ if (unit_size)
+ return head_bytes (filename, fd, number);
+ else
+ return head_lines (filename, fd, number);
+}
+
+int
+head_bytes (filename, fd, bytes_to_write)
+ char *filename;
+ int fd;
+ long bytes_to_write;
+{
+ char buffer[BUFSIZE];
+ int bytes_read;
+
+ while (bytes_to_write)
+ {
+ bytes_read = read (fd, buffer, BUFSIZE);
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+ if (bytes_read == 0)
+ break;
+ if (bytes_read > bytes_to_write)
+ bytes_read = bytes_to_write;
+ xwrite (1, buffer, bytes_read);
+ bytes_to_write -= bytes_read;
+ }
+ return 0;
+}
+
+int
+head_lines (filename, fd, lines_to_write)
+ char *filename;
+ int fd;
+ long lines_to_write;
+{
+ char buffer[BUFSIZE];
+ int bytes_read;
+ int bytes_to_write;
+
+ while (lines_to_write)
+ {
+ bytes_read = read (fd, buffer, BUFSIZE);
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+ if (bytes_read == 0)
+ break;
+ bytes_to_write = 0;
+ while (bytes_to_write < bytes_read)
+ if (buffer[bytes_to_write++] == '\n' && --lines_to_write == 0)
+ break;
+ xwrite (1, buffer, bytes_to_write);
+ }
+ return 0;
+}
+
+void
+parse_unit (str)
+ char *str;
+{
+ int arglen = strlen (str);
+
+ if (arglen == 0)
+ return;
+
+ switch (str[arglen - 1])
+ {
+ case 'b':
+ unit_size = 512;
+ str[arglen - 1] = '\0';
+ break;
+ case 'k':
+ unit_size = 1024;
+ str[arglen - 1] = '\0';
+ break;
+ case 'm':
+ unit_size = 1048576;
+ str[arglen - 1] = '\0';
+ break;
+ }
+}
+
+/* Convert STR, a string of ASCII digits, into an unsigned integer.
+ Return -1 if STR does not represent a valid unsigned integer. */
+
+long
+atou (str)
+ char *str;
+{
+ int value;
+
+ for (value = 0; ISDIGIT (*str); ++str)
+ value = value * 10 + *str - '0';
+ return *str ? -1 : value;
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-c N[bkm]] [-n N] [-qv] [--bytes=N[bkm]] [--lines=N]\n\
+ [--quiet] [--silent] [--verbose] [file...]\n\
+ %s [-Nbcklmqv] [file...]\n", program_name, program_name);
+ exit (1);
+}
diff --git a/src/join.c b/src/join.c
new file mode 100644
index 000000000..9ac82e0fd
--- /dev/null
+++ b/src/join.c
@@ -0,0 +1,690 @@
+/* join - join lines of two files on a common field
+ Copyright (C) 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Written by Mike Haertel, mike@gnu.ai.mit.edu. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <sys/types.h>
+#include <getopt.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISSPACE(c) (isascii(c) && isspace(c))
+#define ISDIGIT(c) (isascii(c) && isdigit(c))
+#else
+#define ISSPACE(c) isspace(c)
+#define ISDIGIT(c) isdigit(c)
+#endif
+
+char *xmalloc ();
+char *xrealloc ();
+void error ();
+static void usage ();
+
+#define min(A, B) ((A) < (B) ? (A) : (B))
+
+/* An element of the list describing the format of each
+ output line. */
+struct outlist
+{
+ int file; /* File to take field from (1 or 2). */
+ int field; /* Field number to print. */
+ struct outlist *next;
+};
+
+/* A field of a line. */
+struct field
+{
+ char *beg; /* First character in field. */
+ char *lim; /* Character after last character in field. */
+};
+
+/* A line read from an input file. Newlines are not stored. */
+struct line
+{
+ char *beg; /* First character in line. */
+ char *lim; /* Character after last character in line. */
+ int nfields; /* Number of elements in `fields'. */
+ struct field *fields;
+};
+
+/* One or more consecutive lines read from a file that all have the
+ same join field value. */
+struct seq
+{
+ int count; /* Elements used in `lines'. */
+ int alloc; /* Elements allocated in `lines'. */
+ struct line *lines;
+};
+
+/* If nonzero, print unpairable lines in file 1 or 2. */
+static int print_unpairables_1, print_unpairables_2;
+
+/* If nonzero, print pairable lines. */
+static int print_pairables;
+
+/* Empty output field filler. */
+static char *empty_filler;
+
+/* Field to join on. */
+static int join_field_1, join_field_2;
+
+/* List of fields to print. */
+struct outlist *outlist;
+
+/* Last element in `outlist', where a new element can be added. */
+struct outlist *outlist_end;
+
+/* Tab character separating fields; if this is NUL fields are separated
+ by any nonempty string of white space, otherwise by exactly one
+ tab character. */
+static char tab;
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Fill in the `fields' structure in LINE. */
+
+static void
+xfields (line)
+ struct line *line;
+{
+ static int nfields = 2;
+ int i;
+ register char *ptr, *lim;
+
+ line->fields = (struct field *) xmalloc (nfields * sizeof (struct field));
+
+ ptr = line->beg;
+ lim = line->lim;
+
+ for (i = 0; ptr < lim; ++i)
+ {
+ if (i == nfields)
+ {
+ nfields *= 2;
+ line->fields = (struct field *)
+ xrealloc ((char *) line->fields, nfields * sizeof (struct field));
+ }
+ if (tab)
+ {
+ line->fields[i].beg = ptr;
+ while (ptr < lim && *ptr != tab)
+ ++ptr;
+ line->fields[i].lim = ptr;
+ if (ptr < lim)
+ ++ptr;
+ }
+ else
+ {
+ line->fields[i].beg = ptr;
+ while (ptr < lim && !ISSPACE (*ptr))
+ ++ptr;
+ line->fields[i].lim = ptr;
+ while (ptr < lim && ISSPACE (*ptr))
+ ++ptr;
+ }
+ }
+
+ line->nfields = i;
+}
+
+/* Read a line from FP into LINE and split it into fields.
+ Return 0 if EOF, 1 otherwise. */
+
+static int
+get_line (fp, line)
+ FILE *fp;
+ struct line *line;
+{
+ static int linesize = 80;
+ int c, i;
+ char *ptr;
+
+ if (feof (fp))
+ return 0;
+
+ ptr = xmalloc (linesize);
+
+ for (i = 0; (c = getc (fp)) != EOF && c != '\n'; ++i)
+ {
+ if (i == linesize)
+ {
+ linesize *= 2;
+ ptr = xrealloc (ptr, linesize);
+ }
+ ptr[i] = c;
+ }
+
+ if (c == EOF && i == 0)
+ {
+ free (ptr);
+ return 0;
+ }
+
+ line->beg = ptr;
+ line->lim = line->beg + i;
+ xfields (line);
+ return 1;
+}
+
+static void
+freeline (line)
+ struct line *line;
+{
+ free ((char *) line->fields);
+ free (line->beg);
+}
+
+static void
+initseq (seq)
+ struct seq *seq;
+{
+ seq->count = 0;
+ seq->alloc = 1;
+ seq->lines = (struct line *) xmalloc (seq->alloc * sizeof (struct line));
+}
+
+/* Read a line from FP and add it to SEQ. Return 0 if EOF, 1 otherwise. */
+
+static int
+getseq (fp, seq)
+ FILE *fp;
+ struct seq *seq;
+{
+ if (seq->count == seq->alloc)
+ {
+ seq->alloc *= 2;
+ seq->lines = (struct line *)
+ xrealloc ((char *) seq->lines, seq->alloc * sizeof (struct line));
+ }
+
+ if (get_line (fp, &seq->lines[seq->count]))
+ {
+ ++seq->count;
+ return 1;
+ }
+ return 0;
+}
+
+static void
+delseq (seq)
+ struct seq *seq;
+{
+ free ((char *) seq->lines);
+}
+
+/* Return <0 if the join field in LINE1 compares less than the one in LINE2;
+ >0 if it compares greater; 0 if it compares equal. */
+
+static int
+keycmp (line1, line2)
+ struct line *line1;
+ struct line *line2;
+{
+ char *beg1, *beg2; /* Start of field to compare in each file. */
+ int len1, len2; /* Length of fields to compare. */
+ int diff;
+
+ if (join_field_1 < line1->nfields)
+ {
+ beg1 = line1->fields[join_field_1].beg;
+ len1 = line1->fields[join_field_1].lim
+ - line1->fields[join_field_1].beg;
+ }
+ else
+ {
+ beg1 = NULL;
+ len1 = 0;
+ }
+
+ if (join_field_2 < line2->nfields)
+ {
+ beg2 = line2->fields[join_field_2].beg;
+ len2 = line2->fields[join_field_2].lim
+ - line2->fields[join_field_2].beg;
+ }
+ else
+ {
+ beg2 = NULL;
+ len2 = 0;
+ }
+
+ if (len1 == 0)
+ return len2 == 0 ? 0 : -1;
+ if (len2 == 0)
+ return 1;
+ diff = memcmp (beg1, beg2, min (len1, len2));
+ if (diff)
+ return diff;
+ return len1 - len2;
+}
+
+/* Print field N of LINE if it exists and is nonempty, otherwise
+ `empty_filler' if it is nonempty. */
+
+static void
+prfield (n, line)
+ int n;
+ struct line *line;
+{
+ int len;
+
+ if (n < line->nfields)
+ {
+ len = line->fields[n].lim - line->fields[n].beg;
+ if (len)
+ fwrite (line->fields[n].beg, 1, len, stdout);
+ else if (empty_filler)
+ fputs (empty_filler, stdout);
+ }
+ else if (empty_filler)
+ fputs (empty_filler, stdout);
+}
+
+/* Print LINE, with its fields separated by `tab'. */
+
+static void
+prline (line)
+ struct line *line;
+{
+ int i;
+
+ for (i = 0; i < line->nfields; ++i)
+ {
+ prfield (i, line);
+ if (i == line->nfields - 1)
+ putchar ('\n');
+ else
+ putchar (tab ? tab : ' ');
+ }
+}
+
+/* Print the join of LINE1 and LINE2. */
+
+static void
+prjoin (line1, line2)
+ struct line *line1;
+ struct line *line2;
+{
+ if (outlist)
+ {
+ struct outlist *o;
+
+ prfield (outlist->field - 1, outlist->file == 1 ? line1 : line2);
+ for (o = outlist->next; o; o = o->next)
+ {
+ putchar (tab ? tab : ' ');
+ prfield (o->field - 1, o->file == 1 ? line1 : line2);
+ }
+ putchar ('\n');
+ }
+ else
+ {
+ int i;
+
+ prfield (join_field_1, line1);
+ for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
+ {
+ putchar (tab ? tab : ' ');
+ prfield (i, line1);
+ }
+ for (i = join_field_1 + 1; i < line1->nfields; ++i)
+ {
+ putchar (tab ? tab : ' ');
+ prfield (i, line1);
+ }
+
+ for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
+ {
+ putchar (tab ? tab : ' ');
+ prfield (i, line2);
+ }
+ for (i = join_field_2 + 1; i < line2->nfields; ++i)
+ {
+ putchar (tab ? tab : ' ');
+ prfield (i, line2);
+ }
+ putchar ('\n');
+ }
+}
+
+/* Print the join of the files in FP1 and FP2. */
+
+static void
+join (fp1, fp2)
+ FILE *fp1;
+ FILE *fp2;
+{
+ struct seq seq1, seq2;
+ struct line line;
+ int diff, i, j, eof1, eof2;
+
+ /* Read the first line of each file. */
+ initseq (&seq1);
+ getseq (fp1, &seq1);
+ initseq (&seq2);
+ getseq (fp2, &seq2);
+
+ while (seq1.count && seq2.count)
+ {
+ diff = keycmp (&seq1.lines[0], &seq2.lines[0]);
+ if (diff < 0)
+ {
+ if (print_unpairables_1)
+ prline (&seq1.lines[0]);
+ freeline (&seq1.lines[0]);
+ seq1.count = 0;
+ getseq (fp1, &seq1);
+ continue;
+ }
+ if (diff > 0)
+ {
+ if (print_unpairables_2)
+ prline (&seq2.lines[0]);
+ freeline (&seq2.lines[0]);
+ seq2.count = 0;
+ getseq (fp2, &seq2);
+ continue;
+ }
+
+ /* Keep reading lines from file1 as long as they continue to
+ match the current line from file2. */
+ eof1 = 0;
+ do
+ if (!getseq (fp1, &seq1))
+ {
+ eof1 = 1;
+ ++seq1.count;
+ break;
+ }
+ while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0]));
+
+ /* Keep reading lines from file2 as long as they continue to
+ match the current line from file1. */
+ eof2 = 0;
+ do
+ if (!getseq (fp2, &seq2))
+ {
+ eof2 = 1;
+ ++seq2.count;
+ break;
+ }
+ while (!keycmp (&seq1.lines[0], &seq2.lines[seq2.count - 1]));
+
+ if (print_pairables)
+ {
+ for (i = 0; i < seq1.count - 1; ++i)
+ for (j = 0; j < seq2.count - 1; ++j)
+ prjoin (&seq1.lines[i], &seq2.lines[j]);
+ }
+
+ for (i = 0; i < seq1.count - 1; ++i)
+ freeline (&seq1.lines[i]);
+ if (!eof1)
+ {
+ seq1.lines[0] = seq1.lines[seq1.count - 1];
+ seq1.count = 1;
+ }
+ else
+ seq1.count = 0;
+
+ for (i = 0; i < seq2.count - 1; ++i)
+ freeline (&seq2.lines[i]);
+ if (!eof2)
+ {
+ seq2.lines[0] = seq2.lines[seq2.count - 1];
+ seq2.count = 1;
+ }
+ else
+ seq2.count = 0;
+ }
+
+ if (print_unpairables_1 && seq1.count)
+ {
+ prline (&seq1.lines[0]);
+ freeline (&seq1.lines[0]);
+ while (get_line (fp1, &line))
+ {
+ prline (&line);
+ freeline (&line);
+ }
+ }
+
+ if (print_unpairables_2 && seq2.count)
+ {
+ prline (&seq2.lines[0]);
+ freeline (&seq2.lines[0]);
+ while (get_line (fp2, &line))
+ {
+ prline (&line);
+ freeline (&line);
+ }
+ }
+
+ delseq (&seq1);
+ delseq (&seq2);
+}
+
+/* Add a field spec for field FIELD of file FILE to `outlist' and return 1,
+ unless either argument is invalid; then just return 0. */
+
+static int
+add_field (file, field)
+ int file;
+ int field;
+{
+ struct outlist *o;
+
+ if (file < 1 || file > 2 || field < 1)
+ return 0;
+ o = (struct outlist *) xmalloc (sizeof (struct outlist));
+ o->file = file;
+ o->field = field;
+ o->next = NULL;
+
+ /* Add to the end of the list so the fields are in the right order. */
+ if (outlist == NULL)
+ outlist = o;
+ else
+ outlist_end->next = o;
+ outlist_end = o;
+
+ return 1;
+}
+
+/* Add the comma or blank separated field spec(s) in STR to `outlist'.
+ Return the number of fields added. */
+
+static int
+add_field_list (str)
+ char *str;
+{
+ int added = 0;
+ int file = -1, field = -1;
+ int dot_found = 0;
+
+ for (; *str; str++)
+ {
+ if (*str == ',' || isblank (*str))
+ {
+ added += add_field (file, field);
+ file = field = -1;
+ dot_found = 0;
+ }
+ else if (*str == '.')
+ dot_found = 1;
+ else if (ISDIGIT (*str))
+ {
+ if (!dot_found)
+ {
+ if (file == -1)
+ file = 0;
+ file = file * 10 + *str - '0';
+ }
+ else
+ {
+ if (field == -1)
+ field = 0;
+ field = field * 10 + *str - '0';
+ }
+ }
+ else
+ return 0;
+ }
+
+ added += add_field (file, field);
+ return added;
+}
+
+/* When using getopt_long_only, no long option can start with
+ a character that is a short option. */
+static struct option longopts[] =
+{
+ {"j", 1, NULL, 'j'},
+ {"j1", 1, NULL, '1'},
+ {"j2", 1, NULL, '2'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ char *names[2];
+ FILE *fp1, *fp2;
+ int optc, prev_optc = 0, nfiles, val;
+
+ program_name = argv[0];
+ nfiles = 0;
+ print_pairables = 1;
+
+ while ((optc = getopt_long_only (argc, argv, "-a:e:1:2:o:t:v:", longopts,
+ (int *) 0)) != EOF)
+ {
+ switch (optc)
+ {
+ case 'a':
+ val = atoi (optarg);
+ if (val == 1)
+ print_unpairables_1 = 1;
+ else if (val == 2)
+ print_unpairables_2 = 1;
+ else
+ error (2, 0, "invalid file number for `-a'");
+ break;
+
+ case 'e':
+ empty_filler = optarg;
+ break;
+
+ case '1':
+ val = atoi (optarg);
+ if (val <= 0)
+ error (2, 0, "invalid field number for `-1'");
+ join_field_1 = val - 1;
+ break;
+
+ case '2':
+ val = atoi (optarg);
+ if (val <= 0)
+ error (2, 0, "invalid field number for `-2'");
+ join_field_2 = val - 1;
+ break;
+
+ case 'j':
+ val = atoi (optarg);
+ if (val <= 0)
+ error (2, 0, "invalid field number for `-j'");
+ join_field_1 = join_field_2 = val - 1;
+ break;
+
+ case 'o':
+ if (add_field_list (optarg) == 0)
+ error (2, 0, "invalid field list for `-o'");
+ break;
+
+ case 't':
+ tab = *optarg;
+ break;
+
+ case 'v':
+ val = atoi (optarg);
+ if (val == 1)
+ print_unpairables_1 = 1;
+ else if (val == 2)
+ print_unpairables_2 = 1;
+ else
+ error (2, 0, "invalid file number for `-v'");
+ print_pairables = 0;
+ break;
+
+ case 1: /* Non-option argument. */
+ if (prev_optc == 'o')
+ {
+ /* Might be continuation of args to -o. */
+ if (add_field_list (optarg) > 0)
+ continue; /* Don't change `prev_optc'. */
+ }
+
+ if (nfiles > 1)
+ usage ();
+ names[nfiles++] = optarg;
+ break;
+
+ case '?':
+ usage ();
+ }
+ prev_optc = optc;
+ }
+
+ if (nfiles != 2)
+ usage ();
+
+ fp1 = strcmp (names[0], "-") ? fopen (names[0], "r") : stdin;
+ if (!fp1)
+ error (1, errno, "%s", names[0]);
+ fp2 = strcmp (names[1], "-") ? fopen (names[1], "r") : stdin;
+ if (!fp2)
+ error (1, errno, "%s", names[1]);
+ if (fp1 == fp2)
+ error (1, errno, "both files cannot be standard input");
+ join (fp1, fp2);
+
+ if ((fp1 == stdin || fp2 == stdin) && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, 0, "write error");
+
+ exit (0);
+}
+
+static void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-a 1|2] [-v 1|2] [-e empty-string] [-o field-list...] [-t char]\n\
+ [-j[1|2] field] [-1 field] [-2 field] file1 file2\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/nl.c b/src/nl.c
new file mode 100644
index 000000000..368001adc
--- /dev/null
+++ b/src/nl.c
@@ -0,0 +1,546 @@
+/* nl -- number lines of files
+ Copyright (C) 1989, 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Scott Bartram (nancy!scott@uunet.uu.net)
+ Revised by David MacKenzie (djm@ai.mit.edu) */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <getopt.h>
+#include <regex.h>
+#include "linebuffer.h"
+#include "system.h"
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+/* Line-number formats. */
+enum number_format
+{
+ FORMAT_RIGHT_NOLZ, /* Right justified, no leading zeroes. */
+ FORMAT_RIGHT_LZ, /* Right justified, leading zeroes. */
+ FORMAT_LEFT /* Left justified, no leading zeroes. */
+};
+
+/* Default section delimiter characters. */
+#define DEFAULT_SECTION_DELIMITERS "\\:"
+
+/* Types of input lines: either one of the section delimiters,
+ or text to output. */
+enum section
+{
+ Header, Body, Footer, Text
+};
+
+/* Format of body lines (-b). */
+char *body_type = "t";
+
+/* Format of header lines (-h). */
+char *header_type = "n";
+
+/* Format of footer lines (-f). */
+char *footer_type = "n";
+
+/* Format currently being used (body, header, or footer). */
+char *current_type;
+
+/* Regex for body lines to number (-bp). */
+struct re_pattern_buffer body_regex;
+
+/* Regex for header lines to number (-hp). */
+struct re_pattern_buffer header_regex;
+
+/* Regex for footer lines to number (-fp). */
+struct re_pattern_buffer footer_regex;
+
+/* Pointer to current regex, if any. */
+struct re_pattern_buffer *current_regex = NULL;
+
+/* Separator string to print after line number (-s). */
+char *separator_str = "\t";
+
+/* Input section delimiter string (-d). */
+char *section_del = DEFAULT_SECTION_DELIMITERS;
+
+/* Header delimiter string. */
+char *header_del = NULL;
+
+/* Header section delimiter length. */
+int header_del_len;
+
+/* Body delimiter string. */
+char *body_del = NULL;
+
+/* Body section delimiter length. */
+int body_del_len;
+
+/* Footer delimiter string. */
+char *footer_del = NULL;
+
+/* Footer section delimiter length. */
+int footer_del_len;
+
+/* Input buffer. */
+struct linebuffer line_buf;
+
+/* printf format string for line number. */
+char *print_fmt;
+
+/* printf format string for unnumbered lines. */
+char *print_no_line_fmt = NULL;
+
+/* Starting line number on each page (-v). */
+int page_start = 1;
+
+/* Line number increment (-i). */
+int page_incr = 1;
+
+/* If TRUE, reset line number at start of each page (-p). */
+int reset_numbers = TRUE;
+
+/* Number of blank lines to consider to be one line for numbering (-l). */
+int blank_join = 1;
+
+/* Width of line numbers (-w). */
+int lineno_width = 6;
+
+/* Line number format (-n). */
+enum number_format lineno_format = FORMAT_RIGHT_NOLZ;
+
+/* Current print line number. */
+int line_no;
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Nonzero if we have ever read standard input. */
+int have_read_stdin;
+
+enum section check_section ();
+char *xmalloc ();
+char *xrealloc ();
+int build_type_arg ();
+int nl_file ();
+void usage ();
+void process_file ();
+void proc_header ();
+void proc_body ();
+void proc_footer ();
+void proc_text ();
+void print_lineno ();
+void build_print_fmt ();
+void error ();
+
+struct option longopts[] =
+{
+ {"header-numbering", 1, NULL, 'h'},
+ {"body-numbering", 1, NULL, 'b'},
+ {"footer-numbering", 1, NULL, 'f'},
+ {"first-page", 1, NULL, 'v'},
+ {"page-increment", 1, NULL, 'i'},
+ {"no-renumber", 0, NULL, 'p'},
+ {"join-blank-lines", 1, NULL, 'l'},
+ {"number-separator", 1, NULL, 's'},
+ {"number-width", 1, NULL, 'w'},
+ {"number-format", 1, NULL, 'n'},
+ {"section-delimiter", 1, NULL, 'd'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c, exit_status = 0;
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+
+ while ((c = getopt_long (argc, argv, "h:b:f:v:i:pl:s:w:n:d:", longopts,
+ (int *) 0)) != EOF)
+ {
+ switch (c)
+ {
+ case 'h':
+ if (build_type_arg (&header_type, &header_regex) != TRUE)
+ usage ();
+ break;
+ case 'b':
+ if (build_type_arg (&body_type, &body_regex) != TRUE)
+ usage ();
+ break;
+ case 'f':
+ if (build_type_arg (&footer_type, &footer_regex) != TRUE)
+ usage ();
+ break;
+ case 'v':
+ page_start = atoi (optarg);
+ break;
+ case 'i':
+ page_incr = atoi (optarg);
+ if (page_incr < 1)
+ page_incr = 1;
+ break;
+ case 'p':
+ reset_numbers = FALSE;
+ break;
+ case 'l':
+ blank_join = atoi (optarg);
+ break;
+ case 's':
+ separator_str = optarg;
+ break;
+ case 'w':
+ lineno_width = atoi (optarg);
+ if (lineno_width < 1)
+ lineno_width = 1;
+ break;
+ case 'n':
+ switch (*optarg)
+ {
+ case 'l':
+ if (optarg[1] == 'n')
+ lineno_format = FORMAT_LEFT;
+ else
+ usage ();
+ break;
+ case 'r':
+ switch (optarg[1])
+ {
+ case 'n':
+ lineno_format = FORMAT_RIGHT_NOLZ;
+ break;
+ case 'z':
+ lineno_format = FORMAT_RIGHT_LZ;
+ break;
+ default:
+ usage ();
+ break;
+ }
+ break;
+ default:
+ usage ();
+ break;
+ }
+ break;
+ case 'd':
+ section_del = optarg;
+ break;
+ default:
+ usage ();
+ break;
+ }
+ }
+
+ /* Initialize the section delimiters. */
+ c = strlen (section_del);
+
+ header_del_len = c * 3;
+ header_del = xmalloc (header_del_len + 1);
+ strcat (strcat (strcpy (header_del, section_del), section_del), section_del);
+
+ body_del_len = c * 2;
+ body_del = xmalloc (body_del_len + 1);
+ strcat (strcpy (body_del, section_del), section_del);
+
+ footer_del_len = c;
+ footer_del = xmalloc (footer_del_len + 1);
+ strcpy (footer_del, section_del);
+
+ /* Initialize the input buffer. */
+ initbuffer (&line_buf);
+
+ /* Initialize the printf format for unnumbered lines. */
+ c = strlen (separator_str);
+ print_no_line_fmt = xmalloc (lineno_width + c + 1);
+ memset (print_no_line_fmt, ' ', lineno_width + c);
+ print_no_line_fmt[lineno_width + c] = '\0';
+
+ line_no = page_start;
+ current_type = body_type;
+ current_regex = &body_regex;
+ build_print_fmt ();
+
+ /* Main processing. */
+
+ if (optind == argc)
+ exit_status |= nl_file ("-");
+ else
+ for (; optind < argc; optind++)
+ exit_status |= nl_file (argv[optind]);
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ {
+ error (0, errno, "-");
+ exit_status = 1;
+ }
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, 0, "write error");
+
+ exit (exit_status);
+}
+
+/* Process file FILE to standard output.
+ Return 0 if successful, 1 if not. */
+
+int
+nl_file (file)
+ char *file;
+{
+ FILE *stream;
+
+ if (!strcmp (file, "-"))
+ {
+ have_read_stdin = 1;
+ stream = stdin;
+ }
+ else
+ {
+ stream = fopen (file, "r");
+ if (stream == NULL)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ }
+
+ process_file (stream);
+
+ if (ferror (stream))
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ if (!strcmp (file, "-"))
+ clearerr (stream); /* Also clear EOF. */
+ else if (fclose (stream) == EOF)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ return 0;
+}
+
+/* Read and process the file pointed to by FP. */
+
+void
+process_file (fp)
+ FILE *fp;
+{
+ while (readline (&line_buf, fp))
+ {
+ switch ((int) check_section ())
+ {
+ case Header:
+ proc_header ();
+ break;
+ case Body:
+ proc_body ();
+ break;
+ case Footer:
+ proc_footer ();
+ break;
+ case Text:
+ proc_text ();
+ break;
+ }
+ }
+}
+
+/* Return the type of line in `line_buf'. */
+
+enum section
+check_section ()
+{
+ if (line_buf.length < 2 || memcmp (line_buf.buffer, section_del, 2))
+ return Text;
+ if (line_buf.length == header_del_len
+ && !memcmp (line_buf.buffer, header_del, header_del_len))
+ return Header;
+ if (line_buf.length == body_del_len
+ && !memcmp (line_buf.buffer, body_del, body_del_len))
+ return Body;
+ if (line_buf.length == footer_del_len
+ && !memcmp (line_buf.buffer, footer_del, footer_del_len))
+ return Footer;
+ return Text;
+}
+
+/* Switch to a header section. */
+
+void
+proc_header ()
+{
+ current_type = header_type;
+ current_regex = &header_regex;
+ if (reset_numbers)
+ line_no = page_start;
+ putchar ('\n');
+}
+
+/* Switch to a body section. */
+
+void
+proc_body ()
+{
+ current_type = body_type;
+ current_regex = &body_regex;
+ putchar ('\n');
+}
+
+/* Switch to a footer section. */
+
+void
+proc_footer ()
+{
+ current_type = footer_type;
+ current_regex = &footer_regex;
+ putchar ('\n');
+}
+
+/* Process a regular text line in `line_buf'. */
+
+void
+proc_text ()
+{
+ static int blank_lines = 0; /* Consecutive blank lines so far. */
+
+ switch (*current_type)
+ {
+ case 'a':
+ if (blank_join > 1)
+ {
+ if (line_buf.length || ++blank_lines == blank_join)
+ {
+ print_lineno ();
+ blank_lines = 0;
+ }
+ else
+ printf (print_no_line_fmt);
+ }
+ else
+ print_lineno ();
+ break;
+ case 't':
+ if (line_buf.length)
+ print_lineno ();
+ else
+ printf (print_no_line_fmt);
+ break;
+ case 'n':
+ printf (print_no_line_fmt);
+ break;
+ case 'p':
+ if (re_search (current_regex, line_buf.buffer, line_buf.length,
+ 0, line_buf.length, (struct re_registers *) 0) < 0)
+ printf (print_no_line_fmt);
+ else
+ print_lineno ();
+ break;
+ }
+ fwrite (line_buf.buffer, sizeof (char), line_buf.length, stdout);
+ putchar ('\n');
+}
+
+/* Print and increment the line number. */
+
+void
+print_lineno ()
+{
+ printf (print_fmt, line_no);
+ line_no += page_incr;
+}
+
+/* Build the printf format string, based on `lineno_format'. */
+
+void
+build_print_fmt ()
+{
+ /* 12 = 10 chars for lineno_width, 1 for %, 1 for \0. */
+ print_fmt = xmalloc (strlen (separator_str) + 12);
+ switch (lineno_format)
+ {
+ case FORMAT_RIGHT_NOLZ:
+ sprintf (print_fmt, "%%%dd%s", lineno_width, separator_str);
+ break;
+ case FORMAT_RIGHT_LZ:
+ sprintf (print_fmt, "%%0%dd%s", lineno_width, separator_str);
+ break;
+ case FORMAT_LEFT:
+ sprintf (print_fmt, "%%-%dd%s", lineno_width, separator_str);
+ break;
+ }
+}
+
+/* Set the command line flag TYPEP and possibly the regex pointer REGEXP,
+ according to `optarg'. */
+
+int
+build_type_arg (typep, regexp)
+ char **typep;
+ struct re_pattern_buffer *regexp;
+{
+ char *errmsg;
+ int rval = TRUE;
+ int optlen;
+
+ switch (*optarg)
+ {
+ case 'a':
+ case 't':
+ case 'n':
+ *typep = optarg;
+ break;
+ case 'p':
+ *typep = optarg++;
+ optlen = strlen (optarg);
+ regexp->allocated = optlen * 2;
+ regexp->buffer = (unsigned char *) xmalloc (regexp->allocated);
+ regexp->translate = NULL;
+ regexp->fastmap = xmalloc (256);
+ regexp->fastmap_accurate = 0;
+ errmsg = re_compile_pattern (optarg, optlen, regexp);
+ if (errmsg)
+ error (1, 0, "%s", errmsg);
+ break;
+ default:
+ rval = FALSE;
+ break;
+ }
+ return rval;
+}
+
+/* Print a usage message and quit. */
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-h header-style] [-b body-style] [-f footer-style] [-p] [-d cc]\n\
+ [-v start-number] [-i increment] [-l lines] [-s line-separator]\n\
+ [-w line-no-width] [-n {ln,rn,rz}] [--header-numbering=style]\n\
+ [--body-numbering=style] [--footer-numbering=style]\n\
+ [--first-page=number] [--page-increment=number] [--no-renumber]\n\
+ [--join-blank-lines=number] [--number-separator=string]\n\
+ [--number-width=number] [--number-format={ln,rn,rz}]\n\
+ [--section-delimiter=cc] [file...]\n",
+ program_name);
+ exit (2);
+}
diff --git a/src/od.c b/src/od.c
new file mode 100644
index 000000000..f13c6b7bc
--- /dev/null
+++ b/src/od.c
@@ -0,0 +1,1697 @@
+/* od -- dump in octal (and other formats) the contents of files
+ Copyright (C) 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Jim Meyering. */
+
+/* AIX requires this to be the first thing in the file. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not HAVE_ALLOCA_H */
+#ifdef _AIX
+ #pragma alloca
+#else /* not _AIX */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#include <stdio.h>
+#include <ctype.h>
+#include <assert.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+#if defined(__GNUC__) || defined(STDC_HEADERS)
+#include <float.h>
+#endif
+
+#ifdef __GNUC__
+typedef long double LONG_DOUBLE;
+#else
+typedef double LONG_DOUBLE;
+#endif
+
+#if HAVE_LIMITS_H
+#include <limits.h>
+#endif
+#ifndef SCHAR_MAX
+#define SCHAR_MAX 127
+#endif
+#ifndef SHRT_MAX
+#define SHRT_MAX 32767
+#endif
+#ifndef ULONG_MAX
+#define ULONG_MAX ((unsigned long) ~(unsigned long) 0)
+#endif
+
+#define STREQ(a,b) (strcmp((a), (b)) == 0)
+
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#ifndef MIN
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+/* The default number of input bytes per output line. */
+#define DEFAULT_BYTES_PER_BLOCK 16
+
+/* The number of decimal digits of precision in a float. */
+#ifndef FLT_DIG
+#define FLT_DIG 7
+#endif
+
+/* The number of decimal digits of precision in a double. */
+#ifndef DBL_DIG
+#define DBL_DIG 15
+#endif
+
+/* The number of decimal digits of precision in a long double. */
+#ifndef LDBL_DIG
+#define LDBL_DIG DBL_DIG
+#endif
+
+char *xmalloc ();
+char *xrealloc ();
+void error ();
+
+enum size_spec
+{
+ NO_SIZE,
+ CHAR,
+ SHORT,
+ INT,
+ LONG,
+ FP_SINGLE,
+ FP_DOUBLE,
+ FP_LONG_DOUBLE
+};
+
+enum output_format
+{
+ SIGNED_DECIMAL,
+ UNSIGNED_DECIMAL,
+ OCTAL,
+ HEXADECIMAL,
+ FLOATING_POINT,
+ NAMED_CHARACTER,
+ CHARACTER
+};
+
+enum strtoul_error
+{
+ UINT_OK, UINT_INVALID, UINT_INVALID_SUFFIX_CHAR, UINT_OVERFLOW
+};
+typedef enum strtoul_error strtoul_error;
+
+/* Each output format specification (from POSIX `-t spec' or from
+ old-style options) is represented by one of these structures. */
+struct tspec
+{
+ enum output_format fmt;
+ enum size_spec size;
+ void (*print_function) ();
+ char *fmt_string;
+};
+
+/* Convert the number of 8-bit bytes of a binary representation to
+ the number of characters (digits + sign if the type is signed)
+ required to represent the same quantity in the specified base/type.
+ For example, a 32-bit (4-byte) quantity may require a field width
+ as wide as the following for these types:
+ 11 unsigned octal
+ 11 signed decimal
+ 10 unsigned decimal
+ 8 unsigned hexadecimal */
+
+static const unsigned int bytes_to_oct_digits[] =
+{0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
+
+static const unsigned int bytes_to_signed_dec_digits[] =
+{1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40};
+
+static const unsigned int bytes_to_unsigned_dec_digits[] =
+{0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39};
+
+static const unsigned int bytes_to_hex_digits[] =
+{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
+
+/* Convert enum size_spec to the size of the named type. */
+static const int width_bytes[] =
+{
+ -1,
+ sizeof (char),
+ sizeof (short int),
+ sizeof (int),
+ sizeof (long int),
+ sizeof (float),
+ sizeof (double),
+ sizeof (LONG_DOUBLE)
+};
+
+/* Names for some non-printing characters. */
+static const char *const charname[33] =
+{
+ "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
+ "bs", "ht", "nl", "vt", "ff", "cr", "so", "si",
+ "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
+ "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
+ "sp"
+};
+
+/* A printf control string for printing a file offset. */
+static const char *output_address_fmt_string;
+
+/* FIXME: make this the number of octal digits in an unsigned long. */
+#define MAX_ADDRESS_LENGTH 13
+static char address_fmt_buffer[MAX_ADDRESS_LENGTH + 1];
+static char address_pad[MAX_ADDRESS_LENGTH + 1];
+
+static unsigned long int string_min;
+static unsigned long int flag_dump_strings;
+
+/* The number of input bytes to skip before formatting and writing. */
+static unsigned long int n_bytes_to_skip = 0;
+
+/* When non-zero, MAX_BYTES_TO_FORMAT is the maximum number of bytes
+ to be read and formatted. Otherwise all input is formatted. */
+static int limit_bytes_to_format = 0;
+
+/* The maximum number of bytes that will be formatted. This
+ value is used only when LIMIT_BYTES_TO_FORMAT is non-zero. */
+static unsigned long int max_bytes_to_format;
+
+/* When non-zero and two or more consecutive blocks are equal, format
+ only the first block and output an asterisk alone on the following
+ line to indicate that identical blocks have been elided. */
+static int abbreviate_duplicate_blocks = 1;
+
+/* An array of specs describing how to format each input block. */
+static struct tspec *spec;
+
+/* The number of format specs. */
+static unsigned int n_specs;
+
+/* The allocated length of SPEC. */
+static unsigned int n_specs_allocated;
+
+/* The number of input bytes formatted per output line. It must be
+ a multiple of the least common multiple of the sizes associated with
+ the specified output types. It should be as large as possible, but
+ no larger than 16 -- unless specified with the -w option. */
+static unsigned int bytes_per_block;
+
+/* Human-readable representation of *file_list (for error messages).
+ It differs from *file_list only when *file_list is "-". */
+static char const *input_filename;
+
+/* A NULL-terminated list of the file-arguments from the command line.
+ If no file-arguments were specified, this variable is initialized
+ to { "-", NULL }. */
+static char const *const *file_list;
+
+/* The input stream associated with the current file. */
+static FILE *in_stream;
+
+#define LONGEST_INTEGRAL_TYPE long int
+
+#define MAX_INTEGRAL_TYPE_SIZE sizeof(LONGEST_INTEGRAL_TYPE)
+static enum size_spec integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1];
+
+#define MAX_FP_TYPE_SIZE sizeof(LONG_DOUBLE)
+static enum size_spec fp_type_size[MAX_FP_TYPE_SIZE + 1];
+
+static struct option long_options[] =
+{
+ /* POSIX options. */
+ {"skip-bytes", 1, NULL, 'j'},
+ {"address-radix", 1, NULL, 'A'},
+ {"read-bytes", 1, NULL, 'N'},
+ {"format", 1, NULL, 't'},
+ {"output-duplicates", 0, NULL, 'v'},
+
+ /* non-POSIX options. */
+ {"strings", 2, NULL, 's'},
+ {"width", 2, NULL, 'w'},
+ {NULL, 0, NULL, 0}
+};
+
+/* The name this program was run with. */
+char *program_name;
+
+static void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-abcdfhiloxv] [-s[bytes]] [-w[bytes]] [-A radix] [-j bytes]\n\
+ [-N bytes] [-t type] [--skip-bytes=bytes] [--address-radix=radix]\n\
+ [--read-bytes=bytes] [--format=type] [--output-duplicates]\n\
+ [--strings[=bytes]] [--width[=bytes]] [file...]\n",
+ program_name);
+ exit (1);
+}
+
+/* Compute the greatest common denominator of U and V
+ using Euclid's algorithm. */
+
+static unsigned int
+gcd (u, v)
+ unsigned int u;
+ unsigned int v;
+{
+ unsigned int t;
+ while (v != 0)
+ {
+ t = u % v;
+ u = v;
+ v = t;
+ }
+ return u;
+}
+
+/* Compute the least common multiple of U and V. */
+
+static unsigned int
+lcm (u, v)
+ unsigned int u;
+ unsigned int v;
+{
+ unsigned int t = gcd (u, v);
+ if (t == 0)
+ return 0;
+ return u * v / t;
+}
+
+static strtoul_error
+my_strtoul (s, base, val, allow_bkm_suffix)
+ const char *s;
+ int base;
+ long unsigned int *val;
+ int allow_bkm_suffix;
+{
+ char *p;
+ unsigned long int tmp;
+
+ assert (0 <= base && base <= 36);
+
+ tmp = strtoul (s, &p, base);
+ if (errno != 0)
+ return UINT_OVERFLOW;
+ if (p == s)
+ return UINT_INVALID;
+ if (!allow_bkm_suffix)
+ {
+ if (*p == '\0')
+ {
+ *val = tmp;
+ return UINT_OK;
+ }
+ else
+ return UINT_INVALID_SUFFIX_CHAR;
+ }
+
+ switch (*p)
+ {
+ case '\0':
+ break;
+
+#define BKM_SCALE(x,scale_factor) \
+ do \
+ { \
+ if (x > (double) ULONG_MAX / scale_factor) \
+ return UINT_OVERFLOW; \
+ x *= scale_factor; \
+ } \
+ while (0)
+
+ case 'b':
+ BKM_SCALE (tmp, 512);
+ break;
+
+ case 'k':
+ BKM_SCALE (tmp, 1024);
+ break;
+
+ case 'm':
+ BKM_SCALE (tmp, 1024 * 1024);
+ break;
+
+ default:
+ return UINT_INVALID_SUFFIX_CHAR;
+ break;
+ }
+
+ *val = tmp;
+ return UINT_OK;
+}
+
+static void
+uint_fatal_error (str, argument_type_string, err)
+ const char *str;
+ const char *argument_type_string;
+ strtoul_error err;
+{
+ switch (err)
+ {
+ case UINT_OK:
+ abort ();
+
+ case UINT_INVALID:
+ error (2, 0, "invalid %s `%s'", argument_type_string, str);
+ break;
+
+ case UINT_INVALID_SUFFIX_CHAR:
+ error (2, 0, "invalid character following %s `%s'",
+ argument_type_string, str);
+ break;
+
+ case UINT_OVERFLOW:
+ error (2, 0, "%s `%s' larger than maximum unsigned long",
+ argument_type_string, str);
+ break;
+ }
+}
+
+static void
+print_s_char (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes; i > 0; i--)
+ {
+ int tmp = (unsigned) *(unsigned char *) block;
+ if (tmp > SCHAR_MAX)
+ tmp = (SCHAR_MAX - tmp);
+ assert (tmp <= SCHAR_MAX);
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned char);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_char (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes; i > 0; i--)
+ {
+ unsigned int tmp = *(unsigned char *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned char);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_s_short (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (unsigned short); i > 0; i--)
+ {
+ int tmp = (unsigned) *(unsigned short *) block;
+ if (tmp > SHRT_MAX)
+ tmp = (SHRT_MAX - tmp);
+ assert (tmp <= SHRT_MAX);
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned short);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_short (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (unsigned short); i > 0; i--)
+ {
+ unsigned int tmp = *(unsigned short *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned short);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_int (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (unsigned int); i > 0; i--)
+ {
+ unsigned int tmp = *(unsigned int *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned int);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_long (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (unsigned long); i > 0; i--)
+ {
+ unsigned long tmp = *(unsigned long *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned long);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_float (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (float); i > 0; i--)
+ {
+ float tmp = *(float *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (float);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_double (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (double); i > 0; i--)
+ {
+ double tmp = *(double *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (double);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+#ifdef __GNUC__
+static void
+print_long_double (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (LONG_DOUBLE); i > 0; i--)
+ {
+ LONG_DOUBLE tmp = *(LONG_DOUBLE *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (LONG_DOUBLE);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+#endif
+
+static void
+print_named_ascii (n_bytes, block, unused_fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *unused_fmt_string;
+{
+ int i;
+ for (i = n_bytes; i > 0; i--)
+ {
+ unsigned int c = *(unsigned char *) block;
+ unsigned int masked_c = (0x7f & c);
+ const char *s;
+ char buf[5];
+
+ if (masked_c == 127)
+ s = "del";
+ else if (masked_c <= 040)
+ s = charname[masked_c];
+ else
+ {
+ sprintf (buf, " %c", masked_c);
+ s = buf;
+ }
+
+ if (printf ("%3s%c", s, (i == 1 ? '\n' : ' ')) == EOF)
+ error (2, errno, "standard output");
+ block += sizeof (unsigned char);
+ }
+}
+
+static void
+print_ascii (n_bytes, block, unused_fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *unused_fmt_string;
+{
+ int i;
+ for (i = n_bytes; i > 0; i--)
+ {
+ unsigned int c = *(unsigned char *) block;
+ const char *s;
+ char buf[5];
+
+ switch (c)
+ {
+ case '\0':
+ s = " \\0";
+ break;
+
+ case '\007':
+ s = " \\a";
+ break;
+
+ case '\b':
+ s = " \\b";
+ break;
+
+ case '\f':
+ s = " \\f";
+ break;
+
+ case '\n':
+ s = " \\n";
+ break;
+
+ case '\r':
+ s = " \\r";
+ break;
+
+ case '\t':
+ s = " \\t";
+ break;
+
+ case '\v':
+ s = " \\v";
+ break;
+
+ default:
+ sprintf (buf, (isprint (c) ? " %c" : "%03o"), c);
+ s = (const char *) buf;
+ }
+
+ if (printf ("%3s%c", s, (i == 1 ? '\n' : ' ')) == EOF)
+ error (2, errno, "standard output");
+ block += sizeof (unsigned char);
+ }
+}
+
+/* Convert a null-terminated (possibly zero-length) string S to an
+ unsigned long integer value. If S points to a non-digit set *P to S,
+ *VAL to 0, and return 0. Otherwise, accumulate the integer value of
+ the string of digits. If the string of digits represents a value
+ larger than ULONG_MAX, don't modify *VAL or *P and return non-zero.
+ Otherwise, advance *P to the first non-digit after S, set *VAL to
+ the result of the conversion and return zero. */
+
+static int
+simple_strtoul (s, p, val)
+ const char *s;
+ const char **p;
+ long unsigned int *val;
+{
+ unsigned long int sum;
+
+ sum = 0;
+ while (isdigit (*s))
+ {
+ unsigned int c = *s++ - '0';
+ if (sum > (ULONG_MAX - c) / 10)
+ return 1;
+ sum = sum * 10 + c;
+ }
+ *p = s;
+ *val = sum;
+ return 0;
+}
+
+/* If S points to a single valid POSIX-style od format string, put a
+ description of that format in *TSPEC, make *NEXT point at the character
+ following the just-decoded format (if *NEXT is non-NULL), and return
+ zero. If S is not valid, don't modify *NEXT or *TSPEC and return
+ non-zero. For example, if S were "d4afL" *NEXT would be set to "afL"
+ and *TSPEC would be
+ {
+ fmt = SIGNED_DECIMAL;
+ size = INT or LONG; (whichever integral_type_size[4] resolves to)
+ print_function = print_int; (assuming size == INT)
+ fmt_string = "%011d%c";
+ }
+*/
+
+static int
+decode_one_format (s, next, tspec)
+ const char *s;
+ const char **next;
+ struct tspec *tspec;
+{
+ enum size_spec size_spec;
+ unsigned long int size;
+ enum output_format fmt;
+ const char *pre_fmt_string;
+ char *fmt_string;
+ void (*print_function) ();
+ const char *p;
+ unsigned int c;
+
+ assert (tspec != NULL);
+
+ switch (*s)
+ {
+ case 'd':
+ case 'o':
+ case 'u':
+ case 'x':
+ c = *s;
+ ++s;
+ switch (*s)
+ {
+ case 'C':
+ ++s;
+ size = sizeof (char);
+ break;
+
+ case 'S':
+ ++s;
+ size = sizeof (short);
+ break;
+
+ case 'I':
+ ++s;
+ size = sizeof (int);
+ break;
+
+ case 'L':
+ ++s;
+ size = sizeof (long int);
+ break;
+
+ default:
+ if (simple_strtoul (s, &p, &size) != 0)
+ return 1;
+ if (p == s)
+ size = sizeof (int);
+ else
+ {
+ if (size > MAX_INTEGRAL_TYPE_SIZE
+ || integral_type_size[size] == NO_SIZE)
+ return 1;
+ s = p;
+ }
+ break;
+ }
+
+#define FMT_BYTES_ALLOCATED 9
+ fmt_string = xmalloc (FMT_BYTES_ALLOCATED);
+
+ size_spec = integral_type_size[size];
+
+ switch (c)
+ {
+ case 'd':
+ fmt = SIGNED_DECIMAL;
+ sprintf (fmt_string, "%%0%u%sd%%c",
+ bytes_to_signed_dec_digits[size],
+ (size_spec == LONG ? "l" : ""));
+ break;
+
+ case 'o':
+ fmt = OCTAL;
+ sprintf (fmt_string, "%%0%u%so%%c",
+ bytes_to_oct_digits[size],
+ (size_spec == LONG ? "l" : ""));
+ break;
+
+ case 'u':
+ fmt = UNSIGNED_DECIMAL;
+ sprintf (fmt_string, "%%0%u%su%%c",
+ bytes_to_unsigned_dec_digits[size],
+ (size_spec == LONG ? "l" : ""));
+ break;
+
+ case 'x':
+ fmt = HEXADECIMAL;
+ sprintf (fmt_string, "%%0%u%sx%%c",
+ bytes_to_hex_digits[size],
+ (size_spec == LONG ? "l" : ""));
+ break;
+
+ default:
+ abort ();
+ }
+
+ assert (strlen (fmt_string) < FMT_BYTES_ALLOCATED);
+
+ switch (size_spec)
+ {
+ case CHAR:
+ print_function = (fmt == SIGNED_DECIMAL
+ ? print_s_char
+ : print_char);
+ break;
+
+ case SHORT:
+ print_function = (fmt == SIGNED_DECIMAL
+ ? print_s_short
+ : print_short);;
+ break;
+
+ case INT:
+ print_function = print_int;
+ break;
+
+ case LONG:
+ print_function = print_long;
+ break;
+
+ default:
+ abort ();
+ }
+ break;
+
+ case 'f':
+ fmt = FLOATING_POINT;
+ ++s;
+ switch (*s)
+ {
+ case 'F':
+ ++s;
+ size = sizeof (float);
+ break;
+
+ case 'D':
+ ++s;
+ size = sizeof (double);
+ break;
+
+ case 'L':
+ ++s;
+ size = sizeof (LONG_DOUBLE);
+ break;
+
+ default:
+ if (simple_strtoul (s, &p, &size) != 0)
+ return 1;
+ if (p == s)
+ size = sizeof (double);
+ else
+ {
+ if (size > MAX_FP_TYPE_SIZE
+ || fp_type_size[size] == NO_SIZE)
+ return 1;
+ s = p;
+ }
+ break;
+ }
+ size_spec = fp_type_size[size];
+
+ switch (size_spec)
+ {
+ case FP_SINGLE:
+ print_function = print_float;
+ pre_fmt_string = "%%%d.%d#e%%c";
+ fmt_string = xmalloc (strlen (pre_fmt_string));
+ sprintf (fmt_string, pre_fmt_string,
+ FLT_DIG + 8, FLT_DIG);
+ break;
+
+ case FP_DOUBLE:
+ print_function = print_double;
+ pre_fmt_string = "%%%d.%d#e%%c";
+ fmt_string = xmalloc (strlen (pre_fmt_string));
+ sprintf (fmt_string, pre_fmt_string,
+ DBL_DIG + 8, DBL_DIG);
+ break;
+
+#ifdef __GNUC__
+ case FP_LONG_DOUBLE:
+ print_function = print_long_double;
+ pre_fmt_string = "%%%d.%d#le%%c";
+ fmt_string = xmalloc (strlen (pre_fmt_string));
+ sprintf (fmt_string, pre_fmt_string,
+ LDBL_DIG + 8, LDBL_DIG);
+ break;
+#endif
+
+ default:
+ abort ();
+ }
+ break;
+
+ case 'a':
+ ++s;
+ fmt = NAMED_CHARACTER;
+ size_spec = CHAR;
+ fmt_string = NULL;
+ print_function = print_named_ascii;
+ break;
+
+ case 'c':
+ ++s;
+ fmt = CHARACTER;
+ size_spec = CHAR;
+ fmt_string = NULL;
+ print_function = print_ascii;
+ break;
+
+ default:
+ return 1;
+ }
+
+ tspec->size = size_spec;
+ tspec->fmt = fmt;
+ tspec->print_function = print_function;
+ tspec->fmt_string = fmt_string;
+
+ if (next != NULL)
+ *next = s;
+
+ return 0;
+}
+
+/* Decode the POSIX-style od format string S. Append the decoded
+ representation to the global array SPEC, reallocating SPEC if
+ necessary. Return zero if S is valid, non-zero otherwise. */
+
+static int
+decode_format_string (s)
+ const char *s;
+{
+ assert (s != NULL);
+
+ while (*s != '\0')
+ {
+ struct tspec tspec;
+ const char *next;
+
+ if (decode_one_format (s, &next, &tspec))
+ return 1;
+
+ assert (s != next);
+ s = next;
+
+ if (n_specs >= n_specs_allocated)
+ {
+ n_specs_allocated = 1 + (3 * n_specs_allocated) / 2;
+ spec = (struct tspec *) xrealloc (spec, (n_specs_allocated
+ * sizeof (struct tspec)));
+ }
+
+ bcopy ((char *) &tspec, (char *) &spec[n_specs], sizeof (struct tspec));
+ ++n_specs;
+ }
+
+ return 0;
+}
+
+/* Given a list of one or more input filenames FILE_LIST, set the global
+ file pointer IN_STREAM to position N_SKIP in the concatenation of
+ those files. If any file operation fails or if there are fewer than
+ N_SKIP bytes in the combined input, give an error message and exit.
+ When possible, use seek- rather than read operations to advance
+ IN_STREAM. A file name of "-" is interpreted as standard input. */
+
+static void
+skip (n_skip)
+ long unsigned int n_skip;
+{
+ for ( /*empty */ ; *file_list != NULL; ++file_list)
+ {
+ struct stat file_stats;
+ int j;
+
+ if (STREQ (*file_list, "-"))
+ {
+ input_filename = "standard input";
+ in_stream = stdin;
+ }
+ else
+ {
+ input_filename = *file_list;
+ in_stream = fopen (input_filename, "r");
+ if (in_stream == NULL)
+ error (2, errno, "%s", input_filename);
+ }
+
+ if (n_skip == 0)
+ break;
+
+ /* First try using fseek. For large offsets, all this work is
+ worthwhile. If the offset is below some threshold it may be
+ more efficient to move the pointer by reading. There are two
+ issues when trying to use fseek:
+ - the file must be seekable.
+ - before seeking to the specified position, make sure
+ that the new position is in the current file.
+ Try to do that by getting file's size using stat().
+ But that will work only for regular files and dirs. */
+
+ if (fstat (fileno (in_stream), &file_stats))
+ error (2, errno, "%s", input_filename);
+
+ /* The st_size field is valid only for regular files and
+ directories. FIXME: is the preceding true?
+ If the number of bytes left to skip is at least as large as
+ the size of the current file, we can decrement
+ n_skip and go on to the next file. */
+ if (S_ISREG (file_stats.st_mode) || S_ISDIR (file_stats.st_mode))
+ {
+ if (n_skip >= file_stats.st_size)
+ {
+ n_skip -= file_stats.st_size;
+ if (in_stream != stdin)
+ {
+ if (fclose (in_stream))
+ error (2, errno, "%s", input_filename);
+ }
+ continue;
+ }
+ else
+ {
+ if (fseek (in_stream, n_skip, SEEK_SET) == 0)
+ {
+ n_skip = 0;
+ break;
+ }
+ }
+ }
+
+ /* fseek didn't work or wasn't attempted; do it the slow way. */
+
+ for (j = n_skip / BUFSIZ; j >= 0; j--)
+ {
+ char buf[BUFSIZ];
+ size_t n_bytes_to_read = (j > 0
+ ? BUFSIZ
+ : n_skip % BUFSIZ);
+ size_t n_bytes_read;
+ n_bytes_read = fread (buf, 1, n_bytes_to_read, in_stream);
+ n_skip -= n_bytes_read;
+ if (n_bytes_read != n_bytes_to_read)
+ {
+ if (ferror (in_stream))
+ error (2, errno, "%s", input_filename);
+ else
+ break;
+ }
+ }
+
+ if (n_skip == 0)
+ break;
+ }
+
+ if (n_skip != 0)
+ error (2, 0, "cannot skip past end of combined input");
+}
+
+static const char *
+format_address (address)
+ long unsigned int address;
+{
+ const char *address_string;
+
+ if (output_address_fmt_string == NULL)
+ address_string = "";
+ else
+ {
+ sprintf (address_fmt_buffer, output_address_fmt_string, address);
+ address_string = address_fmt_buffer;
+ }
+ return address_string;
+}
+
+/* Write N_BYTES bytes from CURR_BLOCK to standard output once for each
+ of the N_SPEC format specs. CURRENT_OFFSET is the byte address of
+ CURR_BLOCK in the concatenation of input files, and it is printed
+ (optionally) only before the output line associated with the first
+ format spec. When duplicate blocks are being abbreviated, the output
+ for a sequence of identical input blocks is the output for the first
+ block followed by an asterisk alone on a line. It is valid to compare
+ the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK.
+ That condition may be false only for the last input block -- and then
+ only when it has not been padded to length BYTES_PER_BLOCK. */
+
+static void
+write_block (current_offset, n_bytes, prev_block, curr_block)
+ long unsigned int current_offset;
+ long unsigned int n_bytes;
+ const char *prev_block;
+ const char *curr_block;
+{
+ static int first = 1;
+ static int prev_pair_equal = 0;
+
+#define EQUAL_BLOCKS(b1, b2) (bcmp ((b1), (b2), bytes_per_block) == 0)
+
+ if (abbreviate_duplicate_blocks
+ && !first && n_bytes == bytes_per_block
+ && EQUAL_BLOCKS (prev_block, curr_block))
+ {
+ if (prev_pair_equal)
+ {
+ /* The two preceding blocks were equal, and the current
+ block is the same as the last one, so print nothing. */
+ }
+ else
+ {
+ printf ("*\n");
+ prev_pair_equal = 1;
+ }
+ }
+ else
+ {
+ int i;
+
+ prev_pair_equal = 0;
+ for (i = 0; i < n_specs; i++)
+ {
+ if (printf ("%s ", (i == 0
+ ? format_address (current_offset)
+ : address_pad))
+ == EOF)
+ error (2, errno, "standard output");
+ (*spec[i].print_function) (n_bytes, curr_block, spec[i].fmt_string);
+ }
+ }
+ first = 0;
+}
+
+/* Read and return a single byte from the concatenation of the input
+ files named in the global array FILE_LIST. On the first call to this
+ function, the global variable IN_STREAM is expected to be an open
+ stream associated with the input file *FILE_LIST. If IN_STREAM is
+ at end-of-file, close it and update the global variables IN_STREAM,
+ FILE_LIST, and INPUT_FILENAME so they correspond to the next file in
+ the list. Then try to read a byte from the newly opened file.
+ Repeat if necessary until *FILE_LIST is NULL. Upon any read-, open-,
+ or close error give a message and exit. When EOF is reached for the
+ last file in FILE_LIST, return EOF. Any subsequent calls return EOF. */
+
+static int
+read_char ()
+{
+ if (*file_list == NULL)
+ return EOF;
+
+ while (1)
+ {
+ int c;
+
+ c = fgetc (in_stream);
+
+ if (c != EOF)
+ return c;
+
+ if (errno != 0)
+ error (2, errno, "%s", input_filename);
+
+ if (in_stream != stdin)
+ if (fclose (in_stream) == EOF)
+ error (2, errno, "%s", input_filename);
+
+ ++file_list;
+ if (*file_list == NULL)
+ return EOF;
+
+ if (STREQ (*file_list, "-"))
+ {
+ input_filename = "standard input";
+ in_stream = stdin;
+ }
+ else
+ {
+ input_filename = *file_list;
+ in_stream = fopen (input_filename, "r");
+ if (in_stream == NULL)
+ error (2, errno, "%s", input_filename);
+ }
+ }
+}
+
+/* Read N bytes into BLOCK from the concatenation of the input files
+ named in the global array FILE_LIST. On the first call to this
+ function, the global variable IN_STREAM is expected to be an open
+ stream associated with the input file *FILE_LIST. On subsequent
+ calls, if *FILE_LIST is NULL, don't modify BLOCK and return zero.
+ If all N bytes cannot be read from IN_STREAM, close IN_STREAM and
+ update the global variables IN_STREAM, FILE_LIST, and INPUT_FILENAME.
+ Then try to read the remaining bytes from the newly opened file.
+ Repeat if necessary until *FILE_LIST is NULL. Upon any read-, open-,
+ or close error give a message and exit. Otherwise, return the number
+ of bytes read. */
+
+static unsigned long int
+read_block (n, block)
+ size_t n;
+ char *block;
+{
+ unsigned long int n_bytes_in_buffer;
+
+ assert (n > 0 && n <= bytes_per_block);
+ if (n == 0)
+ return 0;
+
+ n_bytes_in_buffer = 0;
+
+ if (*file_list == NULL)
+ return 0; /* EOF. */
+
+ while (1)
+ {
+ size_t n_needed;
+ size_t n_read;
+
+ n_needed = n - n_bytes_in_buffer;
+ n_read = fread (block + n_bytes_in_buffer, 1, n_needed, in_stream);
+
+ if (ferror (in_stream))
+ error (2, errno, "%s", input_filename);
+
+ if (n_read == n_needed)
+ return n;
+
+ n_bytes_in_buffer += n_read;
+
+ if (in_stream != stdin)
+ if (fclose (in_stream) == EOF)
+ error (2, errno, "%s", input_filename);
+
+ ++file_list;
+ if (*file_list == NULL)
+ return n_bytes_in_buffer;
+
+ if (STREQ (*file_list, "-"))
+ {
+ input_filename = "standard input";
+ in_stream = stdin;
+ }
+ else
+ {
+ input_filename = *file_list;
+ in_stream = fopen (input_filename, "r");
+ if (in_stream == NULL)
+ error (2, errno, "%s", input_filename);
+ }
+ }
+}
+
+/* Return the least common multiple of the sizes associated
+ with the format specs. */
+
+static int
+get_lcm ()
+{
+ int i;
+ int l_c_m = 1;
+
+ for (i = 0; i < n_specs; i++)
+ l_c_m = lcm (l_c_m, width_bytes[(int) spec[i].size]);
+ return l_c_m;
+}
+
+/* Read chunks of size BYTES_PER_BLOCK from the input files, write the
+ formatted block to standard output, and repeat until the specified
+ maximum number of bytes has been read or until all input has been
+ processed. If the last block read is smaller than BYTES_PER_BLOCK
+ and its size is not a multiple of the size associated with a format
+ spec, extend the input block with zero bytes until its length is a
+ multiple of all format spec sizes. Write the final block. Finally,
+ write on a line by itself the offset of the byte after the last byte
+ read. */
+
+static void
+dump ()
+{
+ char *block[2];
+ unsigned long int current_offset;
+ int idx = 0;
+ size_t n_bytes_read;
+
+ block[0] = (char *) alloca (bytes_per_block);
+ block[1] = (char *) alloca (bytes_per_block);
+
+ current_offset = n_bytes_to_skip;
+
+ if (limit_bytes_to_format)
+ {
+ size_t end_offset = n_bytes_to_skip + max_bytes_to_format;
+
+ n_bytes_read = 0;
+ while (current_offset < end_offset)
+ {
+ size_t n_needed;
+ n_needed = MIN (end_offset - current_offset, bytes_per_block);
+ n_bytes_read = read_block (n_needed, block[idx]);
+ if (n_bytes_read < bytes_per_block)
+ break;
+ assert (n_bytes_read == bytes_per_block);
+ write_block (current_offset, n_bytes_read,
+ block[!idx], block[idx]);
+ current_offset += n_bytes_read;
+ idx = !idx;
+ }
+ }
+ else
+ {
+ while (1)
+ {
+ n_bytes_read = read_block (bytes_per_block, block[idx]);
+ if (n_bytes_read < bytes_per_block)
+ break;
+ assert (n_bytes_read == bytes_per_block);
+ write_block (current_offset, n_bytes_read,
+ block[!idx], block[idx]);
+ current_offset += n_bytes_read;
+ idx = !idx;
+ }
+ }
+
+ if (n_bytes_read > 0)
+ {
+ int l_c_m;
+ size_t bytes_to_write;
+
+ l_c_m = get_lcm ();
+
+ /* Make bytes_to_write the smallest multiple of l_c_m that
+ is at least as large as n_bytes_read. */
+ bytes_to_write = l_c_m * (int) ((n_bytes_read + l_c_m - 1) / l_c_m);
+
+ bzero (block[idx] + n_bytes_read, bytes_to_write - n_bytes_read);
+ write_block (current_offset, bytes_to_write,
+ block[!idx], block[idx]);
+ current_offset += n_bytes_read;
+ }
+
+ if (output_address_fmt_string != NULL)
+ {
+ if (printf ("%s\n", format_address (current_offset)) == EOF)
+ error (2, errno, "standard output");
+ }
+}
+
+/* STRINGS mode. Find each "string constant" in the file.
+ A string constant is a run of at least `string_min' ASCII graphic
+ (or formatting) characters terminated by a null. Based on a
+ function written by Richard Stallman for a pre-POSIX
+ version of od. */
+
+static void
+dump_strings ()
+{
+ int bufsize = MAX (100, string_min);
+ char *buf = xmalloc (bufsize);
+ unsigned long address = n_bytes_to_skip;
+
+ while (1)
+ {
+ int i;
+ int c;
+
+ /* See if the next `string_min' chars are all printing chars. */
+ tryline:
+
+ if (limit_bytes_to_format
+ && address >= (n_bytes_to_skip + max_bytes_to_format - string_min))
+ break;
+
+ for (i = 0; i < string_min; i++)
+ {
+ c = read_char ();
+ address++;
+ if (c < 0)
+ return;
+ if (!isprint (c))
+ /* Found a non-printing. Try again starting with next char. */
+ goto tryline;
+ buf[i] = c;
+ }
+
+ /* We found a run of `string_min' printable characters.
+ Now see if it is terminated with a null byte. */
+ while (!limit_bytes_to_format
+ || address < n_bytes_to_skip + max_bytes_to_format)
+ {
+ if (i == bufsize)
+ {
+ bufsize = 1 + 3 * bufsize / 2;
+ buf = xrealloc (buf, bufsize);
+ }
+ c = read_char ();
+ address++;
+ if (c < 0)
+ return;
+ if (c == '\0')
+ break; /* It is; print this string. */
+ if (!isprint (c))
+ goto tryline; /* It isn't; give up on this string. */
+ buf[i++] = c; /* String continues; store it all. */
+ }
+
+ /* If we get here, the string is all printable and null-terminated,
+ so print it. It is all in `buf' and `i' is its length. */
+ buf[i] = 0;
+ if (output_address_fmt_string != NULL)
+ {
+ if (printf ("%s ", format_address (address - i - 1)) == EOF)
+ error (2, errno, "standard output");
+ }
+ for (i = 0; (c = buf[i]); i++)
+ {
+ int err;
+ switch (c)
+ {
+ case '\007':
+ err = fputs ("\\a", stdout);
+ break;
+
+ case '\b':
+ err = fputs ("\\b", stdout);
+ break;
+
+ case '\f':
+ err = fputs ("\\f", stdout);
+ break;
+
+ case '\n':
+ err = fputs ("\\n", stdout);
+ break;
+
+ case '\r':
+ err = fputs ("\\r", stdout);
+ break;
+
+ case '\t':
+ err = fputs ("\\t", stdout);
+ break;
+
+ case '\v':
+ err = fputs ("\\v", stdout);
+ break;
+
+ default:
+ err = putchar (c);
+ }
+ if (err == EOF)
+ error (2, errno, "standard output");
+ }
+ if (putchar ('\n') == EOF)
+ error (2, errno, "standard output");
+ }
+ free (buf);
+}
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int n_files;
+ int i;
+ unsigned int l_c_m;
+ unsigned int address_pad_len;
+ unsigned long int desired_width;
+ int width_specified = 0;
+
+ program_name = argv[0];
+
+ for (i = 0; i <= MAX_INTEGRAL_TYPE_SIZE; i++)
+ integral_type_size[i] = NO_SIZE;
+
+ integral_type_size[sizeof (char)] = CHAR;
+ integral_type_size[sizeof (short int)] = SHORT;
+ integral_type_size[sizeof (int)] = INT;
+ integral_type_size[sizeof (long int)] = LONG;
+
+ for (i = 0; i <= MAX_FP_TYPE_SIZE; i++)
+ fp_type_size[i] = NO_SIZE;
+
+ fp_type_size[sizeof (float)] = FP_SINGLE;
+ /* The array entry for `double' is filled in after that for LONG_DOUBLE
+ so that if `long double' is the same type or if long double isn't
+ supported FP_LONG_DOUBLE will never be used. */
+ fp_type_size[sizeof (LONG_DOUBLE)] = FP_LONG_DOUBLE;
+ fp_type_size[sizeof (double)] = FP_DOUBLE;
+
+ n_specs = 0;
+ n_specs_allocated = 5;
+ spec = (struct tspec *) xmalloc (n_specs_allocated * sizeof (struct tspec));
+
+ output_address_fmt_string = "%07o";
+ address_pad_len = 7;
+ flag_dump_strings = 0;
+
+ while ((c = getopt_long (argc, argv, "abcdfhilos::xw::A:j:N:t:v",
+ long_options, (int *) 0))
+ != EOF)
+ {
+ strtoul_error err;
+
+ switch (c)
+ {
+ case 'A':
+ switch (optarg[0])
+ {
+ case 'd':
+ output_address_fmt_string = "%07d";
+ address_pad_len = 7;
+ break;
+ case 'o':
+ output_address_fmt_string = "%07o";
+ address_pad_len = 7;
+ break;
+ case 'x':
+ output_address_fmt_string = "%06x";
+ address_pad_len = 6;
+ break;
+ case 'n':
+ output_address_fmt_string = NULL;
+ address_pad_len = 0;
+ break;
+ default:
+ error (2, 0,
+ "invalid output address radix `%c'; it must be one character from [doxn]",
+ optarg[0]);
+ break;
+ }
+ break;
+
+ case 'j':
+ err = my_strtoul (optarg, 0, &n_bytes_to_skip, 1);
+ if (err != UINT_OK)
+ uint_fatal_error (optarg, "skip argument", err);
+ break;
+
+ case 'N':
+ limit_bytes_to_format = 1;
+
+ err = my_strtoul (optarg, 0, &max_bytes_to_format, 1);
+ if (err != UINT_OK)
+ uint_fatal_error (optarg, "limit argument", err);
+ break;
+
+ case 's':
+ if (optarg == NULL)
+ string_min = 3;
+ else
+ {
+ err = my_strtoul (optarg, 0, &string_min, 1);
+ if (err != UINT_OK)
+ uint_fatal_error (optarg, "minimum string length", err);
+ }
+ ++flag_dump_strings;
+ break;
+
+ case 't':
+ if (decode_format_string (optarg))
+ error (2, 0, "invalid type string `%s'", optarg);
+ break;
+
+ case 'v':
+ abbreviate_duplicate_blocks = 0;
+ break;
+
+ /* The next several cases map the old, pre-POSIX format
+ specification options to the corresponding POSIX format
+ specs. GNU od accepts any combination of old- and
+ new-style options. If only POSIX format specs are used
+ and more than one is used, they are accumulated. If only
+ old-style options are used, all but the last are ignored.
+ If both types of specs are used in the same command, the
+ last old-style option and any POSIX specs following it
+ are accumulated. To illustrate, `od -c -t a' is the same
+ as `od -t ca', but `od -t a -c' is the same as `od -c'. */
+
+#define CASE_OLD_ARG(old_char,new_string) \
+ case old_char: \
+ { \
+ const char *next; \
+ int tmp; \
+ assert (n_specs_allocated >= 1); \
+ tmp = decode_one_format (new_string, &next, &(spec[0])); \
+ n_specs = 1; \
+ assert (tmp == 0); \
+ assert (*next == '\0'); \
+ } \
+ break
+
+ CASE_OLD_ARG ('a', "a");
+ CASE_OLD_ARG ('b', "oC");
+ CASE_OLD_ARG ('c', "c");
+ CASE_OLD_ARG ('d', "u2");
+ CASE_OLD_ARG ('f', "fF");
+ CASE_OLD_ARG ('h', "x2");
+ CASE_OLD_ARG ('i', "d2");
+ CASE_OLD_ARG ('l', "d4");
+ CASE_OLD_ARG ('o', "o2");
+ CASE_OLD_ARG ('x', "x2");
+
+#undef CASE_OLD_ARG
+
+ case 'w':
+ width_specified = 1;
+ if (optarg == NULL)
+ {
+ desired_width = 32;
+ }
+ else
+ {
+ err = my_strtoul (optarg, 10, &desired_width, 0);
+ if (err != UINT_OK)
+ error (2, 0, "invalid width specification `%s'", optarg);
+ }
+ break;
+
+ default:
+ usage ();
+ break;
+ }
+ }
+
+ if (flag_dump_strings && n_specs > 0)
+ error (2, 0, "no type may be specified when dumping strings");
+
+ assert (address_pad_len <= MAX_ADDRESS_LENGTH);
+ for (i = 0; i < address_pad_len; i++)
+ address_pad[i] = ' ';
+ address_pad[address_pad_len] = '\0';
+
+ if (n_specs == 0)
+ {
+ int err = decode_one_format ("o2", NULL, &(spec[0]));
+
+ assert (err == 0);
+ n_specs = 1;
+ }
+
+ n_files = argc - optind;
+ if (n_files > 0)
+ file_list = (char const *const *) &argv[optind];
+ else
+ {
+ /* If no files were listed on the command line, set up the
+ global array FILE_LIST so that it contains the null-terminated
+ list of one name: "-". */
+ static char const * const default_file_list[] = {"-", NULL};
+
+ file_list = default_file_list;
+ }
+
+ skip (n_bytes_to_skip);
+
+ /* Compute output block length. */
+ l_c_m = get_lcm ();
+
+ if (width_specified)
+ {
+ if (desired_width != 0 && desired_width % l_c_m == 0)
+ bytes_per_block = desired_width;
+ else
+ {
+ error (0, 0, "warning: invalid width %d; using %d instead",
+ desired_width, l_c_m);
+ bytes_per_block = l_c_m;
+ }
+ }
+ else
+ {
+ if (l_c_m < DEFAULT_BYTES_PER_BLOCK)
+ bytes_per_block = l_c_m * (int) (DEFAULT_BYTES_PER_BLOCK / l_c_m);
+ else
+ bytes_per_block = l_c_m;
+ }
+
+#ifdef DEBUG
+ for (i = 0; i < n_specs; i++)
+ {
+ printf ("%d: fmt=\"%s\" width=%d\n",
+ i, spec[i].fmt_string, width_bytes[spec[i].size]);
+ }
+#endif
+
+ if (flag_dump_strings)
+ {
+ dump_strings ();
+ }
+ else
+ {
+ dump ();
+ }
+
+ exit (0);
+}
diff --git a/src/paste.c b/src/paste.c
new file mode 100644
index 000000000..c7058a63c
--- /dev/null
+++ b/src/paste.c
@@ -0,0 +1,458 @@
+/* paste - merge lines of files
+ Copyright (C) 1984 by David M. Ihnat
+
+ This program is a total rewrite of the Bell Laboratories Unix(Tm)
+ command of the same name, as of System V. It contains no proprietary
+ code, and therefore may be used without violation of any proprietary
+ agreements whatsoever. However, you will notice that the program is
+ copyrighted by me. This is to assure the program does *not* fall
+ into the public domain. Thus, I may specify just what I am now:
+ This program may be freely copied and distributed, provided this notice
+ remains; it may not be sold for profit without express written consent of
+ the author.
+ Please note that I recreated the behavior of the Unix(Tm) 'paste' command
+ as faithfully as possible, with minor exceptions; however,
+ I haven't run a full set of regression tests. Thus, the user of
+ this program accepts full responsibility for any effects or loss;
+ in particular, the author is not responsible for any losses,
+ explicit or incidental, that may be incurred through use of this program.
+
+ I ask that any bugs (and, if possible, fixes) be reported to me when
+ possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us
+
+ The list of valid escape sequences has been expanded over the Unix
+ version, to include \b, \f, \r, and \v.
+
+ POSIX changes, bug fixes, long-named options, and cleanup
+ by David MacKenzie <djm@ai.mit.edu>.
+
+ Options:
+ --serial
+ -s Paste one file at a time rather than
+ one line from each file.
+ --delimiters=delim-list
+ -d delim-list Consecutively use the characters in
+ DELIM-LIST instead of tab to separate
+ merged lines. When DELIM-LIST is exhausted,
+ start again at its beginning.
+ A FILE of `-' means standard input.
+ If no FILEs are given, standard input is used. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+char *collapse_escapes ();
+char *xmalloc ();
+char *xrealloc ();
+int paste_parallel ();
+int paste_serial ();
+void error ();
+void usage ();
+
+/* Indicates that no delimiter should be added in the current position. */
+#define EMPTY_DELIM '\0'
+
+/* Element marking a file that has reached EOF and been closed. */
+#define CLOSED ((FILE *) -1)
+
+/* Element marking end of list of open files. */
+#define ENDLIST ((FILE *) -2)
+
+/* Name this program was run with. */
+char *program_name;
+
+/* If nonzero, we have read standard input at some point. */
+int have_read_stdin;
+
+/* If nonzero, merge subsequent lines of each file rather than
+ corresponding lines from each file in parallel. */
+int serial_merge;
+
+/* The delimeters between lines of input files (used cyclically). */
+char *delims;
+
+/* A pointer to the character after the end of `delims'. */
+char *delim_end;
+
+struct option longopts[] =
+{
+ {"serial", 0, 0, 's'},
+ {"delimiters", 1, 0, 'd'},
+ {0, 0, 0, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int optc, exit_status;
+ char default_delims[2], zero_delims[3];
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+ serial_merge = 0;
+ delims = default_delims;
+ strcpy (delims, "\t");
+ strcpy (zero_delims, "\\0");
+
+ while ((optc = getopt_long (argc, argv, "d:s", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (optc)
+ {
+ case 'd':
+ /* Delimiter character(s). */
+ if (optarg[0] == '\0')
+ optarg = zero_delims;
+ delims = optarg;
+ break;
+
+ case 's':
+ serial_merge++;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (optind == argc)
+ argv[argc++] = "-";
+
+ delim_end = collapse_escapes (delims);
+
+ if (!serial_merge)
+ exit_status = paste_parallel (argc - optind, &argv[optind]);
+ else
+ exit_status = paste_serial (argc - optind, &argv[optind]);
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, errno, "write error");
+ exit (exit_status);
+}
+
+/* Replace backslash representations of special characters in
+ STRPTR with their actual values.
+ The set of possible backslash characters has been expanded beyond
+ that recognized by the Unix version.
+
+ Return a pointer to the character after the new end of STRPTR. */
+
+char *
+collapse_escapes (strptr)
+ char *strptr;
+{
+ register char *strout;
+
+ strout = strptr; /* Start at the same place, anyway. */
+
+ while (*strptr)
+ {
+ if (*strptr != '\\') /* Is it an escape character? */
+ *strout++ = *strptr++; /* No, just transfer it. */
+ else
+ {
+ switch (*++strptr)
+ {
+ case '0':
+ *strout++ = EMPTY_DELIM;
+ break;
+
+ case 'b':
+ *strout++ = '\b';
+ break;
+
+ case 'f':
+ *strout++ = '\f';
+ break;
+
+ case 'n':
+ *strout++ = '\n';
+ break;
+
+ case 'r':
+ *strout++ = '\r';
+ break;
+
+ case 't':
+ *strout++ = '\t';
+ break;
+
+ case 'v':
+ *strout++ = '\v';
+ break;
+
+ default:
+ *strout++ = *strptr;
+ break;
+ }
+ strptr++;
+ }
+ }
+ return strout;
+}
+
+/* Perform column paste on the NFILES files named in FNAMPTR.
+ Return 0 if no errors, 1 if one or more files could not be
+ opened or read. */
+
+int
+paste_parallel (nfiles, fnamptr)
+ int nfiles;
+ char **fnamptr;
+{
+ int errors = 0; /* 1 if open or read errors occur. */
+ /* Number of files for which space is allocated in `delbuf' and `fileptr'.
+ Enlarged as necessary. */
+ int file_list_size = 12;
+ int chr; /* Input character. */
+ int line_length; /* Number of chars in line. */
+ int somedone; /* 0 if all files empty for this line. */
+ /* If all files are just ready to be closed, or will be on this
+ round, the string of delimiters must be preserved.
+ delbuf[0] through delbuf[file_list_size]
+ store the delimiters for closed files. */
+ char *delbuf;
+ int delims_saved; /* Number of delims saved in `delbuf'. */
+ register char *delimptr; /* Cycling pointer into `delims'. */
+ FILE **fileptr; /* Streams open to the files to process. */
+ int files_open; /* Number of files still open to process. */
+ int i; /* Loop index. */
+ int opened_stdin = 0; /* Nonzero if any fopen got fd 0. */
+
+ delbuf = (char *) xmalloc (file_list_size + 2);
+ fileptr = (FILE **) xmalloc ((file_list_size + 1) * sizeof (FILE *));
+
+ /* Attempt to open all files. This could be expanded to an infinite
+ number of files, but at the (considerable) expense of remembering
+ each file and its current offset, then opening/reading/closing. */
+
+ for (files_open = 0; files_open < nfiles; ++files_open)
+ {
+ if (files_open == file_list_size - 2)
+ {
+ file_list_size += 12;
+ delbuf = (char *) xrealloc (delbuf, file_list_size + 2);
+ fileptr = (FILE **) xrealloc (fileptr, (file_list_size + 1)
+ * sizeof (FILE *));
+ }
+ if (!strcmp (fnamptr[files_open], "-"))
+ {
+ have_read_stdin = 1;
+ fileptr[files_open] = stdin;
+ }
+ else
+ {
+ fileptr[files_open] = fopen (fnamptr[files_open], "r");
+ if (fileptr[files_open] == NULL)
+ error (1, errno, "%s", fnamptr[files_open]);
+ else if (fileno (fileptr[files_open]) == 0)
+ opened_stdin = 1;
+ }
+ }
+
+ fileptr[files_open] = ENDLIST;
+
+ if (opened_stdin && have_read_stdin)
+ error (1, 0, "standard input is closed");
+
+ /* Read a line from each file and output it to stdout separated by a
+ delimiter, until we go through the loop without successfully
+ reading from any of the files. */
+
+ while (files_open)
+ {
+ /* Set up for the next line. */
+ somedone = 0;
+ delimptr = delims;
+ delims_saved = 0;
+
+ for (i = 0; fileptr[i] != ENDLIST && files_open; i++)
+ {
+ line_length = 0; /* Clear so we can easily detect EOF. */
+ if (fileptr[i] != CLOSED)
+ {
+ chr = getc (fileptr[i]);
+ if (chr != EOF && delims_saved)
+ {
+ fwrite (delbuf, sizeof (char), delims_saved, stdout);
+ delims_saved = 0;
+ }
+
+ while (chr != EOF)
+ {
+ line_length++;
+ if (chr == '\n')
+ break;
+ putc (chr, stdout);
+ chr = getc (fileptr[i]);
+ }
+ }
+
+ if (line_length == 0)
+ {
+ /* EOF, read error, or closed file.
+ If an EOF or error, close the file and mark it in the list. */
+ if (fileptr[i] != CLOSED)
+ {
+ if (ferror (fileptr[i]))
+ {
+ error (0, errno, "%s", fnamptr[i]);
+ errors = 1;
+ }
+ if (fileptr[i] == stdin)
+ clearerr (fileptr[i]); /* Also clear EOF. */
+ else if (fclose (fileptr[i]) == EOF)
+ {
+ error (0, errno, "%s", fnamptr[i]);
+ errors = 1;
+ }
+
+ fileptr[i] = CLOSED;
+ files_open--;
+ }
+
+ if (fileptr[i + 1] == ENDLIST)
+ {
+ /* End of this output line.
+ Is this the end of the whole thing? */
+ if (somedone)
+ {
+ /* No. Some files were not closed for this line. */
+ if (delims_saved)
+ {
+ fwrite (delbuf, sizeof (char), delims_saved, stdout);
+ delims_saved = 0;
+ }
+ putc ('\n', stdout);
+ }
+ continue; /* Next read of files, or exit. */
+ }
+ else
+ {
+ /* Closed file; add delimiter to `delbuf'. */
+ if (*delimptr != EMPTY_DELIM)
+ delbuf[delims_saved++] = *delimptr;
+ if (++delimptr == delim_end)
+ delimptr = delims;
+ }
+ }
+ else
+ {
+ /* Some data read. */
+ somedone++;
+
+ /* Except for last file, replace last newline with delim. */
+ if (fileptr[i + 1] != ENDLIST)
+ {
+ if (chr != '\n')
+ putc (chr, stdout);
+ if (*delimptr != EMPTY_DELIM)
+ putc (*delimptr, stdout);
+ if (++delimptr == delim_end)
+ delimptr = delims;
+ }
+ else
+ putc (chr, stdout);
+ }
+ }
+ }
+ return errors;
+}
+
+/* Perform serial paste on the NFILES files named in FNAMPTR.
+ Return 0 if no errors, 1 if one or more files could not be
+ opened or read. */
+
+int
+paste_serial (nfiles, fnamptr)
+ int nfiles;
+ char **fnamptr;
+{
+ int errors = 0; /* 1 if open or read errors occur. */
+ register int charnew, charold; /* Current and previous char read. */
+ register char *delimptr; /* Current delimiter char. */
+ register FILE *fileptr; /* Open for reading current file. */
+
+ for (; nfiles; nfiles--, fnamptr++)
+ {
+ if (!strcmp (*fnamptr, "-"))
+ {
+ have_read_stdin = 1;
+ fileptr = stdin;
+ }
+ else
+ {
+ fileptr = fopen (*fnamptr, "r");
+ if (fileptr == NULL)
+ {
+ error (0, errno, "%s", *fnamptr);
+ errors = 1;
+ continue;
+ }
+ }
+
+ delimptr = delims; /* Set up for delimiter string. */
+
+ charold = getc (fileptr);
+ if (charold != EOF)
+ {
+ /* `charold' is set up. Hit it!
+ Keep reading characters, stashing them in `charnew';
+ output `charold', converting to the appropriate delimiter
+ character if needed. After the EOF, output `charold'
+ if it's a newline; otherwise, output it and then a newline. */
+
+ while ((charnew = getc (fileptr)) != EOF)
+ {
+ /* Process the old character. */
+ if (charold == '\n')
+ {
+ if (*delimptr != EMPTY_DELIM)
+ putc (*delimptr, stdout);
+
+ if (++delimptr == delim_end)
+ delimptr = delims;
+ }
+ else
+ putc (charold, stdout);
+
+ charold = charnew;
+ }
+
+ /* Hit EOF. Process that last character. */
+ putc (charold, stdout);
+ }
+
+ if (charold != '\n')
+ putc ('\n', stdout);
+
+ if (ferror (fileptr))
+ {
+ error (0, errno, "%s", *fnamptr);
+ errors = 1;
+ }
+ if (fileptr == stdin)
+ clearerr (fileptr); /* Also clear EOF. */
+ else if (fclose (fileptr) == EOF)
+ {
+ error (0, errno, "%s", *fnamptr);
+ errors = 1;
+ }
+ }
+ return errors;
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-s] [-d delim-list] [--serial] [--delimiters=delim-list]\n\
+ [file...]\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/pr.c b/src/pr.c
new file mode 100644
index 000000000..10595ad73
--- /dev/null
+++ b/src/pr.c
@@ -0,0 +1,1844 @@
+/* pr -- convert text files for printing.
+ Copyright (C) 1988, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Author: Pete TerMaat. */
+
+/* Things to watch: Sys V screws up on ...
+ pr -n -3 -s: /usr/dict/words
+ pr -m -o10 -n /usr/dict/words{,,,}
+ pr -6 -a -n -o5 /usr/dict/words
+
+ Ideas:
+
+ Keep a things_to_do list of functions to call when we know we have
+ something to print. Cleaner than current series of checks.
+
+ Improve the printing of control prefixes.
+
+
+ Options:
+
+ +PAGE Begin output at page PAGE of the output.
+
+ -COLUMN Produce output that is COLUMN columns wide and print
+ columns down.
+
+ -a Print columns across rather than down. The input
+ one
+ two
+ three
+ four
+ will be printed as
+ one two three
+ four
+
+ -b Balance columns on the last page.
+
+ -c Print unprintable characters as control prefixes.
+ Control-g is printed as ^G.
+
+ -d Double space the output.
+
+ -e[c[k]] Expand tabs to spaces on input. Optional argument C
+ is the input tab character. (Default is `\t'.) Optional
+ argument K is the input tab character's width. (Default is 8.)
+
+ -F
+ -f Use formfeeds instead of newlines to separate pages.
+
+ -h header Replace the filename in the header with the string HEADER.
+
+ -i[c[k]] Replace spaces with tabs on output. Optional argument
+ C is the output tab character. (Default is `\t'.) Optional
+ argument K is the output tab character's width. (Default
+ is 8.)
+
+ -l lines Set the page length to LINES. Default is 66.
+
+ -m Print files in parallel.
+
+ -n[c[k]] Precede each column with a line number.
+ (With parallel files, precede each line with a line
+ number.) Optional argument C is the character to print
+ after each number. (Default `\t'.) Optional argument
+ K is the number of digits per line number. (Default 5.)
+
+ -o offset Offset each line with a margin OFFSET spaces wide.
+ Total page width is the size of this offset plus the
+ width set with `-w'.
+
+ -r Ignore files that can't be opened.
+
+ -s[c] Separate each line with a character. Optional argument C is
+ the character to be used. Default is `\t'.
+
+ -t Do not print headers or footers.
+
+ -v Print unprintable characters as escape sequences.
+ Control-G becomes \007.
+
+ -w width Set the page width to WIDTH characters. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <time.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISPRINT(c) (isascii (c) && isprint (c))
+#define ISDIGIT(c) (isascii (c) && isdigit (c))
+#else
+#define ISPRINT(c) isprint (c)
+#define ISDIGIT(c) isdigit (c)
+#endif
+
+int char_to_clump ();
+int read_line ();
+int print_page ();
+int print_stored ();
+char *xmalloc ();
+char *xrealloc ();
+int open_file ();
+int skip_to_page ();
+void error ();
+void getoptarg ();
+void usage ();
+void print_files ();
+void init_header ();
+void init_store_cols ();
+void store_columns ();
+void balance ();
+void store_char ();
+void pad_down ();
+void read_rest_of_line ();
+void print_char ();
+void cleanup ();
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+/* Used with start_position in the struct COLUMN described below.
+ If start_position == ANYWHERE, we aren't truncating columns and
+ can begin printing a column anywhere. Otherwise we must pad to
+ the horizontal position start_position. */
+#define ANYWHERE 0
+
+/* Each column has one of these structures allocated for it.
+ If we're only dealing with one file, fp is the same for all
+ columns.
+
+ The general strategy is to spend time setting up these column
+ structures (storing columns if necessary), after which printing
+ is a matter of flitting from column to column and calling
+ print_func.
+
+ Parallel files, single files printing across in multiple
+ columns, and single files printing down in multiple columns all
+ fit the same printing loop.
+
+ print_func Function used to print lines in this column.
+ If we're storing this column it will be
+ print_stored(), Otherwise it will be read_line().
+
+ char_func Function used to process characters in this column.
+ If we're storing this column it will be store_char(),
+ otherwise it will be print_char().
+
+ current_line Index of the current entry in line_vector, which
+ contains the index of the first character of the
+ current line in buff[].
+
+ lines_stored Number of lines in this column which are stored in
+ buff.
+
+ lines_to_print If we're storing this column, lines_to_print is
+ the number of stored_lines which remain to be
+ printed. Otherwise it is the number of lines
+ we can print without exceeding lines_per_body.
+
+ start_position The horizontal position we want to be in before we
+ print the first character in this column.
+
+ numbered True means precede this column with a line number. */
+
+struct COLUMN
+{
+ FILE *fp; /* Input stream for this column. */
+ char *name; /* File name. */
+ enum
+ {
+ OPEN,
+ ON_HOLD, /* Hit a form feed. */
+ CLOSED
+ } status; /* Status of the file pointer. */
+ int (*print_func) (); /* Func to print lines in this col. */
+ void (*char_func) (); /* Func to print/store chars in this col. */
+ int current_line; /* Index of current place in line_vector. */
+ int lines_stored; /* Number of lines stored in buff. */
+ int lines_to_print; /* No. lines stored or space left on page. */
+ int start_position; /* Horizontal position of first char. */
+ int numbered;
+};
+
+typedef struct COLUMN COLUMN;
+
+#define NULLCOL (COLUMN *)0
+
+/* All of the columns to print. */
+COLUMN *column_vector;
+
+/* When printing a single file in multiple downward columns,
+ we store the leftmost columns contiguously in buff.
+ To print a line from buff, get the index of the first char
+ from line_vector[i], and print up to line_vector[i + 1]. */
+char *buff;
+
+/* Index of the position in buff where the next character
+ will be stored. */
+int buff_current;
+
+/* The number of characters in buff.
+ Used for allocation of buff and to detect overflow of buff. */
+int buff_allocated;
+
+/* Array of indices into buff.
+ Each entry is an index of the first character of a line.
+ This is used when storing lines to facilitate shuffling when
+ we do column balancing on the last page. */
+int *line_vector;
+
+/* Array of horizonal positions.
+ For each line in line_vector, end_vector[line] is the horizontal
+ position we are in after printing that line. We keep track of this
+ so that we know how much we need to pad to prepare for the next
+ column. */
+int *end_vector;
+
+/* (-m) True means we're printing multiple files in parallel. */
+int parallel_files = FALSE;
+
+/* (-[0-9]+) True means we're given an option explicitly specifying
+ number of columns. Used to detect when this option is used with -m. */
+int explicit_columns = FALSE;
+
+/* (-t) True means we're printing headers and footers. */
+int extremities = TRUE;
+
+/* True means we need to print a header as soon as we know we've got input
+ to print after it. */
+int print_a_header;
+
+/* (-h) True means we're using the standard header rather than a
+ customized one specified by the -h flag. */
+int standard_header = TRUE;
+
+/* (-f) True means use formfeeds instead of newlines to separate pages. */
+int use_form_feed = FALSE;
+
+/* True means we haven't encountered any filenames in the argument list. */
+int input_is_stdin = TRUE;
+
+/* True means we have read the standard input. */
+int have_read_stdin = FALSE;
+
+/* True means the -a flag has been given. */
+int print_across_flag = FALSE;
+
+/* True means we're printing one file in multiple (>1) downward columns. */
+int storing_columns = TRUE;
+
+/* (-b) True means balance columns on the last page as Sys V does. */
+int balance_columns = FALSE;
+
+/* (-l) Number of lines on a page, including header and footer lines. */
+int lines_per_page = 66;
+
+/* Number of lines in the header and footer can be reset to 0 using
+ the -t flag. */
+int lines_per_header = 5;
+int lines_per_body;
+int lines_per_footer = 5;
+
+/* (-w) Width in characters of the page. Does not include the width of
+ the margin. */
+int chars_per_line = 72;
+
+/* Number of characters in a column. Based on the gutter and page widths. */
+int chars_per_column;
+
+/* (-e) True means convert tabs to spaces on input. */
+int untabify_input = FALSE;
+
+/* (-e) The input tab character. */
+char input_tab_char = '\t';
+
+/* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
+ where the leftmost column is 1. */
+int chars_per_input_tab = 8;
+
+/* (-i) True means convert spaces to tabs on output. */
+int tabify_output = FALSE;
+
+/* (-i) The output tab character. */
+char output_tab_char = '\t';
+
+/* (-i) The width of the output tab. */
+int chars_per_output_tab = 8;
+
+/* Keeps track of pending white space. When we hit a nonspace
+ character after some whitespace, we print whitespace, tabbing
+ if necessary to get to output_position + spaces_not_printed. */
+int spaces_not_printed;
+
+/* Number of spaces between columns (though tabs can be used when possible to
+ use up the equivalent amount of space). Not sure if this is worth making
+ a flag for. BSD uses 0, Sys V uses 1. Sys V looks better. */
+int chars_per_gutter = 1;
+
+/* (-o) Number of spaces in the left margin (tabs used when possible). */
+int chars_per_margin = 0;
+
+/* Position where the next character will fall.
+ Leftmost position is 0 + chars_per_margin.
+ Rightmost position is chars_per_margin + chars_per_line - 1.
+ This is important for converting spaces to tabs on output. */
+int output_position;
+
+/* Horizontal position relative to the current file.
+ (output_position depends on where we are on the page;
+ input_position depends on where we are in the file.)
+ Important for converting tabs to spaces on input. */
+int input_position;
+
+/* Count number of failed opens so we can exit with non-zero
+ status if there were any. */
+int failed_opens = 0;
+
+/* The horizontal position we'll be at after printing a tab character
+ of width c_ from the position h_. */
+#define pos_after_tab(c_, h_) h_ - h_ % c_ + c_
+
+/* The number of spaces taken up if we print a tab character with width
+ c_ from position h_. */
+#define tab_width(c_, h_) - h_ % c_ + c_
+
+/* (-NNN) Number of columns of text to print. */
+int columns = 1;
+
+/* (+NNN) Page number on which to begin printing. */
+int first_page_number = 1;
+
+/* Number of files open (not closed, not on hold). */
+int files_ready_to_read = 0;
+
+/* Number of columns with either an open file or stored lines. */
+int cols_ready_to_print = 0;
+
+/* Current page number. Displayed in header. */
+int page_number;
+
+/* Current line number. Displayed when -n flag is specified.
+
+ When printing files in parallel (-m flag), line numbering is as follows:
+ 1 foo goo moo
+ 2 hoo too zoo
+
+ When printing files across (-a flag), ...
+ 1 foo 2 moo 3 goo
+ 4 hoo 3 too 6 zoo
+
+ Otherwise, line numbering is as follows:
+ 1 foo 3 goo 5 too
+ 2 moo 4 hoo 6 zoo */
+int line_number;
+
+/* (-n) True means lines should be preceded by numbers. */
+int numbered_lines = FALSE;
+
+/* True means print a number as soon as we know we'll be printing
+ from the current column. */
+int print_a_number;
+
+/* (-n) Character which follows each line number. */
+char number_separator = '\t';
+
+/* (-n) Width in characters of a line number. */
+int chars_per_number = 5;
+
+/* Used when widening the first column to accommodate numbers -- only
+ needed when printing files in parallel. Includes width of both the
+ number and the number_separator. */
+int number_width;
+
+/* Buffer sprintf uses to format a line number. */
+char *number_buff;
+
+/* (-v) True means unprintable characters are printed as escape sequences.
+ control-g becomes \007. */
+int use_esc_sequence = FALSE;
+
+/* (-c) True means unprintable characters are printed as control prefixes.
+ control-g becomes ^G. */
+int use_cntrl_prefix = FALSE;
+
+/* (-d) True means output is double spaced. */
+int double_space = FALSE;
+
+/* Number of files opened initially in init_files. Should be 1
+ unless we're printing multiple files in parallel. */
+int total_files = 0;
+
+/* (-r) True means don't complain if we can't open a file. */
+int ignore_failed_opens = FALSE;
+
+/* (-s) True means we separate columns with a specified character. */
+int use_column_separator = FALSE;
+
+/* Character used to separate columns if the the -s flag has been specified. */
+char column_separator = '\t';
+
+/* Number of separator characters waiting to be printed as soon as we
+ know that we have any input remaining to be printed. */
+int separators_not_printed;
+
+/* Position we need to pad to, as soon as we know that we have input
+ remaining to be printed. */
+int padding_not_printed;
+
+/* True means we should pad the end of the page. Remains false until we
+ know we have a page to print. */
+int pad_vertically;
+
+/* (-h) String of characters used in place of the filename in the header. */
+char *custom_header;
+
+/* String containing the date, filename or custom header, and "Page ". */
+char *header;
+
+int *clump_buff;
+
+/* True means we truncate lines longer than chars_per_column. */
+int truncate_lines = FALSE;
+
+/* The name under which this program was invoked. */
+char *program_name;
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ char *s;
+ int files = 0;
+ char **file_names, **file_name_vector;
+ int accum = 0;
+
+ program_name = argv[0];
+
+ file_name_vector = (char **) xmalloc (argc * sizeof (char **));
+ file_names = file_name_vector;
+
+ for (;;)
+ {
+ c = getopt (argc, argv, "-0123456789abcde::fFh:i::l:mn::o:rs::tvw:");
+
+ if (c == 1) /* Non-option argument. */
+ {
+ s = optarg;
+ if (*s == '+')
+ {
+ if (!ISDIGIT (*++s))
+ usage ("`+' requires a numeric argument");
+ first_page_number = atoi (s);
+ }
+ else
+ {
+ *file_names++ = optarg;
+ ++files;
+ }
+ }
+ else if (files > 0)
+ {
+ if (parallel_files && explicit_columns)
+ error (1, 0,
+"Cannot specify number of columns when printing in parallel.");
+
+ if (parallel_files && print_across_flag)
+ error (1, 0,
+"Cannot specify both printing across and printing in parallel.");
+
+ if (parallel_files)
+ print_files (files, file_name_vector);
+ else
+ {
+ file_names = file_name_vector;
+ while (files--)
+ print_files (1, file_names++);
+ }
+
+ input_is_stdin = FALSE;
+ file_names = file_name_vector;
+ files = 0;
+ cleanup ();
+ }
+
+ if (ISDIGIT (c))
+ {
+ accum = accum * 10 + c - '0';
+ continue;
+ }
+ else
+ {
+ if (accum > 0)
+ {
+ columns = accum;
+ explicit_columns = TRUE;
+ }
+ accum = 0;
+ }
+
+ switch (c)
+ {
+ case 'a':
+ print_across_flag = TRUE;
+ storing_columns = FALSE;
+ break;
+ case 'b':
+ balance_columns = TRUE;
+ break;
+ case 'c':
+ use_cntrl_prefix = TRUE;
+ break;
+ case 'd':
+ double_space = TRUE;
+ break;
+ case 'e':
+ if (optarg)
+ getoptarg (optarg, 'e', &input_tab_char,
+ &chars_per_input_tab);
+ /* Could check tab width > 0. */
+ untabify_input = TRUE;
+ break;
+ case 'f':
+ case 'F':
+ use_form_feed = TRUE;
+ break;
+ case 'h':
+ custom_header = optarg;
+ standard_header = FALSE;
+ break;
+ case 'i':
+ if (optarg)
+ getoptarg (optarg, 'i', &output_tab_char,
+ &chars_per_output_tab);
+ /* Could check tab width > 0. */
+ tabify_output = TRUE;
+ break;
+ case 'l':
+ lines_per_page = atoi (optarg);
+ break;
+ case 'm':
+ parallel_files = TRUE;
+ storing_columns = FALSE;
+ break;
+ case 'n':
+ numbered_lines = TRUE;
+ if (optarg)
+ getoptarg (optarg, 'n', &number_separator,
+ &chars_per_number);
+ break;
+ case 'o':
+ chars_per_margin = atoi (optarg);
+ break;
+ case 'r':
+ ignore_failed_opens = TRUE;
+ break;
+ case 's':
+ use_column_separator = TRUE;
+ if (optarg)
+ {
+ s = optarg;
+ column_separator = *s;
+ if (*++s)
+ {
+ fprintf (stderr, "\
+%s: extra characters in the argument to the `-s' option: `%s'\n",
+ program_name, s);
+ usage ((char *) 0);
+ }
+ }
+ break;
+ case 't':
+ extremities = FALSE;
+ break;
+ case 'v':
+ use_esc_sequence = TRUE;
+ break;
+ case 'w':
+ chars_per_line = atoi (optarg);
+ break;
+ case '?':
+ usage ((char *) 0);
+ break;
+ }
+
+ if (c == EOF)
+ break;
+ }
+
+ if (input_is_stdin)
+ print_files (0, (char **) 0);
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "standard input");
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, errno, "write error");
+ if (failed_opens > 0)
+ exit(1);
+ exit (0);
+}
+
+/* Parse options of the form -scNNN.
+
+ Example: -nck, where 'n' is the option, c is the optional number
+ separator, and k is the optional width of the field used when printing
+ a number. */
+
+void
+getoptarg (arg, switch_char, character, number)
+ char *arg, switch_char, *character;
+ int *number;
+{
+ if (!ISDIGIT (*arg))
+ *character = *arg++;
+ if (*arg)
+ {
+ if (ISDIGIT (*arg))
+ *number = atoi (arg);
+ else
+ {
+ fprintf (stderr, "\
+%s: extra characters in the argument to the `-%c' option: `%s'\n",
+ program_name, switch_char, arg);
+ usage ((char *) 0);
+ }
+ }
+}
+
+/* Set parameters related to formatting. */
+
+void
+init_parameters (number_of_files)
+ int number_of_files;
+{
+ int chars_used_by_number = 0;
+
+ lines_per_body = lines_per_page - lines_per_header - lines_per_footer;
+ if (lines_per_body <= 0)
+ extremities = FALSE;
+ if (extremities == FALSE)
+ lines_per_body = lines_per_page;
+
+ if (double_space)
+ lines_per_body = lines_per_body / 2;
+
+ /* If input is stdin, cannot print parallel files. BSD dumps core
+ on this. */
+ if (number_of_files == 0)
+ parallel_files = FALSE;
+
+ if (parallel_files)
+ columns = number_of_files;
+
+ /* Tabification is assumed for multiple columns. */
+ if (columns > 1)
+ {
+ if (!use_column_separator)
+ truncate_lines = TRUE;
+
+ untabify_input = TRUE;
+ tabify_output = TRUE;
+ }
+ else
+ storing_columns = FALSE;
+
+ if (numbered_lines)
+ {
+ if (number_separator == input_tab_char)
+ {
+ number_width = chars_per_number +
+ tab_width (chars_per_input_tab,
+ (chars_per_margin + chars_per_number));
+ }
+ else
+ number_width = chars_per_number + 1;
+ /* The number is part of the column width unless we are
+ printing files in parallel. */
+ if (parallel_files)
+ chars_used_by_number = number_width;
+ }
+
+ chars_per_column = (chars_per_line - chars_used_by_number -
+ (columns - 1) * chars_per_gutter) / columns;
+
+ if (chars_per_column < 1)
+ error (1, 0, "page width too narrow");
+
+ if (numbered_lines)
+ {
+ if (number_buff != (char *) 0)
+ free (number_buff);
+ number_buff = (char *)
+ xmalloc (2 * chars_per_number * sizeof (char));
+ }
+
+ /* Pick the maximum between the tab width and the width of an
+ escape sequence. */
+ if (clump_buff != (int *) 0)
+ free (clump_buff);
+ clump_buff = (int *) xmalloc ((chars_per_input_tab > 4
+ ? chars_per_input_tab : 4) * sizeof (int));
+}
+
+/* Open the necessary files,
+ maintaining a COLUMN structure for each column.
+
+ With multiple files, each column p has a different p->fp.
+ With single files, each column p has the same p->fp.
+ Return 1 if (number_of_files > 0) and no files can be opened,
+ 0 otherwise. */
+
+int
+init_fps (number_of_files, av)
+ int number_of_files;
+ char **av;
+{
+ int i, files_left;
+ COLUMN *p;
+ FILE *firstfp;
+ char *firstname;
+
+ total_files = 0;
+
+ if (column_vector != NULLCOL)
+ free ((char *) column_vector);
+ column_vector = (COLUMN *) xmalloc (columns * sizeof (COLUMN));
+
+ if (parallel_files)
+ {
+ files_left = number_of_files;
+ for (p = column_vector; files_left--; ++p, ++av)
+ {
+ if (open_file (*av, p) == 0)
+ {
+ --p;
+ --columns;
+ }
+ }
+ if (columns == 0)
+ return 1;
+ init_header ("", -1);
+ }
+ else
+ {
+ p = column_vector;
+ if (number_of_files > 0)
+ {
+ if (open_file (*av, p) == 0)
+ return 1;
+ init_header (*av, fileno (p->fp));
+ }
+ else
+ {
+ p->name = "standard input";
+ p->fp = stdin;
+ have_read_stdin = TRUE;
+ p->status = OPEN;
+ ++total_files;
+ init_header ("", -1);
+ }
+
+ firstname = p->name;
+ firstfp = p->fp;
+ for (i = columns - 1, ++p; i; --i, ++p)
+ {
+ p->name = firstname;
+ p->fp = firstfp;
+ p->status = OPEN;
+ }
+ }
+ files_ready_to_read = total_files;
+ return 0;
+}
+
+/* Determine print_func and char_func, the functions
+ used by each column for printing and/or storing.
+
+ Determine the horizontal position desired when we begin
+ printing a column (p->start_position). */
+
+void
+init_funcs ()
+{
+ int i, h, h_next;
+ COLUMN *p;
+
+ h = chars_per_margin;
+
+ if (use_column_separator)
+ h_next = ANYWHERE;
+ else
+ {
+ /* When numbering lines of parallel files, we enlarge the
+ first column to accomodate the number. Looks better than
+ the Sys V approach. */
+ if (parallel_files && numbered_lines)
+ h_next = h + chars_per_column + number_width;
+ else
+ h_next = h + chars_per_column;
+ }
+
+ /* This loop takes care of all but the rightmost column. */
+
+ for (p = column_vector, i = 1; i < columns; ++p, ++i)
+ {
+ if (storing_columns) /* One file, multi columns down. */
+ {
+ p->char_func = store_char;
+ p->print_func = print_stored;
+ }
+ else
+ /* One file, multi columns across; or parallel files. */
+ {
+ p->char_func = print_char;
+ p->print_func = read_line;
+ }
+
+ /* Number only the first column when printing files in
+ parallel. */
+ p->numbered = numbered_lines && (!parallel_files || i == 1);
+ p->start_position = h;
+
+ /* If we're using separators, all start_positions are
+ ANYWHERE, except the first column's start_position when
+ using a margin. */
+
+ if (use_column_separator)
+ {
+ h = ANYWHERE;
+ h_next = ANYWHERE;
+ }
+ else
+ {
+ h = h_next + chars_per_gutter;
+ h_next = h + chars_per_column;
+ }
+ }
+
+ /* The rightmost column.
+
+ Doesn't need to be stored unless we intend to balance
+ columns on the last page. */
+ if (storing_columns && balance_columns)
+ {
+ p->char_func = store_char;
+ p->print_func = print_stored;
+ }
+ else
+ {
+ p->char_func = print_char;
+ p->print_func = read_line;
+ }
+
+ p->numbered = numbered_lines && (!parallel_files || i == 1);
+ p->start_position = h;
+}
+
+/* Open a file. Return nonzero if successful, zero if failed. */
+
+int
+open_file (name, p)
+ char *name;
+ COLUMN *p;
+{
+ if (!strcmp (name, "-"))
+ {
+ p->name = "standard input";
+ p->fp = stdin;
+ have_read_stdin = 1;
+ }
+ else
+ {
+ p->name = name;
+ p->fp = fopen (name, "r");
+ }
+ if (p->fp == NULL)
+ {
+ ++failed_opens;
+ if (!ignore_failed_opens)
+ error (0, errno, "%s", name);
+ return 0;
+ }
+ p->status = OPEN;
+ ++total_files;
+ return 1;
+}
+
+/* Close the file in P.
+
+ If we aren't dealing with multiple files in parallel, we change
+ the status of all columns in the column list to reflect the close. */
+
+void
+close_file (p)
+ COLUMN *p;
+{
+ COLUMN *q;
+ int i;
+
+ if (p->status == CLOSED)
+ return;
+ if (ferror (p->fp))
+ error (1, errno, "%s", p->name);
+ if (p->fp != stdin && fclose (p->fp) == EOF)
+ error (1, errno, "%s", p->name);
+
+ if (!parallel_files)
+ {
+ for (q = column_vector, i = columns; i; ++q, --i)
+ {
+ q->status = CLOSED;
+ if (q->lines_stored == 0)
+ {
+#if 0
+ if (cols_ready_to_print > 0)
+ --cols_ready_to_print;
+#endif
+ q->lines_to_print = 0;
+ }
+ }
+ }
+ else
+ {
+ p->status = CLOSED;
+ p->lines_to_print = 0;
+ }
+
+ --files_ready_to_read;
+}
+
+/* Put a file on hold until we start a new page,
+ since we've hit a form feed.
+
+ If we aren't dealing with parallel files, we must change the
+ status of all columns in the column list. */
+
+void
+hold_file (p)
+ COLUMN *p;
+{
+ COLUMN *q;
+ int i;
+
+ if (!parallel_files)
+ for (q = column_vector, i = columns; i; ++q, --i)
+ q->status = ON_HOLD;
+ else
+ p->status = ON_HOLD;
+ p->lines_to_print = 0;
+ --files_ready_to_read;
+}
+
+/* Undo hold_file -- go through the column list and change any
+ ON_HOLD columns to OPEN. Used at the end of each page. */
+
+void
+reset_status ()
+{
+ int i = columns;
+ COLUMN *p;
+
+ for (p = column_vector; i; --i, ++p)
+ if (p->status == ON_HOLD)
+ {
+ p->status = OPEN;
+ files_ready_to_read++;
+ }
+}
+
+/* Print a single file, or multiple files in parallel.
+
+ Set up the list of columns, opening the necessary files.
+ Allocate space for storing columns, if necessary.
+ Skip to first_page_number, if user has asked to skip leading pages.
+ Determine which functions are appropriate to store/print lines
+ in each column.
+ Print the file(s). */
+
+void
+print_files (number_of_files, av)
+ int number_of_files;
+ char **av;
+{
+ init_parameters (number_of_files);
+ if (init_fps (number_of_files, av))
+ return;
+ if (storing_columns)
+ init_store_cols ();
+
+ if (first_page_number > 1)
+ {
+ if (!skip_to_page (first_page_number))
+ return;
+ else
+ page_number = first_page_number;
+ }
+ else
+ page_number = 1;
+
+ init_funcs ();
+
+ line_number = 1;
+ while (print_page ())
+ ;
+}
+
+/* Generous estimate of number of characters taken up by "Jun 7 00:08 " and
+ "Page NNNNN". */
+#define CHARS_FOR_DATE_AND_PAGE 50
+
+/* Initialize header information.
+ If DESC is non-negative, it is a file descriptor open to
+ FILENAME for reading.
+
+ Allocate space for a header string,
+ Determine the time, insert file name or user-specified string.
+
+ It might be nice to have a "blank headers" option, since
+ pr -h "" still prints the date and page number. */
+
+void
+init_header (filename, desc)
+ char *filename;
+ int desc;
+{
+ int chars_per_header;
+ char *f = filename;
+ char *t, *middle;
+ struct stat st;
+
+ if (filename == 0)
+ f = "";
+
+ /* If parallel files or standard input, use current time. */
+ if (desc < 0 || !strcmp (filename, "-") || fstat (desc, &st))
+ st.st_mtime = time ((time_t *) 0);
+ t = ctime (&st.st_mtime);
+
+ t[16] = '\0'; /* Mark end of month and time string. */
+ t[24] = '\0'; /* Mark end of year string. */
+
+ middle = standard_header ? f : custom_header;
+
+ chars_per_header = strlen (middle) + CHARS_FOR_DATE_AND_PAGE + 1;
+ if (header != (char *) 0)
+ free (header);
+ header = (char *) xmalloc (chars_per_header * sizeof (char));
+
+ sprintf (header, "%s %s %s Page", &t[4], &t[20], middle);
+}
+
+/* Set things up for printing a page
+
+ Scan through the columns ...
+ Determine which are ready to print
+ (i.e., which have lines stored or open files)
+ Set p->lines_to_print appropriately
+ (to p->lines_stored if we're storing, or lines_per_body
+ if we're reading straight from the file)
+ Keep track of this total so we know when to stop printing */
+
+void
+init_page ()
+{
+ int j;
+ COLUMN *p;
+
+ cols_ready_to_print = 0;
+
+ if (storing_columns)
+ {
+ store_columns ();
+ for (j = columns - 1, p = column_vector; j; --j, ++p)
+ {
+ p->lines_to_print = p->lines_stored;
+ if (p->lines_to_print != 0)
+ ++cols_ready_to_print;
+ }
+
+ /* Last column. */
+ if (balance_columns)
+ {
+ p->lines_to_print = p->lines_stored;
+ if (p->lines_to_print != 0)
+ ++cols_ready_to_print;
+ }
+ /* Since we're not balancing columns, we don't need to store
+ the rightmost column. Read it straight from the file. */
+ else
+ {
+ if (p->status == OPEN)
+ {
+ p->lines_to_print = lines_per_body;
+ ++cols_ready_to_print;
+ }
+ else
+ p->lines_to_print = 0;
+ }
+ }
+ else
+ for (j = columns, p = column_vector; j; --j, ++p)
+ if (p->status == OPEN)
+ {
+ p->lines_to_print = lines_per_body;
+ ++cols_ready_to_print;
+ }
+ else
+ p->lines_to_print = 0;
+}
+
+/* Print one page.
+
+ As long as there are lines left on the page and columns ready to print,
+ Scan across the column list
+ if the column has stored lines or the file is open
+ pad to the appropriate spot
+ print the column
+ pad the remainder of the page with \n or \f as requested
+ reset the status of all files -- any files which where on hold because
+ of formfeeds are now put back into the lineup. */
+
+int
+print_page ()
+{
+ int j;
+ int lines_left_on_page;
+ COLUMN *p;
+
+ /* Used as an accumulator (with | operator) of successive values of
+ pad_vertically. The trick is to set pad_vertically
+ to zero before each run through the inner loop, then after that
+ loop, it tells us whether a line was actually printed (whether a
+ newline needs to be output -- or two for double spacing). But those
+ values have to be accumulated (in pv) so we can invoke pad_down
+ properly after the outer loop completes. */
+ int pv;
+
+ init_page ();
+
+ if (cols_ready_to_print == 0)
+ return FALSE;
+
+ if (extremities)
+ print_a_header = TRUE;
+
+ /* Don't pad unless we know a page was printed. */
+ pad_vertically = FALSE;
+ pv = FALSE;
+
+ lines_left_on_page = lines_per_body;
+ if (double_space)
+ lines_left_on_page *= 2;
+
+ while (lines_left_on_page > 0 && cols_ready_to_print > 0)
+ {
+ output_position = 0;
+ spaces_not_printed = 0;
+ separators_not_printed = 0;
+ pad_vertically = FALSE;
+
+ for (j = 1, p = column_vector; j <= columns; ++j, ++p)
+ {
+ input_position = 0;
+ if (p->lines_to_print > 0)
+ {
+ padding_not_printed = p->start_position;
+
+ if (!(p->print_func) (p))
+ read_rest_of_line (p);
+ pv |= pad_vertically;
+
+ if (use_column_separator)
+ ++separators_not_printed;
+
+ if (--p->lines_to_print <= 0 && --cols_ready_to_print <= 0)
+ break;
+ }
+ }
+
+ if (pad_vertically)
+ {
+ putchar ('\n');
+ --lines_left_on_page;
+ }
+
+ if (double_space && pv && extremities)
+ {
+ putchar ('\n');
+ --lines_left_on_page;
+ }
+ }
+
+ pad_vertically = pv;
+
+ if (pad_vertically && extremities)
+ pad_down (lines_left_on_page + lines_per_footer);
+
+ reset_status (); /* Change ON_HOLD to OPEN. */
+
+ return TRUE; /* More pages to go. */
+}
+
+/* Allocate space for storing columns.
+
+ This is necessary when printing multiple columns from a single file.
+ Lines are stored consecutively in buff, separated by '\0'.
+ (We can't use a fixed offset since with the '-s' flag lines aren't
+ truncated.)
+
+ We maintain a list (line_vector) of pointers to the beginnings
+ of lines in buff. We allocate one more than the number of lines
+ because the last entry tells us the index of the last character,
+ which we need to know in order to print the last line in buff. */
+
+void
+init_store_cols ()
+{
+ int total_lines = lines_per_body * columns;
+ int chars_if_truncate = total_lines * (chars_per_column + 1);
+
+ if (line_vector != (int *) 0)
+ free ((int *) line_vector);
+ line_vector = (int *) xmalloc ((total_lines + 1) * sizeof (int *));
+
+ if (end_vector != (int *) 0)
+ free ((int *) end_vector);
+ end_vector = (int *) xmalloc (total_lines * sizeof (int *));
+
+ if (buff != (char *) 0)
+ free (buff);
+ buff_allocated = use_column_separator ? 2 * chars_if_truncate
+ : chars_if_truncate; /* Tune this. */
+ buff = (char *) xmalloc (buff_allocated * sizeof (char));
+}
+
+/* Store all but the rightmost column.
+ (Used when printing a single file in multiple downward columns)
+
+ For each column
+ set p->current_line to be the index in line_vector of the
+ first line in the column
+ For each line in the column
+ store the line in buff
+ add to line_vector the index of the line's first char
+ buff_start is the index in buff of the first character in the
+ current line. */
+
+void
+store_columns ()
+{
+ int i, j;
+ int line = 0;
+ int buff_start;
+ int last_col; /* The rightmost column which will be saved in buff */
+ COLUMN *p;
+
+ buff_current = 0;
+ buff_start = 0;
+
+ if (balance_columns)
+ last_col = columns;
+ else
+ last_col = columns - 1;
+
+ for (i = 1, p = column_vector; i <= last_col; ++i, ++p)
+ p->lines_stored = 0;
+
+ for (i = 1, p = column_vector; i <= last_col && files_ready_to_read;
+ ++i, ++p)
+ {
+ p->current_line = line;
+ for (j = lines_per_body; j && files_ready_to_read; --j)
+
+ if (p->status == OPEN) /* Redundant. Clean up. */
+ {
+ input_position = 0;
+
+ if (!read_line (p, i))
+ read_rest_of_line (p);
+
+ if (p->status == OPEN
+ || buff_start != buff_current)
+ {
+ ++p->lines_stored;
+ line_vector[line] = buff_start;
+ end_vector[line++] = input_position;
+ buff_start = buff_current;
+ }
+ }
+ }
+
+ /* Keep track of the location of the last char in buff. */
+ line_vector[line] = buff_start;
+
+ if (balance_columns && p->lines_stored != lines_per_body)
+ balance (line);
+}
+
+void
+balance (total_stored)
+ int total_stored;
+{
+ COLUMN *p;
+ int i, lines;
+ int first_line = 0;
+
+ for (i = 1, p = column_vector; i <= columns; ++i, ++p)
+ {
+ lines = total_stored / columns;
+ if (i <= total_stored % columns)
+ ++lines;
+
+ p->lines_stored = lines;
+ p->current_line = first_line;
+
+ first_line += lines;
+ }
+}
+
+/* Store a character in the buffer. */
+
+void
+store_char (c)
+ int c;
+{
+ if (buff_current >= buff_allocated)
+ {
+ /* May be too generous. */
+ buff_allocated = 2 * buff_allocated;
+ buff = (char *) xrealloc (buff, buff_allocated * sizeof (char));
+ }
+ buff[buff_current++] = (char) c;
+}
+
+void
+number (p)
+ COLUMN *p;
+{
+ int i;
+ char *s;
+
+ sprintf (number_buff, "%*d", chars_per_number, line_number++);
+ s = number_buff;
+ for (i = chars_per_number; i > 0; i--)
+ (p->char_func) ((int) *s++);
+
+ if (number_separator == input_tab_char)
+ {
+ i = number_width - chars_per_number;
+ while (i-- > 0)
+ (p->char_func) ((int) ' ');
+ }
+ else
+ (p->char_func) ((int) number_separator);
+
+ if (truncate_lines && !parallel_files)
+ input_position += number_width;
+}
+
+/* Print (or store) padding until the current horizontal position
+ is position. */
+
+void
+pad_across_to (position)
+ int position;
+{
+ register int h = output_position;
+
+ if (tabify_output)
+ spaces_not_printed = position - output_position;
+ else
+ {
+ while (++h <= position)
+ putchar (' ');
+ output_position = position;
+ }
+}
+
+/* Pad to the bottom of the page.
+
+ If the user has requested a formfeed, use one.
+ Otherwise, use newlines. */
+
+void
+pad_down (lines)
+ int lines;
+{
+ register int i;
+
+ if (use_form_feed)
+ putchar ('\f');
+ else
+ for (i = lines; i; --i)
+ putchar ('\n');
+}
+
+/* Read the rest of the line.
+
+ Read from the current column's file until an end of line is
+ hit. Used when we've truncated a line and we no longer need
+ to print or store its characters. */
+
+void
+read_rest_of_line (p)
+ COLUMN *p;
+{
+ register int c;
+ FILE *f = p->fp;
+
+ while ((c = getc (f)) != '\n')
+ {
+ if (c == '\f')
+ {
+ hold_file (p);
+ break;
+ }
+ else if (c == EOF)
+ {
+ close_file (p);
+ break;
+ }
+ }
+}
+
+/* If we're tabifying output,
+
+ When print_char encounters white space it keeps track
+ of our desired horizontal position and delays printing
+ until this function is called. */
+
+void
+print_white_space ()
+{
+ register int h_new;
+ register int h_old = output_position;
+ register int goal = h_old + spaces_not_printed;
+
+ while (goal - h_old > 1
+ && (h_new = pos_after_tab (chars_per_output_tab, h_old)) <= goal)
+ {
+ putchar (output_tab_char);
+ h_old = h_new;
+ }
+ while (++h_old <= goal)
+ putchar (' ');
+
+ output_position = goal;
+ spaces_not_printed = 0;
+}
+
+/* Print column separators.
+
+ We keep a count until we know that we'll be printing a line,
+ then print_separators() is called. */
+
+void
+print_separators ()
+{
+ for (; separators_not_printed > 0; --separators_not_printed)
+ print_char (column_separator);
+}
+
+/* Print (or store, depending on p->char_func) a clump of N
+ characters. */
+
+void
+print_clump (p, n, clump)
+ COLUMN *p;
+ int n;
+ int *clump;
+{
+ while (n--)
+ (p->char_func) (*clump++);
+}
+
+/* Print a character.
+
+ If we're tabifying, all tabs have been converted to spaces by
+ process_char(). Keep a count of consecutive spaces, and when
+ a nonspace is encountered, call print_white_space() to print the
+ required number of tabs and spaces. */
+
+void
+print_char (c)
+ int c;
+{
+ if (tabify_output)
+ {
+ if (c == ' ')
+ {
+ ++spaces_not_printed;
+ return;
+ }
+ else if (spaces_not_printed > 0)
+ print_white_space ();
+
+ /* Nonprintables are assumed to have width 0, except '\b'. */
+ if (!ISPRINT (c))
+ {
+ if (c == '\b')
+ --output_position;
+ }
+ else
+ ++output_position;
+ }
+ putchar (c);
+}
+
+/* Skip to page PAGE before printing. */
+
+int
+skip_to_page (page)
+ int page;
+{
+ int n, i, j;
+ COLUMN *p;
+
+ for (n = 1; n < page; ++n)
+ {
+ for (i = 1; i <= lines_per_body; ++i)
+ {
+ for (j = 1, p = column_vector; j <= columns; ++j, ++p)
+ read_rest_of_line (p);
+ }
+ reset_status ();
+ }
+ return files_ready_to_read > 0;
+}
+
+/* Print a header.
+
+ Formfeeds are assumed to use up two lines at the beginning of
+ the page. */
+
+void
+print_header ()
+{
+ if (!use_form_feed)
+ fprintf (stdout, "\n\n");
+
+ output_position = 0;
+ pad_across_to (chars_per_margin);
+ print_white_space ();
+
+ fprintf (stdout, "%s %d\n\n\n", header, page_number++);
+
+ print_a_header = FALSE;
+ output_position = 0;
+}
+
+/* Print (or store, if p->char_func is store_char()) a line.
+
+ Read a character to determine whether we have a line or not.
+ (We may hit EOF, \n, or \f)
+
+ Once we know we have a line,
+ set pad_vertically = TRUE, meaning it's safe
+ to pad down at the end of the page, since we do have a page.
+ print a header if needed.
+ pad across to padding_not_printed if needed.
+ print any separators which need to be printed.
+ print a line number if it needs to be printed.
+
+ Print the clump which corresponds to the first character.
+
+ Enter a loop and keep printing until an end of line condition
+ exists, or until we exceed chars_per_column.
+
+ Return FALSE if we exceed chars_per_column before reading
+ an end of line character, TRUE otherwise. */
+
+int
+read_line (p)
+ COLUMN *p;
+{
+ register int c, chars;
+ int last_input_position;
+
+ c = getc (p->fp);
+
+ last_input_position = input_position;
+ switch (c)
+ {
+ case '\f':
+ hold_file (p);
+ return TRUE;
+ case EOF:
+ close_file (p);
+ return TRUE;
+ case '\n':
+ break;
+ default:
+ chars = char_to_clump (c);
+ }
+
+ if (truncate_lines && input_position > chars_per_column)
+ {
+ input_position = last_input_position;
+ return FALSE;
+ }
+
+ if (p->char_func != store_char)
+ {
+ pad_vertically = TRUE;
+
+ if (print_a_header)
+ print_header ();
+
+ if (padding_not_printed != ANYWHERE)
+ {
+ pad_across_to (padding_not_printed);
+ padding_not_printed = ANYWHERE;
+ }
+
+ if (use_column_separator)
+ print_separators ();
+ }
+
+ if (p->numbered)
+ number (p);
+
+ if (c == '\n')
+ return TRUE;
+
+ print_clump (p, chars, clump_buff);
+
+ for (;;)
+ {
+ c = getc (p->fp);
+
+ switch (c)
+ {
+ case '\n':
+ return TRUE;
+ case '\f':
+ hold_file (p);
+ return TRUE;
+ case EOF:
+ close_file (p);
+ return TRUE;
+ }
+
+ last_input_position = input_position;
+ chars = char_to_clump (c);
+ if (truncate_lines && input_position > chars_per_column)
+ {
+ input_position = last_input_position;
+ return FALSE;
+ }
+
+ print_clump (p, chars, clump_buff);
+ }
+}
+
+/* Print a line from buff.
+
+ If this function has been called, we know we have something to
+ print. Therefore we set pad_vertically to TRUE, print
+ a header if necessary, pad across if necessary, and print
+ separators if necessary.
+
+ Return TRUE, meaning there is no need to call read_rest_of_line. */
+
+int
+print_stored (p)
+ COLUMN *p;
+{
+ int line = p->current_line++;
+ register char *first = &buff[line_vector[line]];
+ register char *last = &buff[line_vector[line + 1]];
+
+ pad_vertically = TRUE;
+
+ if (print_a_header)
+ print_header ();
+
+ if (padding_not_printed != ANYWHERE)
+ {
+ pad_across_to (padding_not_printed);
+ padding_not_printed = ANYWHERE;
+ }
+
+ if (use_column_separator)
+ print_separators ();
+
+ while (first != last)
+ print_char (*first++);
+
+ if (spaces_not_printed == 0)
+ output_position = p->start_position + end_vector[line];
+
+ return TRUE;
+}
+
+/* Convert a character to the proper format and return the number of
+ characters in the resulting clump. Increment input_position by
+ the width of the clump.
+
+ Tabs are converted to clumps of spaces.
+ Nonprintable characters may be converted to clumps of escape
+ sequences or control prefixes.
+
+ Note: the width of a clump is not necessarily equal to the number of
+ characters in clump_buff. (e.g, the width of '\b' is -1, while the
+ number of characters is 1.) */
+
+int
+char_to_clump (c)
+ int c;
+{
+ register int *s = clump_buff;
+ register int i;
+ char esc_buff[4];
+ int width;
+ int chars;
+
+ if (c == input_tab_char)
+ {
+ width = tab_width (chars_per_input_tab, input_position);
+
+ if (untabify_input)
+ {
+ for (i = width; i; --i)
+ *s++ = ' ';
+ chars = width;
+ }
+ else
+ {
+ *s = c;
+ chars = 1;
+ }
+
+ }
+ else if (!ISPRINT (c))
+ {
+ if (use_esc_sequence)
+ {
+ width = 4;
+ chars = 4;
+ *s++ = '\\';
+ sprintf (esc_buff, "%03o", c);
+ for (i = 0; i <= 2; ++i)
+ *s++ = (int) esc_buff[i];
+ }
+ else if (use_cntrl_prefix)
+ {
+ if (c < 0200)
+ {
+ width = 2;
+ chars = 2;
+ *s++ = '^';
+ *s++ = c ^ 0100;
+ }
+ else
+ {
+ width = 4;
+ chars = 4;
+ *s++ = '\\';
+ sprintf (esc_buff, "%03o", c);
+ for (i = 0; i <= 2; ++i)
+ *s++ = (int) esc_buff[i];
+ }
+ }
+ else if (c == '\b')
+ {
+ width = -1;
+ chars = 1;
+ *s = c;
+ }
+ else
+ {
+ width = 0;
+ chars = 1;
+ *s = c;
+ }
+ }
+ else
+ {
+ width = 1;
+ chars = 1;
+ *s = c;
+ }
+
+ input_position += width;
+ return chars;
+}
+
+/* We've just printed some files and need to clean up things before
+ looking for more options and printing the next batch of files.
+
+ Free everything we've xmalloc'ed, except `header'. */
+
+void
+cleanup ()
+{
+ if (number_buff)
+ free (number_buff);
+ if (clump_buff)
+ free (clump_buff);
+ if (column_vector)
+ free (column_vector);
+ if (line_vector)
+ free (line_vector);
+ if (end_vector)
+ free (end_vector);
+ if (buff)
+ free (buff);
+}
+
+/* Complain, print a usage message, and die. */
+
+void
+usage (reason)
+ char *reason;
+{
+ if (reason)
+ fprintf (stderr, "%s: %s\n", program_name, reason);
+
+ fprintf (stderr, "\
+Usage: %s [+PAGE] [-COLUMN] [-abcdfFmrtv] [-e[in-tab-char[in-tab-width]]]\n\
+ [-h header] [-i[out-tab-char[out-tab-width]]] [-l page-length]\n\
+ [-n[number-separator[digits]]] [-o left-margin]\n\
+ [-s[column-separator]] [-w page-width] [file...]\n",
+ program_name);
+ exit (2);
+}
diff --git a/src/sort.c b/src/sort.c
new file mode 100644
index 000000000..de8b937e2
--- /dev/null
+++ b/src/sort.c
@@ -0,0 +1,1746 @@
+/* sort - sort lines of text (with all kinds of options).
+ Copyright (C) 1988, 1991 Free Software Foundation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Written December 1988 by Mike Haertel.
+ The author may be reached (Email) at the address mike@ai.mit.edu,
+ or (US mail) as Mike Haertel c/o Free Software Foundation. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <sys/types.h>
+#include <signal.h>
+#include <stdio.h>
+#include "system.h"
+#ifdef _POSIX_VERSION
+#include <limits.h>
+#else
+#ifndef UCHAR_MAX
+#define UCHAR_MAX 255
+#endif
+#endif
+#ifndef STDC_HEADERS
+char *malloc ();
+char *realloc ();
+void free ();
+#endif
+
+void error ();
+static void usage ();
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define UCHAR_LIM (UCHAR_MAX + 1)
+#define UCHAR(c) ((unsigned char) (c))
+
+#ifdef isascii
+#define ISALNUM(c) (isascii(c) && isalnum(c))
+#define ISDIGIT(c) (isascii(c) && isdigit(c))
+#define ISPRINT(c) (isascii(c) && isprint(c))
+#define ISLOWER(c) (isascii(c) && islower(c))
+#else
+#define ISALNUM(c) isalnum(c)
+#define ISDIGIT(c) isdigit(c)
+#define ISPRINT(c) isprint(c)
+#define ISLOWER(c) islower(c)
+#endif
+
+/* The kind of blanks for '-b' to skip in various options. */
+enum blanktype { bl_start, bl_end, bl_both };
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Table of digits. */
+static int digits[UCHAR_LIM];
+
+/* Table of white space. */
+static int blanks[UCHAR_LIM];
+
+/* Table of non-printing characters. */
+static int nonprinting[UCHAR_LIM];
+
+/* Table of non-dictionary characters (not letters, digits, or blanks). */
+static int nondictionary[UCHAR_LIM];
+
+/* Translation table folding lower case to upper. */
+static char fold_toupper[UCHAR_LIM];
+
+/* Table mapping 3-letter month names to integers.
+ Alphabetic order allows binary search. */
+static struct month
+{
+ char *name;
+ int val;
+} monthtab[] =
+{
+ "APR", 4,
+ "AUG", 8,
+ "DEC", 12,
+ "FEB", 2,
+ "JAN", 1,
+ "JUL", 7,
+ "JUN", 6,
+ "MAR", 3,
+ "MAY", 5,
+ "NOV", 11,
+ "OCT", 10,
+ "SEP", 9
+};
+
+/* During the merge phase, the number of files to merge at once. */
+#define NMERGE 16
+
+/* Initial buffer size for in core sorting. Will not grow unless a
+ line longer than this is seen. */
+static int sortalloc = 524288;
+
+/* Initial buffer size for in core merge buffers. Bear in mind that
+ up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
+static int mergealloc = 16384;
+
+/* Guess of average line length. */
+static int linelength = 30;
+
+/* Maximum number of elements for the array(s) of struct line's, in bytes. */
+#define LINEALLOC 262144
+
+/* Prefix for temporary file names. */
+static char *prefix;
+
+/* Flag to reverse the order of all comparisons. */
+static int reverse;
+
+/* Flag for stable sort. This turns off the last ditch bytewise
+ comparison of lines, and instead leaves lines in the same order
+ they were read if all keys compare equal. */
+static int stable;
+
+/* Tab character separating fields. If NUL, then fields are separated
+ by the empty string between a non-whitespace character and a whitespace
+ character. */
+static char tab;
+
+/* Flag to remove consecutive duplicate lines from the output.
+ Only the last of a sequence of equal lines will be output. */
+static int unique;
+
+/* Nonzero if any of the input files are the standard input. */
+static int have_read_stdin;
+
+/* Lines are held in core as counted strings. */
+struct line
+{
+ char *text; /* Text of the line. */
+ int length; /* Length not including final newline. */
+ char *keybeg; /* Start of first key. */
+ char *keylim; /* Limit of first key. */
+};
+
+/* Arrays of lines. */
+struct lines
+{
+ struct line *lines; /* Dynamically allocated array of lines. */
+ int used; /* Number of slots used. */
+ int alloc; /* Number of slots allocated. */
+ int limit; /* Max number of slots to allocate. */
+};
+
+/* Input buffers. */
+struct buffer
+{
+ char *buf; /* Dynamically allocated buffer. */
+ int used; /* Number of bytes used. */
+ int alloc; /* Number of bytes allocated. */
+ int left; /* Number of bytes left after line parsing. */
+};
+
+/* Lists of key field comparisons to be tried. */
+static struct keyfield
+{
+ int sword; /* Zero-origin 'word' to start at. */
+ int schar; /* Additional characters to skip. */
+ int skipsblanks; /* Skip leading white space at start. */
+ int eword; /* Zero-origin first word after field. */
+ int echar; /* Additional characters in field. */
+ int skipeblanks; /* Skip trailing white space at finish. */
+ int *ignore; /* Boolean array of characters to ignore. */
+ char *translate; /* Translation applied to characters. */
+ int numeric; /* Flag for numeric comparison. */
+ int month; /* Flag for comparison by month name. */
+ int reverse; /* Reverse the sense of comparison. */
+ struct keyfield *next; /* Next keyfield to try. */
+} keyhead;
+
+/* The list of temporary files. */
+static struct tempnode
+{
+ char *name;
+ struct tempnode *next;
+} temphead;
+
+/* Clean up any remaining temporary files. */
+
+static void
+cleanup ()
+{
+ struct tempnode *node;
+
+ for (node = temphead.next; node; node = node->next)
+ unlink (node->name);
+}
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+char *
+xmalloc (n)
+ unsigned n;
+{
+ char *p;
+
+ p = malloc (n);
+ if (p == 0)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ exit (2);
+ }
+ return p;
+}
+
+/* Change the size of an allocated block of memory P to N bytes,
+ with error checking.
+ If P is NULL, run xmalloc.
+ If N is 0, run free and return NULL. */
+
+char *
+xrealloc (p, n)
+ char *p;
+ unsigned n;
+{
+ if (p == 0)
+ return xmalloc (n);
+ if (n == 0)
+ {
+ free (p);
+ return 0;
+ }
+ p = realloc (p, n);
+ if (p == 0)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ exit (2);
+ }
+ return p;
+}
+
+static FILE *
+xfopen (file, how)
+ char *file, *how;
+{
+ FILE *fp = strcmp (file, "-") ? fopen (file, how) : stdin;
+
+ if (fp == 0)
+ {
+ error (0, errno, "%s", file);
+ cleanup ();
+ exit (2);
+ }
+ if (fp == stdin)
+ have_read_stdin = 1;
+ return fp;
+}
+
+static void
+xfclose (fp)
+ FILE *fp;
+{
+ fflush (fp);
+ if (fp != stdin && fp != stdout)
+ {
+ if (fclose (fp) != 0)
+ {
+ error (0, errno, "error closing file");
+ cleanup ();
+ exit (2);
+ }
+ }
+ else
+ /* Allow reading stdin from tty more than once. */
+ clearerr (fp);
+}
+
+static void
+xfwrite (buf, size, nelem, fp)
+ char *buf;
+ int size, nelem;
+ FILE *fp;
+{
+ if (fwrite (buf, size, nelem, fp) != nelem)
+ {
+ error (0, errno, "write error");
+ cleanup ();
+ exit (2);
+ }
+}
+
+/* Return a name for a temporary file. */
+
+static char *
+tempname ()
+{
+ static int seq;
+ int len = strlen (prefix);
+ char *name = xmalloc (len + 16);
+ struct tempnode *node =
+ (struct tempnode *) xmalloc (sizeof (struct tempnode));
+
+ if (len && prefix[len - 1] != '/')
+ sprintf (name, "%s/sort%5.5d%5.5d", prefix, getpid (), ++seq);
+ else
+ sprintf (name, "%ssort%5.5d%5.5d", prefix, getpid (), ++seq);
+ node->name = name;
+ node->next = temphead.next;
+ temphead.next = node;
+ return name;
+}
+
+/* Search through the list of temporary files for NAME;
+ remove it if it is found on the list. */
+
+static void
+zaptemp (name)
+ char *name;
+{
+ struct tempnode *node, *temp;
+
+ for (node = &temphead; node->next; node = node->next)
+ if (!strcmp (name, node->next->name))
+ break;
+ if (node->next)
+ {
+ temp = node->next;
+ unlink (temp->name);
+ free (temp->name);
+ node->next = temp->next;
+ free ((char *) temp);
+ }
+}
+
+/* Initialize the character class tables. */
+
+static void
+inittables ()
+{
+ int i;
+
+ for (i = 0; i < UCHAR_LIM; ++i)
+ {
+ if (isblank (i))
+ blanks[i] = 1;
+ if (ISDIGIT (i))
+ digits[i] = 1;
+ if (!ISPRINT (i))
+ nonprinting[i] = 1;
+ if (!ISALNUM (i) && !isblank (i))
+ nondictionary[i] = 1;
+ if (ISLOWER (i))
+ fold_toupper[i] = toupper (i);
+ else
+ fold_toupper[i] = i;
+ }
+}
+
+/* Initialize BUF, allocating ALLOC bytes initially. */
+
+static void
+initbuf (buf, alloc)
+ struct buffer *buf;
+ int alloc;
+{
+ buf->alloc = alloc;
+ buf->buf = xmalloc (buf->alloc);
+ buf->used = buf->left = 0;
+}
+
+/* Fill BUF reading from FP, moving buf->left bytes from the end
+ of buf->buf to the beginning first. If EOF is reached and the
+ file wasn't terminated by a newline, supply one. Return a count
+ of bytes buffered. */
+
+static int
+fillbuf (buf, fp)
+ struct buffer *buf;
+ FILE *fp;
+{
+ int cc;
+
+ bcopy (buf->buf + buf->used - buf->left, buf->buf, buf->left);
+ buf->used = buf->left;
+
+ while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, '\n', buf->used)))
+ {
+ if (buf->used == buf->alloc)
+ {
+ buf->alloc *= 2;
+ buf->buf = xrealloc (buf->buf, buf->alloc);
+ }
+ cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
+ if (ferror (fp))
+ {
+ error (0, errno, "read error");
+ cleanup ();
+ exit (2);
+ }
+ buf->used += cc;
+ }
+
+ if (feof (fp) && buf->used && buf->buf[buf->used - 1] != '\n')
+ {
+ if (buf->used == buf->alloc)
+ {
+ buf->alloc *= 2;
+ buf->buf = xrealloc (buf->buf, buf->alloc);
+ }
+ buf->buf[buf->used++] = '\n';
+ }
+
+ return buf->used;
+}
+
+/* Initialize LINES, allocating space for ALLOC lines initially.
+ LIMIT is the maximum possible number of lines to allocate space
+ for, ever. */
+
+static void
+initlines (lines, alloc, limit)
+ struct lines *lines;
+ int alloc;
+ int limit;
+{
+ lines->alloc = alloc;
+ lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
+ lines->used = 0;
+ lines->limit = limit;
+}
+
+/* Return a pointer to the first character of the field specified
+ by KEY in LINE. */
+
+static char *
+begfield (line, key)
+ struct line *line;
+ struct keyfield *key;
+{
+ register char *ptr = line->text, *lim = ptr + line->length;
+ register int sword = key->sword, schar = key->schar;
+
+ if (tab)
+ while (ptr < lim && sword--)
+ {
+ while (ptr < lim && *ptr != tab)
+ ++ptr;
+ if (ptr < lim)
+ ++ptr;
+ }
+ else
+ while (ptr < lim && sword--)
+ {
+ while (ptr < lim && blanks[UCHAR (*ptr)])
+ ++ptr;
+ while (ptr < lim && !blanks[UCHAR (*ptr)])
+ ++ptr;
+ }
+
+ if (key->skipsblanks)
+ while (ptr < lim && blanks[UCHAR (*ptr)])
+ ++ptr;
+
+ while (ptr < lim && schar--)
+ ++ptr;
+
+ return ptr;
+}
+
+/* Return the limit of (a pointer to the first character after) the field
+ in LINE specified by KEY. */
+
+static char *
+limfield (line, key)
+ struct line *line;
+ struct keyfield *key;
+{
+ register char *ptr = line->text, *lim = ptr + line->length;
+ register int eword = key->eword, echar = key->echar;
+
+ if (tab)
+ while (ptr < lim && eword--)
+ {
+ while (ptr < lim && *ptr != tab)
+ ++ptr;
+ if (ptr < lim && (eword || key->skipeblanks))
+ ++ptr;
+ }
+ else
+ while (ptr < lim && eword--)
+ {
+ while (ptr < lim && blanks[UCHAR (*ptr)])
+ ++ptr;
+ while (ptr < lim && !blanks[UCHAR (*ptr)])
+ ++ptr;
+ }
+
+ if (key->skipeblanks)
+ while (ptr < lim && blanks[UCHAR (*ptr)])
+ ++ptr;
+
+ while (ptr < lim && echar--)
+ ++ptr;
+
+ return ptr;
+}
+
+/* Find the lines in BUF, storing pointers and lengths in LINES.
+ Also replace newlines with NULs. */
+
+static void
+findlines (buf, lines)
+ struct buffer *buf;
+ struct lines *lines;
+{
+ register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
+ struct keyfield *key = keyhead.next;
+
+ lines->used = 0;
+
+ while (beg < lim && (ptr = memchr (beg, '\n', lim - beg))
+ && lines->used < lines->limit)
+ {
+ /* There are various places in the code that rely on a NUL
+ being at the end of in-core lines; NULs inside the lines
+ will not cause trouble, though. */
+ *ptr = '\0';
+
+ if (lines->used == lines->alloc)
+ {
+ lines->alloc *= 2;
+ lines->lines = (struct line *)
+ xrealloc ((char *) lines->lines,
+ lines->alloc * sizeof (struct line));
+ }
+
+ lines->lines[lines->used].text = beg;
+ lines->lines[lines->used].length = ptr - beg;
+
+ /* Precompute the position of the first key for efficiency. */
+ if (key)
+ {
+ if (key->eword >= 0)
+ lines->lines[lines->used].keylim =
+ limfield (&lines->lines[lines->used], key);
+ else
+ lines->lines[lines->used].keylim = ptr;
+
+ if (key->sword >= 0)
+ lines->lines[lines->used].keybeg =
+ begfield (&lines->lines[lines->used], key);
+ else
+ {
+ if (key->skipsblanks)
+ while (blanks[UCHAR (*beg)])
+ ++beg;
+ lines->lines[lines->used].keybeg = beg;
+ }
+ }
+
+ ++lines->used;
+ beg = ptr + 1;
+ }
+
+ buf->left = lim - beg;
+}
+
+/* Compare strings A and B containing decimal fractions < 1. Each string
+ should begin with a decimal point followed immediately by the digits
+ of the fraction. Strings not of this form are considered to be zero. */
+
+static int
+fraccompare (a, b)
+ register char *a, *b;
+{
+ register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
+
+ if (tmpa == '.' && tmpb == '.')
+ {
+ do
+ tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
+ while (tmpa == tmpb && digits[tmpa]);
+ if (digits[tmpa] && digits[tmpb])
+ return tmpa - tmpb;
+ if (digits[tmpa])
+ {
+ while (tmpa == '0')
+ tmpa = UCHAR (*++a);
+ if (digits[tmpa])
+ return 1;
+ return 0;
+ }
+ if (digits[tmpb])
+ {
+ while (tmpb == '0')
+ tmpb = UCHAR (*++b);
+ if (digits[tmpb])
+ return -1;
+ return 0;
+ }
+ return 0;
+ }
+ else if (tmpa == '.')
+ {
+ do
+ tmpa = UCHAR (*++a);
+ while (tmpa == '0');
+ if (digits[tmpa])
+ return 1;
+ return 0;
+ }
+ else if (tmpb == '.')
+ {
+ do
+ tmpb = UCHAR (*++b);
+ while (tmpb == '0');
+ if (digits[tmpb])
+ return -1;
+ return 0;
+ }
+ return 0;
+}
+
+/* Compare strings A and B as numbers without explicitly converting them to
+ machine numbers. Comparatively slow for short strings, but asymptotically
+ hideously fast. */
+
+static int
+numcompare (a, b)
+ register char *a, *b;
+{
+ register int tmpa, tmpb, loga, logb, tmp;
+
+ tmpa = UCHAR (*a), tmpb = UCHAR (*b);
+
+ if (tmpa == '-')
+ {
+ tmpa = UCHAR (*++a);
+ if (tmpb != '-')
+ {
+ if (digits[tmpa] && digits[tmpb])
+ return -1;
+ return 0;
+ }
+ tmpb = UCHAR (*++b);
+
+ while (tmpa == '0')
+ tmpa = UCHAR (*++a);
+ while (tmpb == '0')
+ tmpb = UCHAR (*++b);
+
+ while (tmpa == tmpb && digits[tmpa])
+ tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
+
+ if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
+ return -fraccompare (a, b);
+
+ if (digits[tmpa])
+ for (loga = 1; digits[UCHAR (*++a)]; ++loga)
+ ;
+ else
+ loga = 0;
+
+ if (digits[tmpb])
+ for (logb = 1; digits[UCHAR (*++b)]; ++logb)
+ ;
+ else
+ logb = 0;
+
+ if (tmp = logb - loga)
+ return tmp;
+
+ if (!loga)
+ return 0;
+
+ return tmpb - tmpa;
+ }
+ else if (tmpb == '-')
+ {
+ if (digits[UCHAR (tmpa)] && digits[UCHAR (*++b)])
+ return 1;
+ return 0;
+ }
+ else
+ {
+ while (tmpa == '0')
+ tmpa = UCHAR (*++a);
+ while (tmpb == '0')
+ tmpb = UCHAR (*++b);
+
+ while (tmpa == tmpb && digits[tmpa])
+ tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
+
+ if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
+ return fraccompare (a, b);
+
+ if (digits[tmpa])
+ for (loga = 1; digits[UCHAR (*++a)]; ++loga)
+ ;
+ else
+ loga = 0;
+
+ if (digits[tmpb])
+ for (logb = 1; digits[UCHAR (*++b)]; ++logb)
+ ;
+ else
+ logb = 0;
+
+ if (tmp = loga - logb)
+ return tmp;
+
+ if (!loga)
+ return 0;
+
+ return tmpa - tmpb;
+ }
+}
+
+/* Return an integer <= 12 associated with month name S with length LEN,
+ 0 if the name in S is not recognized. */
+
+static int
+getmonth (s, len)
+ char *s;
+ int len;
+{
+ char month[4];
+ register int i, lo = 0, hi = 12;
+
+ if (len < 3)
+ return 0;
+
+ for (i = 0; i < 3; ++i)
+ month[i] = fold_toupper[UCHAR (s[i])];
+ month[3] = '\0';
+
+ while (hi - lo > 1)
+ if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
+ hi = (lo + hi) / 2;
+ else
+ lo = (lo + hi) / 2;
+ if (!strcmp (month, monthtab[lo].name))
+ return monthtab[lo].val;
+ return 0;
+}
+
+/* Compare two lines A and B trying every key in sequence until there
+ are no more keys or a difference is found. */
+
+static int
+keycompare (a, b)
+ struct line *a, *b;
+{
+ register char *texta, *textb, *lima, *limb, *translate;
+ register int *ignore;
+ struct keyfield *key;
+ int diff = 0, iter = 0, lena, lenb;
+
+ for (key = keyhead.next; key; key = key->next, ++iter)
+ {
+ ignore = key->ignore;
+ translate = key->translate;
+
+ /* Find the beginning and limit of each field. */
+ if (iter || a->keybeg == NULL || b->keybeg == NULL)
+ {
+ if (key->eword >= 0)
+ lima = limfield (a, key), limb = limfield (b, key);
+ else
+ lima = a->text + a->length, limb = b->text + b->length;
+
+ if (key->sword >= 0)
+ texta = begfield (a, key), textb = begfield (b, key);
+ else
+ {
+ texta = a->text, textb = b->text;
+ if (key->skipsblanks)
+ {
+ while (texta < lima && blanks[UCHAR (*texta)])
+ ++texta;
+ while (textb < limb && blanks[UCHAR (*textb)])
+ ++textb;
+ }
+ }
+ }
+ else
+ {
+ /* For the first iteration only, the key positions have
+ been precomputed for us. */
+ texta = a->keybeg, lima = a->keylim;
+ textb = b->keybeg, limb = b->keylim;
+ }
+
+ /* Find the lengths. */
+ lena = lima - texta, lenb = limb - textb;
+ if (lena < 0)
+ lena = 0;
+ if (lenb < 0)
+ lenb = 0;
+
+ /* Actually compare the fields. */
+ if (key->numeric)
+ {
+ if (*lima || *limb)
+ {
+ char savea = *lima, saveb = *limb;
+
+ *lima = *limb = '\0';
+ diff = numcompare (texta, textb);
+ *lima = savea, *limb = saveb;
+ }
+ else
+ diff = numcompare (texta, textb);
+
+ if (diff)
+ return key->reverse ? -diff : diff;
+ continue;
+ }
+ else if (key->month)
+ {
+ diff = getmonth (texta, lena) - getmonth (textb, lenb);
+ if (diff)
+ return key->reverse ? -diff : diff;
+ continue;
+ }
+ else if (ignore && translate)
+ while (texta < lima && textb < limb)
+ {
+ while (texta < lima && ignore[UCHAR (*texta)])
+ ++texta;
+ while (textb < limb && ignore[UCHAR (*textb)])
+ ++textb;
+ if (texta < lima && textb < limb &&
+ translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
+ {
+ diff = translate[UCHAR (*--texta)] - translate[UCHAR (*--textb)];
+ break;
+ }
+ }
+ else if (ignore)
+ while (texta < lima && textb < limb)
+ {
+ while (texta < lima && ignore[UCHAR (*texta)])
+ ++texta;
+ while (textb < limb && ignore[UCHAR (*textb)])
+ ++textb;
+ if (texta < lima && textb < limb && *texta++ != *textb++)
+ {
+ diff = *--texta - *--textb;
+ break;
+ }
+ }
+ else if (translate)
+ while (texta < lima && textb < limb)
+ {
+ if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
+ {
+ diff = translate[UCHAR (*--texta)] - translate[UCHAR (*--textb)];
+ break;
+ }
+ }
+ else
+ diff = memcmp (texta, textb, min (lena, lenb));
+
+ if (diff)
+ return key->reverse ? -diff : diff;
+ if (diff = lena - lenb)
+ return key->reverse ? -diff : diff;
+ }
+
+ return 0;
+}
+
+/* Compare two lines A and B, returning negative, zero, or positive
+ depending on whether A compares less than, equal to, or greater than B. */
+
+static int
+compare (a, b)
+ register struct line *a, *b;
+{
+ int diff, tmpa, tmpb, mini;
+
+ if (keyhead.next)
+ {
+ diff = keycompare (a, b);
+ if (diff)
+ return diff;
+ if (!unique && !stable)
+ {
+ tmpa = a->length, tmpb = b->length;
+ diff = memcmp (a->text, b->text, min (tmpa, tmpb));
+ if (!diff)
+ diff = tmpa - tmpb;
+ }
+ }
+ else
+ {
+ tmpa = a->length, tmpb = b->length;
+ mini = min (tmpa, tmpb);
+ if (mini == 0)
+ diff = tmpa - tmpb;
+ else
+ {
+ char *ap = a->text, *bp = b->text;
+
+ diff = *ap - *bp;
+ if (diff == 0)
+ {
+ diff = memcmp (ap, bp, mini);
+ if (diff == 0)
+ diff = tmpa - tmpb;
+ }
+ }
+ }
+
+ return reverse ? -diff : diff;
+}
+
+/* Check that the lines read from the given FP come in order. Return
+ 1 if they do and 0 if there is a disorder. */
+
+static int
+checkfp (fp)
+ FILE *fp;
+{
+ struct buffer buf; /* Input buffer. */
+ struct lines lines; /* Lines scanned from the buffer. */
+ struct line temp; /* Copy of previous line. */
+ int cc; /* Character count. */
+ int cmp; /* Result of calling compare. */
+ int alloc, i, success = 1;
+
+ initbuf (&buf, mergealloc);
+ initlines (&lines, mergealloc / linelength + 1,
+ LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
+ alloc = linelength;
+ temp.text = xmalloc (alloc);
+
+ cc = fillbuf (&buf, fp);
+ findlines (&buf, &lines);
+
+ if (cc)
+ do
+ {
+ /* Compare each line in the buffer with its successor. */
+ for (i = 0; i < lines.used - 1; ++i)
+ {
+ cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
+ if ((unique && cmp >= 0) || (cmp > 0))
+ {
+ success = 0;
+ goto finish;
+ }
+ }
+
+ /* Save the last line of the buffer and refill the buffer. */
+ if (lines.lines[lines.used - 1].length > alloc)
+ {
+ while (lines.lines[lines.used - 1].length + 1 > alloc)
+ alloc *= 2;
+ temp.text = xrealloc (temp.text, alloc);
+ }
+ bcopy (lines.lines[lines.used - 1].text, temp.text,
+ lines.lines[lines.used - 1].length + 1);
+ temp.length = lines.lines[lines.used - 1].length;
+
+ cc = fillbuf (&buf, fp);
+ if (cc)
+ {
+ findlines (&buf, &lines);
+ /* Make sure the line saved from the old buffer contents is
+ less than or equal to the first line of the new buffer. */
+ cmp = compare (&temp, &lines.lines[0]);
+ if ((unique && cmp >= 0) || (cmp > 0))
+ {
+ success = 0;
+ break;
+ }
+ }
+ }
+ while (cc);
+
+finish:
+ xfclose (fp);
+ free (buf.buf);
+ free ((char *) lines.lines);
+ free (temp.text);
+ return success;
+}
+
+/* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
+ Close FPS before returning. */
+
+static void
+mergefps (fps, nfps, ofp)
+ FILE *fps[], *ofp;
+ register int nfps;
+{
+ struct buffer buffer[NMERGE]; /* Input buffers for each file. */
+ struct lines lines[NMERGE]; /* Line tables for each buffer. */
+ struct line saved; /* Saved line for unique check. */
+ int savedflag = 0; /* True if there is a saved line. */
+ int savealloc; /* Size allocated for the saved line. */
+ int cur[NMERGE]; /* Current line in each line table. */
+ int ord[NMERGE]; /* Table representing a permutation of fps,
+ such that lines[ord[0]].lines[cur[ord[0]]]
+ is the smallest line and will be next
+ output. */
+ register int i, j, t;
+
+ /* Allocate space for a saved line if necessary. */
+ if (unique)
+ {
+ savealloc = linelength;
+ saved.text = xmalloc (savealloc);
+ }
+
+ /* Read initial lines from each input file. */
+ for (i = 0; i < nfps; ++i)
+ {
+ initbuf (&buffer[i], mergealloc);
+ /* If a file is empty, eliminate it from future consideration. */
+ while (i < nfps && !fillbuf (&buffer[i], fps[i]))
+ {
+ xfclose (fps[i]);
+ --nfps;
+ for (j = i; j < nfps; ++j)
+ fps[j] = fps[j + 1];
+ }
+ if (i == nfps)
+ free (buffer[i].buf);
+ else
+ {
+ initlines (&lines[i], mergealloc / linelength + 1,
+ LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
+ findlines (&buffer[i], &lines[i]);
+ cur[i] = 0;
+ }
+ }
+
+ /* Set up the ord table according to comparisons among input lines.
+ Since this only reorders two items if one is strictly greater than
+ the other, it is stable. */
+ for (i = 0; i < nfps; ++i)
+ ord[i] = i;
+ for (i = 1; i < nfps; ++i)
+ if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
+ &lines[ord[i]].lines[cur[ord[i]]]) > 0)
+ t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
+
+ /* Repeatedly output the smallest line until no input remains. */
+ while (nfps)
+ {
+ /* If uniqified output is turned out, output only the first of
+ an identical series of lines. */
+ if (unique)
+ {
+ if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
+ {
+ xfwrite (saved.text, 1, saved.length, ofp);
+ putc ('\n', ofp);
+ savedflag = 0;
+ }
+ if (!savedflag)
+ {
+ if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
+ {
+ while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
+ savealloc *= 2;
+ saved.text = xrealloc (saved.text, savealloc);
+ }
+ saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
+ bcopy (lines[ord[0]].lines[cur[ord[0]]].text, saved.text,
+ saved.length + 1);
+ savedflag = 1;
+ }
+ }
+ else
+ {
+ xfwrite (lines[ord[0]].lines[cur[ord[0]]].text, 1,
+ lines[ord[0]].lines[cur[ord[0]]].length, ofp);
+ putc ('\n', ofp);
+ }
+
+ /* Check if we need to read more lines into core. */
+ if (++cur[ord[0]] == lines[ord[0]].used)
+ if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
+ {
+ findlines (&buffer[ord[0]], &lines[ord[0]]);
+ cur[ord[0]] = 0;
+ }
+ else
+ {
+ /* We reached EOF on fps[ord[0]]. */
+ for (i = 1; i < nfps; ++i)
+ if (ord[i] > ord[0])
+ --ord[i];
+ --nfps;
+ xfclose (fps[ord[0]]);
+ free (buffer[ord[0]].buf);
+ free ((char *) lines[ord[0]].lines);
+ for (i = ord[0]; i < nfps; ++i)
+ {
+ fps[i] = fps[i + 1];
+ buffer[i] = buffer[i + 1];
+ lines[i] = lines[i + 1];
+ cur[i] = cur[i + 1];
+ }
+ for (i = 0; i < nfps; ++i)
+ ord[i] = ord[i + 1];
+ continue;
+ }
+
+ /* The new line just read in may be larger than other lines
+ already in core; push it back in the queue until we encounter
+ a line larger than it. */
+ for (i = 1; i < nfps; ++i)
+ {
+ t = compare (&lines[ord[0]].lines[cur[ord[0]]],
+ &lines[ord[i]].lines[cur[ord[i]]]);
+ if (!t)
+ t = ord[0] - ord[i];
+ if (t < 0)
+ break;
+ }
+ t = ord[0];
+ for (j = 1; j < i; ++j)
+ ord[j - 1] = ord[j];
+ ord[i - 1] = t;
+ }
+
+ if (unique && savedflag)
+ {
+ xfwrite (saved.text, 1, saved.length, ofp);
+ putc ('\n', ofp);
+ free (saved.text);
+ }
+}
+
+/* Sort the array LINES with NLINES members, using TEMP for temporary space. */
+
+static void
+sortlines (lines, nlines, temp)
+ struct line *lines, *temp;
+ int nlines;
+{
+ register struct line *lo, *hi, *t;
+ register int nlo, nhi;
+
+ if (nlines == 2)
+ {
+ if (compare (&lines[0], &lines[1]) > 0)
+ *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
+ return;
+ }
+
+ nlo = nlines / 2;
+ lo = lines;
+ nhi = nlines - nlo;
+ hi = lines + nlo;
+
+ if (nlo > 1)
+ sortlines (lo, nlo, temp);
+
+ if (nhi > 1)
+ sortlines (hi, nhi, temp);
+
+ t = temp;
+
+ while (nlo && nhi)
+ if (compare (lo, hi) <= 0)
+ *t++ = *lo++, --nlo;
+ else
+ *t++ = *hi++, --nhi;
+ while (nlo--)
+ *t++ = *lo++;
+
+ for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
+ *lo++ = *t++;
+}
+
+/* Check that each of the NFILES FILES is ordered.
+ Return a count of disordered files. */
+
+static int
+check (files, nfiles)
+ char *files[];
+ int nfiles;
+{
+ int i, disorders = 0;
+ FILE *fp;
+
+ for (i = 0; i < nfiles; ++i)
+ {
+ fp = xfopen (files[i], "r");
+ if (!checkfp (fp))
+ {
+ printf ("%s: disorder on %s\n", program_name, files[i]);
+ ++disorders;
+ }
+ }
+ return disorders;
+}
+
+/* Merge NFILES FILES onto OFP. */
+
+static void
+merge (files, nfiles, ofp)
+ char *files[];
+ int nfiles;
+ FILE *ofp;
+{
+ int i, j, t;
+ char *temp;
+ FILE *fps[NMERGE], *tfp;
+
+ while (nfiles > NMERGE)
+ {
+ t = 0;
+ for (i = 0; i < nfiles / NMERGE; ++i)
+ {
+ for (j = 0; j < NMERGE; ++j)
+ fps[j] = xfopen (files[i * NMERGE + j], "r");
+ tfp = xfopen (temp = tempname (), "w");
+ mergefps (fps, NMERGE, tfp);
+ xfclose (tfp);
+ for (j = 0; j < NMERGE; ++j)
+ zaptemp (files[i * NMERGE + j]);
+ files[t++] = temp;
+ }
+ for (j = 0; j < nfiles % NMERGE; ++j)
+ fps[j] = xfopen (files[i * NMERGE + j], "r");
+ tfp = xfopen (temp = tempname (), "w");
+ mergefps (fps, nfiles % NMERGE, tfp);
+ xfclose (tfp);
+ for (j = 0; j < nfiles % NMERGE; ++j)
+ zaptemp (files[i * NMERGE + j]);
+ files[t++] = temp;
+ nfiles = t;
+ }
+
+ for (i = 0; i < nfiles; ++i)
+ fps[i] = xfopen (files[i], "r");
+ mergefps (fps, i, ofp);
+ for (i = 0; i < nfiles; ++i)
+ zaptemp (files[i]);
+}
+
+/* Sort NFILES FILES onto OFP. */
+
+static void
+sort (files, nfiles, ofp)
+ char **files;
+ int nfiles;
+ FILE *ofp;
+{
+ struct buffer buf;
+ struct lines lines;
+ struct line *tmp;
+ int i, ntmp;
+ FILE *fp, *tfp;
+ struct tempnode *node;
+ int ntemp = 0;
+ char **tempfiles;
+
+ initbuf (&buf, sortalloc);
+ initlines (&lines, sortalloc / linelength + 1,
+ LINEALLOC / sizeof (struct line));
+ ntmp = lines.alloc;
+ tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
+
+ while (nfiles--)
+ {
+ fp = xfopen (*files++, "r");
+ while (fillbuf (&buf, fp))
+ {
+ findlines (&buf, &lines);
+ if (lines.used > ntmp)
+ {
+ while (lines.used > ntmp)
+ ntmp *= 2;
+ tmp = (struct line *)
+ xrealloc ((char *) tmp, ntmp * sizeof (struct line));
+ }
+ sortlines (lines.lines, lines.used, tmp);
+ if (feof (fp) && !nfiles && !ntemp && !buf.left)
+ tfp = ofp;
+ else
+ {
+ ++ntemp;
+ tfp = xfopen (tempname (), "w");
+ }
+ for (i = 0; i < lines.used; ++i)
+ if (!unique || i == 0
+ || compare (&lines.lines[i], &lines.lines[i - 1]))
+ {
+ xfwrite (lines.lines[i].text, 1, lines.lines[i].length, tfp);
+ putc ('\n', tfp);
+ }
+ if (tfp != ofp)
+ xfclose (tfp);
+ }
+ xfclose (fp);
+ }
+
+ free (buf.buf);
+ free ((char *) lines.lines);
+ free ((char *) tmp);
+
+ if (ntemp)
+ {
+ tempfiles = (char **) xmalloc (ntemp * sizeof (char *));
+ i = ntemp;
+ for (node = temphead.next; node; node = node->next)
+ tempfiles[--i] = node->name;
+ merge (tempfiles, ntemp, ofp);
+ free ((char *) tempfiles);
+ }
+}
+
+/* Insert key KEY at the end of the list (`keyhead'). */
+
+static void
+insertkey (key)
+ struct keyfield *key;
+{
+ struct keyfield *k = &keyhead;
+
+ while (k->next)
+ k = k->next;
+ k->next = key;
+ key->next = NULL;
+}
+
+static void
+badfieldspec (s)
+ char *s;
+{
+ error (2, 0, "invalid field specification `%s'", s);
+}
+
+/* Handle interrupts and hangups. */
+
+static void
+sighandler (sig)
+ int sig;
+{
+#ifdef _POSIX_VERSION
+ struct sigaction sigact;
+
+ sigact.sa_handler = SIG_DFL;
+ sigemptyset (&sigact.sa_mask);
+ sigact.sa_flags = 0;
+ sigaction (sig, &sigact, NULL);
+#else /* !_POSIX_VERSION */
+ signal (sig, SIG_DFL);
+#endif /* _POSIX_VERSION */
+ cleanup ();
+ kill (getpid (), sig);
+}
+
+/* Set the ordering options for KEY specified in S.
+ Return the address of the first character in S that
+ is not a valid ordering option.
+ BLANKTYPE is the kind of blanks that 'b' should skip. */
+
+static char *
+set_ordering (s, key, blanktype)
+ register char *s;
+ struct keyfield *key;
+ enum blanktype blanktype;
+{
+ while (*s)
+ {
+ switch (*s)
+ {
+ case 'b':
+ if (blanktype == bl_start || blanktype == bl_both)
+ key->skipsblanks = 1;
+ if (blanktype == bl_end || blanktype == bl_both)
+ key->skipeblanks = 1;
+ break;
+ case 'd':
+ key->ignore = nondictionary;
+ break;
+ case 'f':
+ key->translate = fold_toupper;
+ break;
+#if 0
+ case 'g':
+ /* Reserved for comparing floating-point numbers. */
+ break;
+#endif
+ case 'i':
+ key->ignore = nonprinting;
+ break;
+ case 'M':
+ key->skipsblanks = key->skipeblanks = key->month = 1;
+ break;
+ case 'n':
+ key->skipsblanks = key->skipeblanks = key->numeric = 1;
+ break;
+ case 'r':
+ key->reverse = 1;
+ break;
+ default:
+ return s;
+ }
+ ++s;
+ }
+ return s;
+}
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ struct keyfield *key = NULL, gkey;
+ char *s;
+ int i, t, t2;
+ int checkonly = 0, mergeonly = 0, nfiles = 0;
+ char *minus = "-", *outfile = minus, **files, *tmp;
+ FILE *ofp;
+#ifdef _POSIX_VERSION
+ struct sigaction oldact, newact;
+#endif /* _POSIX_VERSION */
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+ inittables ();
+
+ prefix = getenv ("TMPDIR");
+ if (prefix == NULL)
+ prefix = "/tmp";
+
+#ifdef _POSIX_VERSION
+ newact.sa_handler = sighandler;
+ sigemptyset (&newact.sa_mask);
+ newact.sa_flags = 0;
+
+ sigaction (SIGINT, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGINT, &newact, NULL);
+ sigaction (SIGHUP, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGHUP, &newact, NULL);
+ sigaction (SIGPIPE, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGPIPE, &newact, NULL);
+ sigaction (SIGTERM, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGTERM, &newact, NULL);
+#else /* !_POSIX_VERSION */
+ if (signal (SIGINT, SIG_IGN) != SIG_IGN)
+ signal (SIGINT, sighandler);
+ if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
+ signal (SIGHUP, sighandler);
+ if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
+ signal (SIGPIPE, sighandler);
+ if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
+ signal (SIGTERM, sighandler);
+#endif /* !_POSIX_VERSION */
+
+ gkey.sword = gkey.eword = -1;
+ gkey.ignore = NULL;
+ gkey.translate = NULL;
+ gkey.numeric = gkey.month = gkey.reverse = 0;
+ gkey.skipsblanks = gkey.skipeblanks = 0;
+
+ files = (char **) xmalloc (sizeof (char *) * argc);
+
+ for (i = 1; i < argc; ++i)
+ {
+ if (argv[i][0] == '+')
+ {
+ if (key)
+ insertkey (key);
+ key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
+ key->eword = -1;
+ key->ignore = NULL;
+ key->translate = NULL;
+ key->skipsblanks = key->skipeblanks = 0;
+ key->numeric = key->month = key->reverse = 0;
+ s = argv[i] + 1;
+ if (!digits[UCHAR (*s)])
+ badfieldspec (argv[i]);
+ for (t = 0; digits[UCHAR (*s)]; ++s)
+ t = 10 * t + *s - '0';
+ t2 = 0;
+ if (*s == '.')
+ for (++s; digits[UCHAR (*s)]; ++s)
+ t2 = 10 * t2 + *s - '0';
+ if (t2 || t)
+ {
+ key->sword = t;
+ key->schar = t2;
+ }
+ else
+ key->sword = -1;
+ s = set_ordering (s, key, bl_start);
+ if (*s)
+ badfieldspec (argv[i]);
+ }
+ else if (argv[i][0] == '-' && argv[i][1])
+ {
+ s = argv[i] + 1;
+ if (digits[UCHAR (*s)])
+ {
+ if (!key)
+ usage ();
+ for (t = 0; digits[UCHAR (*s)]; ++s)
+ t = t * 10 + *s - '0';
+ t2 = 0;
+ if (*s == '.')
+ for (++s; digits[UCHAR (*s)]; ++s)
+ t2 = t2 * 10 + *s - '0';
+ key->eword = t;
+ key->echar = t2;
+ s = set_ordering (s, key, bl_end);
+ if (*s)
+ badfieldspec (argv[i]);
+ insertkey (key);
+ key = NULL;
+ }
+ else
+ while (*s)
+ {
+ s = set_ordering (s, &gkey, bl_both);
+ switch (*s)
+ {
+ case '\0':
+ break;
+ case 'c':
+ checkonly = 1;
+ break;
+ case 'k':
+ if (s[1])
+ ++s;
+ else
+ {
+ if (i == argc - 1)
+ error (2, 0, "option `-k' requires an argument");
+ else
+ s = argv[++i];
+ }
+ if (key)
+ insertkey (key);
+ key = (struct keyfield *)
+ xmalloc (sizeof (struct keyfield));
+ key->eword = -1;
+ key->ignore = NULL;
+ key->translate = NULL;
+ key->skipsblanks = key->skipeblanks = 0;
+ key->numeric = key->month = key->reverse = 0;
+ /* Get POS1. */
+ if (!digits[UCHAR (*s)])
+ badfieldspec (argv[i]);
+ for (t = 0; digits[UCHAR (*s)]; ++s)
+ t = 10 * t + *s - '0';
+ if (t)
+ t--;
+ t2 = 0;
+ if (*s == '.')
+ {
+ for (++s; digits[UCHAR (*s)]; ++s)
+ t2 = 10 * t2 + *s - '0';
+ if (t2)
+ t2--;
+ }
+ if (t2 || t)
+ {
+ key->sword = t;
+ key->schar = t2;
+ }
+ else
+ key->sword = -1;
+ s = set_ordering (s, key, bl_start);
+ if (*s && *s != ',')
+ badfieldspec (argv[i]);
+ else if (*s++)
+ {
+ /* Get POS2. */
+ for (t = 0; digits[UCHAR (*s)]; ++s)
+ t = t * 10 + *s - '0';
+ t2 = 0;
+ if (*s == '.')
+ {
+ for (++s; digits[UCHAR (*s)]; ++s)
+ t2 = t2 * 10 + *s - '0';
+ if (t2)
+ t--;
+ }
+ key->eword = t;
+ key->echar = t2;
+ s = set_ordering (s, key, bl_end);
+ if (*s)
+ badfieldspec (argv[i]);
+ }
+ insertkey (key);
+ key = NULL;
+ goto outer;
+ case 'm':
+ mergeonly = 1;
+ break;
+ case 'o':
+ if (s[1])
+ outfile = s + 1;
+ else
+ {
+ if (i == argc - 1)
+ error (2, 0, "option `-o' requires an argument");
+ else
+ outfile = argv[++i];
+ }
+ goto outer;
+ case 's':
+ stable = 1;
+ break;
+ case 't':
+ if (s[1])
+ tab = *++s;
+ else if (i < argc - 1)
+ {
+ tab = *argv[++i];
+ goto outer;
+ }
+ else
+ error (2, 0, "option `-t' requires an argument");
+ break;
+ case 'u':
+ unique = 1;
+ break;
+ default:
+ fprintf (stderr, "%s: unrecognized option `-%c'\n",
+ argv[0], *s);
+ usage ();
+ }
+ if (*s)
+ ++s;
+ }
+ }
+ else /* Not an option. */
+ {
+ files[nfiles++] = argv[i];
+ }
+ outer:;
+ }
+
+ if (key)
+ insertkey (key);
+
+ /* Inheritance of global options to individual keys. */
+ for (key = keyhead.next; key; key = key->next)
+ if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
+ && !key->skipeblanks && !key->month && !key->numeric)
+ {
+ key->ignore = gkey.ignore;
+ key->translate = gkey.translate;
+ key->skipsblanks = gkey.skipsblanks;
+ key->skipeblanks = gkey.skipeblanks;
+ key->month = gkey.month;
+ key->numeric = gkey.numeric;
+ key->reverse = gkey.reverse;
+ }
+
+ if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
+ || gkey.reverse || gkey.skipeblanks
+ || gkey.month || gkey.numeric))
+ insertkey (&gkey);
+
+ if (nfiles == 0)
+ {
+ nfiles = 1;
+ files = &minus;
+ }
+
+ if (checkonly)
+ exit (check (files, nfiles) != 0);
+
+ if (strcmp (outfile, "-"))
+ {
+ for (i = 0; i < nfiles; ++i)
+ if (!strcmp (outfile, files[i]))
+ break;
+ if (i == nfiles)
+ ofp = xfopen (outfile, "w");
+ else
+ {
+ char buf[8192];
+ FILE *fp = xfopen (outfile, "r");
+ int cc;
+
+ tmp = tempname ();
+ ofp = xfopen (tmp, "w");
+ while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
+ xfwrite (buf, 1, cc, ofp);
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", outfile);
+ cleanup ();
+ exit (2);
+ }
+ xfclose (ofp);
+ xfclose (fp);
+ files[i] = tmp;
+ ofp = xfopen (outfile, "w");
+ }
+ }
+ else
+ ofp = stdout;
+
+ if (mergeonly)
+ merge (files, nfiles, ofp);
+ else
+ sort (files, nfiles, ofp);
+ cleanup ();
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, 0, "write error");
+
+ exit (0);
+}
+
+static void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-cmus] [-t separator] [-o output-file] [-bdfiMnr] [+POS1 [-POS2]]\n\
+ [-k POS1[,POS2]] [file...]\n",
+ program_name);
+ exit (2);
+}
diff --git a/src/split.c b/src/split.c
new file mode 100644
index 000000000..ccc4535c2
--- /dev/null
+++ b/src/split.c
@@ -0,0 +1,532 @@
+/* split.c -- split a file into pieces.
+ Copyright (C) 1988, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* By tege@sics.se, with rms.
+
+ To do:
+ * Implement -t CHAR or -t REGEX to specify break characters other
+ than newline. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include "system.h"
+
+char *xmalloc ();
+void error ();
+
+int convint ();
+int isdigits ();
+int stdread ();
+void line_bytes_split ();
+void bytes_split ();
+void cwrite ();
+void lines_split ();
+void next_file_name ();
+
+/* Name under which this program was invoked. */
+char *program_name;
+
+/* Base name of output files. */
+char *outfile;
+
+/* Pointer to the end of the prefix in OUTFILE.
+ Suffixes are inserted here. */
+char *outfile_mid;
+
+/* Pointer to the end of OUTFILE. */
+char *outfile_end;
+
+/* Status for outfile name generation. */
+unsigned outfile_count = -1;
+unsigned outfile_name_limit = 25 * 26;
+unsigned outfile_name_generation = 1;
+
+/* Name of input file. May be "-". */
+char *infile;
+
+/* Descriptor on which input file is open. */
+int input_desc;
+
+/* Descriptor on which output file is open. */
+int output_desc;
+
+void
+usage (reason)
+ char *reason;
+{
+ if (reason != NULL)
+ fprintf (stderr, "%s: %s\n", program_name, reason);
+ fprintf (stderr, "\
+Usage: %s [-lines] [-l lines] [-b bytes[bkm]] [-C bytes[bkm]]\n\
+ [--lines=lines] [--bytes=bytes[bkm]] [--line-bytes=bytes[bkm]]\n\
+ [infile [outfile-prefix]]\n",
+ program_name);
+ exit (2);
+}
+
+struct option longopts[] =
+{
+ {"bytes", 1, NULL, 'b'},
+ {"lines", 1, NULL, 'l'},
+ {"line-bytes", 1, NULL, 'C'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ struct stat stat_buf;
+ int num; /* numeric argument from command line */
+ enum
+ {
+ type_undef, type_bytes, type_byteslines, type_lines, type_digits
+ } split_type = type_undef;
+ int in_blk_size; /* optimal block size of input file device */
+ char *buf; /* file i/o buffer */
+ int accum = 0;
+ char *outbase;
+ int c;
+ int digits_optind = 0;
+
+ program_name = argv[0];
+
+ /* Parse command line options. */
+
+ infile = "-";
+ outbase = "x";
+
+ while (1)
+ {
+ /* This is the argv-index of the option we will read next. */
+ int this_optind = optind ? optind : 1;
+
+ c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case 'b':
+ if (split_type != type_undef)
+ usage ("cannot split in more than one way");
+ split_type = type_bytes;
+ if (convint (optarg, &accum) == -1)
+ usage ("invalid number of bytes");
+ break;
+
+ case 'l':
+ if (split_type != type_undef)
+ usage ("cannot split in more than one way");
+ split_type = type_lines;
+ if (!isdigits (optarg))
+ usage ("invalid number of lines");
+ accum = atoi (optarg);
+ break;
+
+ case 'C':
+ if (split_type != type_undef)
+ usage ("cannot split in more than one way");
+ split_type = type_byteslines;
+ if (convint (optarg, &accum) == -1)
+ usage ("invalid number of bytes");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (split_type != type_undef && split_type != type_digits)
+ usage ("cannot split in more than one way");
+ if (digits_optind != 0 && digits_optind != this_optind)
+ accum = 0; /* More than one number given; ignore other. */
+ digits_optind = this_optind;
+ split_type = type_digits;
+ accum = accum * 10 + c - '0';
+ break;
+
+ default:
+ usage ((char *)0);
+ }
+ }
+
+ /* Handle default case. */
+ if (split_type == type_undef)
+ {
+ split_type = type_lines;
+ accum = 1000;
+ }
+
+ if (accum < 1)
+ usage ("invalid number");
+ num = accum;
+
+ /* Get out the filename arguments. */
+
+ if (optind < argc)
+ infile = argv[optind++];
+
+ if (optind < argc)
+ outbase = argv[optind++];
+
+ if (optind < argc)
+ usage ("too many arguments");
+
+ /* Open the input file. */
+ if (!strcmp (infile, "-"))
+ input_desc = 0;
+ else
+ {
+ input_desc = open (infile, O_RDONLY);
+ if (input_desc < 0)
+ error (1, errno, "%s", infile);
+ }
+
+ /* No output file is open now. */
+ output_desc = -1;
+
+ /* Copy the output file prefix so we can add suffixes to it.
+ 26**29 is certainly enough output files! */
+
+ outfile = xmalloc (strlen (outbase) + 30);
+ strcpy (outfile, outbase);
+ outfile_mid = outfile + strlen (outfile);
+ outfile_end = outfile_mid + 2;
+ bzero (outfile_mid, 30);
+ outfile_mid[0] = 'a';
+ outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
+
+ /* Get the optimal block size of input device and make a buffer. */
+
+ if (fstat (input_desc, &stat_buf) < 0)
+ error (1, errno, "%s", infile);
+ in_blk_size = ST_BLKSIZE (stat_buf);
+
+ buf = xmalloc (in_blk_size + 1);
+
+ switch (split_type)
+ {
+ case type_digits:
+ case type_lines:
+ lines_split (num, buf, in_blk_size);
+ break;
+
+ case type_bytes:
+ bytes_split (num, buf, in_blk_size);
+ break;
+
+ case type_byteslines:
+ line_bytes_split (num);
+ break;
+ }
+
+ if (close (input_desc) < 0)
+ error (1, errno, "%s", infile);
+ if (output_desc >= 0 && close (output_desc) < 0)
+ error (1, errno, "%s", outfile);
+
+ exit (0);
+}
+
+/* Return nonzero if the string STR is composed entirely of decimal digits. */
+
+int
+isdigits (str)
+ char *str;
+{
+ do
+ {
+ if (!isdigit (*str))
+ return 0;
+ str++;
+ }
+ while (*str);
+ return 1;
+}
+
+/* Put the value of the number in STR into *VAL.
+ STR can specify a positive integer, optionally ending in `k'
+ to mean kilo or `m' to mean mega.
+ Return 0 if STR is valid, -1 if not. */
+
+int
+convint (str, val)
+ char *str;
+ int *val;
+{
+ int multiplier = 1;
+ int arglen = strlen (str);
+
+ if (arglen > 1)
+ {
+ switch (str[arglen - 1])
+ {
+ case 'b':
+ multiplier = 512;
+ str[arglen - 1] = '\0';
+ break;
+ case 'k':
+ multiplier = 1024;
+ str[arglen - 1] = '\0';
+ break;
+ case 'm':
+ multiplier = 1048576;
+ str[arglen - 1] = '\0';
+ break;
+ }
+ }
+ if (!isdigits (str))
+ return -1;
+ *val = atoi (str) * multiplier;
+ return 0;
+}
+
+/* Split into pieces of exactly NCHARS bytes.
+ Use buffer BUF, whose size is BUFSIZE. */
+
+void
+bytes_split (nchars, buf, bufsize)
+ int nchars;
+ char *buf;
+ int bufsize;
+{
+ int n_read;
+ int new_file_flag = 1;
+ int to_read;
+ int to_write = nchars;
+ char *bp_out;
+
+ do
+ {
+ n_read = stdread (buf, bufsize);
+ if (n_read < 0)
+ error (1, errno, "%s", infile);
+ bp_out = buf;
+ to_read = n_read;
+ for (;;)
+ {
+ if (to_read < to_write)
+ {
+ if (to_read) /* do not write 0 bytes! */
+ {
+ cwrite (new_file_flag, bp_out, to_read);
+ to_write -= to_read;
+ new_file_flag = 0;
+ }
+ break;
+ }
+ else
+ {
+ cwrite (new_file_flag, bp_out, to_write);
+ bp_out += to_write;
+ to_read -= to_write;
+ new_file_flag = 1;
+ to_write = nchars;
+ }
+ }
+ }
+ while (n_read == bufsize);
+}
+
+/* Split into pieces of exactly NLINES lines.
+ Use buffer BUF, whose size is BUFSIZE. */
+
+void
+lines_split (nlines, buf, bufsize)
+ int nlines;
+ char *buf;
+ int bufsize;
+{
+ int n_read;
+ char *bp, *bp_out, *eob;
+ int new_file_flag = 1;
+ int n = 0;
+
+ do
+ {
+ n_read = stdread (buf, bufsize);
+ if (n_read < 0)
+ error (1, errno, "%s", infile);
+ bp = bp_out = buf;
+ eob = bp + n_read;
+ *eob = '\n';
+ for (;;)
+ {
+ while (*bp++ != '\n')
+ ; /* this semicolon takes most of the time */
+ if (bp > eob)
+ {
+ if (eob != bp_out) /* do not write 0 bytes! */
+ {
+ cwrite (new_file_flag, bp_out, eob - bp_out);
+ new_file_flag = 0;
+ }
+ break;
+ }
+ else
+ if (++n >= nlines)
+ {
+ cwrite (new_file_flag, bp_out, bp - bp_out);
+ bp_out = bp;
+ new_file_flag = 1;
+ n = 0;
+ }
+ }
+ }
+ while (n_read == bufsize);
+}
+
+/* Split into pieces that are as large as possible while still not more
+ than NCHARS bytes, and are split on line boundaries except
+ where lines longer than NCHARS bytes occur. */
+
+void
+line_bytes_split (nchars)
+ int nchars;
+{
+ int n_read;
+ char *bp;
+ int eof = 0;
+ int n_buffered = 0;
+ char *buf = (char *) xmalloc (nchars);
+
+ do
+ {
+ /* Fill up the full buffer size from the input file. */
+
+ n_read = stdread (buf + n_buffered, nchars - n_buffered);
+ if (n_read < 0)
+ error (1, errno, "%s", infile);
+
+ n_buffered += n_read;
+ if (n_buffered != nchars)
+ eof = 1;
+
+ /* Find where to end this chunk. */
+ bp = buf + n_buffered;
+ if (n_buffered == nchars)
+ {
+ while (bp > buf && bp[-1] != '\n')
+ bp--;
+ }
+
+ /* If chunk has no newlines, use all the chunk. */
+ if (bp == buf)
+ bp = buf + n_buffered;
+
+ /* Output the chars as one output file. */
+ cwrite (1, buf, bp - buf);
+
+ /* Discard the chars we just output; move rest of chunk
+ down to be the start of the next chunk. */
+ n_buffered -= bp - buf;
+ if (n_buffered > 0)
+ bcopy (bp, buf, n_buffered);
+ }
+ while (!eof);
+ free (buf);
+}
+
+/* Write BYTES bytes at BP to an output file.
+ If NEW_FILE_FLAG is nonzero, open the next output file.
+ Otherwise add to the same output file already in use. */
+
+void
+cwrite (new_file_flag, bp, bytes)
+ int new_file_flag;
+ char *bp;
+ int bytes;
+{
+ if (new_file_flag)
+ {
+ if (output_desc >= 0 && close (output_desc) < 0)
+ error (1, errno, "%s", outfile);
+
+ next_file_name ();
+ output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
+ if (output_desc < 0)
+ error (1, errno, "%s", outfile);
+ }
+ if (write (output_desc, bp, bytes) < 0)
+ error (1, errno, "%s", outfile);
+}
+
+/* Read NCHARS bytes from the input file into BUF.
+ Return the number of bytes successfully read.
+ If this is less than NCHARS, do not call `stdread' again. */
+
+int
+stdread (buf, nchars)
+ char *buf;
+ int nchars;
+{
+ int n_read;
+ int to_be_read = nchars;
+
+ while (to_be_read)
+ {
+ n_read = read (input_desc, buf, to_be_read);
+ if (n_read < 0)
+ return -1;
+ if (n_read == 0)
+ break;
+ to_be_read -= n_read;
+ buf += n_read;
+ }
+ return nchars - to_be_read;
+}
+
+/* Compute the next sequential output file name suffix and store it
+ into the string `outfile' at the position pointed to by `outfile_mid'. */
+
+void
+next_file_name ()
+{
+ int x;
+ char *ne;
+
+ outfile_count++;
+ if (outfile_count < outfile_name_limit)
+ {
+ for (ne = outfile_end - 1; ; ne--)
+ {
+ x = *ne;
+ if (x != 'z')
+ break;
+ *ne = 'a';
+ }
+ *ne = x + 1;
+ return;
+ }
+
+ outfile_count = 0;
+ outfile_name_limit *= 26;
+ outfile_name_generation++;
+ *outfile_mid++ = 'z';
+ for (x = 0; x <= outfile_name_generation; x++)
+ outfile_mid[x] = 'a';
+ outfile_end += 2;
+}
diff --git a/src/sum.c b/src/sum.c
new file mode 100644
index 000000000..9236614ca
--- /dev/null
+++ b/src/sum.c
@@ -0,0 +1,217 @@
+/* sum -- checksum and count the blocks in a file
+ Copyright (C) 1986, 1989, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Like BSD sum or SysV sum -r, except like SysV sum if -s option is given. */
+
+/* Written by Kayvan Aghaiepour and David MacKenzie. */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <getopt.h>
+#include "system.h"
+
+int bsd_sum_file ();
+int sysv_sum_file ();
+void error ();
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Nonzero if any of the files read were the standard input. */
+int have_read_stdin;
+
+/* Right-rotate 32-bit integer variable C. */
+#define ROTATE_RIGHT(c) if ((c) & 01) (c) = ((c) >>1) + 0x8000; else (c) >>= 1;
+
+struct option longopts[] =
+{
+ {"sysv", 0, NULL, 's'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int errors = 0;
+ int optc;
+ int files_given;
+ int (*sum_func) () = bsd_sum_file;
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+
+ while ((optc = getopt_long (argc, argv, "rs", longopts, (int *) 0)) != -1)
+ {
+ switch (optc)
+ {
+ case 'r': /* For SysV compatibility. */
+ sum_func = bsd_sum_file;
+ break;
+
+ case 's':
+ sum_func = sysv_sum_file;
+ break;
+
+ case '?':
+ fprintf (stderr, "\
+Usage: %s [-rs] [--sysv] [file...]\n", argv[0]);
+ exit (1);
+ }
+ }
+
+ files_given = argc - optind;
+ if (files_given == 0)
+ {
+ if ((*sum_func) ("-", files_given) < 0)
+ errors = 1;
+ }
+ else
+ for (; optind < argc; optind++)
+ if ((*sum_func) (argv[optind], files_given) < 0)
+ errors = 1;
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ exit (errors);
+}
+
+/* Calculate and print the rotated checksum and the size in 1K blocks
+ of file FILE, or of the standard input if FILE is "-".
+ If PRINT_NAME is >1, print FILE next to the checksum and size.
+ The checksum varies depending on sizeof(int).
+ Return 0 if successful, -1 if an error occurs. */
+
+int
+bsd_sum_file (file, print_name)
+ char *file;
+ int print_name;
+{
+ register FILE *fp;
+ register unsigned long checksum = 0; /* The checksum mod 2^16. */
+ register long total_bytes = 0; /* The number of bytes. */
+ register int ch; /* Each character read. */
+
+ if (!strcmp (file, "-"))
+ {
+ fp = stdin;
+ have_read_stdin = 1;
+ }
+ else
+ {
+ fp = fopen (file, "r");
+ if (fp == NULL)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+ }
+
+ /* This algorithm seems to depend on sign extension in `ch' in order to
+ give the right results. Ick. */
+ while ((ch = getc (fp)) != EOF)
+ {
+ total_bytes++;
+ ROTATE_RIGHT (checksum);
+ checksum += ch;
+ checksum &= 0xffff; /* Keep it within bounds. */
+ }
+
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", file);
+ if (strcmp (file, "-"))
+ fclose (fp);
+ return -1;
+ }
+
+ if (strcmp (file, "-") && fclose (fp) == EOF)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+
+ printf ("%05lu %5ld", checksum, (total_bytes + 1024 - 1) / 1024);
+ if (print_name > 1)
+ printf (" %s", file);
+ putchar ('\n');
+
+ return 0;
+}
+
+/* Calculate and print the checksum and the size in 512-byte blocks
+ of file FILE, or of the standard input if FILE is "-".
+ If PRINT_NAME is >0, print FILE next to the checksum and size.
+ Return 0 if successful, -1 if an error occurs. */
+
+int
+sysv_sum_file (file, print_name)
+ char *file;
+ int print_name;
+{
+ int fd;
+ unsigned char buf[8192];
+ register int bytes_read;
+ register unsigned long checksum = 0;
+ long total_bytes = 0;
+
+ if (!strcmp (file, "-"))
+ {
+ fd = 0;
+ have_read_stdin = 1;
+ }
+ else
+ {
+ fd = open (file, O_RDONLY);
+ if (fd == -1)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+ }
+
+ while ((bytes_read = read (fd, buf, sizeof buf)) > 0)
+ {
+ register int i;
+
+ for (i = 0; i < bytes_read; i++)
+ checksum += buf[i];
+ total_bytes += bytes_read;
+ }
+
+ if (bytes_read < 0)
+ {
+ error (0, errno, "%s", file);
+ if (strcmp (file, "-"))
+ close (fd);
+ return -1;
+ }
+
+ if (strcmp (file, "-") && close (fd) == -1)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+
+ printf ("%lu %ld", checksum % 0xffff, (total_bytes + 512 - 1) / 512);
+ if (print_name)
+ printf (" %s", file);
+ putchar ('\n');
+
+ return 0;
+}
diff --git a/src/tac.c b/src/tac.c
new file mode 100644
index 000000000..78e18467b
--- /dev/null
+++ b/src/tac.c
@@ -0,0 +1,628 @@
+/* tac - concatenate and print files in reverse
+ Copyright (C) 1988, 1989, 1990, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Jay Lepreau (lepreau@cs.utah.edu).
+ GNU enhancements by David MacKenzie (djm@ai.mit.edu). */
+
+/* Copy each FILE, or the standard input if none are given or when a
+ FILE name of "-" is encountered, to the standard output with the
+ order of the records reversed. The records are separated by
+ instances of a string, or a newline if none is given. By default, the
+ separator string is attached to the end of the record that it
+ follows in the file.
+
+ Options:
+ -b, --before The separator is attached to the beginning
+ of the record that it precedes in the file.
+ -r, --regex The separator is a regular expression.
+ -s, --separator=separator Use SEPARATOR as the record separator.
+
+ To reverse a file byte by byte, use (in bash, ksh, or sh):
+tac -r -s '.\|
+' file */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <regex.h>
+#include "system.h"
+
+#ifndef STDC_HEADERS
+char *malloc ();
+char *realloc ();
+#endif
+
+/* The number of bytes per atomic read. */
+#define INITIAL_READSIZE 8192
+
+/* The number of bytes per atomic write. */
+#define WRITESIZE 8192
+
+char *mktemp ();
+
+RETSIGTYPE cleanup ();
+int tac ();
+int tac_file ();
+int tac_stdin ();
+char *xmalloc ();
+char *xrealloc ();
+void output ();
+void error ();
+void save_stdin ();
+void xwrite ();
+
+/* The name this program was run with. */
+char *program_name;
+
+/* The string that separates the records of the file. */
+char *separator;
+
+/* If nonzero, print `separator' along with the record preceding it
+ in the file; otherwise with the record following it. */
+int separator_ends_record;
+
+/* 0 if `separator' is to be matched as a regular expression;
+ otherwise, the length of `separator', used as a sentinel to
+ stop the search. */
+int sentinel_length;
+
+/* The length of a match with `separator'. If `sentinel_length' is 0,
+ `match_length' is computed every time a match succeeds;
+ otherwise, it is simply the length of `separator'. */
+int match_length;
+
+/* The input buffer. */
+char *buffer;
+
+/* The number of bytes to read at once into `buffer'. */
+unsigned read_size;
+
+/* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
+ The extra 2 bytes allow `past_end' to have a value beyond the
+ end of `buffer' and `match_start' to run off the front of `buffer'. */
+unsigned buffer_size;
+
+/* The compiled regular expression representing `separator'. */
+static struct re_pattern_buffer compiled_separator;
+
+struct option longopts[] =
+{
+ {"before", 0, &separator_ends_record, 0},
+ {"regex", 0, &sentinel_length, 0},
+ {"separator", 1, NULL, 's'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ char *error_message; /* Return value from re_compile_pattern. */
+ int optc, errors;
+ int have_read_stdin = 0;
+
+ program_name = argv[0];
+ errors = 0;
+ separator = "\n";
+ sentinel_length = 1;
+ separator_ends_record = 1;
+
+ while ((optc = getopt_long (argc, argv, "brs:", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (optc)
+ {
+ case 0:
+ break;
+ case 'b':
+ separator_ends_record = 0;
+ break;
+ case 'r':
+ sentinel_length = 0;
+ break;
+ case 's':
+ separator = optarg;
+ if (*separator == 0)
+ error (1, 0, "separator cannot be empty");
+ break;
+ default:
+ fprintf (stderr, "\
+Usage: %s [-br] [-s separator] [--before] [--regex] [--separator=separator]\n\
+ [file...]\n",
+ program_name);
+ exit (1);
+ }
+ }
+
+ if (sentinel_length == 0)
+ {
+ compiled_separator.allocated = 100;
+ compiled_separator.buffer = (unsigned char *)
+ xmalloc (compiled_separator.allocated);
+ compiled_separator.fastmap = xmalloc (256);
+ compiled_separator.translate = 0;
+ error_message = re_compile_pattern (separator, strlen (separator),
+ &compiled_separator);
+ if (error_message)
+ error (1, 0, "%s", error_message);
+ }
+ else
+ match_length = sentinel_length = strlen (separator);
+
+ read_size = INITIAL_READSIZE;
+ /* A precaution that will probably never be needed. */
+ while (sentinel_length * 2 >= read_size)
+ read_size *= 2;
+ buffer_size = read_size * 2 + sentinel_length + 2;
+ buffer = xmalloc (buffer_size);
+ if (sentinel_length)
+ {
+ strcpy (buffer, separator);
+ buffer += sentinel_length;
+ }
+ else
+ ++buffer;
+
+ if (optind == argc)
+ {
+ have_read_stdin = 1;
+ errors = tac_stdin ();
+ }
+ else
+ for (; optind < argc; ++optind)
+ {
+ if (strcmp (argv[optind], "-") == 0)
+ {
+ have_read_stdin = 1;
+ errors |= tac_stdin ();
+ }
+ else
+ errors |= tac_file (argv[optind]);
+ }
+
+ /* Flush the output buffer. */
+ output ((char *) NULL, (char *) NULL);
+
+ if (have_read_stdin && close (0) < 0)
+ error (1, errno, "-");
+ if (close (1) < 0)
+ error (1, errno, "write error");
+ exit (errors);
+}
+
+/* The name of a temporary file containing a copy of pipe input. */
+char *tempfile;
+
+/* Print the standard input in reverse, saving it to temporary
+ file `tempfile' first if it is a pipe.
+ Return 0 if ok, 1 if an error occurs. */
+
+int
+tac_stdin ()
+{
+ /* Previous values of signal handlers. */
+ RETSIGTYPE (*sigint) (), (*sighup) (), (*sigpipe) (), (*sigterm) ();
+ int errors;
+ struct stat stats;
+#ifdef _POSIX_VERSION
+ struct sigaction oldact, newact;
+#endif /* _POSIX_VERSION */
+
+ /* No tempfile is needed for "tac < file".
+ Use fstat instead of checking for errno == ESPIPE because
+ lseek doesn't work on some special files but doesn't return an
+ error, either. */
+ if (fstat (0, &stats))
+ {
+ error (0, errno, "standard input");
+ return 1;
+ }
+ if (S_ISREG (stats.st_mode))
+ return tac (0, "standard input");
+
+#ifdef _POSIX_VERSION
+ newact.sa_handler = cleanup;
+ sigemptyset (&newact.sa_mask);
+ newact.sa_flags = 0;
+
+ sigaction (SIGINT, NULL, &oldact);
+ sigint = oldact.sa_handler;
+ if (sigint != SIG_IGN)
+ sigaction (SIGINT, &newact, NULL);
+
+ sigaction (SIGHUP, NULL, &oldact);
+ sighup = oldact.sa_handler;
+ if (sighup != SIG_IGN)
+ sigaction (SIGHUP, &newact, NULL);
+
+ sigaction (SIGPIPE, NULL, &oldact);
+ sigpipe = oldact.sa_handler;
+ if (sigpipe != SIG_IGN)
+ sigaction (SIGPIPE, &newact, NULL);
+
+ sigaction (SIGTERM, NULL, &oldact);
+ sigterm = oldact.sa_handler;
+ if (sigterm != SIG_IGN)
+ sigaction (SIGTERM, &newact, NULL);
+#else /* !_POSIX_VERSION */
+ sigint = signal (SIGINT, SIG_IGN);
+ if (sigint != SIG_IGN)
+ signal (SIGINT, cleanup);
+
+ sighup = signal (SIGHUP, SIG_IGN);
+ if (sighup != SIG_IGN)
+ signal (SIGHUP, cleanup);
+
+ sigpipe = signal (SIGPIPE, SIG_IGN);
+ if (sigpipe != SIG_IGN)
+ signal (SIGPIPE, cleanup);
+
+ sigterm = signal (SIGTERM, SIG_IGN);
+ if (sigterm != SIG_IGN)
+ signal (SIGTERM, cleanup);
+#endif /* _POSIX_VERSION */
+
+ save_stdin ();
+
+ errors = tac_file (tempfile);
+
+ unlink (tempfile);
+
+#ifdef _POSIX_VERSION
+ newact.sa_handler = sigint;
+ sigaction (SIGINT, &newact, NULL);
+ newact.sa_handler = sighup;
+ sigaction (SIGHUP, &newact, NULL);
+ newact.sa_handler = sigterm;
+ sigaction (SIGTERM, &newact, NULL);
+ newact.sa_handler = sigpipe;
+ sigaction (SIGPIPE, &newact, NULL);
+#else /* !_POSIX_VERSION */
+ signal (SIGINT, sigint);
+ signal (SIGHUP, sighup);
+ signal (SIGTERM, sigterm);
+ signal (SIGPIPE, sigpipe);
+#endif /* _POSIX_VERSION */
+
+ return errors;
+}
+
+/* Make a copy of the standard input in `tempfile'. */
+
+void
+save_stdin ()
+{
+ static char *template = NULL;
+ static char *tempdir;
+ int fd;
+ int bytes_read;
+
+ if (template == NULL)
+ {
+ tempdir = getenv ("TMPDIR");
+ if (tempdir == NULL)
+ tempdir = "/tmp";
+ template = xmalloc (strlen (tempdir) + 11);
+ }
+ sprintf (template, "%s/tacXXXXXX", tempdir);
+ tempfile = mktemp (template);
+
+ fd = creat (tempfile, 0600);
+ if (fd == -1)
+ {
+ error (0, errno, "%s", tempfile);
+ cleanup ();
+ }
+ while ((bytes_read = read (0, buffer, read_size)) > 0)
+ if (write (fd, buffer, bytes_read) != bytes_read)
+ {
+ error (0, errno, "%s", tempfile);
+ cleanup ();
+ }
+ if (close (fd) < 0)
+ {
+ error (0, errno, "%s", tempfile);
+ cleanup ();
+ }
+ if (bytes_read == -1)
+ {
+ error (0, errno, "read error");
+ cleanup ();
+ }
+}
+
+/* Print FILE in reverse.
+ Return 0 if ok, 1 if an error occurs. */
+
+int
+tac_file (file)
+ char *file;
+{
+ int fd, errors;
+
+ fd = open (file, 0);
+ if (fd == -1)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ errors = tac (fd, file);
+ if (close (fd) < 0)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ return errors;
+}
+
+/* Print in reverse the file open on descriptor FD for reading FILE.
+ Return 0 if ok, 1 if an error occurs. */
+
+int
+tac (fd, file)
+ int fd;
+ char *file;
+{
+ /* Pointer to the location in `buffer' where the search for
+ the next separator will begin. */
+ char *match_start;
+ /* Pointer to one past the rightmost character in `buffer' that
+ has not been printed yet. */
+ char *past_end;
+ unsigned saved_record_size; /* Length of the record growing in `buffer'. */
+ off_t file_pos; /* Offset in the file of the next read. */
+ /* Nonzero if `output' has not been called yet for any file.
+ Only used when the separator is attached to the preceding record. */
+ int first_time = 1;
+ char first_char = *separator; /* Speed optimization, non-regexp. */
+ char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
+ int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
+ struct re_registers regs;
+
+ /* Find the size of the input file. */
+ file_pos = lseek (fd, (off_t) 0, SEEK_END);
+ if (file_pos < 1)
+ return 0; /* It's an empty file. */
+
+ /* Arrange for the first read to lop off enough to leave the rest of the
+ file a multiple of `read_size'. Since `read_size' can change, this may
+ not always hold during the program run, but since it usually will, leave
+ it here for i/o efficiency (page/sector boundaries and all that).
+ Note: the efficiency gain has not been verified. */
+ saved_record_size = file_pos % read_size;
+ if (saved_record_size == 0)
+ saved_record_size = read_size;
+ file_pos -= saved_record_size;
+ /* `file_pos' now points to the start of the last (probably partial) block
+ in the input file. */
+
+ lseek (fd, file_pos, SEEK_SET);
+ if (read (fd, buffer, saved_record_size) != saved_record_size)
+ {
+ error (0, 1, "%s", file);
+ return 1;
+ }
+
+ match_start = past_end = buffer + saved_record_size;
+ /* For non-regexp search, move past impossible positions for a match. */
+ if (sentinel_length)
+ match_start -= match_length1;
+
+ for (;;)
+ {
+ /* Search backward from `match_start' - 1 to `buffer' for a match
+ with `separator'; for speed, use strncmp if `separator' contains no
+ metacharacters.
+ If the match succeeds, set `match_start' to point to the start of
+ the match and `match_length' to the length of the match.
+ Otherwise, make `match_start' < `buffer'. */
+ if (sentinel_length == 0)
+ {
+ int i = match_start - buffer;
+ int ret;
+
+ ret = re_search (&compiled_separator, buffer, i, i - 1, -i, &regs);
+ if (ret == -1)
+ match_start = buffer - 1;
+ else if (ret == -2)
+ {
+ error (0, 0, "error in regular expression search");
+ cleanup ();
+ }
+ else
+ {
+ match_start = buffer + regs.start[0];
+ match_length = regs.end[0] - regs.start[0];
+ }
+ }
+ else
+ {
+ /* `match_length' is constant for non-regexp boundaries. */
+ while (*--match_start != first_char
+ || (match_length1 && strncmp (match_start + 1, separator1,
+ match_length1)))
+ /* Do nothing. */ ;
+ }
+
+ /* Check whether we backed off the front of `buffer' without finding
+ a match for `separator'. */
+ if (match_start < buffer)
+ {
+ if (file_pos == 0)
+ {
+ /* Hit the beginning of the file; print the remaining record. */
+ output (buffer, past_end);
+ return 0;
+ }
+
+ saved_record_size = past_end - buffer;
+ if (saved_record_size > read_size)
+ {
+ /* `buffer_size' is about twice `read_size', so since
+ we want to read in another `read_size' bytes before
+ the data already in `buffer', we need to increase
+ `buffer_size'. */
+ char *newbuffer;
+ int offset = sentinel_length ? sentinel_length : 1;
+
+ read_size *= 2;
+ buffer_size = read_size * 2 + sentinel_length + 2;
+ newbuffer = xrealloc (buffer - offset, buffer_size) + offset;
+ /* Adjust the pointers for the new buffer location. */
+ match_start += newbuffer - buffer;
+ past_end += newbuffer - buffer;
+ buffer = newbuffer;
+ }
+
+ /* Back up to the start of the next bufferfull of the file. */
+ if (file_pos >= read_size)
+ file_pos -= read_size;
+ else
+ {
+ read_size = file_pos;
+ file_pos = 0;
+ }
+ lseek (fd, file_pos, SEEK_SET);
+
+ /* Shift the pending record data right to make room for the new. */
+ bcopy (buffer, buffer + read_size, saved_record_size);
+ past_end = buffer + read_size + saved_record_size;
+ /* For non-regexp searches, avoid unneccessary scanning. */
+ if (sentinel_length)
+ match_start = buffer + read_size;
+ else
+ match_start = past_end;
+
+ if (read (fd, buffer, read_size) != read_size)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ }
+ else
+ {
+ /* Found a match of `separator'. */
+ if (separator_ends_record)
+ {
+ char *match_end = match_start + match_length;
+
+ /* If this match of `separator' isn't at the end of the
+ file, print the record. */
+ if (first_time == 0 || match_end != past_end)
+ output (match_end, past_end);
+ past_end = match_end;
+ first_time = 0;
+ }
+ else
+ {
+ output (match_start, past_end);
+ past_end = match_start;
+ }
+ match_start -= match_length - 1;
+ }
+ }
+}
+
+/* Print the characters from START to PAST_END - 1.
+ If START is NULL, just flush the buffer. */
+
+void
+output (start, past_end)
+ char *start;
+ char *past_end;
+{
+ static char buffer[WRITESIZE];
+ static int bytes_in_buffer = 0;
+ int bytes_to_add = past_end - start;
+ int bytes_available = WRITESIZE - bytes_in_buffer;
+
+ if (start == 0)
+ {
+ xwrite (1, buffer, bytes_in_buffer);
+ bytes_in_buffer = 0;
+ return;
+ }
+
+ /* Write out as many full buffers as possible. */
+ while (bytes_to_add >= bytes_available)
+ {
+ bcopy (start, buffer + bytes_in_buffer, bytes_available);
+ bytes_to_add -= bytes_available;
+ start += bytes_available;
+ xwrite (1, buffer, WRITESIZE);
+ bytes_in_buffer = 0;
+ bytes_available = WRITESIZE;
+ }
+
+ bcopy (start, buffer + bytes_in_buffer, bytes_to_add);
+ bytes_in_buffer += bytes_to_add;
+}
+
+RETSIGTYPE
+cleanup ()
+{
+ unlink (tempfile);
+ exit (1);
+}
+
+void
+xwrite (desc, buffer, size)
+ int desc;
+ char *buffer;
+ int size;
+{
+ if (write (desc, buffer, size) != size)
+ {
+ error (0, errno, "write error");
+ cleanup ();
+ }
+}
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+char *
+xmalloc (n)
+ unsigned n;
+{
+ char *p;
+
+ p = malloc (n);
+ if (p == 0)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ }
+ return p;
+}
+
+/* Change the size of memory area P to N bytes, with error checking. */
+
+char *
+xrealloc (p, n)
+ char *p;
+ unsigned n;
+{
+ p = realloc (p, n);
+ if (p == 0)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ }
+ return p;
+}
diff --git a/src/tail.c b/src/tail.c
new file mode 100644
index 000000000..050c1936f
--- /dev/null
+++ b/src/tail.c
@@ -0,0 +1,858 @@
+/* tail -- output last part of file(s)
+ Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Can display any amount of data, unlike the Unix version, which uses
+ a fixed size buffer and therefore can only deliver a limited number
+ of lines.
+
+ Options:
+ -b Tail by N 512-byte blocks.
+ -c, --bytes=N[bkm] Tail by N bytes
+ [or 512-byte blocks, kilobytes, or megabytes].
+ -f, --follow Loop forever trying to read more characters at the
+ end of the file, on the assumption that the file
+ is growing. Ignored if reading from a pipe.
+ Cannot be used if more than one file is given.
+ -k Tail by N kilobytes.
+ -N, -l, -n, --lines=N Tail by N lines.
+ -m Tail by N megabytes.
+ -q, --quiet, --silent Never print filename headers.
+ -v, --verbose Always print filename headers.
+
+ If a number (N) starts with a `+', begin printing with the Nth item
+ from the start of each file, instead of from the end.
+
+ Reads from standard input if no files are given or when a filename of
+ ``-'' is encountered.
+ By default, filename headers are printed only more than one file
+ is given.
+ By default, prints the last 10 lines (tail -n 10).
+
+ Original version by Paul Rubin <phr@ocf.berkeley.edu>.
+ Extensions by David MacKenzie <djm@ai.mit.edu>. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
+#else
+#define ISDIGIT(c) (isdigit ((c)))
+#endif
+
+/* Number of items to tail. */
+#define DEFAULT_NUMBER 10
+
+/* Size of atomic reads. */
+#define BUFSIZE (512 * 8)
+
+/* Number of bytes per item we are printing.
+ If 0, tail in lines. */
+int unit_size;
+
+/* If nonzero, read from end of file until killed. */
+int forever;
+
+/* If nonzero, count from start of file instead of end. */
+int from_start;
+
+/* If nonzero, print filename headers. */
+int print_headers;
+
+/* When to print the filename banners. */
+enum header_mode
+{
+ multiple_files, always, never
+};
+
+char *xmalloc ();
+int file_lines ();
+int pipe_bytes ();
+int pipe_lines ();
+int start_bytes ();
+int start_lines ();
+int tail ();
+int tail_bytes ();
+int tail_file ();
+int tail_lines ();
+long atou();
+void dump_remainder ();
+void error ();
+void parse_unit ();
+void usage ();
+void write_header ();
+void xwrite ();
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Nonzero if we have ever read standard input. */
+int have_read_stdin;
+
+struct option long_options[] =
+{
+ {"bytes", 1, NULL, 'c'},
+ {"follow", 0, NULL, 'f'},
+ {"lines", 1, NULL, 'n'},
+ {"quiet", 0, NULL, 'q'},
+ {"silent", 0, NULL, 'q'},
+ {"verbose", 0, NULL, 'v'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ enum header_mode header_mode = multiple_files;
+ int exit_status = 0;
+ /* If from_start, the number of items to skip before printing; otherwise,
+ the number of items at the end of the file to print. Initially, -1
+ means the value has not been set. */
+ long number = -1;
+ int c; /* Option character. */
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+ unit_size = 0;
+ forever = from_start = print_headers = 0;
+
+ if (argc > 1
+ && ((argv[1][0] == '-' && ISDIGIT (argv[1][1]))
+ || (argv[1][0] == '+' && (ISDIGIT (argv[1][1]) || argv[1][1] == 0))))
+ {
+ /* Old option syntax: a dash or plus, one or more digits (zero digits
+ are acceptable with a plus), and one or more option letters. */
+ if (argv[1][0] == '+')
+ from_start = 1;
+ if (argv[1][1] != 0)
+ {
+ for (number = 0, ++argv[1]; ISDIGIT (*argv[1]); ++argv[1])
+ number = number * 10 + *argv[1] - '0';
+ /* Parse any appended option letters. */
+ while (*argv[1])
+ {
+ switch (*argv[1])
+ {
+ case 'b':
+ unit_size = 512;
+ break;
+
+ case 'c':
+ unit_size = 1;
+ break;
+
+ case 'f':
+ forever = 1;
+ break;
+
+ case 'k':
+ unit_size = 1024;
+ break;
+
+ case 'l':
+ unit_size = 0;
+ break;
+
+ case 'm':
+ unit_size = 1048576;
+ break;
+
+ case 'q':
+ header_mode = never;
+ break;
+
+ case 'v':
+ header_mode = always;
+ break;
+
+ default:
+ error (0, 0, "unrecognized option `-%c'", *argv[1]);
+ usage ();
+ }
+ ++argv[1];
+ }
+ }
+ /* Make the options we just parsed invisible to getopt. */
+ argv[1] = argv[0];
+ argv++;
+ argc--;
+ }
+
+ while ((c = getopt_long (argc, argv, "c:n:fqv", long_options, (int *) 0))
+ != EOF)
+ {
+ switch (c)
+ {
+ case 'c':
+ unit_size = 1;
+ parse_unit (optarg);
+ goto getnum;
+ case 'n':
+ unit_size = 0;
+ getnum:
+ if (*optarg == '+')
+ {
+ from_start = 1;
+ ++optarg;
+ }
+ else if (*optarg == '-')
+ ++optarg;
+ number = atou (optarg);
+ if (number == -1)
+ error (1, 0, "invalid number `%s'", optarg);
+ break;
+
+ case 'f':
+ forever = 1;
+ break;
+
+ case 'q':
+ header_mode = never;
+ break;
+
+ case 'v':
+ header_mode = always;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (number == -1)
+ number = DEFAULT_NUMBER;
+
+ /* To start printing with item `number' from the start of the file, skip
+ `number' - 1 items. `tail +0' is actually meaningless, but for Unix
+ compatibility it's treated the same as `tail +1'. */
+ if (from_start)
+ {
+ if (number)
+ --number;
+ }
+
+ if (unit_size > 1)
+ number *= unit_size;
+
+ if (optind < argc - 1 && forever)
+ error (1, 0, "cannot follow the ends of multiple files");
+
+ if (header_mode == always
+ || (header_mode == multiple_files && optind < argc - 1))
+ print_headers = 1;
+
+ if (optind == argc)
+ exit_status |= tail_file ("-", number);
+
+ for (; optind < argc; ++optind)
+ exit_status |= tail_file (argv[optind], number);
+
+ if (have_read_stdin && close (0) < 0)
+ error (1, errno, "-");
+ if (close (1) < 0)
+ error (1, errno, "write error");
+ exit (exit_status);
+}
+
+/* Display the last NUMBER units of file FILENAME.
+ "-" for FILENAME means the standard input.
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+tail_file (filename, number)
+ char *filename;
+ long number;
+{
+ int fd;
+
+ if (!strcmp (filename, "-"))
+ {
+ have_read_stdin = 1;
+ filename = "standard input";
+ if (print_headers)
+ write_header (filename);
+ return tail (filename, 0, number);
+ }
+ else
+ {
+ fd = open (filename, O_RDONLY);
+ if (fd >= 0)
+ {
+ int errors;
+
+ if (print_headers)
+ write_header (filename);
+ errors = tail (filename, fd, number);
+ if (close (fd) == 0)
+ return errors;
+ }
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+}
+
+void
+write_header (filename)
+ char *filename;
+{
+ static int first_file = 1;
+
+ if (first_file)
+ {
+ xwrite (1, "==> ", 4);
+ first_file = 0;
+ }
+ else
+ xwrite (1, "\n==> ", 5);
+ xwrite (1, filename, strlen (filename));
+ xwrite (1, " <==\n", 5);
+}
+
+/* Display the last NUMBER units of file FILENAME, open for reading
+ in FD.
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+tail (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ if (unit_size)
+ return tail_bytes (filename, fd, number);
+ else
+ return tail_lines (filename, fd, number);
+}
+
+/* Display the last part of file FILENAME, open for reading in FD,
+ using NUMBER characters.
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+tail_bytes (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ struct stat stats;
+
+ /* Use fstat instead of checking for errno == ESPIPE because
+ lseek doesn't work on some special files but doesn't return an
+ error, either. */
+ if (fstat (fd, &stats))
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+
+ if (from_start)
+ {
+ if (S_ISREG (stats.st_mode))
+ lseek (fd, number, SEEK_SET);
+ else if (start_bytes (filename, fd, number))
+ return 1;
+ dump_remainder (filename, fd);
+ }
+ else
+ {
+ if (S_ISREG (stats.st_mode))
+ {
+ if (lseek (fd, 0L, SEEK_END) <= number)
+ /* The file is shorter than we want, or just the right size, so
+ print the whole file. */
+ lseek (fd, 0L, SEEK_SET);
+ else
+ /* The file is longer than we want, so go back. */
+ lseek (fd, -number, SEEK_END);
+ dump_remainder (filename, fd);
+ }
+ else
+ return pipe_bytes (filename, fd, number);
+ }
+ return 0;
+}
+
+/* Display the last part of file FILENAME, open for reading on FD,
+ using NUMBER lines.
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+tail_lines (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ struct stat stats;
+ long length;
+
+ if (fstat (fd, &stats))
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+
+ if (from_start)
+ {
+ if (start_lines (filename, fd, number))
+ return 1;
+ dump_remainder (filename, fd);
+ }
+ else
+ {
+ if (S_ISREG (stats.st_mode))
+ {
+ length = lseek (fd, 0L, SEEK_END);
+ if (length != 0 && file_lines (filename, fd, number, length))
+ return 1;
+ dump_remainder (filename, fd);
+ }
+ else
+ return pipe_lines (filename, fd, number);
+ }
+ return 0;
+}
+
+/* Print the last NUMBER lines from the end of file FD.
+ Go backward through the file, reading `BUFSIZE' bytes at a time (except
+ probably the first), until we hit the start of the file or have
+ read NUMBER newlines.
+ POS starts out as the length of the file (the offset of the last
+ byte of the file + 1).
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+file_lines (filename, fd, number, pos)
+ char *filename;
+ int fd;
+ long number;
+ long pos;
+{
+ char buffer[BUFSIZE];
+ int bytes_read;
+ int i; /* Index into `buffer' for scanning. */
+
+ if (number == 0)
+ return 0;
+
+ /* Set `bytes_read' to the size of the last, probably partial, buffer;
+ 0 < `bytes_read' <= `BUFSIZE'. */
+ bytes_read = pos % BUFSIZE;
+ if (bytes_read == 0)
+ bytes_read = BUFSIZE;
+ /* Make `pos' a multiple of `BUFSIZE' (0 if the file is short), so that all
+ reads will be on block boundaries, which might increase efficiency. */
+ pos -= bytes_read;
+ lseek (fd, pos, SEEK_SET);
+ bytes_read = read (fd, buffer, bytes_read);
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+
+ /* Count the incomplete line on files that don't end with a newline. */
+ if (bytes_read && buffer[bytes_read - 1] != '\n')
+ --number;
+
+ do
+ {
+ /* Scan backward, counting the newlines in this bufferfull. */
+ for (i = bytes_read - 1; i >= 0; i--)
+ {
+ /* Have we counted the requested number of newlines yet? */
+ if (buffer[i] == '\n' && number-- == 0)
+ {
+ /* If this newline wasn't the last character in the buffer,
+ print the text after it. */
+ if (i != bytes_read - 1)
+ xwrite (1, &buffer[i + 1], bytes_read - (i + 1));
+ return 0;
+ }
+ }
+ /* Not enough newlines in that bufferfull. */
+ if (pos == 0)
+ {
+ /* Not enough lines in the file; print the entire file. */
+ lseek (fd, 0L, SEEK_SET);
+ return 0;
+ }
+ pos -= BUFSIZE;
+ lseek (fd, pos, SEEK_SET);
+ }
+ while ((bytes_read = read (fd, buffer, BUFSIZE)) > 0);
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+ return 0;
+}
+
+/* Print the last NUMBER lines from the end of the standard input,
+ open for reading as pipe FD.
+ Buffer the text as a linked list of LBUFFERs, adding them as needed.
+ Return 0 if successful, 1 if an error occured. */
+
+int
+pipe_lines (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ struct linebuffer
+ {
+ int nbytes, nlines;
+ char buffer[BUFSIZE];
+ struct linebuffer *next;
+ };
+ typedef struct linebuffer LBUFFER;
+ LBUFFER *first, *last, *tmp;
+ int i; /* Index into buffers. */
+ int total_lines = 0; /* Total number of newlines in all buffers. */
+ int errors = 0;
+
+ first = last = (LBUFFER *) xmalloc (sizeof (LBUFFER));
+ first->nbytes = first->nlines = 0;
+ first->next = NULL;
+ tmp = (LBUFFER *) xmalloc (sizeof (LBUFFER));
+
+ /* Input is always read into a fresh buffer. */
+ while ((tmp->nbytes = read (fd, tmp->buffer, BUFSIZE)) > 0)
+ {
+ tmp->nlines = 0;
+ tmp->next = NULL;
+
+ /* Count the number of newlines just read. */
+ for (i = 0; i < tmp->nbytes; i++)
+ if (tmp->buffer[i] == '\n')
+ ++tmp->nlines;
+ total_lines += tmp->nlines;
+
+ /* If there is enough room in the last buffer read, just append the new
+ one to it. This is because when reading from a pipe, `nbytes' can
+ often be very small. */
+ if (tmp->nbytes + last->nbytes < BUFSIZE)
+ {
+ bcopy (tmp->buffer, &last->buffer[last->nbytes], tmp->nbytes);
+ last->nbytes += tmp->nbytes;
+ last->nlines += tmp->nlines;
+ }
+ else
+ {
+ /* If there's not enough room, link the new buffer onto the end of
+ the list, then either free up the oldest buffer for the next
+ read if that would leave enough lines, or else malloc a new one.
+ Some compaction mechanism is possible but probably not
+ worthwhile. */
+ last = last->next = tmp;
+ if (total_lines - first->nlines > number)
+ {
+ tmp = first;
+ total_lines -= first->nlines;
+ first = first->next;
+ }
+ else
+ tmp = (LBUFFER *) xmalloc (sizeof (LBUFFER));
+ }
+ }
+ if (tmp->nbytes == -1)
+ {
+ error (0, errno, "%s", filename);
+ errors = 1;
+ free ((char *) tmp);
+ goto free_lbuffers;
+ }
+
+ free ((char *) tmp);
+
+ /* This prevents a core dump when the pipe contains no newlines. */
+ if (number == 0)
+ goto free_lbuffers;
+
+ /* Count the incomplete line on files that don't end with a newline. */
+ if (last->buffer[last->nbytes - 1] != '\n')
+ {
+ ++last->nlines;
+ ++total_lines;
+ }
+
+ /* Run through the list, printing lines. First, skip over unneeded
+ buffers. */
+ for (tmp = first; total_lines - tmp->nlines > number; tmp = tmp->next)
+ total_lines -= tmp->nlines;
+
+ /* Find the correct beginning, then print the rest of the file. */
+ if (total_lines > number)
+ {
+ char *cp;
+
+ /* Skip `total_lines' - `number' newlines. We made sure that
+ `total_lines' - `number' <= `tmp->nlines'. */
+ cp = tmp->buffer;
+ for (i = total_lines - number; i; --i)
+ while (*cp++ != '\n')
+ /* Do nothing. */ ;
+ i = cp - tmp->buffer;
+ }
+ else
+ i = 0;
+ xwrite (1, &tmp->buffer[i], tmp->nbytes - i);
+
+ for (tmp = tmp->next; tmp; tmp = tmp->next)
+ xwrite (1, tmp->buffer, tmp->nbytes);
+
+free_lbuffers:
+ while (first)
+ {
+ tmp = first->next;
+ free ((char *) first);
+ first = tmp;
+ }
+ return errors;
+}
+
+/* Print the last NUMBER characters from the end of pipe FD.
+ This is a stripped down version of pipe_lines.
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+pipe_bytes (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ struct charbuffer
+ {
+ int nbytes;
+ char buffer[BUFSIZE];
+ struct charbuffer *next;
+ };
+ typedef struct charbuffer CBUFFER;
+ CBUFFER *first, *last, *tmp;
+ int i; /* Index into buffers. */
+ int total_bytes = 0; /* Total characters in all buffers. */
+ int errors = 0;
+
+ first = last = (CBUFFER *) xmalloc (sizeof (CBUFFER));
+ first->nbytes = 0;
+ first->next = NULL;
+ tmp = (CBUFFER *) xmalloc (sizeof (CBUFFER));
+
+ /* Input is always read into a fresh buffer. */
+ while ((tmp->nbytes = read (fd, tmp->buffer, BUFSIZE)) > 0)
+ {
+ tmp->next = NULL;
+
+ total_bytes += tmp->nbytes;
+ /* If there is enough room in the last buffer read, just append the new
+ one to it. This is because when reading from a pipe, `nbytes' can
+ often be very small. */
+ if (tmp->nbytes + last->nbytes < BUFSIZE)
+ {
+ bcopy (tmp->buffer, &last->buffer[last->nbytes], tmp->nbytes);
+ last->nbytes += tmp->nbytes;
+ }
+ else
+ {
+ /* If there's not enough room, link the new buffer onto the end of
+ the list, then either free up the oldest buffer for the next
+ read if that would leave enough characters, or else malloc a new
+ one. Some compaction mechanism is possible but probably not
+ worthwhile. */
+ last = last->next = tmp;
+ if (total_bytes - first->nbytes > number)
+ {
+ tmp = first;
+ total_bytes -= first->nbytes;
+ first = first->next;
+ }
+ else
+ {
+ tmp = (CBUFFER *) xmalloc (sizeof (CBUFFER));
+ }
+ }
+ }
+ if (tmp->nbytes == -1)
+ {
+ error (0, errno, "%s", filename);
+ errors = 1;
+ free ((char *) tmp);
+ goto free_cbuffers;
+ }
+
+ free ((char *) tmp);
+
+ /* Run through the list, printing characters. First, skip over unneeded
+ buffers. */
+ for (tmp = first; total_bytes - tmp->nbytes > number; tmp = tmp->next)
+ total_bytes -= tmp->nbytes;
+
+ /* Find the correct beginning, then print the rest of the file.
+ We made sure that `total_bytes' - `number' <= `tmp->nbytes'. */
+ if (total_bytes > number)
+ i = total_bytes - number;
+ else
+ i = 0;
+ xwrite (1, &tmp->buffer[i], tmp->nbytes - i);
+
+ for (tmp = tmp->next; tmp; tmp = tmp->next)
+ xwrite (1, tmp->buffer, tmp->nbytes);
+
+free_cbuffers:
+ while (first)
+ {
+ tmp = first->next;
+ free ((char *) first);
+ first = tmp;
+ }
+ return errors;
+}
+
+/* Skip NUMBER characters from the start of pipe FD, and print
+ any extra characters that were read beyond that.
+ Return 1 on error, 0 if ok. */
+
+int
+start_bytes (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ char buffer[BUFSIZE];
+ int bytes_read = 0;
+
+ while (number > 0 && (bytes_read = read (fd, buffer, BUFSIZE)) > 0)
+ number -= bytes_read;
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+ else if (number < 0)
+ xwrite (1, &buffer[bytes_read + number], -number);
+ return 0;
+}
+
+/* Skip NUMBER lines at the start of file or pipe FD, and print
+ any extra characters that were read beyond that.
+ Return 1 on error, 0 if ok. */
+
+int
+start_lines (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ char buffer[BUFSIZE];
+ int bytes_read = 0;
+ int bytes_to_skip = 0;
+
+ while (number && (bytes_read = read (fd, buffer, BUFSIZE)) > 0)
+ {
+ bytes_to_skip = 0;
+ while (bytes_to_skip < bytes_read)
+ if (buffer[bytes_to_skip++] == '\n' && --number == 0)
+ break;
+ }
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+ else if (bytes_to_skip < bytes_read)
+ xwrite (1, &buffer[bytes_to_skip], bytes_read - bytes_to_skip);
+ return 0;
+}
+
+/* Display file FILENAME from the current position in FD
+ to the end. If `forever' is nonzero, keep reading from the
+ end of the file until killed. */
+
+void
+dump_remainder (filename, fd)
+ char *filename;
+ int fd;
+{
+ char buffer[BUFSIZE];
+ int bytes_read;
+
+output:
+ while ((bytes_read = read (fd, buffer, BUFSIZE)) > 0)
+ xwrite (1, buffer, bytes_read);
+ if (bytes_read == -1)
+ error (1, errno, "%s", filename);
+ if (forever)
+ {
+ sleep (1);
+ goto output;
+ }
+}
+
+void
+parse_unit (str)
+ char *str;
+{
+ int arglen = strlen (str);
+
+ if (arglen == 0)
+ return;
+
+ switch (str[arglen - 1])
+ {
+ case 'b':
+ unit_size = 512;
+ str[arglen - 1] = '\0';
+ break;
+ case 'k':
+ unit_size = 1024;
+ str[arglen - 1] = '\0';
+ break;
+ case 'm':
+ unit_size = 1048576;
+ str[arglen - 1] = '\0';
+ break;
+ }
+}
+
+/* Convert STR, a string of ASCII digits, into an unsigned integer.
+ Return -1 if STR does not represent a valid unsigned integer. */
+
+long
+atou (str)
+ char *str;
+{
+ unsigned long value;
+
+ for (value = 0; ISDIGIT (*str); ++str)
+ value = value * 10 + *str - '0';
+ return *str ? -1 : value;
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-c [+]N[bkm]] [-n [+]N] [-fqv] [--bytes=[+]N[bkm]] [--lines=[+]N]\n\
+ [--follow] [--quiet] [--silent] [--verbose] [file...]\n\
+ %s [{-,+}Nbcfklmqv] [file...]\n", program_name, program_name);
+ exit (1);
+}
diff --git a/src/tr.c b/src/tr.c
new file mode 100644
index 000000000..bd12f383f
--- /dev/null
+++ b/src/tr.c
@@ -0,0 +1,1813 @@
+/* tr -- a filter to translate characters
+ Copyright (C) 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Jim Meyering. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifndef isgraph
+#define isgraph(c) (isprint (c) && !isspace (c))
+#endif
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include "getopt.h"
+#include "system.h"
+
+#ifndef LONG_MAX
+#define LONG_MAX 0x7FFFFFFF
+#endif
+
+#ifndef UCHAR_MAX
+#define UCHAR_MAX 0xFF
+#endif
+
+#define N_CHARS (UCHAR_MAX + 1)
+
+/* A pointer to a function that returns an int. */
+typedef int (*PFI) ();
+
+/* Convert from character C to its index in the collating
+ sequence array. Just cast to an unsigned int to avoid
+ problems with sign-extension. */
+#define ORD(c) (unsigned int)(c)
+
+/* The inverse of ORD. */
+#define CHR(i) (unsigned char)(i)
+
+/* The value for Spec_list->state that indicates to
+ get_next that it should initialize the tail pointer.
+ Its value doesn't matter as long as it can't be
+ confused with a valid character code. */
+#define BEGIN_STATE (2 * N_CHARS)
+
+/* The value for Spec_list->state that indicates to
+ get_next that the element pointed to by Spec_list->tail is
+ being considered for the first time on this pass through the
+ list -- it indicates that get_next should make any necessary
+ initializations. */
+#define NEW_ELEMENT (BEGIN_STATE + 1)
+
+/* A value distinct from any character that may have been stored in a
+ buffer as the result of a block-read in the function squeeze_filter. */
+#define NOT_A_CHAR (unsigned int)(-1)
+
+/* The following (but not CC_NO_CLASS) are indices into the array of
+ valid character class strings. */
+enum Char_class
+{
+ CC_ALNUM = 0, CC_ALPHA = 1, CC_BLANK = 2, CC_CNTRL = 3,
+ CC_DIGIT = 4, CC_GRAPH = 5, CC_LOWER = 6, CC_PRINT = 7,
+ CC_PUNCT = 8, CC_SPACE = 9, CC_UPPER = 10, CC_XDIGIT = 11,
+ CC_NO_CLASS = 9999
+};
+
+/* Character class to which a character (returned by get_next) belonged;
+ but it is set only if the construct from which the character was obtained
+ was one of the character classes [:upper:] or [:lower:]. The value
+ is used only when translating and then, only to make sure that upper
+ and lower class constructs have the same relative positions in string1
+ and string2. */
+enum Upper_Lower_class
+{
+ UL_LOWER = 0,
+ UL_UPPER = 1,
+ UL_NONE = 2
+};
+
+/* A shortcut to ensure that when constructing the translation array,
+ one of the values returned by paired calls to get_next (from s1 and s2) is
+ from [:upper:] and the other is from [:lower:], or neither is
+ from upper or lower. In fact, no other character classes are allowed
+ when translating, but that condition is tested elsewhere. This array
+ is indexed by values of type enum Upper_Lower_class. */
+static int class_ok[3][3] =
+{
+ {0, 1, 0},
+ {1, 0, 0},
+ {0, 0, 1}
+};
+
+/* The type of a List_element. See build_spec_list for more details. */
+enum Range_element_type
+{
+ RE_NO_TYPE = 0,
+ RE_NORMAL_CHAR,
+ RE_RANGE,
+ RE_CHAR_CLASS,
+ RE_EQUIV_CLASS,
+ RE_REPEATED_CHAR
+};
+
+/* One construct in one of tr's argument strings.
+ For example, consider the POSIX version of the
+ classic tr command:
+ tr -cs 'a-zA-Z_' '[\n*]'
+ String1 has 3 constructs, two of which are ranges (a-z and A-Z),
+ and a single normal character, `_'. String2 has one construct. */
+struct List_element
+{
+ enum Range_element_type type;
+ struct List_element *next;
+ union
+ {
+ int normal_char;
+ struct /* unnamed */
+ {
+ unsigned int first_char;
+ unsigned int last_char;
+ } range;
+ enum Char_class char_class;
+ int equiv_code;
+ struct /* unnamed */
+ {
+ unsigned int the_repeated_char;
+ long repeat_count;
+ } repeated_char;
+ } u;
+};
+
+/* Each of tr's argument strings is parsed into a form that is easier
+ to work with: a linked list of constructs (struct List_element).
+ Each Spec_list structure also encapsulates various attributes of
+ the corresponding argument string. The attributes are used mainly
+ to verify that the strings are legal in the context of any options
+ specified (like -s, -d, or -c). The main exception is the member
+ `tail', which is first used to construct the list. After construction,
+ it is used by get_next to save its state when traversing the list.
+ The member `state' serves a similar function. */
+struct Spec_list
+{
+ /* Points to the head of the list of range elements.
+ The first struct is a dummy; its members are never used. */
+ struct List_element *head;
+
+ /* When appending, points to the last element. When traversing via
+ get_next(), points to the element to process next. Setting
+ Spec_list.state to the value BEGIN_STATE before calling get_next
+ signals get_next to initialize tail to point to head->next. */
+ struct List_element *tail;
+
+ /* Used to save state between calls to get_next(). */
+ unsigned int state;
+
+ /* Length, in the sense that length('a-z[:digit:]123abc')
+ is 42 ( = 26 + 10 + 6). */
+ int length;
+
+ /* The number of [c*] and [c*0] constructs that appear in this spec. */
+ int n_indefinite_repeats;
+
+ /* Non-zero if this spec contains at least one equivalence
+ class construct e.g. [=c=]. */
+ int has_equiv_class;
+
+ /* Non-zero if this spec contains at least one of [:upper:] or
+ [:lower:] class constructs. */
+ int has_upper_or_lower;
+
+ /* Non-zero if this spec contains at least one of the character class
+ constructs (all but upper and lower) that aren't allowed in s2. */
+ int has_restricted_char_class;
+};
+
+char *xmalloc ();
+char *stpcpy ();
+void error ();
+
+/* The name by which this program was run. */
+char *program_name;
+
+/* When non-zero, each sequence in the input of a repeated character
+ (call it c) is replaced (in the output) by a single occurrence of c
+ for every c in the squeeze set. */
+static int squeeze_repeats = 0;
+
+/* When non-zero, removes characters in the delete set from input. */
+static int delete = 0;
+
+/* Use the complement of set1 in place of set1. */
+static int complement = 0;
+
+/* When non-zero, this flag causes GNU tr to provide strict
+ compliance with POSIX draft 1003.2.11.2. The POSIX spec
+ says that when -d is used without -s, string2 (if present)
+ must be ignored. Silently ignoring arguments is a bad idea.
+ The default GNU behavior is to give a usage message and exit.
+ Additionally, when this flag is non-zero, tr prints warnings
+ on stderr if it is being used in a manner that is not portable.
+ Applicable warnings are given by default, but are suppressed
+ if the environment variable `POSIXLY_CORRECT' is set, since
+ being POSIX conformant means we can't issue such messages.
+ Warnings on the following topics are suppressed when this
+ variable is non-zero:
+ 1. Ambiguous octal escapes. */
+static int posix_pedantic;
+
+/* When tr is performing translation and string1 is longer than string2,
+ POSIX says that the result is undefined. That gives the implementor
+ of a POSIX conforming version of tr two reasonable choices for the
+ semantics of this case.
+
+ * The BSD tr pads string2 to the length of string1 by
+ repeating the last character in string2.
+
+ * System V tr ignores characters in string1 that have no
+ corresponding character in string2. That is, string1 is effectively
+ truncated to the length of string2.
+
+ When non-zero, this flag causes GNU tr to imitate the behavior
+ of System V tr when translating with string1 longer than string2.
+ The default is to emulate BSD tr. This flag is ignored in modes where
+ no translation is performed. Emulating the System V tr
+ in this exceptional case causes the relatively common BSD idiom:
+
+ tr -cs A-Za-z0-9 '\012'
+
+ to break (it would convert only zero bytes, rather than all
+ non-alphanumerics, to newlines).
+
+ WARNING: This switch does not provide general BSD or System V
+ compatibility. For example, it doesn't disable the interpretation
+ of the POSIX constructs [:alpha:], [=c=], and [c*10], so if by
+ some unfortunate coincidence you use such constructs in scripts
+ expecting to use some other version of tr, the scripts will break. */
+static int truncate_set1 = 0;
+
+/* An alias for (!delete && non_option_args == 2).
+ It is set in main and used there and in validate(). */
+static int translating;
+
+#ifndef BUFSIZ
+#define BUFSIZ 8192
+#endif
+
+#define IO_BUF_SIZE BUFSIZ
+static unsigned char io_buf[IO_BUF_SIZE];
+
+char *char_class_name[] =
+{
+ "alnum", "alpha", "blank", "cntrl", "digit", "graph",
+ "lower", "print", "punct", "space", "upper", "xdigit"
+};
+#define N_CHAR_CLASSES (sizeof(char_class_name) / sizeof(char_class_name[0]))
+
+typedef char SET_TYPE;
+
+/* Array of boolean values. A character `c' is a member of the
+ squeeze set if and only if in_squeeze_set[c] is true. The squeeze
+ set is defined by the last (possibly, the only) string argument
+ on the command line when the squeeze option is given. */
+static SET_TYPE in_squeeze_set[N_CHARS];
+
+/* Array of boolean values. A character `c' is a member of the
+ delete set if and only if in_delete_set[c] is true. The delete
+ set is defined by the first (or only) string argument on the
+ command line when the delete option is given. */
+static SET_TYPE in_delete_set[N_CHARS];
+
+/* Array of character values defining the translation (if any) that
+ tr is to perform. Translation is performed only when there are
+ two specification strings and the delete switch is not given. */
+static char xlate[N_CHARS];
+
+static struct option long_options[] =
+{
+ {"complement", 0, NULL, 'c'},
+ {"delete", 0, NULL, 'd'},
+ {"squeeze-repeats", 0, NULL, 's'},
+ {"truncate-set1", 0, NULL, 't'},
+ {NULL, 0, NULL, 0}
+};
+
+
+static void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-cdst] [--complement] [--delete] [--squeeze-repeats]\n\
+ [--truncate-set1] string1 [string2]\n",
+ program_name);
+ exit (2);
+}
+
+/* Return non-zero if the character C is a member of the
+ equivalence class containing the character EQUIV_CLASS. */
+
+static int
+is_equiv_class_member (equiv_class, c)
+ unsigned int equiv_class;
+ unsigned int c;
+{
+ return (equiv_class == c);
+}
+
+/* Return non-zero if the character C is a member of the
+ character class CHAR_CLASS. */
+
+static int
+is_char_class_member (char_class, c)
+ enum Char_class char_class;
+ unsigned int c;
+{
+ switch (char_class)
+ {
+ case CC_ALNUM:
+ return isalnum (c);
+ break;
+ case CC_ALPHA:
+ return isalpha (c);
+ break;
+ case CC_BLANK:
+ return isblank (c);
+ break;
+ case CC_CNTRL:
+ return iscntrl (c);
+ break;
+ case CC_DIGIT:
+ return isdigit (c);
+ break;
+ case CC_GRAPH:
+ return isgraph (c);
+ break;
+ case CC_LOWER:
+ return islower (c);
+ break;
+ case CC_PRINT:
+ return isprint (c);
+ break;
+ case CC_PUNCT:
+ return ispunct (c);
+ break;
+ case CC_SPACE:
+ return isspace (c);
+ break;
+ case CC_UPPER:
+ return isupper (c);
+ break;
+ case CC_XDIGIT:
+ return isxdigit (c);
+ break;
+ case CC_NO_CLASS:
+ abort ();
+ return 0;
+ break;
+ }
+}
+
+/* Perform the first pass over each range-spec argument S,
+ converting all \c and \ddd escapes to their one-byte representations.
+ The conversion is done in-place, so S must point to writable
+ storage. If an illegal quote sequence is found, an error message is
+ printed and the function returns non-zero. Otherwise the length of
+ the resulting string is returned through LEN and the function returns 0.
+ The resulting array of characters may contain zero-bytes; however,
+ on input, S is assumed to be null-terminated, and hence
+ cannot contain actual (non-escaped) zero bytes. */
+
+static int
+unquote (s, len)
+ unsigned char *s;
+ int *len;
+{
+ int i, j;
+
+ j = 0;
+ for (i = 0; s[i]; i++)
+ {
+ switch (s[i])
+ {
+ int c;
+ case '\\':
+ switch (s[i + 1])
+ {
+ int oct_digit;
+ case '\\':
+ c = '\\';
+ break;
+ case 'a':
+ c = '\007';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ c = s[i + 1] - '0';
+ oct_digit = s[i + 2] - '0';
+ if (0 <= oct_digit && oct_digit <= 7)
+ {
+ c = 8 * c + oct_digit;
+ ++i;
+ oct_digit = s[i + 2] - '0';
+ if (0 <= oct_digit && oct_digit <= 7)
+ {
+ if (8 * c + oct_digit < N_CHARS)
+ {
+ c = 8 * c + oct_digit;
+ ++i;
+ }
+ else if (!posix_pedantic)
+ {
+ /* Any octal number larger than 0377 won't
+ fit in 8 bits. So we stop when adding the
+ next digit would put us over the limit and
+ give a warning about the ambiguity. POSIX
+ isn't clear on this, but one person has said
+ that in his interpretation, POSIX says tr
+ can't even give a warning. */
+ error (0, 0, "warning: the ambiguous octal escape \
+\\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, `%c'",
+ s[i], s[i + 1], s[i + 2],
+ s[i], s[i + 1], s[i + 2]);
+ }
+ }
+ }
+ break;
+ case '\0':
+ error (0, 0, "invalid backslash escape at end of string");
+ return 1;
+ break;
+ default:
+ error (0, 0, "invalid backslash escape `\\%c'", s[i + 1]);
+ return 1;
+ break;
+ }
+ ++i;
+ s[j++] = c;
+ break;
+ default:
+ s[j++] = s[i];
+ break;
+ }
+ }
+ *len = j;
+ return 0;
+}
+
+/* If CLASS_STR is a valid character class string, return its index
+ in the global char_class_name array. Otherwise, return CC_NO_CLASS. */
+
+static enum Char_class
+look_up_char_class (class_str)
+ unsigned char *class_str;
+{
+ unsigned int i;
+
+ for (i = 0; i < N_CHAR_CLASSES; i++)
+ if (strcmp (class_str, char_class_name[i]) == 0)
+ return (enum Char_class) i;
+ return CC_NO_CLASS;
+}
+
+/* Return a newly allocated string with a printable version of C.
+ This function is used solely for formatting error messages. */
+
+static char *
+make_printable_char (c)
+ unsigned int c;
+{
+ char *buf = xmalloc (5);
+
+ assert (c < N_CHARS);
+ if (isprint (c))
+ {
+ buf[0] = c;
+ buf[1] = '\0';
+ }
+ else
+ {
+ sprintf (buf, "\\%03o", c);
+ }
+ return buf;
+}
+
+/* Return a newly allocated copy of S which is suitable for printing.
+ LEN is the number of characters in S. Most non-printing
+ (isprint) characters are represented by a backslash followed by
+ 3 octal digits. However, the characters represented by \c escapes
+ where c is one of [abfnrtv] are represented by their 2-character \c
+ sequences. This function is used solely for printing error messages. */
+
+static char *
+make_printable_str (s, len)
+ unsigned char *s;
+ int len;
+{
+ /* Worst case is that every character expands to a backslash
+ followed by a 3-character octal escape sequence. */
+ char *printable_buf = xmalloc (4 * len + 1);
+ char *p = printable_buf;
+ int i;
+
+ for (i = 0; i < len; i++)
+ {
+ char buf[5];
+ char *tmp = NULL;
+
+ switch (s[i])
+ {
+ case '\\':
+ tmp = "\\";
+ break;
+ case '\007':
+ tmp = "\\a";
+ break;
+ case '\b':
+ tmp = "\\b";
+ break;
+ case '\f':
+ tmp = "\\f";
+ break;
+ case '\n':
+ tmp = "\\n";
+ break;
+ case '\r':
+ tmp = "\\r";
+ break;
+ case '\t':
+ tmp = "\\t";
+ break;
+ case '\v':
+ tmp = "\\v";
+ break;
+ default:
+ if (isprint (s[i]))
+ {
+ buf[0] = s[i];
+ buf[1] = '\0';
+ }
+ else
+ sprintf (buf, "\\%03o", s[i]);
+ tmp = buf;
+ break;
+ }
+ p = stpcpy (p, tmp);
+ }
+ return printable_buf;
+}
+
+/* Append a newly allocated structure representing a
+ character C to the specification list LIST. */
+
+static void
+append_normal_char (list, c)
+ struct Spec_list *list;
+ unsigned int c;
+{
+ struct List_element *new;
+
+ new = (struct List_element *) xmalloc (sizeof (struct List_element));
+ new->next = NULL;
+ new->type = RE_NORMAL_CHAR;
+ new->u.normal_char = c;
+ assert (list->tail);
+ list->tail->next = new;
+ list->tail = new;
+}
+
+/* Append a newly allocated structure representing the range
+ of characters from FIRST to LAST to the specification list LIST.
+ Return non-zero if LAST precedes FIRST in the collating sequence,
+ zero otherwise. This means that '[c-c]' is acceptable. */
+
+static int
+append_range (list, first, last)
+ struct Spec_list *list;
+ unsigned int first;
+ unsigned int last;
+{
+ struct List_element *new;
+
+ if (ORD (first) > ORD (last))
+ {
+ char *tmp1 = make_printable_char (first);
+ char *tmp2 = make_printable_char (last);
+
+ error (0, 0,
+ "range-endpoints of `%s-%s' are in reverse collating sequence order",
+ tmp1, tmp2);
+ free (tmp1);
+ free (tmp2);
+ return 1;
+ }
+ new = (struct List_element *) xmalloc (sizeof (struct List_element));
+ new->next = NULL;
+ new->type = RE_RANGE;
+ new->u.range.first_char = first;
+ new->u.range.last_char = last;
+ assert (list->tail);
+ list->tail->next = new;
+ list->tail = new;
+ return 0;
+}
+
+/* If CHAR_CLASS_STR is a valid character class string, append a
+ newly allocated structure representing that character class to the end
+ of the specification list LIST and return 0. If CHAR_CLASS_STR is not
+ a valid string, give an error message and return non-zero. */
+
+static int
+append_char_class (list, char_class_str, len)
+ struct Spec_list *list;
+ unsigned char *char_class_str;
+ int len;
+{
+ enum Char_class char_class;
+ struct List_element *new;
+
+ char_class = look_up_char_class (char_class_str);
+ if (char_class == CC_NO_CLASS)
+ {
+ char *tmp = make_printable_str (char_class_str, len);
+
+ error (0, 0, "invalid character class `%s'", tmp);
+ free (tmp);
+ return 1;
+ }
+ new = (struct List_element *) xmalloc (sizeof (struct List_element));
+ new->next = NULL;
+ new->type = RE_CHAR_CLASS;
+ new->u.char_class = char_class;
+ assert (list->tail);
+ list->tail->next = new;
+ list->tail = new;
+ return 0;
+}
+
+/* Append a newly allocated structure representing a [c*n]
+ repeated character construct, to the specification list LIST.
+ THE_CHAR is the single character to be repeated, and REPEAT_COUNT
+ is non-negative repeat count. */
+
+static void
+append_repeated_char (list, the_char, repeat_count)
+ struct Spec_list *list;
+ unsigned int the_char;
+ long int repeat_count;
+{
+ struct List_element *new;
+
+ new = (struct List_element *) xmalloc (sizeof (struct List_element));
+ new->next = NULL;
+ new->type = RE_REPEATED_CHAR;
+ new->u.repeated_char.the_repeated_char = the_char;
+ new->u.repeated_char.repeat_count = repeat_count;
+ assert (list->tail);
+ list->tail->next = new;
+ list->tail = new;
+}
+
+/* Given a string, EQUIV_CLASS_STR, from a [=str=] context and
+ the length of that string, LEN, if LEN is exactly one, append
+ a newly allocated structure representing the specified
+ equivalence class to the specification list, LIST and return zero.
+ If LEN is not 1, issue an error message and return non-zero. */
+
+static int
+append_equiv_class (list, equiv_class_str, len)
+ struct Spec_list *list;
+ unsigned char *equiv_class_str;
+ int len;
+{
+ struct List_element *new;
+
+ if (len != 1)
+ {
+ char *tmp = make_printable_str (equiv_class_str, len);
+
+ error (0, 0, "%s: equivalence class operand must be a single character",
+ tmp);
+ free (tmp);
+ return 1;
+ }
+ new = (struct List_element *) xmalloc (sizeof (struct List_element));
+ new->next = NULL;
+ new->type = RE_EQUIV_CLASS;
+ new->u.equiv_code = *equiv_class_str;
+ assert (list->tail);
+ list->tail->next = new;
+ list->tail = new;
+ return 0;
+}
+
+/* Return a newly allocated copy of P[FIRST_IDX..LAST_IDX]. */
+
+static unsigned char *
+substr (p, first_idx, last_idx)
+ unsigned char *p;
+ int first_idx;
+ int last_idx;
+{
+ int len = last_idx - first_idx + 1;
+ unsigned char *tmp = (unsigned char *) xmalloc (len);
+
+ assert (first_idx <= last_idx);
+ /* We must use bcopy or memcopy rather than strncpy
+ because `p' may contain zero-bytes. */
+ bcopy (p + first_idx, tmp, len);
+ tmp[len] = '\0';
+ return tmp;
+}
+
+/* Search forward starting at START_IDX for the 2-char sequence
+ (PRE_BRACKET_CHAR,']') in the string P of length P_LEN. If such
+ a sequence is found, return the index of the first character,
+ otherwise return -1. P may contain zero bytes. */
+
+static int
+find_closing_delim (p, start_idx, p_len, pre_bracket_char)
+ unsigned char *p;
+ int start_idx;
+ int p_len;
+ unsigned int pre_bracket_char;
+{
+ int i;
+
+ for (i = start_idx; i < p_len - 1; i++)
+ if (p[i] == pre_bracket_char && p[i + 1] == ']')
+ return i;
+ return -1;
+}
+
+/* Convert a string S with explicit length LEN, possibly
+ containing embedded zero bytes, to a long integer value.
+ If the string represents a negative value, a value larger
+ than LONG_MAX, or if all LEN characters do not represent a
+ valid integer, return non-zero and do not modify *VAL.
+ Otherwise, return zero and set *VAL to the converted value. */
+
+static int
+non_neg_strtol (s, len, val)
+ unsigned char *s;
+ int len;
+ long int *val;
+{
+ int i;
+ long sum = 0;
+ unsigned int base;
+
+ if (len <= 0)
+ return 1;
+ if (s[0] == '0')
+ base = 8;
+ else if (isdigit (s[0]))
+ base = 10;
+ else
+ return 1;
+
+ for (i = 0; i < len; i++)
+ {
+ int c = s[i] - '0';
+
+ if (c >= base || c < 0)
+ return 1;
+ if (i > 8 && sum > (LONG_MAX - c) / base)
+ return 1;
+ sum = sum * base + c;
+ }
+ *val = sum;
+ return 0;
+}
+
+/* Parse the bracketed repeat-char syntax. If the P_LEN characters
+ beginning with P[ START_IDX ] comprise a valid [c*n] construct,
+ return the character and the repeat count through the arg pointers,
+ CHAR_TO_REPEAT and N, and then return the index of the closing
+ bracket as the function value. If the second character following
+ the opening bracket is not `*' or if no closing bracket can be
+ found, return -1. If a closing bracket is found and the
+ second char is `*', but the string between the `*' and `]' isn't
+ empty, an octal number, or a decimal number, print an error message
+ and return -2. */
+
+static int
+find_bracketed_repeat (p, start_idx, p_len, char_to_repeat, n)
+ unsigned char *p;
+ int start_idx;
+ int p_len;
+ unsigned int *char_to_repeat;
+ long int *n;
+{
+ int i;
+
+ assert (start_idx + 1 < p_len);
+ if (p[start_idx + 1] != '*')
+ return -1;
+
+ for (i = start_idx + 2; i < p_len; i++)
+ {
+ if (p[i] == ']')
+ {
+ unsigned char *digit_str;
+ int digit_str_len = i - start_idx - 2;
+
+ *char_to_repeat = p[start_idx];
+ if (digit_str_len == 0)
+ {
+ /* We've matched [c*] -- no explicit repeat count. */
+ *n = 0;
+ return i;
+ }
+
+ /* Here, we have found [c*s] where s should be a string
+ of octal or decimal digits. */
+ digit_str = &p[start_idx + 2];
+ if (non_neg_strtol (digit_str, digit_str_len, n))
+ {
+ char *tmp = make_printable_str (digit_str, digit_str_len);
+ error (0, 0, "invalid repeat count `%s' in [c*n] construct", tmp);
+ free (tmp);
+ return -2;
+ }
+ return i;
+ }
+ }
+ return -1; /* No bracket found. */
+}
+
+/* Convert string UNESACPED_STRING (which has been preprocessed to
+ convert backslash-escape sequences) of length LEN characters into
+ a linked list of the following 5 types of constructs:
+ - [:str:] Character class where `str' is one of the 12 valid strings.
+ - [=c=] Equivalence class where `c' is any single character.
+ - [c*n] Repeat the single character `c' `n' times. n may be omitted.
+ However, if `n' is present, it must be a non-negative octal or
+ decimal integer.
+ - r-s Range of characters from `r' to `s'. The second endpoint must
+ not precede the first in the current collating sequence.
+ - c Any other character is interpreted as itself. */
+
+static int
+build_spec_list (unescaped_string, len, result)
+ unsigned char *unescaped_string;
+ int len;
+ struct Spec_list *result;
+{
+ unsigned char *p;
+ int i;
+
+ p = unescaped_string;
+
+ /* The main for-loop below recognizes the 4 multi-character constructs.
+ A character that matches (in its context) none of the multi-character
+ constructs is classified as `normal'. Since all multi-character
+ constructs have at least 3 characters, any strings of length 2 or
+ less are composed solely of normal characters. Hence, the index of
+ the outer for-loop runs only as far as LEN-2. */
+
+ for (i = 0; i < len - 2;)
+ {
+ switch (p[i])
+ {
+ int fall_through;
+ case '[':
+ fall_through = 0;
+ switch (p[i + 1])
+ {
+ int closing_delim_idx;
+ int closing_bracket_idx;
+ unsigned int char_to_repeat;
+ long repeat_count;
+ case ':':
+ case '=':
+ closing_delim_idx = find_closing_delim (p, i + 2, len, p[i + 1]);
+ if (closing_delim_idx >= 0)
+ {
+ int parse_failed;
+ unsigned char *opnd_str = substr (p, i + 2, closing_delim_idx - 1);
+ if (p[i + 1] == ':')
+ parse_failed = append_char_class (result, opnd_str,
+ (closing_delim_idx - 1) - (i + 2) + 1);
+ else
+ parse_failed = append_equiv_class (result, opnd_str,
+ (closing_delim_idx - 1) - (i + 2) + 1);
+ free (opnd_str);
+
+ /* Return non-zero if append_*_class reports a problem. */
+ if (parse_failed)
+ return 1;
+ else
+ i = closing_delim_idx + 2;
+ break;
+ }
+ /* Else fall through. This could be [:*] or [=*]. */
+ default:
+ /* Determine whether this is a bracketed repeat range
+ matching the RE \[.\*(dec_or_oct_number)?\]. */
+ closing_bracket_idx = find_bracketed_repeat (p, i + 1,
+ len, &char_to_repeat, &repeat_count);
+ if (closing_bracket_idx >= 0)
+ {
+ append_repeated_char (result, char_to_repeat, repeat_count);
+ i = closing_bracket_idx + 1;
+ break;
+ }
+ else if (closing_bracket_idx == -1)
+ {
+ fall_through = 1;
+ }
+ else
+ /* Found a string that looked like [c*n] but the
+ numeric part was invalid. */
+ return 1;
+ break;
+ }
+ if (!fall_through)
+ break;
+
+ /* Here if we've tried to match [c*n], [:str:], and [=c=]
+ and none of them fit. So we still have to consider the
+ range `[-c' (from `[' to `c'). */
+ default:
+ /* Look ahead one char for ranges like a-z. */
+ if (p[i + 1] == '-')
+ {
+ if (append_range (result, p[i], p[i + 2]))
+ return 1;
+ i += 3;
+ }
+ else
+ {
+ append_normal_char (result, p[i]);
+ ++i;
+ }
+ break;
+ }
+ }
+
+ /* Now handle the (2 or fewer) remaining characters p[i]..p[len - 1]. */
+ for (; i < len; i++)
+ append_normal_char (result, p[i]);
+
+ return 0;
+}
+
+
+/* Given a Spec_list S (with its saved state implicit in the values
+ of its members `tail' and `state'), return the next single character
+ in the expansion of S's constructs. If the last character of S was
+ returned on the previous call or if S was empty, this function
+ returns -1. For example, successive calls to get_next where S
+ represents the spec-string 'a-d[y*3]' will return the sequence
+ of values a, b, c, d, y, y, y, -1. Finally, if the construct from
+ which the returned character comes is [:upper:] or [:lower:], the
+ parameter CLASS is given a value to indicate which it was. Otherwise
+ CLASS is set to UL_NONE. This value is used only when constructing
+ the translation table to verify that any occurrences of upper and
+ lower class constructs in the spec-strings appear in the same relative
+ positions. */
+
+static int
+get_next (s, class)
+ struct Spec_list *s;
+ enum Upper_Lower_class *class;
+{
+ struct List_element *p;
+ int return_val;
+ int i;
+
+ if (class)
+ *class = UL_NONE;
+
+ if (s->state == BEGIN_STATE)
+ {
+ s->tail = s->head->next;
+ s->state = NEW_ELEMENT;
+ }
+
+ p = s->tail;
+ if (p == NULL)
+ return -1;
+
+ switch (p->type)
+ {
+ case RE_NORMAL_CHAR:
+ return_val = p->u.normal_char;
+ s->state = NEW_ELEMENT;
+ s->tail = p->next;
+ break;
+
+ case RE_RANGE:
+ if (s->state == NEW_ELEMENT)
+ s->state = ORD (p->u.range.first_char);
+ else
+ ++(s->state);
+ return_val = CHR (s->state);
+ if (s->state == ORD (p->u.range.last_char))
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ }
+ break;
+
+ case RE_CHAR_CLASS:
+ if (s->state == NEW_ELEMENT)
+ {
+ for (i = 0; i < N_CHARS; i++)
+ if (is_char_class_member (p->u.char_class, i))
+ break;
+ assert (i < N_CHARS);
+ s->state = i;
+ }
+ assert (is_char_class_member (p->u.char_class, s->state));
+ return_val = CHR (s->state);
+ for (i = s->state + 1; i < N_CHARS; i++)
+ if (is_char_class_member (p->u.char_class, i))
+ break;
+ if (i < N_CHARS)
+ s->state = i;
+ else
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ }
+ if (class)
+ {
+ switch (p->u.char_class)
+ {
+ case CC_LOWER:
+ *class = UL_LOWER;
+ break;
+ case CC_UPPER:
+ *class = UL_UPPER;
+ break;
+ default:
+ /* empty */
+ break;
+ }
+ }
+ break;
+
+ case RE_EQUIV_CLASS:
+ /* FIXME: this assumes that each character is alone in its own
+ equivalence class (which appears to be correct for my
+ LC_COLLATE. But I don't know of any function that allows
+ one to determine a character's equivalence class. */
+
+ return_val = p->u.equiv_code;
+ s->state = NEW_ELEMENT;
+ s->tail = p->next;
+ break;
+
+ case RE_REPEATED_CHAR:
+ /* Here, a repeat count of n == 0 means don't repeat at all. */
+ assert (p->u.repeated_char.repeat_count >= 0);
+ if (p->u.repeated_char.repeat_count == 0)
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ return_val = get_next (s, class);
+ }
+ else
+ {
+ if (s->state == NEW_ELEMENT)
+ {
+ s->state = 0;
+ }
+ ++(s->state);
+ return_val = p->u.repeated_char.the_repeated_char;
+ if (p->u.repeated_char.repeat_count > 0
+ && s->state == p->u.repeated_char.repeat_count)
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ }
+ }
+ break;
+
+ case RE_NO_TYPE:
+ abort ();
+ break;
+ }
+ return return_val;
+}
+
+/* This is a minor kludge. This function is called from
+ get_spec_stats to determine the cardinality of a set derived
+ from a complemented string. It's a kludge in that some of
+ the same operations are (duplicated) performed in set_initialize. */
+
+static int
+card_of_complement (s)
+ struct Spec_list *s;
+{
+ int c;
+ int cardinality = N_CHARS;
+ SET_TYPE in_set[N_CHARS];
+
+ bzero (in_set, N_CHARS * sizeof (in_set[0]));
+ s->state = BEGIN_STATE;
+ while ((c = get_next (s, NULL)) != -1)
+ if (!in_set[c]++)
+ --cardinality;
+ return cardinality;
+}
+
+/* Gather statistics about the spec-list S in preparation for the tests
+ in validate that determine the legality of the specs. This function
+ is called at most twice; once for string1, and again for any string2.
+ LEN_S1 < 0 indicates that this is the first call and that S represents
+ string1. When LEN_S1 >= 0, it is the length of the expansion of the
+ constructs in string1, and we can use its value to resolve any
+ indefinite repeat construct in S (which represents string2). Hence,
+ this function has the side-effect that it converts a valid [c*]
+ construct in string2 to [c*n] where n is large enough (or 0) to give
+ string2 the same length as string1. For example, with the command
+ tr a-z 'A[\n*]Z' on the second call to get_spec_stats, LEN_S1 would
+ be 26 and S (representing string2) would be converted to 'A[\n*24]Z'. */
+
+static void
+get_spec_stats (s, len_s1)
+ struct Spec_list *s;
+ int len_s1;
+{
+ struct List_element *p;
+ struct List_element *indefinite_repeat_element = NULL;
+ int len = 0;
+
+ s->n_indefinite_repeats = 0;
+ s->has_equiv_class = 0;
+ s->has_restricted_char_class = 0;
+ s->has_upper_or_lower = 0;
+ for (p = s->head->next; p; p = p->next)
+ {
+ switch (p->type)
+ {
+ int i;
+ case RE_NORMAL_CHAR:
+ ++len;
+ break;
+
+ case RE_RANGE:
+ assert (p->u.range.last_char >= p->u.range.first_char);
+ len += p->u.range.last_char - p->u.range.first_char + 1;
+ break;
+
+ case RE_CHAR_CLASS:
+ for (i = 0; i < N_CHARS; i++)
+ if (is_char_class_member (p->u.char_class, i))
+ ++len;
+ switch (p->u.char_class)
+ {
+ case CC_UPPER:
+ case CC_LOWER:
+ s->has_upper_or_lower = 1;
+ break;
+ default:
+ s->has_restricted_char_class = 1;
+ break;
+ }
+ break;
+
+ case RE_EQUIV_CLASS:
+ for (i = 0; i < N_CHARS; i++)
+ if (is_equiv_class_member (p->u.equiv_code, i))
+ ++len;
+ s->has_equiv_class = 1;
+ break;
+
+ case RE_REPEATED_CHAR:
+ if (p->u.repeated_char.repeat_count > 0)
+ len += p->u.repeated_char.repeat_count;
+ else if (p->u.repeated_char.repeat_count == 0)
+ {
+ indefinite_repeat_element = p;
+ ++(s->n_indefinite_repeats);
+ }
+ break;
+
+ case RE_NO_TYPE:
+ assert (0);
+ break;
+ }
+ }
+
+ if (len_s1 >= len && s->n_indefinite_repeats == 1)
+ {
+ indefinite_repeat_element->u.repeated_char.repeat_count = len_s1 - len;
+ len = len_s1;
+ }
+ if (complement && len_s1 < 0)
+ s->length = card_of_complement (s);
+ else
+ s->length = len;
+ return;
+}
+
+static void
+spec_init (spec_list)
+ struct Spec_list *spec_list;
+{
+ spec_list->head = spec_list->tail =
+ (struct List_element *) xmalloc (sizeof (struct List_element));
+ spec_list->head->next = NULL;
+}
+
+/* This function makes two passes over the argument string S. The first
+ one converts all \c and \ddd escapes to their one-byte representations.
+ The second constructs a linked specification list, SPEC_LIST, of the
+ characters and constructs that comprise the argument string. If either
+ of these passes detects an error, this function returns non-zero. */
+
+static int
+parse_str (s, spec_list)
+ unsigned char *s;
+ struct Spec_list *spec_list;
+{
+ int len;
+
+ if (unquote (s, &len))
+ return 1;
+ if (build_spec_list (s, len, spec_list))
+ return 1;
+ return 0;
+}
+
+/* Given two specification lists, S1 and S2, and assuming that
+ S1->length > S2->length, append a single [c*n] element to S2 where c
+ is the last character in the expansion of S2 and n is the difference
+ between the two lengths.
+ Upon successful completion, S2->length is set to S1->length. The only
+ way this function can fail to make S2 as long as S1 is when S2 has
+ zero-length, since in that case, there is no last character to repeat.
+ So S2->length is required to be at least 1.
+
+ Providing this functionality allows the user to do some pretty
+ non-BSD (and non-portable) things: For example, the command
+ tr -cs '[:upper:]0-9' '[:lower:]'
+ is almost guaranteed to give results that depend on your collating
+ sequence. */
+
+static void
+string2_extend (s1, s2)
+ struct Spec_list *s1;
+ struct Spec_list *s2;
+{
+ struct List_element *p;
+ int char_to_repeat;
+ int i;
+
+ assert (translating);
+ assert (s1->length > s2->length);
+ assert (s2->length > 0);
+
+ p = s2->tail;
+ switch (p->type)
+ {
+ case RE_NORMAL_CHAR:
+ char_to_repeat = p->u.normal_char;
+ break;
+ case RE_RANGE:
+ char_to_repeat = p->u.range.last_char;
+ break;
+ case RE_CHAR_CLASS:
+ for (i = N_CHARS; i >= 0; i--)
+ if (is_char_class_member (p->u.char_class, i))
+ break;
+ assert (i >= 0);
+ char_to_repeat = CHR (i);
+ break;
+
+ case RE_REPEATED_CHAR:
+ char_to_repeat = p->u.repeated_char.the_repeated_char;
+ break;
+
+ case RE_EQUIV_CLASS:
+ /* This shouldn't happen, because validate exits with an error
+ if it finds an equiv class in string2 when translating. */
+ abort ();
+ break;
+
+ case RE_NO_TYPE:
+ abort ();
+ break;
+ }
+ append_repeated_char (s2, char_to_repeat, s1->length - s2->length);
+ s2->length = s1->length;
+ return;
+}
+
+/* Die with an error message if S1 and S2 describe strings that
+ are not valid with the given command line switches.
+ A side effect of this function is that if a legal [c*] or
+ [c*0] construct appears in string2, it is converted to [c*n]
+ with a value for n that makes s2->length == s1->length. By
+ the same token, if the --truncate-set1 option is not
+ given, S2 may be extended. */
+
+static void
+validate (s1, s2)
+ struct Spec_list *s1;
+ struct Spec_list *s2;
+{
+ get_spec_stats (s1, -1);
+ if (s1->n_indefinite_repeats > 0)
+ {
+ error (1, 0, "the [c*] repeat construct may not appear in string1");
+ }
+
+ /* FIXME: it isn't clear from the POSIX spec that this is illegal,
+ but in the spirit of the other restrictions put on translation
+ with character classes, this seems a logical interpretation. */
+ if (complement && s1->has_upper_or_lower)
+ {
+ error (1, 0,
+ "character classes may not be used when translating and complementing");
+ }
+
+ if (s2)
+ {
+ get_spec_stats (s2, s1->length);
+ if (s2->has_restricted_char_class)
+ {
+ error (1, 0,
+ "when translating, the only character classes that may appear in\n\
+\tstring2 are `upper' and `lower'");
+ }
+
+ if (s2->n_indefinite_repeats > 1)
+ {
+ error (1, 0, "only one [c*] repeat construct may appear in string2");
+ }
+
+ if (translating)
+ {
+ if (s2->has_equiv_class)
+ {
+ error (1, 0,
+ "[=c=] expressions may not appear in string2 when translating");
+ }
+
+ if (s1->length > s2->length)
+ {
+ if (!truncate_set1)
+ {
+ /* string2 must be non-empty unless --truncate-set1 is
+ given or string1 is empty. */
+
+ if (s2->length == 0)
+ error (1, 0,
+ "when not truncating set1, string2 must be non-empty");
+ string2_extend (s1, s2);
+ }
+ }
+
+ if (complement && s2->has_upper_or_lower)
+ error (1, 0,
+ "character classes may not be used when translating and complementing");
+ }
+ else
+ /* Not translating. */
+ {
+ if (s2->n_indefinite_repeats > 0)
+ error (1, 0,
+ "the [c*] construct may appear in string2 only when translating");
+ }
+ }
+}
+
+/* Read buffers of SIZE bytes via the function READER (if READER is
+ NULL, read from stdin) until EOF. When non-NULL, READER is either
+ read_and_delete or read_and_xlate. After each buffer is read, it is
+ processed and written to stdout. The buffers are processed so that
+ multiple consecutive occurrences of the same character in the input
+ stream are replaced by a single occurrence of that character if the
+ character is in the squeeze set. */
+
+static void
+squeeze_filter (buf, size, reader)
+ unsigned char *buf;
+ long int size;
+ PFI reader;
+{
+ unsigned int char_to_squeeze = NOT_A_CHAR;
+ int i = 0;
+ int nr = 0;
+
+ for (;;)
+ {
+ int begin;
+
+ if (i >= nr)
+ {
+ if (reader == NULL)
+ nr = read (0, (char *) buf, size);
+ else
+ nr = (*reader) (buf, size, NULL);
+
+ if (nr < 0)
+ error (1, errno, "read error");
+ if (nr == 0)
+ break;
+ i = 0;
+ }
+
+ begin = i;
+
+ if (char_to_squeeze == NOT_A_CHAR)
+ {
+ int out_len;
+ /* Here, by being a little tricky, we can get a significant
+ performance increase in most cases when the input is
+ reasonably large. Since tr will modify the input only
+ if two consecutive (and identical) input characters are
+ in the squeeze set, we can step by two through the data
+ when searching for a character in the squeeze set. This
+ means there may be a little more work in a few cases and
+ perhaps twice as much work in the worst cases where most
+ of the input is removed by squeezing repeats. But most
+ uses of this functionality seem to remove less than 20-30%
+ of the input. */
+ for (; i < nr && !in_squeeze_set[buf[i]]; i += 2)
+ ; /* empty */
+
+ /* There is a special case when i == nr and we've just
+ skipped a character (the last one in buf) that is in
+ the squeeze set. */
+ if (i == nr && in_squeeze_set[buf[i - 1]])
+ --i;
+
+ if (i >= nr)
+ out_len = nr - begin;
+ else
+ {
+ char_to_squeeze = buf[i];
+ /* We're about to output buf[begin..i]. */
+ out_len = i - begin + 1;
+
+ /* But since we stepped by 2 in the loop above,
+ out_len may be one too large. */
+ if (i > 0 && buf[i - 1] == char_to_squeeze)
+ --out_len;
+
+ /* Advance i to the index of first character to be
+ considered when looking for a char different from
+ char_to_squeeze. */
+ ++i;
+ }
+ if (out_len > 0
+ && fwrite ((char *) &buf[begin], 1, out_len, stdout) == 0)
+ error (1, errno, "write error");
+ }
+
+ if (char_to_squeeze != NOT_A_CHAR)
+ {
+ /* Advance i to index of first char != char_to_squeeze
+ (or to nr if all the rest of the characters in this
+ buffer are the same as char_to_squeeze). */
+ for (; i < nr && buf[i] == char_to_squeeze; i++)
+ ; /* empty */
+ if (i < nr)
+ char_to_squeeze = NOT_A_CHAR;
+ /* If (i >= nr) we've squeezed the last character in this buffer.
+ So now we have to read a new buffer and continue comparing
+ characters against char_to_squeeze. */
+ }
+ }
+}
+
+/* Read buffers of SIZE bytes from stdin until one is found that
+ contains at least one character not in the delete set. Store
+ in the array BUF, all characters from that buffer that are not
+ in the delete set, and return the number of characters saved
+ or 0 upon EOF. */
+
+static long
+read_and_delete (buf, size, not_used)
+ unsigned char *buf;
+ long int size;
+ PFI not_used;
+{
+ long n_saved;
+ static int hit_eof = 0;
+
+ assert (not_used == NULL);
+ assert (size > 0);
+
+ if (hit_eof)
+ return 0;
+
+ /* This enclosing do-while loop is to make sure that
+ we don't return zero (indicating EOF) when we've
+ just deleted all the characters in a buffer. */
+ do
+ {
+ int i;
+ int nr = read (0, (char *) buf, size);
+
+ if (nr < 0)
+ error (1, errno, "read error");
+ if (nr == 0)
+ {
+ hit_eof = 1;
+ return 0;
+ }
+
+ /* This first loop may be a waste of code, but gives much
+ better performance when no characters are deleted in
+ the beginning of a buffer. It just avoids the copying
+ of buf[i] into buf[n_saved] when it would be a NOP. */
+
+ for (i = 0; i < nr && !in_delete_set[buf[i]]; i++)
+ /* empty */ ;
+ n_saved = i;
+
+ for (++i; i < nr; i++)
+ if (!in_delete_set[buf[i]])
+ buf[n_saved++] = buf[i];
+ }
+ while (n_saved == 0);
+
+ return n_saved;
+}
+
+/* Read at most SIZE bytes from stdin into the array BUF. Then
+ perform the in-place and one-to-one mapping specified by the global
+ array `xlate'. Return the number of characters read, or 0 upon EOF. */
+
+static long
+read_and_xlate (buf, size, not_used)
+ unsigned char *buf;
+ long int size;
+ PFI not_used;
+{
+ long chars_read = 0;
+ static int hit_eof = 0;
+ int i;
+
+ assert (not_used == NULL);
+ assert (size > 0);
+
+ if (hit_eof)
+ return 0;
+
+ chars_read = read (0, (char *) buf, size);
+ if (chars_read < 0)
+ error (1, errno, "read error");
+ if (chars_read == 0)
+ {
+ hit_eof = 1;
+ return 0;
+ }
+
+ for (i = 0; i < chars_read; i++)
+ buf[i] = xlate[buf[i]];
+
+ return chars_read;
+}
+
+/* Initialize a boolean membership set IN_SET with the character
+ values obtained by traversing the linked list of constructs S
+ using the function `get_next'. If COMPLEMENT_THIS_SET is
+ non-zero the resulting set is complemented. */
+
+static void
+set_initialize (s, complement_this_set, in_set)
+ struct Spec_list *s;
+ int complement_this_set;
+ SET_TYPE *in_set;
+{
+ int c;
+ int i;
+
+ bzero (in_set, N_CHARS * sizeof (in_set[0]));
+ s->state = BEGIN_STATE;
+ while ((c = get_next (s, NULL)) != -1)
+ in_set[c] = 1;
+ if (complement_this_set)
+ for (i = 0; i < N_CHARS; i++)
+ in_set[i] = (!in_set[i]);
+}
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int non_option_args;
+ struct Spec_list buf1, buf2;
+ struct Spec_list *s1 = &buf1;
+ struct Spec_list *s2 = &buf2;
+
+ program_name = argv[0];
+
+ while ((c = getopt_long (argc, argv, "cdst", long_options,
+ (int *) 0)) != EOF)
+ {
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'c':
+ complement = 1;
+ break;
+
+ case 'd':
+ delete = 1;
+ break;
+
+ case 's':
+ squeeze_repeats = 1;
+ break;
+
+ case 't':
+ truncate_set1 = 1;
+ break;
+
+ default:
+ usage ();
+ break;
+ }
+ }
+
+ posix_pedantic = (getenv ("POSIXLY_CORRECT") != 0);
+
+ non_option_args = argc - optind;
+ translating = (non_option_args == 2 && !delete);
+
+ /* Change this test if it is legal to give tr no options and
+ no args at all. POSIX doesn't specifically say anything
+ either way, but it looks like they implied it's illegal
+ by omission. If you want to make tr do a slow imitation
+ of `cat' use `tr a a'. */
+ if (non_option_args > 2)
+ usage ();
+
+ if (!delete && !squeeze_repeats && non_option_args != 2)
+ error (1, 0, "two strings must be given when translating");
+
+ if (delete && squeeze_repeats && non_option_args != 2)
+ error (1, 0, "two strings must be given when both \
+deleting and squeezing repeats");
+
+ /* If --delete is given without --squeeze-repeats, then
+ only one string argument may be specified. But POSIX
+ says to ignore any string2 in this case, so if POSIXLY_CORRECT
+ is set, pretend we never saw string2. But I think
+ this deserves a fatal error, so that's the default. */
+ if ((delete && !squeeze_repeats) && non_option_args != 1)
+ {
+ if (posix_pedantic && non_option_args == 2)
+ --non_option_args;
+ else
+ error (1, 0,
+ "only one string may be given when deleting without squeezing repeats");
+ }
+
+ spec_init (s1);
+ if (parse_str ((unsigned char *) argv[optind], s1))
+ exit (1);
+
+ if (non_option_args == 2)
+ {
+ spec_init (s2);
+ if (parse_str ((unsigned char *) argv[optind + 1], s2))
+ exit (1);
+ }
+ else
+ s2 = NULL;
+
+ validate (s1, s2);
+
+ if (squeeze_repeats && non_option_args == 1)
+ {
+ set_initialize (s1, complement, in_squeeze_set);
+ squeeze_filter (io_buf, IO_BUF_SIZE, NULL);
+ }
+ else if (delete && non_option_args == 1)
+ {
+ int nr;
+
+ set_initialize (s1, complement, in_delete_set);
+ do
+ {
+ nr = read_and_delete (io_buf, IO_BUF_SIZE, NULL);
+ if (nr > 0 && fwrite ((char *) io_buf, 1, nr, stdout) == 0)
+ error (1, errno, "write error");
+ }
+ while (nr > 0);
+ }
+ else if (squeeze_repeats && delete && non_option_args == 2)
+ {
+ set_initialize (s1, complement, in_delete_set);
+ set_initialize (s2, 0, in_squeeze_set);
+ squeeze_filter (io_buf, IO_BUF_SIZE, (PFI) read_and_delete);
+ }
+ else if (translating)
+ {
+ if (complement)
+ {
+ int i;
+ SET_TYPE *in_s1 = in_delete_set;
+
+ set_initialize (s1, 0, in_s1);
+ s2->state = BEGIN_STATE;
+ for (i = 0; i < N_CHARS; i++)
+ xlate[i] = i;
+ for (i = 0; i < N_CHARS; i++)
+ {
+ if (!in_s1[i])
+ {
+ int c = get_next (s2, NULL);
+ assert (c != -1 || truncate_set1);
+ if (c == -1)
+ {
+ /* This will happen when tr is invoked like e.g.
+ tr -cs A-Za-z0-9 '\012'. */
+ break;
+ }
+ xlate[i] = c;
+ }
+ }
+ assert (get_next (s2, NULL) == -1 || truncate_set1);
+ }
+ else
+ {
+ int c1, c2;
+ int i;
+ enum Upper_Lower_class class_s1;
+ enum Upper_Lower_class class_s2;
+
+ for (i = 0; i < N_CHARS; i++)
+ xlate[i] = i;
+ s1->state = BEGIN_STATE;
+ s2->state = BEGIN_STATE;
+ for (;;)
+ {
+ c1 = get_next (s1, &class_s1);
+ c2 = get_next (s2, &class_s2);
+ if (!class_ok[(int) class_s1][(int) class_s2])
+ error (1, 0,
+ "misaligned or mismatched upper and/or lower classes");
+ /* The following should have been checked by validate... */
+ if (c2 == -1)
+ break;
+ xlate[c1] = c2;
+ }
+ assert (c1 == -1 || truncate_set1);
+ }
+ if (squeeze_repeats)
+ {
+ set_initialize (s2, 0, in_squeeze_set);
+ squeeze_filter (io_buf, IO_BUF_SIZE, (PFI) read_and_xlate);
+ }
+ else
+ {
+ int chars_read;
+
+ do
+ {
+ chars_read = read_and_xlate (io_buf, IO_BUF_SIZE, NULL);
+ if (chars_read > 0
+ && fwrite ((char *) io_buf, 1, chars_read, stdout) == 0)
+ error (1, errno, "write error");
+ }
+ while (chars_read > 0);
+ }
+ }
+
+ exit (0);
+}
+
diff --git a/src/unexpand.c b/src/unexpand.c
new file mode 100644
index 000000000..2733ef77a
--- /dev/null
+++ b/src/unexpand.c
@@ -0,0 +1,432 @@
+/* unexpand - convert spaces to tabs
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* By default, convert only maximal strings of initial blanks and tabs
+ into tabs.
+ Preserves backspace characters in the output; they decrement the
+ column count for tab calculations.
+ The default action is equivalent to -8.
+
+ Options:
+ --tabs=tab1[,tab2[,...]]
+ -t tab1[,tab2[,...]]
+ -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
+ spaces apart instead of the default 8. Otherwise,
+ set the tabs at columns tab1, tab2, etc. (numbered from
+ 0); replace any tabs beyond the tabstops given with
+ single spaces.
+ --all
+ -a Use tabs wherever they would replace 2 or more spaces,
+ not just at the beginnings of lines.
+
+ David MacKenzie <djm@ai.mit.edu> */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISDIGIT(c) (isascii((c)) && isdigit((c)))
+#else
+#define ISDIGIT(c) (isdigit((c)))
+#endif
+
+/* The number of bytes added at a time to the amount of memory
+ allocated for the output line. */
+#define OUTPUT_BLOCK 256
+
+/* The number of bytes added at a time to the amount of memory
+ allocated for the list of tabstops. */
+#define TABLIST_BLOCK 256
+
+char *xmalloc ();
+char *xrealloc ();
+void error ();
+
+FILE *next_file ();
+void add_tabstop ();
+void parse_tabstops ();
+void unexpand ();
+void usage ();
+void validate_tabstops ();
+
+/* If nonzero, convert blanks even after nonblank characters have been
+ read on the line. */
+int convert_entire_line;
+
+/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
+int tab_size;
+
+/* Array of the explicit column numbers of the tab stops;
+ after `tab_list' is exhausted, the rest of the line is printed
+ unchanged. The first column is column 0. */
+int *tab_list;
+
+/* The index of the first invalid element of `tab_list',
+ where the next element can be added. */
+int first_free_tab;
+
+/* Null-terminated array of input filenames. */
+char **file_list;
+
+/* Default for `file_list' if no files are given on the command line. */
+char *stdin_argv[] =
+{
+ "-", NULL
+};
+
+/* Nonzero if we have ever read standard input. */
+int have_read_stdin;
+
+/* Status to return to the system. */
+int exit_status;
+
+/* The name this program was run with. */
+char *program_name;
+
+struct option longopts[] =
+{
+ {"tabs", 1, NULL, 't'},
+ {"all", 0, NULL, 'a'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int tabval = -1; /* Value of tabstop being read, or -1. */
+ int c; /* Option character. */
+
+ have_read_stdin = 0;
+ exit_status = 0;
+ convert_entire_line = 0;
+ tab_list = NULL;
+ first_free_tab = 0;
+ program_name = argv[0];
+
+ while ((c = getopt_long (argc, argv, "at:,0123456789", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (c)
+ {
+ case '?':
+ usage ();
+ case 'a':
+ convert_entire_line = 1;
+ break;
+ case 't':
+ convert_entire_line = 1;
+ parse_tabstops (optarg);
+ break;
+ case ',':
+ add_tabstop (tabval);
+ tabval = -1;
+ break;
+ default:
+ if (tabval == -1)
+ tabval = 0;
+ tabval = tabval * 10 + c - '0';
+ break;
+ }
+ }
+
+ add_tabstop (tabval);
+
+ validate_tabstops (tab_list, first_free_tab);
+
+ if (first_free_tab == 0)
+ tab_size = 8;
+ else if (first_free_tab == 1)
+ tab_size = tab_list[0];
+ else
+ tab_size = 0;
+
+ if (optind == argc)
+ file_list = stdin_argv;
+ else
+ file_list = &argv[optind];
+
+ unexpand ();
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (fclose (stdout) == EOF)
+ error (1, errno, "write error");
+ exit (exit_status);
+}
+
+/* Add the comma or blank separated list of tabstops STOPS
+ to the list of tabstops. */
+
+void
+parse_tabstops (stops)
+ char *stops;
+{
+ int tabval = -1;
+
+ for (; *stops; stops++)
+ {
+ if (*stops == ',' || isblank (*stops))
+ {
+ add_tabstop (tabval);
+ tabval = -1;
+ }
+ else if (ISDIGIT (*stops))
+ {
+ if (tabval == -1)
+ tabval = 0;
+ tabval = tabval * 10 + *stops - '0';
+ }
+ else
+ error (1, 0, "tab size contains an invalid character");
+ }
+
+ add_tabstop (tabval);
+}
+
+/* Add tab stop TABVAL to the end of `tab_list', except
+ if TABVAL is -1, do nothing. */
+
+void
+add_tabstop (tabval)
+ int tabval;
+{
+ if (tabval == -1)
+ return;
+ if (first_free_tab % TABLIST_BLOCK == 0)
+ tab_list = (int *) xrealloc (tab_list, first_free_tab + TABLIST_BLOCK);
+ tab_list[first_free_tab++] = tabval;
+}
+
+/* Check that the list of tabstops TABS, with ENTRIES entries,
+ contains only nonzero, ascending values. */
+
+void
+validate_tabstops (tabs, entries)
+ int *tabs;
+ int entries;
+{
+ int prev_tab = 0;
+ int i;
+
+ for (i = 0; i < entries; i++)
+ {
+ if (tabs[i] == 0)
+ error (1, 0, "tab size cannot be 0");
+ if (tabs[i] <= prev_tab)
+ error (1, 0, "tab sizes must be ascending");
+ prev_tab = tabs[i];
+ }
+}
+
+/* Change spaces to tabs, writing to stdout.
+ Read each file in `file_list', in order. */
+
+void
+unexpand ()
+{
+ FILE *fp; /* Input stream. */
+ int c; /* Each input character. */
+ /* Index in `tab_list' of next tabstop: */
+ int tab_index = 0; /* For calculating width of pending tabs. */
+ int print_tab_index = 0; /* For printing as many tabs as possible. */
+ int column = 0; /* Column on screen of next char. */
+ int next_tab_column; /* Column the next tab stop is on. */
+ int convert = 1; /* If nonzero, perform translations. */
+ int pending = 0; /* Pending columns of blanks. */
+
+ fp = next_file ((FILE *) NULL);
+ for (;;)
+ {
+ c = getc (fp);
+ if (c == EOF)
+ {
+ fp = next_file (fp);
+ if (fp == NULL)
+ break; /* No more files. */
+ else
+ continue;
+ }
+
+ if (c == ' ' && convert)
+ {
+ ++pending;
+ ++column;
+ }
+ else if (c == '\t' && convert)
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (tab_index < first_free_tab - 1
+ && column >= tab_list[tab_index])
+ tab_index++;
+ next_tab_column = tab_list[tab_index];
+ if (tab_index < first_free_tab - 1)
+ tab_index++;
+ if (column >= next_tab_column)
+ {
+ convert = 0; /* Ran out of tab stops. */
+ goto flush_pend;
+ }
+ }
+ else
+ {
+ next_tab_column = column + tab_size - column % tab_size;
+ }
+ pending += next_tab_column - column;
+ column = next_tab_column;
+ }
+ else
+ {
+ flush_pend:
+ /* Flush pending spaces. Print as many tabs as possible,
+ then print the rest as spaces. */
+ if (pending == 1)
+ {
+ putchar (' ');
+ pending = 0;
+ }
+ column -= pending;
+ while (pending != 0)
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (tab_index < first_free_tab - 1
+ && column >= tab_list[tab_index])
+ print_tab_index++;
+ next_tab_column = tab_list[print_tab_index];
+ if (print_tab_index < first_free_tab - 1)
+ print_tab_index++;
+ }
+ else
+ {
+ next_tab_column = column + tab_size - column % tab_size;
+ }
+ if (next_tab_column - column <= pending)
+ {
+ putchar ('\t');
+ pending -= next_tab_column - column;
+ column = next_tab_column;
+ }
+ else
+ {
+ --print_tab_index;
+ column += pending;
+ while (pending != 0)
+ {
+ putchar (' ');
+ pending--;
+ }
+ }
+ }
+
+ if (convert)
+ {
+ if (c == '\b')
+ {
+ if (column > 0)
+ --column;
+ }
+ else
+ {
+ ++column;
+ if (convert_entire_line == 0)
+ convert = 0;
+ }
+ }
+
+ putchar (c);
+
+ if (c == '\n')
+ {
+ tab_index = print_tab_index = 0;
+ column = pending = 0;
+ convert = 1;
+ }
+ }
+ }
+}
+
+/* Close the old stream pointer FP if it is non-NULL,
+ and return a new one opened to read the next input file.
+ Open a filename of `-' as the standard input.
+ Return NULL if there are no more input files. */
+
+FILE *
+next_file (fp)
+ FILE *fp;
+{
+ static char *prev_file;
+ char *file;
+
+ if (fp)
+ {
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", prev_file);
+ exit_status = 1;
+ }
+ if (fp == stdin)
+ clearerr (fp); /* Also clear EOF. */
+ else if (fclose (fp) == EOF)
+ {
+ error (0, errno, "%s", prev_file);
+ exit_status = 1;
+ }
+ }
+
+ while ((file = *file_list++) != NULL)
+ {
+ if (file[0] == '-' && file[1] == '\0')
+ {
+ have_read_stdin = 1;
+ prev_file = file;
+ return stdin;
+ }
+ fp = fopen (file, "r");
+ if (fp)
+ {
+ prev_file = file;
+ return fp;
+ }
+ error (0, errno, "%s", file);
+ exit_status = 1;
+ }
+ return NULL;
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-tab1[,tab2[,...]]] [-t tab1[,tab2[,...]]] [-a]\n\
+ [--tabs=tab1[,tab2[,...]]] [--all] [file...]\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/uniq.c b/src/uniq.c
new file mode 100644
index 000000000..0968cbae4
--- /dev/null
+++ b/src/uniq.c
@@ -0,0 +1,321 @@
+/* uniq -- remove duplicate lines from a sorted file
+ Copyright (C) 1986, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Richard Stallman and David MacKenzie. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+#include "linebuffer.h"
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+char *find_field ();
+int different ();
+void check_file ();
+void error ();
+void usage ();
+void writeline ();
+
+/* Number of fields to skip on each line when doing comparisons. */
+int skip_fields;
+
+/* Number of chars to skip after skipping any fields. */
+int skip_chars;
+
+/* Number of chars to compare; if 0, compare the whole lines. */
+int check_chars;
+
+enum countmode
+{
+ count_occurrences, /* -c Print count before output lines. */
+ count_none /* Default. Do not print counts. */
+};
+
+/* Whether and how to precede the output lines with a count of the number of
+ times they occurred in the input. */
+enum countmode countmode;
+
+enum output_mode
+{
+ output_repeated, /* -d Only lines that are repeated. */
+ output_unique, /* -u Only lines that are not repeated. */
+ output_all /* Default. Print first copy of each line. */
+};
+
+/* Which lines to output. */
+enum output_mode mode;
+
+/* The name this program was run with. */
+char *program_name;
+
+struct option longopts[] =
+{
+ {"count", 0, NULL, 'c'},
+ {"repeated", 0, NULL, 'd'},
+ {"unique", 0, NULL, 'u'},
+ {"skip-fields", 1, NULL, 'f'},
+ {"skip-chars", 1, NULL, 's'},
+ {"check-chars", 1, NULL, 'w'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ int optc;
+ char *infile = "-", *outfile = "-";
+
+ program_name = argv[0];
+ skip_chars = 0;
+ skip_fields = 0;
+ check_chars = 0;
+ mode = output_all;
+ countmode = count_none;
+
+ while ((optc = getopt_long (argc, argv, "0123456789cdf:s:uw:", longopts,
+ (int *) 0)) != EOF)
+ {
+ switch (optc)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ skip_fields = skip_fields * 10 + optc - '0';
+ break;
+
+ case 'c':
+ countmode = count_occurrences;
+ break;
+
+ case 'd':
+ mode = output_repeated;
+ break;
+
+ case 'f': /* Like '-#'. */
+ skip_fields = atoi (optarg);
+ break;
+
+ case 's': /* Like '+#'. */
+ skip_chars = atoi (optarg);
+ break;
+
+ case 'u':
+ mode = output_unique;
+ break;
+
+ case 'w':
+ check_chars = atoi (optarg);
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ while (optind < argc && argv[optind][0] == '+')
+ skip_chars = atoi (argv[optind++]);
+
+ if (optind < argc)
+ infile = argv[optind++];
+
+ if (optind < argc)
+ outfile = argv[optind++];
+
+ if (optind < argc)
+ usage (); /* Extra arguments. */
+
+ check_file (infile, outfile);
+
+ exit (0);
+}
+
+/* Process input file INFILE with output to OUTFILE.
+ If either is "-", use the standard I/O stream for it instead. */
+
+void
+check_file (infile, outfile)
+ char *infile, *outfile;
+{
+ FILE *istream;
+ FILE *ostream;
+ struct linebuffer lb1, lb2;
+ struct linebuffer *thisline, *prevline, *exch;
+ char *prevfield, *thisfield;
+ int prevlen, thislen;
+ int match_count = 0;
+
+ if (!strcmp (infile, "-"))
+ istream = stdin;
+ else
+ istream = fopen (infile, "r");
+ if (istream == NULL)
+ error (1, errno, "%s", infile);
+
+ if (!strcmp (outfile, "-"))
+ ostream = stdout;
+ else
+ ostream = fopen (outfile, "w");
+ if (ostream == NULL)
+ error (1, errno, "%s", outfile);
+
+ thisline = &lb1;
+ prevline = &lb2;
+
+ initbuffer (thisline);
+ initbuffer (prevline);
+
+ if (readline (prevline, istream) == 0)
+ goto closefiles;
+ prevfield = find_field (prevline);
+ prevlen = prevline->length - (prevfield - prevline->buffer);
+
+ while (!feof (istream))
+ {
+ if (readline (thisline, istream) == 0)
+ break;
+ thisfield = find_field (thisline);
+ thislen = thisline->length - (thisfield - thisline->buffer);
+ if (!different (thisfield, prevfield, thislen, prevlen))
+ match_count++;
+ else
+ {
+ writeline (prevline, ostream, match_count);
+ match_count = 0;
+
+ exch = prevline;
+ prevline = thisline;
+ thisline = exch;
+ prevfield = thisfield;
+ prevlen = thislen;
+ }
+ }
+
+ writeline (prevline, ostream, match_count);
+
+ closefiles:
+ if (ferror (istream) || fclose (istream) == EOF)
+ error (1, errno, "error reading %s", infile);
+
+ if (ferror (ostream) || fclose (ostream) == EOF)
+ error (1, errno, "error writing %s", outfile);
+
+ free (lb1.buffer);
+ free (lb2.buffer);
+}
+
+/* Given a linebuffer LINE,
+ return a pointer to the beginning of the line's field to be compared. */
+
+char *
+find_field (line)
+ struct linebuffer *line;
+{
+ register int count;
+ register char *lp = line->buffer;
+ register int size = line->length;
+ register int i = 0;
+
+ for (count = 0; count < skip_fields && i < size; count++)
+ {
+ while (i < size && isblank (lp[i]))
+ i++;
+ while (i < size && !isblank (lp[i]))
+ i++;
+ }
+
+ for (count = 0; count < skip_chars && i < size; count++)
+ i++;
+
+ return lp + i;
+}
+
+/* Return zero if two strings OLD and NEW match, nonzero if not.
+ OLD and NEW point not to the beginnings of the lines
+ but rather to the beginnings of the fields to compare.
+ OLDLEN and NEWLEN are their lengths. */
+
+int
+different (old, new, oldlen, newlen)
+ char *old;
+ char *new;
+ int oldlen;
+ int newlen;
+{
+ register int order;
+
+ if (check_chars)
+ {
+ if (oldlen > check_chars)
+ oldlen = check_chars;
+ if (newlen > check_chars)
+ newlen = check_chars;
+ }
+ order = memcmp (old, new, min (oldlen, newlen));
+ if (order == 0)
+ return oldlen - newlen;
+ return order;
+}
+
+/* Output the line in linebuffer LINE to stream STREAM
+ provided that the switches say it should be output.
+ If requested, print the number of times it occurred, as well;
+ LINECOUNT + 1 is the number of times that the line occurred. */
+
+void
+writeline (line, stream, linecount)
+ struct linebuffer *line;
+ FILE *stream;
+ int linecount;
+{
+ if ((mode == output_unique && linecount != 0)
+ || (mode == output_repeated && linecount == 0))
+ return;
+
+ if (countmode == count_occurrences)
+ fprintf (stream, "%7d\t", linecount + 1);
+
+ fwrite (line->buffer, sizeof (char), line->length, stream);
+ putc ('\n', stream);
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-cdu] [-f skip-fields] [-s skip-chars] [-w check-chars]\n\
+ [-#skip-fields] [+#skip-chars] [--count] [--repeated] [--unique]\n\
+ [--skip-fields=skip-fields] [--skip-chars=skip-chars]\n\
+ [--check-chars=check-chars] [infile] [outfile]\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/wc.c b/src/wc.c
new file mode 100644
index 000000000..72d6ea665
--- /dev/null
+++ b/src/wc.c
@@ -0,0 +1,231 @@
+/* wc - print the number of bytes, words, and lines in files
+ Copyright (C) 1985, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Paul Rubin, phr@ocf.berkeley.edu
+ and David MacKenzie, djm@gnu.ai.mit.edu. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+/* Size of atomic reads. */
+#define BUFFER_SIZE (16 * 1024)
+
+void error ();
+void wc ();
+void wc_file ();
+void write_counts ();
+
+/* Cumulative number of lines, words, and chars in all files so far. */
+unsigned long total_lines, total_words, total_chars;
+
+/* Which counts to print. */
+int print_lines, print_words, print_chars;
+
+/* Nonzero if we have ever read the standard input. */
+int have_read_stdin;
+
+/* The name this program was run with. */
+char *program_name;
+
+/* The error code to return to the system. */
+int exit_status;
+
+struct option longopts[] =
+{
+ {"bytes", 0, NULL, 'c'},
+ {"chars", 0, NULL, 'c'},
+ {"lines", 0, NULL, 'l'},
+ {"words", 0, NULL, 'w'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int optc;
+ int nfiles;
+
+ program_name = argv[0];
+ exit_status = 0;
+ print_lines = print_words = print_chars = 0;
+ total_lines = total_words = total_chars = 0;
+
+ while ((optc = getopt_long (argc, argv, "clw", longopts, (int *) 0)) != EOF)
+ switch (optc)
+ {
+ case 'c':
+ print_chars = 1;
+ break;
+
+ case 'l':
+ print_lines = 1;
+ break;
+
+ case 'w':
+ print_words = 1;
+ break;
+
+ default:
+ fprintf (stderr, "\
+Usage: %s [-clw] [--bytes] [--chars] [--lines] [--words] [file...]\n", argv[0]);
+ exit (1);
+ }
+
+ if (print_lines + print_words + print_chars == 0)
+ print_lines = print_words = print_chars = 1;
+
+ nfiles = argc - optind;
+
+ if (nfiles == 0)
+ {
+ have_read_stdin = 1;
+ wc (0, "");
+ }
+ else
+ {
+ for (; optind < argc; ++optind)
+ wc_file (argv[optind]);
+
+ if (nfiles > 1)
+ write_counts (total_lines, total_words, total_chars, "total");
+ }
+
+ if (have_read_stdin && close (0))
+ error (1, errno, "-");
+
+ exit (exit_status);
+}
+
+void
+wc_file (file)
+ char *file;
+{
+ if (!strcmp (file, "-"))
+ {
+ have_read_stdin = 1;
+ wc (0, file);
+ }
+ else
+ {
+ int fd = open (file, O_RDONLY);
+ if (fd == -1)
+ {
+ error (0, errno, "%s", file);
+ exit_status = 1;
+ return;
+ }
+ wc (fd, file);
+ if (close (fd))
+ {
+ error (0, errno, "%s", file);
+ exit_status = 1;
+ }
+ }
+}
+
+void
+wc (fd, file)
+ int fd;
+ char *file;
+{
+ char buf[BUFFER_SIZE];
+ register int bytes_read;
+ register int in_word = 0;
+ register unsigned long lines, words, chars;
+ struct stat stats;
+
+ lines = words = chars = 0;
+
+ if (print_chars && !print_words && !print_lines
+ && fstat (fd, &stats) == 0 && S_ISREG (stats.st_mode))
+ {
+ chars = stats.st_size;
+ }
+ else
+ {
+ while ((bytes_read = read (fd, buf, BUFFER_SIZE)) > 0)
+ {
+ register char *p = buf;
+
+ chars += bytes_read;
+ do
+ {
+ switch (*p++)
+ {
+ case '\n':
+ lines++;
+ /* Fall through. */
+ case '\r':
+ case '\f':
+ case '\t':
+ case '\v':
+ case ' ':
+ if (in_word)
+ {
+ in_word = 0;
+ words++;
+ }
+ break;
+ default:
+ in_word = 1;
+ break;
+ }
+ }
+ while (--bytes_read);
+ }
+ if (bytes_read < 0)
+ {
+ error (0, errno, "%s", file);
+ exit_status = 1;
+ }
+ if (in_word)
+ words++;
+ }
+
+ write_counts (lines, words, chars, file);
+ total_lines += lines;
+ total_words += words;
+ total_chars += chars;
+}
+
+void
+write_counts (lc, wc, cc, file)
+ unsigned long lc, wc, cc;
+ char *file;
+{
+ if (print_lines)
+ printf ("%7lu", lc);
+ if (print_words)
+ {
+ if (print_lines)
+ putchar (' ');
+ printf ("%7lu", wc);
+ }
+ if (print_chars)
+ {
+ if (print_lines || print_words)
+ putchar (' ');
+ printf ("%7lu", cc);
+ }
+ if (*file)
+ printf (" %s", file);
+ putchar ('\n');
+}