diff options
author | Jim Meyering <jim@meyering.net> | 1992-11-08 02:50:43 +0000 |
---|---|---|
committer | Jim Meyering <jim@meyering.net> | 1992-11-08 02:50:43 +0000 |
commit | b25038ce9a234ea0906ddcbd8a0012e917e6c661 (patch) | |
tree | a4360f1b307910d9266f65fc851479c218219009 /src | |
parent | f33e06711c51330972e2adf07d21a4e69c8f44f6 (diff) | |
download | coreutils-b25038ce9a234ea0906ddcbd8a0012e917e6c661.tar.xz |
Initial revision
Diffstat (limited to 'src')
-rw-r--r-- | src/cat.c | 660 | ||||
-rw-r--r-- | src/cksum.c | 274 | ||||
-rw-r--r-- | src/comm.c | 221 | ||||
-rw-r--r-- | src/csplit.c | 1308 | ||||
-rw-r--r-- | src/cut.c | 586 | ||||
-rw-r--r-- | src/expand.c | 377 | ||||
-rw-r--r-- | src/fold.c | 250 | ||||
-rw-r--r-- | src/head.c | 380 | ||||
-rw-r--r-- | src/join.c | 690 | ||||
-rw-r--r-- | src/nl.c | 546 | ||||
-rw-r--r-- | src/od.c | 1697 | ||||
-rw-r--r-- | src/paste.c | 458 | ||||
-rw-r--r-- | src/pr.c | 1844 | ||||
-rw-r--r-- | src/sort.c | 1746 | ||||
-rw-r--r-- | src/split.c | 532 | ||||
-rw-r--r-- | src/sum.c | 217 | ||||
-rw-r--r-- | src/tac.c | 628 | ||||
-rw-r--r-- | src/tail.c | 858 | ||||
-rw-r--r-- | src/tr.c | 1813 | ||||
-rw-r--r-- | src/unexpand.c | 432 | ||||
-rw-r--r-- | src/uniq.c | 321 | ||||
-rw-r--r-- | src/wc.c | 231 |
22 files changed, 16069 insertions, 0 deletions
diff --git a/src/cat.c b/src/cat.c new file mode 100644 index 000000000..34c438491 --- /dev/null +++ b/src/cat.c @@ -0,0 +1,660 @@ +/* cat -- concatenate files and print on the standard output. + Copyright (C) 1988, 1990, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Differences from the Unix cat: + * Always unbuffered, -u is ignored. + * 100 times faster with -v -u. + * 20 times faster with -v. + + By tege@sics.se, Torbjorn Granlund, advised by rms, Richard Stallman. */ + +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#ifndef _POSIX_SOURCE +#include <sys/ioctl.h> +#endif +#include "system.h" + +#define max(h,i) ((h) > (i) ? (h) : (i)) + +char *stpcpy (); +char *xmalloc (); +void cat (); +void error (); +void next_line_num (); +void simple_cat (); + +/* Name under which this program was invoked. */ +char *program_name; + +/* Name of input file. May be "-". */ +char *infile; + +/* Descriptor on which input file is open. */ +int input_desc; + +/* Descriptor on which output file is open. Always is 1. */ +int output_desc; + +/* Buffer for line numbers. */ +char line_buf[13] = +{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0', '\t', '\0'}; + +/* Position in `line_buf' where printing starts. This will not change + unless the number of lines are more than 999999. */ +char *line_num_print = line_buf + 5; + +/* Position of the first digit in `line_buf'. */ +char *line_num_start = line_buf + 10; + +/* Position of the last digit in `line_buf'. */ +char *line_num_end = line_buf + 10; + +/* Preserves the `cat' function's local `newlines' between invocations. */ +int newlines2 = 0; + +/* Count of non-fatal error conditions. */ +int exit_stat = 0; + +void +usage (reason) + char *reason; +{ + if (reason != NULL) + fprintf (stderr, "%s: %s\n", program_name, reason); + + fprintf (stderr, "\ +Usage: %s [-benstuvAET] [--number] [--number-nonblank] [--squeeze-blank]\n\ + [--show-nonprinting] [--show-ends] [--show-tabs] [--show-all]\n\ + [file...]\n", + program_name); + + exit (2); +} + + +void +main (argc, argv) + int argc; + char *argv[]; +{ + /* Optimal size of i/o operations of output. */ + int outsize; + + /* Optimal size of i/o operations of input. */ + int insize; + + /* Pointer to the input buffer. */ + unsigned char *inbuf; + + /* Pointer to the output buffer. */ + unsigned char *outbuf; + + int c; + + /* Index in argv to processed argument. */ + int argind; + + /* Device number of the output (file or whatever). */ + int out_dev; + + /* I-node number of the output. */ + int out_ino; + + /* Nonzero if the output file should not be the same as any input file. */ + int check_redirection = 1; + + /* Nonzero if we have ever read standard input. */ + int have_read_stdin = 0; + + struct stat stat_buf; + + /* Variables that are set according to the specified options. */ + int numbers = 0; + int numbers_at_empty_lines = 1; + int squeeze_empty_lines = 0; + int mark_line_ends = 0; + int quote = 0; + int output_tabs = 1; + int options = 0; + + static struct option long_options[] = + { + {"number-nonblank", 0, NULL, 'b'}, + {"number", 0, NULL, 'n'}, + {"squeeze-blank", 0, NULL, 's'}, + {"show-nonprinting", 0, NULL, 'v'}, + {"show-ends", 0, NULL, 'E'}, + {"show-tabs", 0, NULL, 'T'}, + {"show-all", 0, NULL, 'A'}, + {NULL, 0, NULL, 0} + }; + + program_name = argv[0]; + + /* Parse command line options. */ + + while ((c = getopt_long (argc, argv, "benstuvAET", long_options, (int *) 0)) + != EOF) + { + options++; + switch (c) + { + case 'b': + numbers = 1; + numbers_at_empty_lines = 0; + break; + + case 'e': + mark_line_ends = 1; + quote = 1; + break; + + case 'n': + numbers = 1; + break; + + case 's': + squeeze_empty_lines = 1; + break; + + case 't': + output_tabs = 0; + quote = 1; + break; + + case 'u': + /* We provide the -u feature unconditionally. */ + options--; + break; + + case 'v': + quote = 1; + break; + + case 'A': + quote = 1; + mark_line_ends = 1; + output_tabs = 0; + break; + + case 'E': + mark_line_ends = 1; + break; + + case 'T': + output_tabs = 0; + break; + + default: + usage ((char *) 0); + } + } + + output_desc = 1; + + /* Get device, i-node number, and optimal blocksize of output. */ + + if (fstat (output_desc, &stat_buf) < 0) + error (1, errno, "standard output"); + + outsize = ST_BLKSIZE (stat_buf); + /* Input file can be output file for non-regular files. + fstat on pipes returns S_IFSOCK on some systems, S_IFIFO + on others, so the checking should not be done for those types, + and to allow things like cat < /dev/tty > /dev/tty, checking + is not done for device files either. */ + + if (S_ISREG (stat_buf.st_mode)) + { + out_dev = stat_buf.st_dev; + out_ino = stat_buf.st_ino; + } + else + check_redirection = 0; + + /* Check if any of the input files are the same as the output file. */ + + /* Main loop. */ + + infile = "-"; + argind = optind; + + do + { + if (argind < argc) + infile = argv[argind]; + + if (infile[0] == '-' && infile[1] == 0) + { + have_read_stdin = 1; + input_desc = 0; + } + else + { + input_desc = open (infile, O_RDONLY); + if (input_desc < 0) + { + error (0, errno, "%s", infile); + exit_stat = 1; + continue; + } + } + + if (fstat (input_desc, &stat_buf) < 0) + { + error (0, errno, "%s", infile); + exit_stat = 1; + goto contin; + } + insize = ST_BLKSIZE (stat_buf); + + /* Compare the device and i-node numbers of this input file with + the corresponding values of the (output file associated with) + stdout, and skip this input file if they coincide. Input + files cannot be redirected to themselves. */ + + if (check_redirection + && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino) + { + error (0, 0, "%s: input file is output file", infile); + exit_stat = 1; + goto contin; + } + + /* Select which version of `cat' to use. If any options (more than -u) + were specified, use `cat', otherwise use `simple_cat'. */ + + if (options == 0) + { + insize = max (insize, outsize); + inbuf = (unsigned char *) xmalloc (insize); + + simple_cat (inbuf, insize); + } + else + { + inbuf = (unsigned char *) xmalloc (insize + 1); + + /* Why are (OUTSIZE - 1 + INSIZE * 4 + 13) bytes allocated for + the output buffer? + + A test whether output needs to be written is done when the input + buffer empties or when a newline appears in the input. After + output is written, at most (OUTSIZE - 1) bytes will remain in the + buffer. Now INSIZE bytes of input is read. Each input character + may grow by a factor of 4 (by the prepending of M-^). If all + characters do, and no newlines appear in this block of input, we + will have at most (OUTSIZE - 1 + INSIZE) bytes in the buffer. If + the last character in the preceeding block of input was a + newline, a line number may be written (according to the given + options) as the first thing in the output buffer. (Done after the + new input is read, but before processing of the input begins.) A + line number requires seldom more than 13 positions. */ + + outbuf = (unsigned char *) xmalloc (outsize - 1 + insize * 4 + 13); + + cat (inbuf, insize, outbuf, outsize, quote, + output_tabs, numbers, numbers_at_empty_lines, mark_line_ends, + squeeze_empty_lines); + + free (outbuf); + } + + free (inbuf); + + contin: + if (strcmp (infile, "-") && close (input_desc) < 0) + { + error (0, errno, "%s", infile); + exit_stat = 1; + } + } + while (++argind < argc); + + if (have_read_stdin && close (0) < 0) + error (1, errno, "-"); + if (close (1) < 0) + error (1, errno, "write error"); + + exit (exit_stat); +} + +/* Plain cat. Copies the file behind `input_desc' to the file behind + `output_desc'. */ + +void +simple_cat (buf, bufsize) + /* Pointer to the buffer, used by reads and writes. */ + unsigned char *buf; + + /* Number of characters preferably read or written by each read and write + call. */ + int bufsize; +{ + /* Actual number of characters read, and therefore written. */ + int n_read; + + /* Loop until the end of the file. */ + + for (;;) + { + /* Read a block of input. */ + + n_read = read (input_desc, buf, bufsize); + if (n_read < 0) + { + error (0, errno, "%s", infile); + exit_stat = 1; + return; + } + + /* End of this file? */ + + if (n_read == 0) + break; + + /* Write this block out. */ + + if (write (output_desc, buf, n_read) != n_read) + error (1, errno, "write error"); + } +} + +/* Cat the file behind INPUT_DESC to the file behind OUTPUT_DESC. + Called if any option more than -u was specified. + + A newline character is always put at the end of the buffer, to make + an explicit test for buffer end unnecessary. */ + +void +cat (inbuf, insize, outbuf, outsize, quote, + output_tabs, numbers, numbers_at_empty_lines, + mark_line_ends, squeeze_empty_lines) + + /* Pointer to the beginning of the input buffer. */ + unsigned char *inbuf; + + /* Number of characters read in each read call. */ + int insize; + + /* Pointer to the beginning of the output buffer. */ + unsigned char *outbuf; + + /* Number of characters written by each write call. */ + int outsize; + + /* Variables that have values according to the specified options. */ + int quote; + int output_tabs; + int numbers; + int numbers_at_empty_lines; + int mark_line_ends; + int squeeze_empty_lines; +{ + /* Last character read from the input buffer. */ + unsigned char ch; + + /* Pointer to the next character in the input buffer. */ + unsigned char *bpin; + + /* Pointer to the first non-valid byte in the input buffer, i.e. the + current end of the buffer. */ + unsigned char *eob; + + /* Pointer to the position where the next character shall be written. */ + unsigned char *bpout; + + /* Number of characters read by the last read call. */ + int n_read; + + /* Determines how many consequtive newlines there have been in the + input. 0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1, + etc. Initially 0 to indicate that we are at the beginning of a + new line. The "state" of the procedure is determined by + NEWLINES. */ + int newlines = newlines2; + +#ifdef FIONREAD + /* If nonzero, use the FIONREAD ioctl, as an optimization. + (On Ultrix, it is not supported on NFS filesystems.) */ + int use_fionread = 1; +#endif + + /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input + is read immediately. */ + + eob = inbuf; + bpin = eob + 1; + + bpout = outbuf; + + for (;;) + { + do + { + /* Write if there are at least OUTSIZE bytes in OUTBUF. */ + + if (bpout - outbuf >= outsize) + { + unsigned char *wp = outbuf; + do + { + if (write (output_desc, wp, outsize) != outsize) + error (1, errno, "write error"); + wp += outsize; + } + while (bpout - wp >= outsize); + + /* Move the remaining bytes to the beginning of the + buffer. */ + + bcopy (wp, outbuf, bpout - wp); + bpout = outbuf + (bpout - wp); + } + + /* Is INBUF empty? */ + + if (bpin > eob) + { +#ifdef FIONREAD + int n_to_read = 0; + + /* Is there any input to read immediately? + If not, we are about to wait, + so write all buffered output before waiting. */ + + if (use_fionread + && ioctl (input_desc, FIONREAD, &n_to_read) < 0) + { + /* Ultrix returns EOPNOTSUPP on NFS; + HP-UX returns ENOTTY on pipes. */ + if (errno == EOPNOTSUPP || errno == ENOTTY) + use_fionread = 0; + else + { + error (0, errno, "cannot do ioctl on `%s'", infile); + exit_stat = 1; + newlines2 = newlines; + return; + } + } + if (n_to_read == 0) +#endif + { + int n_write = bpout - outbuf; + + if (write (output_desc, outbuf, n_write) != n_write) + error (1, errno, "write error"); + bpout = outbuf; + } + + /* Read more input into INBUF. */ + + n_read = read (input_desc, inbuf, insize); + if (n_read < 0) + { + error (0, errno, "%s", infile); + exit_stat = 1; + newlines2 = newlines; + return; + } + if (n_read == 0) + { + newlines2 = newlines; + return; + } + + /* Update the pointers and insert a sentinel at the buffer + end. */ + + bpin = inbuf; + eob = bpin + n_read; + *eob = '\n'; + } + else + { + /* It was a real (not a sentinel) newline. */ + + /* Was the last line empty? + (i.e. have two or more consecutive newlines been read?) */ + + if (++newlines > 0) + { + /* Are multiple adjacent empty lines to be substituted by + single ditto (-s), and this was the second empty line? */ + + if (squeeze_empty_lines && newlines >= 2) + { + ch = *bpin++; + continue; + } + + /* Are line numbers to be written at empty lines (-n)? */ + + if (numbers && numbers_at_empty_lines) + { + next_line_num (); + bpout = (unsigned char *) stpcpy (bpout, line_num_print); + } + } + + /* Output a currency symbol if requested (-e). */ + + if (mark_line_ends) + *bpout++ = '$'; + + /* Output the newline. */ + + *bpout++ = '\n'; + } + ch = *bpin++; + } + while (ch == '\n'); + + /* Are we at the beginning of a line, and line numbers are requested? */ + + if (newlines >= 0 && numbers) + { + next_line_num (); + bpout = (unsigned char *) stpcpy (bpout, line_num_print); + } + + /* Here CH cannot contain a newline character. */ + + /* The loops below continue until a newline character is found, + which means that the buffer is empty or that a proper newline + has been found. */ + + /* If quoting, i.e. at least one of -v, -e, or -t specified, + scan for chars that need conversion. */ + if (quote) + for (;;) + { + if (ch >= 32) + { + if (ch < 127) + *bpout++ = ch; + else if (ch == 127) + *bpout++ = '^', + *bpout++ = '?'; + else + { + *bpout++ = 'M', + *bpout++ = '-'; + if (ch >= 128 + 32) + if (ch < 128 + 127) + *bpout++ = ch - 128; + else + *bpout++ = '^', + *bpout++ = '?'; + else + *bpout++ = '^', + *bpout++ = ch - 128 + 64; + } + } + else if (ch == '\t' && output_tabs) + *bpout++ = '\t'; + else if (ch == '\n') + { + newlines = -1; + break; + } + else + *bpout++ = '^', + *bpout++ = ch + 64; + + ch = *bpin++; + } + else + /* Not quoting, neither of -v, -e, or -t specified. */ + for (;;) + { + if (ch == '\t' && !output_tabs) + *bpout++ = '^', + *bpout++ = ch + 64; + else if (ch != '\n') + *bpout++ = ch; + else + { + newlines = -1; + break; + } + + ch = *bpin++; + } + } +} + +/* Compute the next line number. */ + +void +next_line_num () +{ + char *endp = line_num_end; + do + { + if ((*endp)++ < '9') + return; + *endp-- = '0'; + } + while (endp >= line_num_start); + *--line_num_start = '1'; + if (line_num_start < line_num_print) + line_num_print--; +} diff --git a/src/cksum.c b/src/cksum.c new file mode 100644 index 000000000..df9c3130b --- /dev/null +++ b/src/cksum.c @@ -0,0 +1,274 @@ +/* cksum -- calculate and print POSIX.2 checksums and sizes of files + Copyright (C) 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Q. Frank Xia, qx@math.columbia.edu. + Cosmetic changes and reorganization by David MacKenzie, djm@gnu.ai.mit.edu. + + Usage: cksum [file...] + + The code segment between "#ifdef CRCTAB" and "#else" is the code + which calculates the "crctab". It is included for those who want + verify the correctness of the "crctab". To recreate the "crctab", + do following: + + cc -DCRCTAB -o crctab cksum.c + crctab > crctab.h + + As Bruce Evans pointed out to me, the crctab in the sample C code + in 4.9.10 Rationale of P1003.2/D11.2 is represented in reversed order. + Namely, 0x01 is represented as 0x80, 0x02 is represented as 0x40, etc. + The generating polynomial is crctab[0x80]=0xedb88320 instead of + crctab[1]=0x04C11DB7. But the code works only for a non-reverse order + crctab. Therefore, the sample implementation is wrong. + + This software is compatible with neither the System V nor the BSD + `sum' program. It is supposed to conform to P1003.2/D11.2, + except foreign language interface (4.9.5.3 of P1003.2/D11.2) support. + Any inconsistency with the standard except 4.9.5.3 is a bug. */ + +#ifdef CRCTAB + +#include <stdio.h> + +#define BIT(x) ( (unsigned long)1 << (x) ) +#define SBIT BIT(31) + +/* The generating polynomial is + + 32 26 23 22 16 12 11 10 8 7 5 4 2 1 + G(X)=X + X + X + X + X + X + X + X + X + X + X + X + X + X + 1 + + The i bit in GEN is set if X^i is a summand of G(X) except X^32. */ + +#define GEN (BIT(26)|BIT(23)|BIT(22)|BIT(16)|BIT(12)|BIT(11)|BIT(10)\ + |BIT(8) |BIT(7) |BIT(5) |BIT(4) |BIT(2) |BIT(1) |BIT(0)); + +unsigned long r[8]; + +void +fill_r () +{ + int i; + + r[0] = GEN; + for (i = 1; i < 8; i++) + r[i] = (r[i - 1] & SBIT) ? (r[i - 1] << 1) ^ r[0] : r[i - 1] << 1; +} + +unsigned long +remainder (m) + int m; +{ + unsigned long rem = 0; + int i; + + for (i = 0; i < 8; i++) + if (BIT (i) & m) + rem = rem ^ r[i]; + + return rem & 0xFFFFFFFF; /* Make it run on 64-bit machine. */ +} + +void +main () +{ + int i; + + fill_r (); + printf ("unsigned long crctab[256] = {\n 0x0"); + for (i = 0; i < 51; i++) + { + printf (",\n 0x%08X, 0x%08X, 0x%08X, 0x%08X, 0x%08X", + remainder (i * 5 + 1), remainder (i * 5 + 2), remainder (i * 5 + 3), + remainder (i * 5 + 4), remainder (i * 5 + 5)); + } + printf ("\n};\n"); + exit (0); +} + +#else /* !CRCTAB */ + +#include <stdio.h> +#include <sys/types.h> +#include "system.h" + +/* Number of bytes to read at once. */ +#define BUFLEN (1 << 16) + +unsigned long crctab[256] = +{ + 0x0, + 0x04C11DB7, 0x09823B6E, 0x0D4326D9, 0x130476DC, 0x17C56B6B, + 0x1A864DB2, 0x1E475005, 0x2608EDB8, 0x22C9F00F, 0x2F8AD6D6, + 0x2B4BCB61, 0x350C9B64, 0x31CD86D3, 0x3C8EA00A, 0x384FBDBD, + 0x4C11DB70, 0x48D0C6C7, 0x4593E01E, 0x4152FDA9, 0x5F15ADAC, + 0x5BD4B01B, 0x569796C2, 0x52568B75, 0x6A1936C8, 0x6ED82B7F, + 0x639B0DA6, 0x675A1011, 0x791D4014, 0x7DDC5DA3, 0x709F7B7A, + 0x745E66CD, 0x9823B6E0, 0x9CE2AB57, 0x91A18D8E, 0x95609039, + 0x8B27C03C, 0x8FE6DD8B, 0x82A5FB52, 0x8664E6E5, 0xBE2B5B58, + 0xBAEA46EF, 0xB7A96036, 0xB3687D81, 0xAD2F2D84, 0xA9EE3033, + 0xA4AD16EA, 0xA06C0B5D, 0xD4326D90, 0xD0F37027, 0xDDB056FE, + 0xD9714B49, 0xC7361B4C, 0xC3F706FB, 0xCEB42022, 0xCA753D95, + 0xF23A8028, 0xF6FB9D9F, 0xFBB8BB46, 0xFF79A6F1, 0xE13EF6F4, + 0xE5FFEB43, 0xE8BCCD9A, 0xEC7DD02D, 0x34867077, 0x30476DC0, + 0x3D044B19, 0x39C556AE, 0x278206AB, 0x23431B1C, 0x2E003DC5, + 0x2AC12072, 0x128E9DCF, 0x164F8078, 0x1B0CA6A1, 0x1FCDBB16, + 0x018AEB13, 0x054BF6A4, 0x0808D07D, 0x0CC9CDCA, 0x7897AB07, + 0x7C56B6B0, 0x71159069, 0x75D48DDE, 0x6B93DDDB, 0x6F52C06C, + 0x6211E6B5, 0x66D0FB02, 0x5E9F46BF, 0x5A5E5B08, 0x571D7DD1, + 0x53DC6066, 0x4D9B3063, 0x495A2DD4, 0x44190B0D, 0x40D816BA, + 0xACA5C697, 0xA864DB20, 0xA527FDF9, 0xA1E6E04E, 0xBFA1B04B, + 0xBB60ADFC, 0xB6238B25, 0xB2E29692, 0x8AAD2B2F, 0x8E6C3698, + 0x832F1041, 0x87EE0DF6, 0x99A95DF3, 0x9D684044, 0x902B669D, + 0x94EA7B2A, 0xE0B41DE7, 0xE4750050, 0xE9362689, 0xEDF73B3E, + 0xF3B06B3B, 0xF771768C, 0xFA325055, 0xFEF34DE2, 0xC6BCF05F, + 0xC27DEDE8, 0xCF3ECB31, 0xCBFFD686, 0xD5B88683, 0xD1799B34, + 0xDC3ABDED, 0xD8FBA05A, 0x690CE0EE, 0x6DCDFD59, 0x608EDB80, + 0x644FC637, 0x7A089632, 0x7EC98B85, 0x738AAD5C, 0x774BB0EB, + 0x4F040D56, 0x4BC510E1, 0x46863638, 0x42472B8F, 0x5C007B8A, + 0x58C1663D, 0x558240E4, 0x51435D53, 0x251D3B9E, 0x21DC2629, + 0x2C9F00F0, 0x285E1D47, 0x36194D42, 0x32D850F5, 0x3F9B762C, + 0x3B5A6B9B, 0x0315D626, 0x07D4CB91, 0x0A97ED48, 0x0E56F0FF, + 0x1011A0FA, 0x14D0BD4D, 0x19939B94, 0x1D528623, 0xF12F560E, + 0xF5EE4BB9, 0xF8AD6D60, 0xFC6C70D7, 0xE22B20D2, 0xE6EA3D65, + 0xEBA91BBC, 0xEF68060B, 0xD727BBB6, 0xD3E6A601, 0xDEA580D8, + 0xDA649D6F, 0xC423CD6A, 0xC0E2D0DD, 0xCDA1F604, 0xC960EBB3, + 0xBD3E8D7E, 0xB9FF90C9, 0xB4BCB610, 0xB07DABA7, 0xAE3AFBA2, + 0xAAFBE615, 0xA7B8C0CC, 0xA379DD7B, 0x9B3660C6, 0x9FF77D71, + 0x92B45BA8, 0x9675461F, 0x8832161A, 0x8CF30BAD, 0x81B02D74, + 0x857130C3, 0x5D8A9099, 0x594B8D2E, 0x5408ABF7, 0x50C9B640, + 0x4E8EE645, 0x4A4FFBF2, 0x470CDD2B, 0x43CDC09C, 0x7B827D21, + 0x7F436096, 0x7200464F, 0x76C15BF8, 0x68860BFD, 0x6C47164A, + 0x61043093, 0x65C52D24, 0x119B4BE9, 0x155A565E, 0x18197087, + 0x1CD86D30, 0x029F3D35, 0x065E2082, 0x0B1D065B, 0x0FDC1BEC, + 0x3793A651, 0x3352BBE6, 0x3E119D3F, 0x3AD08088, 0x2497D08D, + 0x2056CD3A, 0x2D15EBE3, 0x29D4F654, 0xC5A92679, 0xC1683BCE, + 0xCC2B1D17, 0xC8EA00A0, 0xD6AD50A5, 0xD26C4D12, 0xDF2F6BCB, + 0xDBEE767C, 0xE3A1CBC1, 0xE760D676, 0xEA23F0AF, 0xEEE2ED18, + 0xF0A5BD1D, 0xF464A0AA, 0xF9278673, 0xFDE69BC4, 0x89B8FD09, + 0x8D79E0BE, 0x803AC667, 0x84FBDBD0, 0x9ABC8BD5, 0x9E7D9662, + 0x933EB0BB, 0x97FFAD0C, 0xAFB010B1, 0xAB710D06, 0xA6322BDF, + 0xA2F33668, 0xBCB4666D, 0xB8757BDA, 0xB5365D03, 0xB1F740B4 +}; + +/* The name this program was run with. */ +char *program_name; + +/* Nonzero if any of the files read were the standard input. */ +int have_read_stdin; + +/* Calculate and print the checksum and length in bytes + of file FILE, or of the standard input if FILE is "-". + If PRINT_NAME is nonzero, print FILE next to the checksum and size. + Return 0 if successful, -1 if an error occurs. */ + +int +cksum (file, print_name) + char *file; + int print_name; +{ + unsigned char buf[BUFLEN]; + unsigned long crc = 0; + long length = 0; + long bytes_read; + register FILE *fp; + + if (!strcmp (file, "-")) + { + fp = stdin; + have_read_stdin = 1; + } + else + { + fp = fopen (file, "r"); + if (fp == NULL) + { + error (0, errno, "%s", file); + return -1; + } + } + + while ((bytes_read = fread (buf, 1, BUFLEN, fp)) > 0) + { + unsigned char *cp = buf; + + length += bytes_read; + while (bytes_read--) + crc = (crc << 8) ^ crctab[((crc >> 24) ^ *(cp++)) & 0xFF]; + } + + if (ferror (fp)) + { + error (0, errno, "%s", file); + if (strcmp (file, "-")) + fclose (fp); + return -1; + } + + if (strcmp (file, "-") && fclose (fp) == EOF) + { + error (0, errno, "%s", file); + return -1; + } + + bytes_read = length; + while (bytes_read > 0) + { + crc = (crc << 8) ^ crctab[((crc >> 24) ^ bytes_read) & 0xFF]; + bytes_read >>= 8; + } + + crc = ~crc & 0xFFFFFFFF; + + printf ("%10lu %8ld", crc, length); + if (print_name) + printf (" %s", file); + putchar ('\n'); + + return 0; +} + +void +main (argc, argv) + int argc; + char **argv; +{ + int errors = 0; + + program_name = argv[0]; + have_read_stdin = 0; + + if (argc == 1) + { + if (cksum ("-", 0) < 0) + errors = 1; + } + else + { + int optind; + + for (optind = 1; optind < argc; ++optind) + if (cksum (argv[optind], 1) < 0) + errors = 1; + } + + if (have_read_stdin && fclose (stdin) == EOF) + error (1, errno, "-"); + exit (errors); +} + +#endif /* !CRCTAB */ diff --git a/src/comm.c b/src/comm.c new file mode 100644 index 000000000..4362b6475 --- /dev/null +++ b/src/comm.c @@ -0,0 +1,221 @@ +/* comm -- compare two sorted files line by line. + Copyright (C) 1986, 1990, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Richard Stallman and David MacKenzie. */ + +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" +#include "linebuffer.h" + +#define min(x, y) ((x) < (y) ? (x) : (y)) + +/* If nonzero, print lines that are found only in file 1. */ +int only_file_1; + +/* If nonzero, print lines that are found only in file 2. */ +int only_file_2; + +/* If nonzero, print lines that are found in both files. */ +int both; + +/* The name this program was run with. */ +char *program_name; + +int compare_files (); +void error (); +void writeline (); +void usage (); + +void +main (argc, argv) + int argc; + char *argv[]; +{ + int c; + + program_name = argv[0]; + + only_file_1 = 1; + only_file_2 = 1; + both = 1; + + while ((c = getopt (argc, argv, "123")) != EOF) + switch (c) + { + case '1': + only_file_1 = 0; + break; + + case '2': + only_file_2 = 0; + break; + + case '3': + both = 0; + break; + + default: + usage (); + } + + if (optind + 2 != argc) + usage (); + + exit (compare_files (argv + optind)); +} + +/* Compare INFILES[0] and INFILES[1]. + If either is "-", use the standard input for that file. + Assume that each input file is sorted; + merge them and output the result. + Return 0 if successful, 1 if any errors occur. */ + +int +compare_files (infiles) + char **infiles; +{ + /* For each file, we have one linebuffer in lb1. */ + struct linebuffer lb1[2]; + + /* thisline[i] points to the linebuffer holding the next available line + in file i, or is NULL if there are no lines left in that file. */ + struct linebuffer *thisline[2]; + + /* streams[i] holds the input stream for file i. */ + FILE *streams[2]; + + int i, ret = 0; + + /* Initialize the storage. */ + for (i = 0; i < 2; i++) + { + initbuffer (&lb1[i]); + thisline[i] = &lb1[i]; + streams[i] = strcmp (infiles[i], "-") + ? fopen (infiles[i], "r") : stdin; + if (!streams[i]) + { + error (0, errno, "%s", infiles[i]); + return 1; + } + + thisline[i] = readline (thisline[i], streams[i]); + } + + while (thisline[0] || thisline[1]) + { + int order; + + /* Compare the next available lines of the two files. */ + + if (!thisline[0]) + order = 1; + else if (!thisline[1]) + order = -1; + else + { + /* Cannot use bcmp -- it only returns a boolean value. */ + order = memcmp (thisline[0]->buffer, thisline[1]->buffer, + min (thisline[0]->length, thisline[1]->length)); + if (order == 0) + order = thisline[0]->length - thisline[1]->length; + } + + /* Output the line that is lesser. */ + if (order == 0) + writeline (thisline[1], stdout, 3); + else if (order > 0) + writeline (thisline[1], stdout, 2); + else + writeline (thisline[0], stdout, 1); + + /* Step the file the line came from. + If the files match, step both files. */ + if (order >= 0) + thisline[1] = readline (thisline[1], streams[1]); + if (order <= 0) + thisline[0] = readline (thisline[0], streams[0]); + } + + /* Free all storage and close all input streams. */ + for (i = 0; i < 2; i++) + { + free (lb1[i].buffer); + if (ferror (streams[i]) || fclose (streams[i]) == EOF) + { + error (0, errno, "%s", infiles[i]); + ret = 1; + } + } + if (ferror (stdout) || fclose (stdout) == EOF) + { + error (0, errno, "write error"); + ret = 1; + } + return ret; +} + +/* Output the line in linebuffer LINE to stream STREAM + provided the switches say it should be output. + CLASS is 1 for a line found only in file 1, + 2 for a line only in file 2, 3 for a line in both. */ + +void +writeline (line, stream, class) + struct linebuffer *line; + FILE *stream; + int class; +{ + switch (class) + { + case 1: + if (!only_file_1) + return; + break; + + case 2: + if (!only_file_2) + return; + /* Skip the tab stop for case 1, if we are printing case 1. */ + if (only_file_1) + putc ('\t', stream); + break; + + case 3: + if (!both) + return; + /* Skip the tab stop for case 1, if we are printing case 1. */ + if (only_file_1) + putc ('\t', stream); + /* Skip the tab stop for case 2, if we are printing case 2. */ + if (only_file_2) + putc ('\t', stream); + break; + } + + fwrite (line->buffer, sizeof (char), line->length, stream); + putc ('\n', stream); +} + +void +usage () +{ + fprintf (stderr, "Usage: %s [-123] file1 file2\n", program_name); + exit (1); +} diff --git a/src/csplit.c b/src/csplit.c new file mode 100644 index 000000000..56bffa385 --- /dev/null +++ b/src/csplit.c @@ -0,0 +1,1308 @@ +/* csplit - split a file into sections determined by context lines + Copyright (C) 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au. + Modified by David MacKenzie, djm@gnu.ai.mit.edu. */ + +#include <stdio.h> +#include <getopt.h> +#include <ctype.h> +#include <sys/types.h> +#include <signal.h> +#include "regex.h" +#include "system.h" + +#if !defined(USG) && !defined(STDC_HEADERS) +char *memchr (); +#endif + +#ifdef STDC_HEADERS +#include <stdlib.h> +#else +char *malloc (); +char *realloc (); +#endif + +void error (); + +void cleanup (); +void close_output_file (); +void create_output_file (); +void save_line_to_file (); +void usage (); + +#ifndef TRUE +#define FALSE 0 +#define TRUE 1 +#endif + +/* Increment size of area for control records. */ +#define ALLOC_SIZE 20 + +/* The default prefix for output file names. */ +#define DEFAULT_PREFIX "xx" + +typedef int boolean; + +/* A compiled pattern arg. */ +struct control +{ + char *regexpr; /* Non-compiled regular expression. */ + struct re_pattern_buffer re_compiled; /* Compiled regular expression. */ + int offset; /* Offset from regexp to split at. */ + int lines_required; /* Number of lines required. */ + int repeat; /* Repeat count. */ + int argnum; /* ARGV index. */ + boolean ignore; /* If true, produce no output (for regexp). */ +}; + +/* Initial size of data area in buffers. */ +#define START_SIZE 8191 + +/* Increment size for data area. */ +#define INCR_SIZE 2048 + +/* Number of lines kept in each node in line list. */ +#define CTRL_SIZE 80 + +#ifdef DEBUG +/* Some small values to test the algorithms. */ +#define START_SIZE 200 +#define INCR_SIZE 10 +#define CTRL_SIZE 1 +#endif + +/* A string with a length count. */ +struct cstring +{ + int len; + char *str; +}; + +/* Pointers to the beginnings of lines in the buffer area. + These structures are linked together if needed. */ +struct line +{ + unsigned used; /* Number of offsets used in this struct. */ + unsigned insert_index; /* Next offset to use when inserting line. */ + unsigned retrieve_index; /* Next index to use when retrieving line. */ + struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */ + struct line *next; /* Next in linked list. */ +}; + +/* The structure to hold the input lines. + Contains a pointer to the data area and a list containing + pointers to the individual lines. */ +struct buffer_record +{ + unsigned bytes_alloc; /* Size of the buffer area. */ + unsigned bytes_used; /* Bytes used in the buffer area. */ + unsigned start_line; /* First line number in this buffer. */ + unsigned first_available; /* First line that can be retrieved. */ + unsigned num_lines; /* Number of complete lines in this buffer. */ + char *buffer; /* Data area. */ + struct line *line_start; /* Head of list of pointers to lines. */ + struct line *curr_line; /* The line start record currently in use. */ + struct buffer_record *next; +}; + +/* Input file descriptor. */ +int input_desc = 0; + +/* List of available buffers. */ +struct buffer_record *free_list = NULL; + +/* Start of buffer list. */ +struct buffer_record *head = NULL; + +/* Partially read line. */ +char *hold_area = NULL; + +/* Number of chars in `hold_area'. */ +unsigned hold_count = 0; + +/* Number of the last line in the buffers. */ +unsigned last_line_number = 0; + +/* Number of the line currently being examined. */ +unsigned current_line = 0; + +/* Number of the last line in the input file. */ +unsigned last_line_in_file = 0; + +/* If TRUE, we have read EOF. */ +boolean have_read_eof = FALSE; + +/* Name of output files. */ +char *filename_space = NULL; + +/* Prefix part of output file names. */ +char *prefix = NULL; + +/* Number of digits to use in output file names. */ +int digits = 2; + +/* Number of files created so far. */ +unsigned files_created = 0; + +/* Number of bytes written to current file. */ +unsigned bytes_written; + +/* Output file pointer. */ +FILE *output_stream = NULL; + +/* Perhaps it would be cleaner to pass arg values instead of indexes. */ +char **global_argv; + +/* If TRUE, do not print the count of bytes in each output file. */ +boolean suppress_count; + +/* If TRUE, remove output files on error. */ +boolean remove_files; + +/* The compiled pattern arguments, which determine how to split + the input file. */ +struct control *controls; + +/* Number of elements in `controls'. */ +unsigned control_used; + +/* The name this program was run with. */ +char *program_name; + +/* Allocate N bytes of memory dynamically, with error checking. */ + +char * +xmalloc (n) + unsigned n; +{ + char *p; + + p = malloc (n); + if (p == NULL) + { + error (0, 0, "virtual memory exhausted"); + cleanup (); + } + return p; +} + +/* Change the size of an allocated block of memory P to N bytes, + with error checking. + If P is NULL, run xmalloc. + If N is 0, run free and return NULL. */ + +char * +xrealloc (p, n) + char *p; + unsigned n; +{ + if (p == NULL) + return xmalloc (n); + if (n == 0) + { + free (p); + return 0; + } + p = realloc (p, n); + if (p == NULL) + { + error (0, 0, "virtual memory exhausted"); + cleanup (); + } + return p; +} + +/* Keep track of NUM chars of a partial line in buffer START. + These chars will be retrieved later when another large buffer is read. + It is not necessary to create a new buffer for these chars; instead, + we keep a pointer to the existing buffer. This buffer *is* on the + free list, and when the next buffer is obtained from this list + (even if it is this one), these chars will be placed at the + start of the new buffer. */ + +void +save_to_hold_area (start, num) + char *start; + unsigned num; +{ + hold_area = start; + hold_count = num; +} + +/* Read up to MAX chars from the input stream into DEST. + Return the number of chars read. */ + +int +read_input (dest, max) + char *dest; + unsigned max; +{ + int bytes_read; + + if (max == 0) + return 0; + + bytes_read = read (input_desc, dest, max); + + if (bytes_read == 0) + have_read_eof = TRUE; + + if (bytes_read < 0) + { + error (0, errno, "read error"); + cleanup (); + } + + return bytes_read; +} + +/* Initialize existing line record P. */ + +void +clear_line_control (p) + struct line *p; +{ + p->used = 0; + p->insert_index = 0; + p->retrieve_index = 0; +} + +/* Initialize all line records in B. */ + +void +clear_all_line_control (b) + struct buffer_record *b; +{ + struct line *l; + + for (l = b->line_start; l; l = l->next) + clear_line_control (l); +} + +/* Return a new, initialized line record. */ + +struct line * +new_line_control () +{ + struct line *p; + + p = (struct line *) xmalloc (sizeof (struct line)); + + p->next = NULL; + clear_line_control (p); + + return p; +} + +/* Record LINE_START, which is the address of the start of a line + of length LINE_LEN in the large buffer, in the lines buffer of B. */ + +void +keep_new_line (b, line_start, line_len) + struct buffer_record *b; + char *line_start; + int line_len; +{ + struct line *l; + + /* If there is no existing area to keep line info, get some. */ + if (b->line_start == NULL) + b->line_start = b->curr_line = new_line_control (); + + /* If existing area for lines is full, get more. */ + if (b->curr_line->used == CTRL_SIZE) + { + b->curr_line->next = new_line_control (); + b->curr_line = b->curr_line->next; + } + + l = b->curr_line; + + /* Record the start of the line, and update counters. */ + l->starts[l->insert_index].str = line_start; + l->starts[l->insert_index].len = line_len; + l->used++; + l->insert_index++; +} + +/* Scan the buffer in B for newline characters + and record the line start locations and lengths in B. + Return the number of lines found in this buffer. + + There may be an incomplete line at the end of the buffer; + a pointer is kept to this area, which will be used when + the next buffer is filled. */ + +unsigned +record_line_starts (b) + struct buffer_record *b; +{ + char *line_start; /* Start of current line. */ + char *line_end; /* End of each line found. */ + unsigned bytes_left; /* Length of incomplete last line. */ + unsigned lines; /* Number of lines found. */ + unsigned line_length; /* Length of each line found. */ + + if (b->bytes_used == 0) + return 0; + + lines = 0; + line_start = b->buffer; + bytes_left = b->bytes_used; + + for (;;) + { + line_end = memchr (line_start, '\n', bytes_left); + if (line_end == NULL) + break; + line_length = line_end - line_start + 1; + keep_new_line (b, line_start, line_length); + bytes_left -= line_length; + line_start = line_end + 1; + lines++; + } + + /* Check for an incomplete last line. */ + if (bytes_left) + { + if (have_read_eof) + { + keep_new_line (b, line_start, bytes_left); + lines++; + last_line_in_file = last_line_number + lines; + } + else + save_to_hold_area (line_start, bytes_left); + } + + b->num_lines = lines; + b->first_available = b->start_line = last_line_number + 1; + last_line_number += lines; + + return lines; +} + +/* Return a new buffer with room to store SIZE bytes, plus + an extra byte for safety. */ + +struct buffer_record * +create_new_buffer (size) + unsigned size; +{ + struct buffer_record *new_buffer; + + new_buffer = (struct buffer_record *) + xmalloc (sizeof (struct buffer_record)); + + new_buffer->buffer = (char *) xmalloc (size + 1); + + new_buffer->bytes_alloc = size; + new_buffer->line_start = new_buffer->curr_line = NULL; + + return new_buffer; +} + +/* Return a new buffer of at least MINSIZE bytes. If a buffer of at + least that size is currently free, use it, otherwise create a new one. */ + +struct buffer_record * +get_new_buffer (min_size) + unsigned min_size; +{ + struct buffer_record *p, *q; + struct buffer_record *new_buffer; /* Buffer to return. */ + unsigned alloc_size; /* Actual size that will be requested. */ + + alloc_size = START_SIZE; + while (min_size > alloc_size) + alloc_size += INCR_SIZE; + + if (free_list == NULL) + new_buffer = create_new_buffer (alloc_size); + else + { + /* Use first-fit to find a buffer. */ + p = new_buffer = NULL; + q = free_list; + + do + { + if (q->bytes_alloc >= min_size) + { + if (p == NULL) + free_list = q->next; + else + p->next = q->next; + break; + } + p = q; + q = q->next; + } + while (q); + + new_buffer = (q ? q : create_new_buffer (alloc_size)); + + new_buffer->curr_line = new_buffer->line_start; + clear_all_line_control (new_buffer); + } + + new_buffer->num_lines = 0; + new_buffer->bytes_used = 0; + new_buffer->start_line = new_buffer->first_available = last_line_number + 1; + new_buffer->next = NULL; + + return new_buffer; +} + +/* Add buffer BUF to the list of free buffers. */ + +void +free_buffer (buf) + struct buffer_record *buf; +{ + buf->next = free_list; + free_list = buf; +} + +/* Append buffer BUF to the linked list of buffers that contain + some data yet to be processed. */ + +void +save_buffer (buf) + struct buffer_record *buf; +{ + struct buffer_record *p; + + buf->next = NULL; + buf->curr_line = buf->line_start; + + if (head == NULL) + head = buf; + else + { + for (p = head; p->next; p = p->next) + /* Do nothing. */ ; + p->next = buf; + } +} + +/* Fill a buffer of input. + + Set the initial size of the buffer to a default. + Fill the buffer (from the hold area and input stream) + and find the individual lines. + If no lines are found (the buffer is too small to hold the next line), + release the current buffer (whose contents would have been put in the + hold area) and repeat the process with another large buffer until at least + one entire line has been read. + + Return TRUE if a new buffer was obtained, otherwise false + (in which case end-of-file must have been encountered). */ + +boolean +load_buffer () +{ + struct buffer_record *b; + unsigned bytes_wanted = START_SIZE; /* Minimum buffer size. */ + unsigned bytes_avail; /* Size of new buffer created. */ + unsigned lines_found; /* Number of lines in this new buffer. */ + char *p; /* Place to load into buffer. */ + + if (have_read_eof) + return FALSE; + + /* We must make the buffer at least as large as the amount of data + in the partial line left over from the last call. */ + if (bytes_wanted < hold_count) + bytes_wanted = hold_count; + + do + { + b = get_new_buffer (bytes_wanted); + bytes_avail = b->bytes_alloc; /* Size of buffer returned. */ + p = b->buffer; + + /* First check the `holding' area for a partial line. */ + if (hold_count) + { + if (p != hold_area) + bcopy (hold_area, p, hold_count); + p += hold_count; + b->bytes_used += hold_count; + bytes_avail -= hold_count; + hold_count = 0; + } + + b->bytes_used += (unsigned) read_input (p, bytes_avail); + + lines_found = record_line_starts (b); + bytes_wanted = b->bytes_alloc + INCR_SIZE; + if (!lines_found) + free_buffer (b); + } + while (!lines_found && !have_read_eof); + + if (lines_found) + save_buffer (b); + + return lines_found != 0; +} + +/* Return the line number of the first line that has not yet been retrieved. */ + +unsigned +get_first_line_in_buffer () +{ + if (head == NULL && !load_buffer ()) + error (1, errno, "input disappeared"); + + return head->first_available; +} + +/* Return a pointer to the logical first line in the buffer and make the + next line the logical first line. + Return NULL if there is no more input. */ + +struct cstring * +remove_line () +{ + struct cstring *line; /* Return value. */ + unsigned line_got; /* Number of the line retrieved. */ + struct line *l; /* For convenience. */ + + if (head == NULL && !load_buffer ()) + return NULL; + + if (current_line < head->first_available) + current_line = head->first_available; + + line_got = head->first_available++; + + l = head->curr_line; + + line = &l->starts[l->retrieve_index]; + + /* Advance index to next line. */ + if (++l->retrieve_index == l->used) + { + /* Go on to the next line record. */ + head->curr_line = l->next; + if (head->curr_line == NULL || head->curr_line->used == 0) + { + /* Go on to the next data block. */ + struct buffer_record *b = head; + head = head->next; + free_buffer (b); + } + } + + return line; +} + +/* Search the buffers for line LINENUM, reading more input if necessary. + Return a pointer to the line, or NULL if it is not found in the file. */ + +struct cstring * +find_line (linenum) + unsigned linenum; +{ + struct buffer_record *b; + + if (head == NULL && !load_buffer ()) + return NULL; + + if (linenum < head->start_line) + return NULL; + + for (b = head;;) + { + if (linenum < b->start_line + b->num_lines) + { + /* The line is in this buffer. */ + struct line *l; + unsigned offset; /* How far into the buffer the line is. */ + + l = b->line_start; + offset = linenum - b->start_line; + /* Find the control record. */ + while (offset >= CTRL_SIZE) + { + l = l->next; + offset -= CTRL_SIZE; + } + return &l->starts[offset]; + } + if (b->next == NULL && !load_buffer ()) + return NULL; + b = b->next; /* Try the next data block. */ + } +} + +/* Return TRUE if at least one more line is available for input. */ + +boolean +no_more_lines () +{ + return (find_line (current_line + 1) == NULL) ? TRUE : FALSE; +} + +/* Set the name of the input file to NAME and open it. */ + +void +set_input_file (name) + char *name; +{ + if (!strcmp (name, "-")) + input_desc = 0; + else + { + input_desc = open (name, O_RDONLY); + if (input_desc < 0) + error (1, errno, "%s", name); + } +} + +/* Write all lines from the beginning of the buffer up to, but + not including, line LAST_LINE, to the current output file. + If IGNORE is TRUE, do not output lines selected here. + ARGNUM is the index in ARGV of the current pattern. */ + +void +write_to_file (last_line, ignore, argnum) + unsigned last_line; + boolean ignore; + int argnum; +{ + struct cstring *line; + unsigned first_line; /* First available input line. */ + unsigned lines; /* Number of lines to output. */ + unsigned i; + + first_line = get_first_line_in_buffer (); + + if (first_line > last_line) + { + error (0, 0, "%s: line number out of range", global_argv[argnum]); + cleanup (); + } + + lines = last_line - first_line; + + for (i = 0; i < lines; i++) + { + line = remove_line (); + if (line == NULL) + { + error (0, 0, "%s: line number out of range", global_argv[argnum]); + cleanup (); + } + if (!ignore) + save_line_to_file (line); + } +} + +/* Output any lines left after all regexps have been processed. */ + +void +dump_rest_of_file () +{ + struct cstring *line; + + while ((line = remove_line ()) != NULL) + save_line_to_file (line); +} + +/* Handle an attempt to read beyond EOF under the control of record P, + on iteration REPETITION if nonzero. */ + +void +handle_line_error (p, repetition) + struct control *p; + int repetition; +{ + fprintf (stderr, "%s: `%d': line number out of range", + program_name, p->lines_required); + if (repetition) + fprintf (stderr, " on repetition %d\n", repetition); + else + fprintf (stderr, "\n"); + + cleanup (); +} + +/* Determine the line number that marks the end of this file, + then get those lines and save them to the output file. + P is the control record. + REPETITION is the repetition number. */ + +void +process_line_count (p, repetition) + struct control *p; + int repetition; +{ + unsigned linenum; + unsigned last_line_to_save = p->lines_required * (repetition + 1); + struct cstring *line; + + create_output_file (); + + linenum = get_first_line_in_buffer (); + + /* Check for requesting a line that has already been written out. + If this ever happens, it's due to a bug in csplit. */ + if (linenum >= last_line_to_save) + handle_line_error (p, repetition); + + while (linenum++ < last_line_to_save) + { + line = remove_line (); + if (line == NULL) + handle_line_error (p, repetition); + save_line_to_file (line); + } + + close_output_file (); + + /* Ensure that the line number specified is not 1 greater than + the number of lines in the file. */ + if (no_more_lines ()) + handle_line_error (p, repetition); +} + +void +regexp_error (p, repetition, ignore) + struct control *p; + int repetition; + boolean ignore; +{ + fprintf (stderr, "%s: `%s': match not found", + program_name, global_argv[p->argnum]); + + if (repetition) + fprintf (stderr, " on repetition %d\n", repetition); + else + fprintf (stderr, "\n"); + + if (!ignore) + { + dump_rest_of_file (); + close_output_file (); + } + cleanup (); +} + +/* Read the input until a line matches the regexp in P, outputting + it unless P->IGNORE is TRUE. + REPETITION is this repeat-count; 0 means the first time. */ + +void +process_regexp (p, repetition) + struct control *p; + int repetition; +{ + struct cstring *line; /* From input file. */ + register unsigned line_len; /* To make "$" in regexps work. */ + unsigned break_line; /* First line number of next file. */ + boolean ignore = p->ignore; /* If TRUE, skip this section. */ + int ret; + + if (!ignore) + create_output_file (); + + /* If there is no offset for the regular expression, or + it is positive, then it is not necessary to buffer the lines. */ + + if (p->offset >= 0) + { + for (;;) + { + line = find_line (++current_line); + if (line == NULL) + regexp_error (p, repetition, ignore); + line_len = line->len; + if (line->str[line_len - 1] == '\n') + line_len--; + ret = re_search (&p->re_compiled, line->str, line_len, + 0, line_len, (struct re_registers *) 0); + if (ret == -2) + { + error (0, 0, "error in regular expression search"); + cleanup (); + } + if (ret == -1) + { + line = remove_line (); + if (!ignore) + save_line_to_file (line); + } + else + break; + } + } + else + { + /* Buffer the lines. */ + for (;;) + { + line = find_line (++current_line); + if (line == NULL) + regexp_error (p, repetition, ignore); + line_len = line->len; + if (line->str[line_len - 1] == '\n') + line_len--; + ret = re_search (&p->re_compiled, line->str, line_len, + 0, line_len, (struct re_registers *) 0); + if (ret == -2) + { + error (0, 0, "error in regular expression search"); + cleanup (); + } + if (ret >= 0) + break; + } + } + + /* Account for any offset from this regexp. */ + break_line = current_line + p->offset; + + write_to_file (break_line, ignore, p->argnum); + + if (!ignore) + close_output_file (); + + current_line = break_line; +} + +/* Split the input file according to the control records we have built. */ + +void +split_file () +{ + register int i, j; + + for (i = 0; i < control_used; i++) + { + if (controls[i].regexpr) + { + for (j = 0; j <= controls[i].repeat; j++) + process_regexp (&controls[i], j); + } + else + { + for (j = 0; j <= controls[i].repeat; j++) + process_line_count (&controls[i], j); + } + } + + create_output_file (); + dump_rest_of_file (); + close_output_file (); +} + +/* Return the name of output file number NUM. */ + +char * +make_filename (num) + int num; +{ + sprintf (filename_space, "%s%0*d", prefix, digits, num); + return filename_space; +} + +/* Create the next output file. */ + +void +create_output_file () +{ + char *name; + + name = make_filename (files_created); + output_stream = fopen (name, "w"); + if (output_stream == NULL) + { + error (0, errno, "%s", name); + cleanup (); + } + files_created++; + bytes_written = 0; +} + +/* Delete all the files we have created. */ + +void +delete_all_files () +{ + int i; + char *name; + + for (i = 0; i < files_created; i++) + { + name = make_filename (i); + if (unlink (name)) + error (0, errno, "%s", name); + } +} + +/* Close the current output file and print the count + of characters in this file. */ + +void +close_output_file () +{ + if (output_stream) + { + if (fclose (output_stream) == EOF) + { + error (0, errno, "write error"); + cleanup (); + } + if (!suppress_count) + fprintf (stdout, "%d\n", bytes_written); + output_stream = NULL; + } +} + +/* Optionally remove files created so far; then exit. + Called when an error detected. */ + +void +cleanup () +{ + if (output_stream) + close_output_file (); + + if (remove_files) + delete_all_files (); + + exit (1); +} + +/* Save line LINE to the output file and + increment the character count for the current file. */ + +void +save_line_to_file (line) + struct cstring *line; +{ + fwrite (line->str, sizeof (char), line->len, output_stream); + bytes_written += line->len; +} + +/* Return a new, initialized control record. */ + +struct control * +new_control_record () +{ + static unsigned control_allocated = 0; /* Total space allocated. */ + register struct control *p; + + if (control_allocated == 0) + { + control_allocated = ALLOC_SIZE; + controls = (struct control *) + xmalloc (sizeof (struct control) * control_allocated); + } + else if (control_used == control_allocated) + { + control_allocated += ALLOC_SIZE; + controls = (struct control *) + xrealloc (controls, sizeof (struct control) * control_allocated); + } + p = &controls[control_used++]; + p->regexpr = NULL; + p->repeat = 0; + p->lines_required = 0; + p->offset = 0; + return p; +} + +/* Convert string NUM to an integer and put the value in *RESULT. + Return a TRUE if the string consists entirely of digits, + FALSE if not. */ + +boolean +string_to_number (result, num) + int *result; + char *num; +{ + register char ch; + register int val = 0; + + if (*num == '\0') + return FALSE; + + while (ch = *num++) + { + if (!isdigit (ch)) + return FALSE; + val = val * 10 + ch - '0'; + } + + *result = val; + return TRUE; +} + +/* Check if there is a numeric offset after a regular expression. + STR is the entire command line argument. + ARGNUM is the index in ARGV of STR. + P is the control record for this regular expression. + NUM is the numeric part of STR. */ + +void +check_for_offset (argnum, p, str, num) + int argnum; + struct control *p; + char *str; + char *num; +{ + if (*num != '-' && *num != '+') + error (1, 0, "%s: `+' or `-' expected after delimeter", str); + + if (!string_to_number (&p->offset, num + 1)) + error (1, 0, "%s: integer expected after `%c'", str, *num); + + if (*num == '-') + p->offset = -p->offset; +} + +/* Given that the first character of command line arg STR is '{', + make sure that the rest of the string is a valid repeat count + and store its value in P. + ARGNUM is the ARGV index of STR. */ + +void +parse_repeat_count (argnum, p, str) + int argnum; + struct control *p; + char *str; +{ + char *end; + + end = str + strlen (str) - 1; + if (*end != '}') + error (1, 0, "%s: `}' is required in repeat count", str); + *end = '\0'; + + if (!string_to_number (&p->repeat, str + 1)) + error (1, 0, "%s}: integer required between `{' and `}'", + global_argv[argnum]); + + *end = '}'; +} + +/* Extract the regular expression from STR and check for a numeric offset. + STR should start with the regexp delimiter character. + Return a new control record for the regular expression. + ARGNUM is the ARGV index of STR. + Unless IGNORE is TRUE, mark these lines for output. */ + +struct control * +extract_regexp (argnum, ignore, str) + int argnum; + boolean ignore; + char *str; +{ + int len; /* Number of chars in this regexp. */ + char delim = *str; + char *closing_delim; + struct control *p; + char *err; + + closing_delim = rindex (str + 1, delim); + if (closing_delim == NULL) + error (1, 0, "%s: closing delimeter `%c' missing", str, delim); + + len = closing_delim - str - 1; + p = new_control_record (); + p->argnum = argnum; + p->ignore = ignore; + + p->regexpr = (char *) xmalloc ((unsigned) (len + 1)); + strncpy (p->regexpr, str + 1, len); + p->re_compiled.allocated = len * 2; + p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated); + p->re_compiled.fastmap = xmalloc (256); + p->re_compiled.translate = 0; + err = re_compile_pattern (p->regexpr, len, &p->re_compiled); + if (err) + { + error (0, 0, "%s: invalid regular expression: %s", str, err); + cleanup (); + } + + if (closing_delim[1]) + check_for_offset (argnum, p, str, closing_delim + 1); + + return p; +} + +/* Extract the break patterns from args START through ARGC - 1 of ARGV. + After each pattern, check if the next argument is a repeat count. */ + +void +parse_patterns (argc, start, argv) + int argc; + int start; + char **argv; +{ + int i; /* Index into ARGV. */ + struct control *p; /* New control record created. */ + + for (i = start; i < argc; i++) + { + if (*argv[i] == '/' || *argv[i] == '%') + { + p = extract_regexp (i, *argv[i] == '%', argv[i]); + } + else + { + p = new_control_record (); + p->argnum = i; + if (!string_to_number (&p->lines_required, argv[i])) + error (1, 0, "%s: invalid pattern", argv[i]); + } + + if (i + 1 < argc && *argv[i + 1] == '{') + { + /* We have a repeat count. */ + i++; + parse_repeat_count (i, p, argv[i]); + } + } +} + +void +interrupt_handler () +{ + error (0, 0, "interrupted"); + cleanup (); +} + +struct option longopts[] = +{ + {"digits", 1, NULL, 'n'}, + {"quiet", 0, NULL, 's'}, + {"silent", 0, NULL, 's'}, + {"keep-files", 0, NULL, 'k'}, + {"prefix", 1, NULL, 'f'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int optc; +#ifdef _POSIX_VERSION + struct sigaction oldact, newact; +#endif /* _POSIX_VERSION */ + + program_name = argv[0]; + global_argv = argv; + controls = NULL; + control_used = 0; + suppress_count = FALSE; + remove_files = TRUE; + prefix = DEFAULT_PREFIX; + +#ifdef _POSIX_VERSION + newact.sa_handler = interrupt_handler; + sigemptyset (&newact.sa_mask); + newact.sa_flags = 0; + + sigaction (SIGHUP, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGHUP, &newact, NULL); + + sigaction (SIGINT, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGINT, &newact, NULL); + + sigaction (SIGQUIT, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGQUIT, &newact, NULL); + + sigaction (SIGTERM, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGTERM, &newact, NULL); +#else /* !_POSIX_VERSION */ + if (signal (SIGHUP, SIG_IGN) != SIG_IGN) + signal (SIGHUP, interrupt_handler); + if (signal (SIGINT, SIG_IGN) != SIG_IGN) + signal (SIGINT, interrupt_handler); + if (signal (SIGQUIT, SIG_IGN) != SIG_IGN) + signal (SIGQUIT, interrupt_handler); + if (signal (SIGTERM, SIG_IGN) != SIG_IGN) + signal (SIGTERM, interrupt_handler); +#endif + + while ((optc = getopt_long (argc, argv, "f:kn:s", longopts, (int *) 0)) + != EOF) + switch (optc) + { + case 'f': + prefix = optarg; + break; + + case 'k': + remove_files = FALSE; + break; + + case 'n': + if (!string_to_number (&digits, optarg)) + error (1, 0, "%s: invalid number", optarg); + break; + + case 's': + suppress_count = TRUE; + break; + + default: + usage (); + } + + if (optind >= argc - 1) + usage (); + + filename_space = (char *) xmalloc (strlen (prefix) + digits + 2); + + set_input_file (argv[optind++]); + + parse_patterns (argc, optind, argv); + + split_file (); + + if (close (input_desc) < 0) + { + error (0, errno, "read error"); + cleanup (); + } + + exit (0); +} + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-sk] [-f prefix] [-n digits] [--prefix=prefix]\n\ + [--digits=digits] [--quiet] [--silent] [--keep-files] file pattern...\n", + program_name); + exit (1); +} diff --git a/src/cut.c b/src/cut.c new file mode 100644 index 000000000..93808b063 --- /dev/null +++ b/src/cut.c @@ -0,0 +1,586 @@ +/* cut - remove parts of lines of files + Copyright (C) 1984 by David M. Ihnat + + This program is a total rewrite of the Bell Laboratories Unix(Tm) + command of the same name, as of System V. It contains no proprietary + code, and therefore may be used without violation of any proprietary + agreements whatsoever. However, you will notice that the program is + copyrighted by me. This is to assure the program does *not* fall + into the public domain. Thus, I may specify just what I am now: + This program may be freely copied and distributed, provided this notice + remains; it may not be sold for profit without express written consent of + the author. + Please note that I recreated the behavior of the Unix(Tm) 'cut' command + as faithfully as possible; however, I haven't run a full set of regression + tests. Thus, the user of this program accepts full responsibility for any + effects or loss; in particular, the author is not responsible for any losses, + explicit or incidental, that may be incurred through use of this program. + + I ask that any bugs (and, if possible, fixes) be reported to me when + possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us + + POSIX changes, bug fixes, long-named options, and cleanup + by David MacKenzie <djm@ai.mit.edu>. + + Options: + --bytes=byte-list + -b byte-list Print only the bytes in positions listed + in BYTE-LIST. + Tabs and backspaces are treated like any + other character; they take up 1 byte. + + --characters=character-list + -c character-list Print only characters in positions listed + in CHARACTER-LIST. + The same as -b for now, but + internationalization will change that. + Tabs and backspaces are treated like any + other character; they take up 1 character. + + --fields=field-list + -f field-list Print only the fields listed in FIELD-LIST. + Fields are separated by a TAB by default. + + --delimiter=delim + -d delim For -f, fields are separated by the first + character in DELIM instead of TAB. + + -n Do not split multibyte chars (no-op for now). + + --only-delimited + -s For -f, do not print lines that do not contain + the field separator character. + + The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers + or ranges separated by commas. The first byte, character, and field + are numbered 1. + + A FILE of `-' means standard input. */ + +#define _GNU_SOURCE +#include <ctype.h> +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" + +#ifdef isascii +#define ISDIGIT(c) (isascii ((c)) && isdigit ((c))) +#else +#define ISDIGIT(c) (isdigit ((c))) +#endif + +char *xmalloc (); +char *xrealloc (); +int set_fields (); +int cut_file (); +void cut_stream (); +void cut_bytes (); +void cut_fields (); +void enlarge_line (); +void error (); +void invalid_list (); +void usage (); + +/* The number of elements allocated for the input line + and the byte or field number. + Enlarged as necessary. */ +int line_size; + +/* Processed output buffer. */ +char *outbuf; + +/* Where to save next char to output. */ +char *outbufptr; + +/* Raw line buffer for field mode. */ +char *inbuf; + +/* Where to save next input char. */ +char *inbufptr; + +/* What can be done about a byte or field. */ +enum field_action +{ + field_omit, + field_output +}; + +/* In byte mode, which bytes to output. + In field mode, which `delim'-separated fields to output. + Both bytes and fields are numbered starting with 1, + so the first element of `fields' is unused. */ +enum field_action *fields; + +enum operating_mode +{ + undefined_mode, + + /* Output characters that are in the given bytes. */ + byte_mode, + + /* Output the given delimeter-separated fields. */ + field_mode +}; + +enum operating_mode operating_mode; + +/* If nonzero, + for field mode, do not output lines containing no delimeter characters. */ +int delimited_lines_only; + +/* The delimeter character for field mode. */ +unsigned char delim; + +/* Nonzero if we have ever read standard input. */ +int have_read_stdin; + +/* The name this program was run with. */ +char *program_name; + +struct option longopts[] = +{ + {"bytes", 1, 0, 'b'}, + {"characters", 1, 0, 'c'}, + {"fields", 1, 0, 'f'}, + {"delimiter", 1, 0, 'd'}, + {"only-delimited", 0, 0, 's'}, + {0, 0, 0, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int optc, exit_status = 0; + + program_name = argv[0]; + + line_size = 512; + operating_mode = undefined_mode; + delimited_lines_only = 0; + delim = '\0'; + have_read_stdin = 0; + + fields = (enum field_action *) + xmalloc (line_size * sizeof (enum field_action)); + outbuf = (char *) xmalloc (line_size); + inbuf = (char *) xmalloc (line_size); + + for (optc = 0; optc < line_size; optc++) + fields[optc] = field_omit; + + while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0)) + != EOF) + { + switch (optc) + { + case 'b': + case 'c': + /* Build the byte list. */ + if (operating_mode != undefined_mode) + usage (); + operating_mode = byte_mode; + if (set_fields (optarg) == 0) + error (2, 0, "no fields given"); + break; + + case 'f': + /* Build the field list. */ + if (operating_mode != undefined_mode) + usage (); + operating_mode = field_mode; + if (set_fields (optarg) == 0) + error (2, 0, "no fields given"); + break; + + case 'd': + /* New delimiter. */ + if (optarg[0] == '\0') + error (2, 0, "no delimiter given"); + if (optarg[1] != '\0') + error (2, 0, "delimiter must be a single character"); + delim = optarg[0]; + break; + + case 'n': + break; + + case 's': + delimited_lines_only++; + break; + + default: + usage (); + } + } + + if (operating_mode == undefined_mode) + usage (); + + if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode) + usage (); + + if (delim == '\0') + delim = '\t'; + + if (optind == argc) + exit_status |= cut_file ("-"); + else + for (; optind < argc; optind++) + exit_status |= cut_file (argv[optind]); + + if (have_read_stdin && fclose (stdin) == EOF) + { + error (0, errno, "-"); + exit_status = 1; + } + if (ferror (stdout) || fclose (stdout) == EOF) + error (1, 0, "write error"); + + exit (exit_status); +} + +/* Select for printing the positions in `fields' that are listed in + byte or field specification FIELDSTR. FIELDSTR should be + composed of one or more numbers or ranges of numbers, separated by + blanks or commas. Incomplete ranges may be given: `-m' means + `1-m'; `n-' means `n' through end of line or last field. + + Return the number of fields selected. */ + +int +set_fields (fieldstr) + char *fieldstr; +{ + int initial = 1; /* Value of first number in a range. */ + int dash_found = 0; /* Nonzero if a '-' is found in this field. */ + int value = 0; /* If nonzero, a number being accumulated. */ + int fields_selected = 0; /* Number of fields selected so far. */ + /* If nonzero, index of first field in a range that goes to end of line. */ + int eol_range_start = 0; + + for (;;) + { + if (*fieldstr == '-') + { + /* Starting a range. */ + if (dash_found) + invalid_list (); + dash_found++; + fieldstr++; + + if (value) + { + if (value >= line_size) + enlarge_line (value); + initial = value; + value = 0; + } + else + initial = 1; + } + else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0') + { + /* Ending the string, or this field/byte sublist. */ + if (dash_found) + { + dash_found = 0; + + /* A range. Possibilites: -n, m-n, n-. + In any case, `initial' contains the start of the range. */ + if (value == 0) + { + /* `n-'. From `initial' to end of line. */ + eol_range_start = initial; + fields_selected++; + } + else + { + /* `m-n' or `-n' (1-n). */ + if (value < initial) + invalid_list (); + + if (value >= line_size) + enlarge_line (value); + + /* Is there already a range going to end of line? */ + if (eol_range_start != 0) + { + /* Yes. Is the new sequence already contained + in the old one? If so, no processing is + necessary. */ + if (initial < eol_range_start) + { + /* No, the new sequence starts before the + old. Does the old range going to end of line + extend into the new range? */ + if (eol_range_start < value) + /* Yes. Simply move the end of line marker. */ + eol_range_start = initial; + else + { + /* No. A simple range, before and disjoint from + the range going to end of line. Fill it. */ + for (; initial <= value; initial++) + fields[initial] = field_output; + } + + /* In any case, some fields were selected. */ + fields_selected++; + } + } + else + { + /* There is no range going to end of line. */ + for (; initial <= value; initial++) + fields[initial] = field_output; + fields_selected++; + } + value = 0; + } + } + else if (value != 0) + { + /* A simple field number, not a range. */ + if (value >= line_size) + enlarge_line (value); + + fields[value] = field_output; + value = 0; + fields_selected++; + } + + if (*fieldstr == '\0') + { + /* If there was a range going to end of line, fill the + array from the end of line point. */ + if (eol_range_start) + for (initial = eol_range_start; initial < line_size; initial++) + fields[initial] = field_output; + + return fields_selected; + } + + fieldstr++; + } + else if (ISDIGIT (*fieldstr)) + { + value = 10 * value + *fieldstr - '0'; + fieldstr++; + } + else + invalid_list (); + } +} + +/* Process file FILE to standard output. + Return 0 if successful, 1 if not. */ + +int +cut_file (file) + char *file; +{ + FILE *stream; + + if (!strcmp (file, "-")) + { + have_read_stdin = 1; + stream = stdin; + } + else + { + stream = fopen (file, "r"); + if (stream == NULL) + { + error (0, errno, "%s", file); + return 1; + } + } + + cut_stream (stream); + + if (ferror (stream)) + { + error (0, errno, "%s", file); + return 1; + } + if (!strcmp (file, "-")) + clearerr (stream); /* Also clear EOF. */ + else if (fclose (stream) == EOF) + { + error (0, errno, "%s", file); + return 1; + } + return 0; +} + +void +cut_stream (stream) + FILE *stream; +{ + if (operating_mode == byte_mode) + cut_bytes (stream); + else + cut_fields (stream); +} + +/* Print the file open for reading on stream STREAM + with the bytes marked `field_omit' in `fields' removed from each line. */ + +void +cut_bytes (stream) + FILE *stream; +{ + register int c; /* Each character from the file. */ + int doneflag = 0; /* Nonzero if EOF reached. */ + int char_count; /* Number of chars in the line so far. */ + + while (doneflag == 0) + { + /* Start processing a line. */ + outbufptr = outbuf; + char_count = 0; + + do + { + c = getc (stream); + if (c == EOF) + { + doneflag++; + break; + } + + /* If this character is to be sent, stow it in the outbuffer. */ + + if (++char_count == line_size - 1) + enlarge_line (char_count); + + if (fields[char_count] == field_output || c == '\n') + *outbufptr++ = c; + } + while (c != '\n'); + + if (char_count) + fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout); + } +} + +/* Print the file open for reading on stream STREAM + with the fields marked `field_omit' in `fields' removed from each line. + All characters are initially stowed in the raw input buffer, until + at least one field has been found. */ + +void +cut_fields (stream) + FILE *stream; +{ + register int c; /* Each character from the file. */ + int doneflag = 0; /* Nonzero if EOF reached. */ + int char_count; /* Number of chars in line before any delim. */ + int fieldfound; /* Nonzero if any fields to print found. */ + int curr_field; /* Current index in `fields'. */ + + while (doneflag == 0) + { + char_count = 0; + fieldfound = 0; + curr_field = 1; + outbufptr = outbuf; + inbufptr = inbuf; + + do + { + c = getc (stream); + if (c == EOF) + { + doneflag++; + break; + } + + if (fields[curr_field] == field_output && c != '\n') + { + /* Working on a field. It, and its terminating + delimiter, go only into the processed buffer. */ + fieldfound = 1; + if (outbufptr - outbuf == line_size - 2) + enlarge_line (outbufptr - outbuf); + *outbufptr++ = c; + } + else if (fieldfound == 0) + { + if (++char_count == line_size - 1) + enlarge_line (char_count); + *inbufptr++ = c; + } + + if (c == delim && ++curr_field == line_size - 1) + enlarge_line (curr_field); + } + while (c != '\n'); + + if (fieldfound) + { + /* Something was found. Print it. */ + if (outbufptr[-1] == delim) + --outbufptr; /* Suppress trailing delimiter. */ + + fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout); + if (c == '\n') + putc (c, stdout); + } + else if (!delimited_lines_only && char_count) + /* A line with some characters, no delimiters, and no + suppression. Print it. */ + fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout); + } +} + +/* Extend the buffers to accomodate at least NEW_SIZE characters. */ + +void +enlarge_line (new_size) + int new_size; +{ + char *newp; + int i; + + new_size += 256; /* Leave some room to grow. */ + + fields = (enum field_action *) + xrealloc (fields, new_size * sizeof (enum field_action)); + + newp = (char *) xrealloc (outbuf, new_size); + outbufptr += newp - outbuf; + outbuf = newp; + + newp = (char *) xrealloc (inbuf, new_size); + inbufptr += newp - inbuf; + inbuf = newp; + + for (i = line_size; i < new_size; i++) + fields[i] = field_omit; + line_size = new_size; +} + +void +invalid_list () +{ + error (2, 0, "invalid byte or field list"); +} + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\ + %s {-c character-list,--characters=character-list} [file...]\n\ + %s {-f field-list,--fields=field-list} [-d delim] [-s]\n\ + [--delimiter=delim] [--only-delimited] [file...]\n", + program_name, program_name, program_name); + exit (2); +} diff --git a/src/expand.c b/src/expand.c new file mode 100644 index 000000000..8e471379e --- /dev/null +++ b/src/expand.c @@ -0,0 +1,377 @@ +/* expand - convert tabs to spaces + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* By default, convert all tabs to spaces. + Preserves backspace characters in the output; they decrement the + column count for tab calculations. + The default action is equivalent to -8. + + Options: + --tabs=tab1[,tab2[,...]] + -t tab1[,tab2[,...]] + -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1 + spaces apart instead of the default 8. Otherwise, + set the tabs at columns tab1, tab2, etc. (numbered from + 0); replace any tabs beyond the tabstops given with + single spaces. + --initial + -i Only convert initial tabs on each line to spaces. + + David MacKenzie <djm@ai.mit.edu> */ + +#define _GNU_SOURCE +#include <ctype.h> +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" + +#ifdef isascii +#define ISDIGIT(c) (isascii ((c)) && isdigit ((c))) +#else +#define ISDIGIT(c) (isdigit ((c))) +#endif + +/* The number of bytes added at a time to the amount of memory + allocated for the output line. */ +#define OUTPUT_BLOCK 256 + +/* The number of bytes added at a time to the amount of memory + allocated for the list of tabstops. */ +#define TABLIST_BLOCK 256 + +char *xmalloc (); +char *xrealloc (); +void error (); + +FILE *next_file (); +void add_tabstop (); +void expand (); +void parse_tabstops (); +void usage (); +void validate_tabstops (); + +/* If nonzero, convert blanks even after nonblank characters have been + read on the line. */ +int convert_entire_line; + +/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */ +int tab_size; + +/* Array of the explicit column numbers of the tab stops; + after `tab_list' is exhausted, each additional tab is replaced + by a space. The first column is column 0. */ +int *tab_list; + +/* The index of the first invalid element of `tab_list', + where the next element can be added. */ +int first_free_tab; + +/* Null-terminated array of input filenames. */ +char **file_list; + +/* Default for `file_list' if no files are given on the command line. */ +char *stdin_argv[] = +{ + "-", NULL +}; + +/* Nonzero if we have ever read standard input. */ +int have_read_stdin; + +/* Status to return to the system. */ +int exit_status; + +/* The name this program was run with. */ +char *program_name; + +struct option longopts[] = +{ + {"tabs", 1, NULL, 't'}, + {"initial", 0, NULL, 'i'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int tabval = -1; /* Value of tabstop being read, or -1. */ + int c; /* Option character. */ + + have_read_stdin = 0; + exit_status = 0; + convert_entire_line = 1; + tab_list = NULL; + first_free_tab = 0; + program_name = argv[0]; + + while ((c = getopt_long (argc, argv, "it:,0123456789", longopts, (int *) 0)) + != EOF) + { + switch (c) + { + case '?': + usage (); + case 'i': + convert_entire_line = 0; + break; + case 't': + parse_tabstops (optarg); + break; + case ',': + add_tabstop (tabval); + tabval = -1; + break; + default: + if (tabval == -1) + tabval = 0; + tabval = tabval * 10 + c - '0'; + break; + } + } + + add_tabstop (tabval); + + validate_tabstops (tab_list, first_free_tab); + + if (first_free_tab == 0) + tab_size = 8; + else if (first_free_tab == 1) + tab_size = tab_list[0]; + else + tab_size = 0; + + if (optind == argc) + file_list = stdin_argv; + else + file_list = &argv[optind]; + + expand (); + + if (have_read_stdin && fclose (stdin) == EOF) + error (1, errno, "-"); + if (ferror (stdout) || fclose (stdout) == EOF) + error (1, 0, "write error"); + + exit (exit_status); +} + +/* Add the comma or blank separated list of tabstops STOPS + to the list of tabstops. */ + +void +parse_tabstops (stops) + char *stops; +{ + int tabval = -1; + + for (; *stops; stops++) + { + if (*stops == ',' || isblank (*stops)) + { + add_tabstop (tabval); + tabval = -1; + } + else if (ISDIGIT (*stops)) + { + if (tabval == -1) + tabval = 0; + tabval = tabval * 10 + *stops - '0'; + } + else + error (1, 0, "tab size contains an invalid character"); + } + + add_tabstop (tabval); +} + +/* Add tab stop TABVAL to the end of `tab_list', except + if TABVAL is -1, do nothing. */ + +void +add_tabstop (tabval) + int tabval; +{ + if (tabval == -1) + return; + if (first_free_tab % TABLIST_BLOCK == 0) + tab_list = (int *) xrealloc (tab_list, first_free_tab + TABLIST_BLOCK); + tab_list[first_free_tab++] = tabval; +} + +/* Check that the list of tabstops TABS, with ENTRIES entries, + contains only nonzero, ascending values. */ + +void +validate_tabstops (tabs, entries) + int *tabs; + int entries; +{ + int prev_tab = 0; + int i; + + for (i = 0; i < entries; i++) + { + if (tabs[i] == 0) + error (1, 0, "tab size cannot be 0"); + if (tabs[i] <= prev_tab) + error (1, 0, "tab sizes must be ascending"); + prev_tab = tabs[i]; + } +} + +/* Change tabs to spaces, writing to stdout. + Read each file in `file_list', in order. */ + +void +expand () +{ + FILE *fp; /* Input stream. */ + int c; /* Each input character. */ + int tab_index = 0; /* Index in `tab_list' of next tabstop. */ + int column = 0; /* Column on screen of the next char. */ + int next_tab_column; /* Column the next tab stop is on. */ + int convert = 1; /* If nonzero, perform translations. */ + + fp = next_file ((FILE *) NULL); + for (;;) + { + c = getc (fp); + if (c == EOF) + { + fp = next_file (fp); + if (fp == NULL) + break; /* No more files. */ + else + continue; + } + + if (c == '\n') + { + putchar (c); + tab_index = 0; + column = 0; + convert = 1; + } + else if (c == '\t' && convert) + { + if (tab_size == 0) + { + /* Do not let tab_index == first_free_tab; + stop when it is 1 less. */ + while (tab_index < first_free_tab - 1 + && column >= tab_list[tab_index]) + tab_index++; + next_tab_column = tab_list[tab_index]; + if (tab_index < first_free_tab - 1) + tab_index++; + if (column >= next_tab_column) + next_tab_column = column + 1; /* Ran out of tab stops. */ + } + else + { + next_tab_column = column + tab_size - column % tab_size; + } + while (column < next_tab_column) + { + putchar (' '); + ++column; + } + } + else + { + if (convert) + { + if (c == '\b') + { + if (column > 0) + --column; + } + else + { + ++column; + if (convert_entire_line == 0) + convert = 0; + } + } + putchar (c); + } + } +} + +/* Close the old stream pointer FP if it is non-NULL, + and return a new one opened to read the next input file. + Open a filename of `-' as the standard input. + Return NULL if there are no more input files. */ + +FILE * +next_file (fp) + FILE *fp; +{ + static char *prev_file; + char *file; + + if (fp) + { + if (ferror (fp)) + { + error (0, errno, "%s", prev_file); + exit_status = 1; + } + if (fp == stdin) + clearerr (fp); /* Also clear EOF. */ + else if (fclose (fp) == EOF) + { + error (0, errno, "%s", prev_file); + exit_status = 1; + } + } + + while ((file = *file_list++) != NULL) + { + if (file[0] == '-' && file[1] == '\0') + { + have_read_stdin = 1; + prev_file = file; + return stdin; + } + fp = fopen (file, "r"); + if (fp) + { + prev_file = file; + return fp; + } + error (0, errno, "%s", file); + exit_status = 1; + } + return NULL; +} + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-tab1[,tab2[,...]]] [-t tab1[,tab2[,...]]] [-i]\n\ + [--tabs=tab1[,tab2[,...]]] [--initial] [file...]\n", + program_name); + exit (1); +} diff --git a/src/fold.c b/src/fold.c new file mode 100644 index 000000000..d5d4ae3fe --- /dev/null +++ b/src/fold.c @@ -0,0 +1,250 @@ +/* fold -- wrap each input line to fit in specified width. + Copyright (C) 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by David MacKenzie. */ + +#define _GNU_SOURCE +#include <ctype.h> +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" + +char *xrealloc (); +int adjust_column (); +int fold_file (); +void error (); + +/* If nonzero, try to break on whitespace. */ +int break_spaces; + +/* If nonzero, count bytes, not column positions. */ +int count_bytes; + +/* If nonzero, at least one of the files we read was standard input. */ +int have_read_stdin; + +/* The name this program was run with. */ +char *program_name; + +struct option longopts[] = +{ + {"bytes", 0, NULL, 'b'}, + {"spaces", 0, NULL, 's'}, + {"width", 1, NULL, 'w'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int width = 80; + int i; + int optc; + int errs = 0; + + program_name = argv[0]; + break_spaces = count_bytes = have_read_stdin = 0; + + while ((optc = getopt_long (argc, argv, "bsw:", longopts, (int *) 0)) + != EOF) + { + switch (optc) + { + case 'b': /* Count bytes rather than columns. */ + count_bytes = 1; + break; + + case 's': /* Break at word boundaries. */ + break_spaces = 1; + break; + + case 'w': /* Line width. */ + width = atoi (optarg); + if (width < 1) + error (1, 0, "%s: invalid line width", optarg); + break; + + default: + fprintf (stderr, "\ +Usage: %s [-bs] [-w width] [--bytes] [--spaces] [--width=width] [file...]\n", + argv[0]); + exit (1); + } + } + + if (argc == optind) + errs |= fold_file ("-", width); + else + for (i = optind; i < argc; i++) + errs |= fold_file (argv[i], width); + + if (have_read_stdin && fclose (stdin) == EOF) + error (1, errno, "-"); + if (fclose (stdout) == EOF) + error (1, errno, "write error"); + + exit (errs); +} + +/* Fold file FILENAME, or standard input if FILENAME is "-", + to stdout, with maximum line length WIDTH. + Return 0 if successful, 1 if an error occurs. */ + +int +fold_file (filename, width) + char *filename; + int width; +{ + FILE *istream; + register int c; + int column = 0; /* Screen column where next char will go. */ + int offset_out = 0; /* Index in `line_out' for next char. */ + static char *line_out = NULL; + static size_t allocated_out = 0; + + if (!strcmp (filename, "-")) + { + istream = stdin; + have_read_stdin = 1; + } + else + istream = fopen (filename, "r"); + + if (istream == NULL) + { + error (0, errno, "%s", filename); + return 1; + } + + while ((c = getc (istream)) != EOF) + { + if (offset_out + 1 >= allocated_out) + { + allocated_out += 1024; + line_out = xrealloc (line_out, allocated_out); + } + + if (c == '\n') + { + line_out[offset_out++] = c; + fwrite (line_out, sizeof (char), offset_out, stdout); + column = offset_out = 0; + continue; + } + + rescan: + column = adjust_column (column, c); + + if (column > width) + { + /* This character would make the line too long. + Print the line plus a newline, and make this character + start the next line. */ + if (break_spaces) + { + /* Look for the last blank. */ + int logical_end; + + for (logical_end = offset_out - 1; logical_end >= 0; + logical_end--) + if (isblank (line_out[logical_end])) + break; + if (logical_end >= 0) + { + int i; + + /* Found a blank. Don't output the part after it. */ + logical_end++; + fwrite (line_out, sizeof (char), logical_end, stdout); + putchar ('\n'); + /* Move the remainder to the beginning of the next line. + The areas being copied here might overlap. */ + bcopy (line_out + logical_end, line_out, + offset_out - logical_end); + offset_out -= logical_end; + for (column = i = 0; i < offset_out; i++) + column = adjust_column (column, line_out[i]); + goto rescan; + } + } + line_out[offset_out++] = '\n'; + fwrite (line_out, sizeof (char), offset_out, stdout); + column = offset_out = 0; + goto rescan; + } + + line_out[offset_out++] = c; + } + + if (offset_out) + fwrite (line_out, sizeof (char), offset_out, stdout); + + if (ferror (istream)) + { + error (0, errno, "%s", filename); + if (strcmp (filename, "-")) + fclose (istream); + return 1; + } + if (strcmp (filename, "-") && fclose (istream) == EOF) + { + error (0, errno, "%s", filename); + return 1; + } + + if (ferror (stdout)) + { + error (0, errno, "write error"); + return 1; + } + + return 0; +} + +/* Assuming the current column is COLUMN, return the column that + printing C will move the cursor to. + The first column is 0. */ + +int +adjust_column (column, c) + int column; + char c; +{ + if (!count_bytes) + { + if (c == '\b') + { + if (column > 0) + column--; + } + else if (c == '\r') + column = 0; + else if (c == '\t') + column = column + 8 - column % 8; + else /* if (isprint (c)) */ + column++; + } + else + column++; + return column; +} diff --git a/src/head.c b/src/head.c new file mode 100644 index 000000000..0302b60ac --- /dev/null +++ b/src/head.c @@ -0,0 +1,380 @@ +/* head -- output first part of file(s) + Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Options: + -b Print first N 512-byte blocks. + -c, --bytes=N[bkm] Print first N bytes + [or 512-byte blocks, kilobytes, or megabytes]. + -k Print first N kilobytes. + -N, -l, -n, --lines=N Print first N lines. + -m Print first N megabytes. + -q, --quiet, --silent Never print filename headers. + -v, --verbose Always print filename headers. + + Reads from standard input if no files are given or when a filename of + ``-'' is encountered. + By default, filename headers are printed only if more than one file + is given. + By default, prints the first 10 lines (head -n 10). + + David MacKenzie <djm@ai.mit.edu> */ + +#include <stdio.h> +#include <getopt.h> +#include <ctype.h> +#include <sys/types.h> +#include "system.h" + +#ifdef isascii +#define ISDIGIT(c) (isascii ((c)) && isdigit ((c))) +#else +#define ISDIGIT(c) (isdigit ((c))) +#endif + +/* Number of lines/chars/blocks to head. */ +#define DEFAULT_NUMBER 10 + +/* Size of atomic reads. */ +#define BUFSIZE (512 * 8) + +/* Number of bytes per item we are printing. + If 0, head in lines. */ +int unit_size; + +/* If nonzero, print filename headers. */ +int print_headers; + +/* When to print the filename banners. */ +enum header_mode +{ + multiple_files, always, never +}; + +int head (); +int head_bytes (); +int head_file (); +int head_lines (); +long atou (); +void error (); +void parse_unit (); +void usage (); +void write_header (); +void xwrite (); + +/* The name this program was run with. */ +char *program_name; + +/* Have we ever read standard input? */ +int have_read_stdin; + +struct option long_options[] = +{ + {"bytes", 1, NULL, 'c'}, + {"lines", 1, NULL, 'n'}, + {"quiet", 0, NULL, 'q'}, + {"silent", 0, NULL, 'q'}, + {"verbose", 0, NULL, 'v'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + enum header_mode header_mode = multiple_files; + int exit_status = 0; + long number = -1; /* Number of items to print (-1 if undef.). */ + int c; /* Option character. */ + + program_name = argv[0]; + have_read_stdin = 0; + unit_size = 0; + print_headers = 0; + + if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1])) + { + /* Old option syntax; a dash, one or more digits, and one or + more option letters. Move past the number. */ + for (number = 0, ++argv[1]; ISDIGIT (*argv[1]); ++argv[1]) + number = number * 10 + *argv[1] - '0'; + /* Parse any appended option letters. */ + while (*argv[1]) + { + switch (*argv[1]) + { + case 'b': + unit_size = 512; + break; + + case 'c': + unit_size = 1; + break; + + case 'k': + unit_size = 1024; + break; + + case 'l': + unit_size = 0; + break; + + case 'm': + unit_size = 1048576; + break; + + case 'q': + header_mode = never; + break; + + case 'v': + header_mode = always; + break; + + default: + error (0, 0, "unrecognized option `-%c'", *argv[1]); + usage (); + } + ++argv[1]; + } + /* Make the options we just parsed invisible to getopt. */ + argv[1] = argv[0]; + argv++; + argc--; + } + + while ((c = getopt_long (argc, argv, "c:n:qv", long_options, (int *) 0)) + != EOF) + { + switch (c) + { + case 'c': + unit_size = 1; + parse_unit (optarg); + goto getnum; + case 'n': + unit_size = 0; + getnum: + number = atou (optarg); + if (number == -1) + error (1, 0, "invalid number `%s'", optarg); + break; + + case 'q': + header_mode = never; + break; + + case 'v': + header_mode = always; + break; + + default: + usage (); + } + } + + if (number == -1) + number = DEFAULT_NUMBER; + + if (unit_size > 1) + number *= unit_size; + + if (header_mode == always + || (header_mode == multiple_files && optind < argc - 1)) + print_headers = 1; + + if (optind == argc) + exit_status |= head_file ("-", number); + + for (; optind < argc; ++optind) + exit_status |= head_file (argv[optind], number); + + if (have_read_stdin && close (0) < 0) + error (1, errno, "-"); + if (close (1) < 0) + error (1, errno, "write error"); + + exit (exit_status); +} + +int +head_file (filename, number) + char *filename; + long number; +{ + int fd; + + if (!strcmp (filename, "-")) + { + have_read_stdin = 1; + filename = "standard input"; + if (print_headers) + write_header (filename); + return head (filename, 0, number); + } + else + { + fd = open (filename, O_RDONLY); + if (fd >= 0) + { + int errors; + + if (print_headers) + write_header (filename); + errors = head (filename, fd, number); + if (close (fd) == 0) + return errors; + } + error (0, errno, "%s", filename); + return 1; + } +} + +void +write_header (filename) + char *filename; +{ + static int first_file = 1; + + if (first_file) + { + xwrite (1, "==> ", 4); + first_file = 0; + } + else + xwrite (1, "\n==> ", 5); + xwrite (1, filename, strlen (filename)); + xwrite (1, " <==\n", 5); +} + +int +head (filename, fd, number) + char *filename; + int fd; + long number; +{ + if (unit_size) + return head_bytes (filename, fd, number); + else + return head_lines (filename, fd, number); +} + +int +head_bytes (filename, fd, bytes_to_write) + char *filename; + int fd; + long bytes_to_write; +{ + char buffer[BUFSIZE]; + int bytes_read; + + while (bytes_to_write) + { + bytes_read = read (fd, buffer, BUFSIZE); + if (bytes_read == -1) + { + error (0, errno, "%s", filename); + return 1; + } + if (bytes_read == 0) + break; + if (bytes_read > bytes_to_write) + bytes_read = bytes_to_write; + xwrite (1, buffer, bytes_read); + bytes_to_write -= bytes_read; + } + return 0; +} + +int +head_lines (filename, fd, lines_to_write) + char *filename; + int fd; + long lines_to_write; +{ + char buffer[BUFSIZE]; + int bytes_read; + int bytes_to_write; + + while (lines_to_write) + { + bytes_read = read (fd, buffer, BUFSIZE); + if (bytes_read == -1) + { + error (0, errno, "%s", filename); + return 1; + } + if (bytes_read == 0) + break; + bytes_to_write = 0; + while (bytes_to_write < bytes_read) + if (buffer[bytes_to_write++] == '\n' && --lines_to_write == 0) + break; + xwrite (1, buffer, bytes_to_write); + } + return 0; +} + +void +parse_unit (str) + char *str; +{ + int arglen = strlen (str); + + if (arglen == 0) + return; + + switch (str[arglen - 1]) + { + case 'b': + unit_size = 512; + str[arglen - 1] = '\0'; + break; + case 'k': + unit_size = 1024; + str[arglen - 1] = '\0'; + break; + case 'm': + unit_size = 1048576; + str[arglen - 1] = '\0'; + break; + } +} + +/* Convert STR, a string of ASCII digits, into an unsigned integer. + Return -1 if STR does not represent a valid unsigned integer. */ + +long +atou (str) + char *str; +{ + int value; + + for (value = 0; ISDIGIT (*str); ++str) + value = value * 10 + *str - '0'; + return *str ? -1 : value; +} + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-c N[bkm]] [-n N] [-qv] [--bytes=N[bkm]] [--lines=N]\n\ + [--quiet] [--silent] [--verbose] [file...]\n\ + %s [-Nbcklmqv] [file...]\n", program_name, program_name); + exit (1); +} diff --git a/src/join.c b/src/join.c new file mode 100644 index 000000000..9ac82e0fd --- /dev/null +++ b/src/join.c @@ -0,0 +1,690 @@ +/* join - join lines of two files on a common field + Copyright (C) 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Written by Mike Haertel, mike@gnu.ai.mit.edu. */ + +#define _GNU_SOURCE +#include <ctype.h> +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif +#include <stdio.h> +#include <sys/types.h> +#include <getopt.h> +#include "system.h" + +#ifdef isascii +#define ISSPACE(c) (isascii(c) && isspace(c)) +#define ISDIGIT(c) (isascii(c) && isdigit(c)) +#else +#define ISSPACE(c) isspace(c) +#define ISDIGIT(c) isdigit(c) +#endif + +char *xmalloc (); +char *xrealloc (); +void error (); +static void usage (); + +#define min(A, B) ((A) < (B) ? (A) : (B)) + +/* An element of the list describing the format of each + output line. */ +struct outlist +{ + int file; /* File to take field from (1 or 2). */ + int field; /* Field number to print. */ + struct outlist *next; +}; + +/* A field of a line. */ +struct field +{ + char *beg; /* First character in field. */ + char *lim; /* Character after last character in field. */ +}; + +/* A line read from an input file. Newlines are not stored. */ +struct line +{ + char *beg; /* First character in line. */ + char *lim; /* Character after last character in line. */ + int nfields; /* Number of elements in `fields'. */ + struct field *fields; +}; + +/* One or more consecutive lines read from a file that all have the + same join field value. */ +struct seq +{ + int count; /* Elements used in `lines'. */ + int alloc; /* Elements allocated in `lines'. */ + struct line *lines; +}; + +/* If nonzero, print unpairable lines in file 1 or 2. */ +static int print_unpairables_1, print_unpairables_2; + +/* If nonzero, print pairable lines. */ +static int print_pairables; + +/* Empty output field filler. */ +static char *empty_filler; + +/* Field to join on. */ +static int join_field_1, join_field_2; + +/* List of fields to print. */ +struct outlist *outlist; + +/* Last element in `outlist', where a new element can be added. */ +struct outlist *outlist_end; + +/* Tab character separating fields; if this is NUL fields are separated + by any nonempty string of white space, otherwise by exactly one + tab character. */ +static char tab; + +/* The name this program was run with. */ +char *program_name; + +/* Fill in the `fields' structure in LINE. */ + +static void +xfields (line) + struct line *line; +{ + static int nfields = 2; + int i; + register char *ptr, *lim; + + line->fields = (struct field *) xmalloc (nfields * sizeof (struct field)); + + ptr = line->beg; + lim = line->lim; + + for (i = 0; ptr < lim; ++i) + { + if (i == nfields) + { + nfields *= 2; + line->fields = (struct field *) + xrealloc ((char *) line->fields, nfields * sizeof (struct field)); + } + if (tab) + { + line->fields[i].beg = ptr; + while (ptr < lim && *ptr != tab) + ++ptr; + line->fields[i].lim = ptr; + if (ptr < lim) + ++ptr; + } + else + { + line->fields[i].beg = ptr; + while (ptr < lim && !ISSPACE (*ptr)) + ++ptr; + line->fields[i].lim = ptr; + while (ptr < lim && ISSPACE (*ptr)) + ++ptr; + } + } + + line->nfields = i; +} + +/* Read a line from FP into LINE and split it into fields. + Return 0 if EOF, 1 otherwise. */ + +static int +get_line (fp, line) + FILE *fp; + struct line *line; +{ + static int linesize = 80; + int c, i; + char *ptr; + + if (feof (fp)) + return 0; + + ptr = xmalloc (linesize); + + for (i = 0; (c = getc (fp)) != EOF && c != '\n'; ++i) + { + if (i == linesize) + { + linesize *= 2; + ptr = xrealloc (ptr, linesize); + } + ptr[i] = c; + } + + if (c == EOF && i == 0) + { + free (ptr); + return 0; + } + + line->beg = ptr; + line->lim = line->beg + i; + xfields (line); + return 1; +} + +static void +freeline (line) + struct line *line; +{ + free ((char *) line->fields); + free (line->beg); +} + +static void +initseq (seq) + struct seq *seq; +{ + seq->count = 0; + seq->alloc = 1; + seq->lines = (struct line *) xmalloc (seq->alloc * sizeof (struct line)); +} + +/* Read a line from FP and add it to SEQ. Return 0 if EOF, 1 otherwise. */ + +static int +getseq (fp, seq) + FILE *fp; + struct seq *seq; +{ + if (seq->count == seq->alloc) + { + seq->alloc *= 2; + seq->lines = (struct line *) + xrealloc ((char *) seq->lines, seq->alloc * sizeof (struct line)); + } + + if (get_line (fp, &seq->lines[seq->count])) + { + ++seq->count; + return 1; + } + return 0; +} + +static void +delseq (seq) + struct seq *seq; +{ + free ((char *) seq->lines); +} + +/* Return <0 if the join field in LINE1 compares less than the one in LINE2; + >0 if it compares greater; 0 if it compares equal. */ + +static int +keycmp (line1, line2) + struct line *line1; + struct line *line2; +{ + char *beg1, *beg2; /* Start of field to compare in each file. */ + int len1, len2; /* Length of fields to compare. */ + int diff; + + if (join_field_1 < line1->nfields) + { + beg1 = line1->fields[join_field_1].beg; + len1 = line1->fields[join_field_1].lim + - line1->fields[join_field_1].beg; + } + else + { + beg1 = NULL; + len1 = 0; + } + + if (join_field_2 < line2->nfields) + { + beg2 = line2->fields[join_field_2].beg; + len2 = line2->fields[join_field_2].lim + - line2->fields[join_field_2].beg; + } + else + { + beg2 = NULL; + len2 = 0; + } + + if (len1 == 0) + return len2 == 0 ? 0 : -1; + if (len2 == 0) + return 1; + diff = memcmp (beg1, beg2, min (len1, len2)); + if (diff) + return diff; + return len1 - len2; +} + +/* Print field N of LINE if it exists and is nonempty, otherwise + `empty_filler' if it is nonempty. */ + +static void +prfield (n, line) + int n; + struct line *line; +{ + int len; + + if (n < line->nfields) + { + len = line->fields[n].lim - line->fields[n].beg; + if (len) + fwrite (line->fields[n].beg, 1, len, stdout); + else if (empty_filler) + fputs (empty_filler, stdout); + } + else if (empty_filler) + fputs (empty_filler, stdout); +} + +/* Print LINE, with its fields separated by `tab'. */ + +static void +prline (line) + struct line *line; +{ + int i; + + for (i = 0; i < line->nfields; ++i) + { + prfield (i, line); + if (i == line->nfields - 1) + putchar ('\n'); + else + putchar (tab ? tab : ' '); + } +} + +/* Print the join of LINE1 and LINE2. */ + +static void +prjoin (line1, line2) + struct line *line1; + struct line *line2; +{ + if (outlist) + { + struct outlist *o; + + prfield (outlist->field - 1, outlist->file == 1 ? line1 : line2); + for (o = outlist->next; o; o = o->next) + { + putchar (tab ? tab : ' '); + prfield (o->field - 1, o->file == 1 ? line1 : line2); + } + putchar ('\n'); + } + else + { + int i; + + prfield (join_field_1, line1); + for (i = 0; i < join_field_1 && i < line1->nfields; ++i) + { + putchar (tab ? tab : ' '); + prfield (i, line1); + } + for (i = join_field_1 + 1; i < line1->nfields; ++i) + { + putchar (tab ? tab : ' '); + prfield (i, line1); + } + + for (i = 0; i < join_field_2 && i < line2->nfields; ++i) + { + putchar (tab ? tab : ' '); + prfield (i, line2); + } + for (i = join_field_2 + 1; i < line2->nfields; ++i) + { + putchar (tab ? tab : ' '); + prfield (i, line2); + } + putchar ('\n'); + } +} + +/* Print the join of the files in FP1 and FP2. */ + +static void +join (fp1, fp2) + FILE *fp1; + FILE *fp2; +{ + struct seq seq1, seq2; + struct line line; + int diff, i, j, eof1, eof2; + + /* Read the first line of each file. */ + initseq (&seq1); + getseq (fp1, &seq1); + initseq (&seq2); + getseq (fp2, &seq2); + + while (seq1.count && seq2.count) + { + diff = keycmp (&seq1.lines[0], &seq2.lines[0]); + if (diff < 0) + { + if (print_unpairables_1) + prline (&seq1.lines[0]); + freeline (&seq1.lines[0]); + seq1.count = 0; + getseq (fp1, &seq1); + continue; + } + if (diff > 0) + { + if (print_unpairables_2) + prline (&seq2.lines[0]); + freeline (&seq2.lines[0]); + seq2.count = 0; + getseq (fp2, &seq2); + continue; + } + + /* Keep reading lines from file1 as long as they continue to + match the current line from file2. */ + eof1 = 0; + do + if (!getseq (fp1, &seq1)) + { + eof1 = 1; + ++seq1.count; + break; + } + while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0])); + + /* Keep reading lines from file2 as long as they continue to + match the current line from file1. */ + eof2 = 0; + do + if (!getseq (fp2, &seq2)) + { + eof2 = 1; + ++seq2.count; + break; + } + while (!keycmp (&seq1.lines[0], &seq2.lines[seq2.count - 1])); + + if (print_pairables) + { + for (i = 0; i < seq1.count - 1; ++i) + for (j = 0; j < seq2.count - 1; ++j) + prjoin (&seq1.lines[i], &seq2.lines[j]); + } + + for (i = 0; i < seq1.count - 1; ++i) + freeline (&seq1.lines[i]); + if (!eof1) + { + seq1.lines[0] = seq1.lines[seq1.count - 1]; + seq1.count = 1; + } + else + seq1.count = 0; + + for (i = 0; i < seq2.count - 1; ++i) + freeline (&seq2.lines[i]); + if (!eof2) + { + seq2.lines[0] = seq2.lines[seq2.count - 1]; + seq2.count = 1; + } + else + seq2.count = 0; + } + + if (print_unpairables_1 && seq1.count) + { + prline (&seq1.lines[0]); + freeline (&seq1.lines[0]); + while (get_line (fp1, &line)) + { + prline (&line); + freeline (&line); + } + } + + if (print_unpairables_2 && seq2.count) + { + prline (&seq2.lines[0]); + freeline (&seq2.lines[0]); + while (get_line (fp2, &line)) + { + prline (&line); + freeline (&line); + } + } + + delseq (&seq1); + delseq (&seq2); +} + +/* Add a field spec for field FIELD of file FILE to `outlist' and return 1, + unless either argument is invalid; then just return 0. */ + +static int +add_field (file, field) + int file; + int field; +{ + struct outlist *o; + + if (file < 1 || file > 2 || field < 1) + return 0; + o = (struct outlist *) xmalloc (sizeof (struct outlist)); + o->file = file; + o->field = field; + o->next = NULL; + + /* Add to the end of the list so the fields are in the right order. */ + if (outlist == NULL) + outlist = o; + else + outlist_end->next = o; + outlist_end = o; + + return 1; +} + +/* Add the comma or blank separated field spec(s) in STR to `outlist'. + Return the number of fields added. */ + +static int +add_field_list (str) + char *str; +{ + int added = 0; + int file = -1, field = -1; + int dot_found = 0; + + for (; *str; str++) + { + if (*str == ',' || isblank (*str)) + { + added += add_field (file, field); + file = field = -1; + dot_found = 0; + } + else if (*str == '.') + dot_found = 1; + else if (ISDIGIT (*str)) + { + if (!dot_found) + { + if (file == -1) + file = 0; + file = file * 10 + *str - '0'; + } + else + { + if (field == -1) + field = 0; + field = field * 10 + *str - '0'; + } + } + else + return 0; + } + + added += add_field (file, field); + return added; +} + +/* When using getopt_long_only, no long option can start with + a character that is a short option. */ +static struct option longopts[] = +{ + {"j", 1, NULL, 'j'}, + {"j1", 1, NULL, '1'}, + {"j2", 1, NULL, '2'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char *argv[]; +{ + char *names[2]; + FILE *fp1, *fp2; + int optc, prev_optc = 0, nfiles, val; + + program_name = argv[0]; + nfiles = 0; + print_pairables = 1; + + while ((optc = getopt_long_only (argc, argv, "-a:e:1:2:o:t:v:", longopts, + (int *) 0)) != EOF) + { + switch (optc) + { + case 'a': + val = atoi (optarg); + if (val == 1) + print_unpairables_1 = 1; + else if (val == 2) + print_unpairables_2 = 1; + else + error (2, 0, "invalid file number for `-a'"); + break; + + case 'e': + empty_filler = optarg; + break; + + case '1': + val = atoi (optarg); + if (val <= 0) + error (2, 0, "invalid field number for `-1'"); + join_field_1 = val - 1; + break; + + case '2': + val = atoi (optarg); + if (val <= 0) + error (2, 0, "invalid field number for `-2'"); + join_field_2 = val - 1; + break; + + case 'j': + val = atoi (optarg); + if (val <= 0) + error (2, 0, "invalid field number for `-j'"); + join_field_1 = join_field_2 = val - 1; + break; + + case 'o': + if (add_field_list (optarg) == 0) + error (2, 0, "invalid field list for `-o'"); + break; + + case 't': + tab = *optarg; + break; + + case 'v': + val = atoi (optarg); + if (val == 1) + print_unpairables_1 = 1; + else if (val == 2) + print_unpairables_2 = 1; + else + error (2, 0, "invalid file number for `-v'"); + print_pairables = 0; + break; + + case 1: /* Non-option argument. */ + if (prev_optc == 'o') + { + /* Might be continuation of args to -o. */ + if (add_field_list (optarg) > 0) + continue; /* Don't change `prev_optc'. */ + } + + if (nfiles > 1) + usage (); + names[nfiles++] = optarg; + break; + + case '?': + usage (); + } + prev_optc = optc; + } + + if (nfiles != 2) + usage (); + + fp1 = strcmp (names[0], "-") ? fopen (names[0], "r") : stdin; + if (!fp1) + error (1, errno, "%s", names[0]); + fp2 = strcmp (names[1], "-") ? fopen (names[1], "r") : stdin; + if (!fp2) + error (1, errno, "%s", names[1]); + if (fp1 == fp2) + error (1, errno, "both files cannot be standard input"); + join (fp1, fp2); + + if ((fp1 == stdin || fp2 == stdin) && fclose (stdin) == EOF) + error (1, errno, "-"); + if (ferror (stdout) || fclose (stdout) == EOF) + error (1, 0, "write error"); + + exit (0); +} + +static void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-a 1|2] [-v 1|2] [-e empty-string] [-o field-list...] [-t char]\n\ + [-j[1|2] field] [-1 field] [-2 field] file1 file2\n", + program_name); + exit (1); +} diff --git a/src/nl.c b/src/nl.c new file mode 100644 index 000000000..368001adc --- /dev/null +++ b/src/nl.c @@ -0,0 +1,546 @@ +/* nl -- number lines of files + Copyright (C) 1989, 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Scott Bartram (nancy!scott@uunet.uu.net) + Revised by David MacKenzie (djm@ai.mit.edu) */ + +#include <stdio.h> +#include <sys/types.h> +#include <getopt.h> +#include <regex.h> +#include "linebuffer.h" +#include "system.h" + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +/* Line-number formats. */ +enum number_format +{ + FORMAT_RIGHT_NOLZ, /* Right justified, no leading zeroes. */ + FORMAT_RIGHT_LZ, /* Right justified, leading zeroes. */ + FORMAT_LEFT /* Left justified, no leading zeroes. */ +}; + +/* Default section delimiter characters. */ +#define DEFAULT_SECTION_DELIMITERS "\\:" + +/* Types of input lines: either one of the section delimiters, + or text to output. */ +enum section +{ + Header, Body, Footer, Text +}; + +/* Format of body lines (-b). */ +char *body_type = "t"; + +/* Format of header lines (-h). */ +char *header_type = "n"; + +/* Format of footer lines (-f). */ +char *footer_type = "n"; + +/* Format currently being used (body, header, or footer). */ +char *current_type; + +/* Regex for body lines to number (-bp). */ +struct re_pattern_buffer body_regex; + +/* Regex for header lines to number (-hp). */ +struct re_pattern_buffer header_regex; + +/* Regex for footer lines to number (-fp). */ +struct re_pattern_buffer footer_regex; + +/* Pointer to current regex, if any. */ +struct re_pattern_buffer *current_regex = NULL; + +/* Separator string to print after line number (-s). */ +char *separator_str = "\t"; + +/* Input section delimiter string (-d). */ +char *section_del = DEFAULT_SECTION_DELIMITERS; + +/* Header delimiter string. */ +char *header_del = NULL; + +/* Header section delimiter length. */ +int header_del_len; + +/* Body delimiter string. */ +char *body_del = NULL; + +/* Body section delimiter length. */ +int body_del_len; + +/* Footer delimiter string. */ +char *footer_del = NULL; + +/* Footer section delimiter length. */ +int footer_del_len; + +/* Input buffer. */ +struct linebuffer line_buf; + +/* printf format string for line number. */ +char *print_fmt; + +/* printf format string for unnumbered lines. */ +char *print_no_line_fmt = NULL; + +/* Starting line number on each page (-v). */ +int page_start = 1; + +/* Line number increment (-i). */ +int page_incr = 1; + +/* If TRUE, reset line number at start of each page (-p). */ +int reset_numbers = TRUE; + +/* Number of blank lines to consider to be one line for numbering (-l). */ +int blank_join = 1; + +/* Width of line numbers (-w). */ +int lineno_width = 6; + +/* Line number format (-n). */ +enum number_format lineno_format = FORMAT_RIGHT_NOLZ; + +/* Current print line number. */ +int line_no; + +/* The name this program was run with. */ +char *program_name; + +/* Nonzero if we have ever read standard input. */ +int have_read_stdin; + +enum section check_section (); +char *xmalloc (); +char *xrealloc (); +int build_type_arg (); +int nl_file (); +void usage (); +void process_file (); +void proc_header (); +void proc_body (); +void proc_footer (); +void proc_text (); +void print_lineno (); +void build_print_fmt (); +void error (); + +struct option longopts[] = +{ + {"header-numbering", 1, NULL, 'h'}, + {"body-numbering", 1, NULL, 'b'}, + {"footer-numbering", 1, NULL, 'f'}, + {"first-page", 1, NULL, 'v'}, + {"page-increment", 1, NULL, 'i'}, + {"no-renumber", 0, NULL, 'p'}, + {"join-blank-lines", 1, NULL, 'l'}, + {"number-separator", 1, NULL, 's'}, + {"number-width", 1, NULL, 'w'}, + {"number-format", 1, NULL, 'n'}, + {"section-delimiter", 1, NULL, 'd'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int c, exit_status = 0; + + program_name = argv[0]; + have_read_stdin = 0; + + while ((c = getopt_long (argc, argv, "h:b:f:v:i:pl:s:w:n:d:", longopts, + (int *) 0)) != EOF) + { + switch (c) + { + case 'h': + if (build_type_arg (&header_type, &header_regex) != TRUE) + usage (); + break; + case 'b': + if (build_type_arg (&body_type, &body_regex) != TRUE) + usage (); + break; + case 'f': + if (build_type_arg (&footer_type, &footer_regex) != TRUE) + usage (); + break; + case 'v': + page_start = atoi (optarg); + break; + case 'i': + page_incr = atoi (optarg); + if (page_incr < 1) + page_incr = 1; + break; + case 'p': + reset_numbers = FALSE; + break; + case 'l': + blank_join = atoi (optarg); + break; + case 's': + separator_str = optarg; + break; + case 'w': + lineno_width = atoi (optarg); + if (lineno_width < 1) + lineno_width = 1; + break; + case 'n': + switch (*optarg) + { + case 'l': + if (optarg[1] == 'n') + lineno_format = FORMAT_LEFT; + else + usage (); + break; + case 'r': + switch (optarg[1]) + { + case 'n': + lineno_format = FORMAT_RIGHT_NOLZ; + break; + case 'z': + lineno_format = FORMAT_RIGHT_LZ; + break; + default: + usage (); + break; + } + break; + default: + usage (); + break; + } + break; + case 'd': + section_del = optarg; + break; + default: + usage (); + break; + } + } + + /* Initialize the section delimiters. */ + c = strlen (section_del); + + header_del_len = c * 3; + header_del = xmalloc (header_del_len + 1); + strcat (strcat (strcpy (header_del, section_del), section_del), section_del); + + body_del_len = c * 2; + body_del = xmalloc (body_del_len + 1); + strcat (strcpy (body_del, section_del), section_del); + + footer_del_len = c; + footer_del = xmalloc (footer_del_len + 1); + strcpy (footer_del, section_del); + + /* Initialize the input buffer. */ + initbuffer (&line_buf); + + /* Initialize the printf format for unnumbered lines. */ + c = strlen (separator_str); + print_no_line_fmt = xmalloc (lineno_width + c + 1); + memset (print_no_line_fmt, ' ', lineno_width + c); + print_no_line_fmt[lineno_width + c] = '\0'; + + line_no = page_start; + current_type = body_type; + current_regex = &body_regex; + build_print_fmt (); + + /* Main processing. */ + + if (optind == argc) + exit_status |= nl_file ("-"); + else + for (; optind < argc; optind++) + exit_status |= nl_file (argv[optind]); + + if (have_read_stdin && fclose (stdin) == EOF) + { + error (0, errno, "-"); + exit_status = 1; + } + if (ferror (stdout) || fclose (stdout) == EOF) + error (1, 0, "write error"); + + exit (exit_status); +} + +/* Process file FILE to standard output. + Return 0 if successful, 1 if not. */ + +int +nl_file (file) + char *file; +{ + FILE *stream; + + if (!strcmp (file, "-")) + { + have_read_stdin = 1; + stream = stdin; + } + else + { + stream = fopen (file, "r"); + if (stream == NULL) + { + error (0, errno, "%s", file); + return 1; + } + } + + process_file (stream); + + if (ferror (stream)) + { + error (0, errno, "%s", file); + return 1; + } + if (!strcmp (file, "-")) + clearerr (stream); /* Also clear EOF. */ + else if (fclose (stream) == EOF) + { + error (0, errno, "%s", file); + return 1; + } + return 0; +} + +/* Read and process the file pointed to by FP. */ + +void +process_file (fp) + FILE *fp; +{ + while (readline (&line_buf, fp)) + { + switch ((int) check_section ()) + { + case Header: + proc_header (); + break; + case Body: + proc_body (); + break; + case Footer: + proc_footer (); + break; + case Text: + proc_text (); + break; + } + } +} + +/* Return the type of line in `line_buf'. */ + +enum section +check_section () +{ + if (line_buf.length < 2 || memcmp (line_buf.buffer, section_del, 2)) + return Text; + if (line_buf.length == header_del_len + && !memcmp (line_buf.buffer, header_del, header_del_len)) + return Header; + if (line_buf.length == body_del_len + && !memcmp (line_buf.buffer, body_del, body_del_len)) + return Body; + if (line_buf.length == footer_del_len + && !memcmp (line_buf.buffer, footer_del, footer_del_len)) + return Footer; + return Text; +} + +/* Switch to a header section. */ + +void +proc_header () +{ + current_type = header_type; + current_regex = &header_regex; + if (reset_numbers) + line_no = page_start; + putchar ('\n'); +} + +/* Switch to a body section. */ + +void +proc_body () +{ + current_type = body_type; + current_regex = &body_regex; + putchar ('\n'); +} + +/* Switch to a footer section. */ + +void +proc_footer () +{ + current_type = footer_type; + current_regex = &footer_regex; + putchar ('\n'); +} + +/* Process a regular text line in `line_buf'. */ + +void +proc_text () +{ + static int blank_lines = 0; /* Consecutive blank lines so far. */ + + switch (*current_type) + { + case 'a': + if (blank_join > 1) + { + if (line_buf.length || ++blank_lines == blank_join) + { + print_lineno (); + blank_lines = 0; + } + else + printf (print_no_line_fmt); + } + else + print_lineno (); + break; + case 't': + if (line_buf.length) + print_lineno (); + else + printf (print_no_line_fmt); + break; + case 'n': + printf (print_no_line_fmt); + break; + case 'p': + if (re_search (current_regex, line_buf.buffer, line_buf.length, + 0, line_buf.length, (struct re_registers *) 0) < 0) + printf (print_no_line_fmt); + else + print_lineno (); + break; + } + fwrite (line_buf.buffer, sizeof (char), line_buf.length, stdout); + putchar ('\n'); +} + +/* Print and increment the line number. */ + +void +print_lineno () +{ + printf (print_fmt, line_no); + line_no += page_incr; +} + +/* Build the printf format string, based on `lineno_format'. */ + +void +build_print_fmt () +{ + /* 12 = 10 chars for lineno_width, 1 for %, 1 for \0. */ + print_fmt = xmalloc (strlen (separator_str) + 12); + switch (lineno_format) + { + case FORMAT_RIGHT_NOLZ: + sprintf (print_fmt, "%%%dd%s", lineno_width, separator_str); + break; + case FORMAT_RIGHT_LZ: + sprintf (print_fmt, "%%0%dd%s", lineno_width, separator_str); + break; + case FORMAT_LEFT: + sprintf (print_fmt, "%%-%dd%s", lineno_width, separator_str); + break; + } +} + +/* Set the command line flag TYPEP and possibly the regex pointer REGEXP, + according to `optarg'. */ + +int +build_type_arg (typep, regexp) + char **typep; + struct re_pattern_buffer *regexp; +{ + char *errmsg; + int rval = TRUE; + int optlen; + + switch (*optarg) + { + case 'a': + case 't': + case 'n': + *typep = optarg; + break; + case 'p': + *typep = optarg++; + optlen = strlen (optarg); + regexp->allocated = optlen * 2; + regexp->buffer = (unsigned char *) xmalloc (regexp->allocated); + regexp->translate = NULL; + regexp->fastmap = xmalloc (256); + regexp->fastmap_accurate = 0; + errmsg = re_compile_pattern (optarg, optlen, regexp); + if (errmsg) + error (1, 0, "%s", errmsg); + break; + default: + rval = FALSE; + break; + } + return rval; +} + +/* Print a usage message and quit. */ + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-h header-style] [-b body-style] [-f footer-style] [-p] [-d cc]\n\ + [-v start-number] [-i increment] [-l lines] [-s line-separator]\n\ + [-w line-no-width] [-n {ln,rn,rz}] [--header-numbering=style]\n\ + [--body-numbering=style] [--footer-numbering=style]\n\ + [--first-page=number] [--page-increment=number] [--no-renumber]\n\ + [--join-blank-lines=number] [--number-separator=string]\n\ + [--number-width=number] [--number-format={ln,rn,rz}]\n\ + [--section-delimiter=cc] [file...]\n", + program_name); + exit (2); +} diff --git a/src/od.c b/src/od.c new file mode 100644 index 000000000..f13c6b7bc --- /dev/null +++ b/src/od.c @@ -0,0 +1,1697 @@ +/* od -- dump in octal (and other formats) the contents of files + Copyright (C) 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Jim Meyering. */ + +/* AIX requires this to be the first thing in the file. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if HAVE_ALLOCA_H +#include <alloca.h> +#else /* not HAVE_ALLOCA_H */ +#ifdef _AIX + #pragma alloca +#else /* not _AIX */ +char *alloca (); +#endif /* not _AIX */ +#endif /* not HAVE_ALLOCA_H */ +#endif /* not __GNUC__ */ + +#include <stdio.h> +#include <ctype.h> +#include <assert.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" + +#if defined(__GNUC__) || defined(STDC_HEADERS) +#include <float.h> +#endif + +#ifdef __GNUC__ +typedef long double LONG_DOUBLE; +#else +typedef double LONG_DOUBLE; +#endif + +#if HAVE_LIMITS_H +#include <limits.h> +#endif +#ifndef SCHAR_MAX +#define SCHAR_MAX 127 +#endif +#ifndef SHRT_MAX +#define SHRT_MAX 32767 +#endif +#ifndef ULONG_MAX +#define ULONG_MAX ((unsigned long) ~(unsigned long) 0) +#endif + +#define STREQ(a,b) (strcmp((a), (b)) == 0) + +#ifndef MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#ifndef MIN +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +/* The default number of input bytes per output line. */ +#define DEFAULT_BYTES_PER_BLOCK 16 + +/* The number of decimal digits of precision in a float. */ +#ifndef FLT_DIG +#define FLT_DIG 7 +#endif + +/* The number of decimal digits of precision in a double. */ +#ifndef DBL_DIG +#define DBL_DIG 15 +#endif + +/* The number of decimal digits of precision in a long double. */ +#ifndef LDBL_DIG +#define LDBL_DIG DBL_DIG +#endif + +char *xmalloc (); +char *xrealloc (); +void error (); + +enum size_spec +{ + NO_SIZE, + CHAR, + SHORT, + INT, + LONG, + FP_SINGLE, + FP_DOUBLE, + FP_LONG_DOUBLE +}; + +enum output_format +{ + SIGNED_DECIMAL, + UNSIGNED_DECIMAL, + OCTAL, + HEXADECIMAL, + FLOATING_POINT, + NAMED_CHARACTER, + CHARACTER +}; + +enum strtoul_error +{ + UINT_OK, UINT_INVALID, UINT_INVALID_SUFFIX_CHAR, UINT_OVERFLOW +}; +typedef enum strtoul_error strtoul_error; + +/* Each output format specification (from POSIX `-t spec' or from + old-style options) is represented by one of these structures. */ +struct tspec +{ + enum output_format fmt; + enum size_spec size; + void (*print_function) (); + char *fmt_string; +}; + +/* Convert the number of 8-bit bytes of a binary representation to + the number of characters (digits + sign if the type is signed) + required to represent the same quantity in the specified base/type. + For example, a 32-bit (4-byte) quantity may require a field width + as wide as the following for these types: + 11 unsigned octal + 11 signed decimal + 10 unsigned decimal + 8 unsigned hexadecimal */ + +static const unsigned int bytes_to_oct_digits[] = +{0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43}; + +static const unsigned int bytes_to_signed_dec_digits[] = +{1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40}; + +static const unsigned int bytes_to_unsigned_dec_digits[] = +{0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39}; + +static const unsigned int bytes_to_hex_digits[] = +{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; + +/* Convert enum size_spec to the size of the named type. */ +static const int width_bytes[] = +{ + -1, + sizeof (char), + sizeof (short int), + sizeof (int), + sizeof (long int), + sizeof (float), + sizeof (double), + sizeof (LONG_DOUBLE) +}; + +/* Names for some non-printing characters. */ +static const char *const charname[33] = +{ + "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", + "bs", "ht", "nl", "vt", "ff", "cr", "so", "si", + "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", + "can", "em", "sub", "esc", "fs", "gs", "rs", "us", + "sp" +}; + +/* A printf control string for printing a file offset. */ +static const char *output_address_fmt_string; + +/* FIXME: make this the number of octal digits in an unsigned long. */ +#define MAX_ADDRESS_LENGTH 13 +static char address_fmt_buffer[MAX_ADDRESS_LENGTH + 1]; +static char address_pad[MAX_ADDRESS_LENGTH + 1]; + +static unsigned long int string_min; +static unsigned long int flag_dump_strings; + +/* The number of input bytes to skip before formatting and writing. */ +static unsigned long int n_bytes_to_skip = 0; + +/* When non-zero, MAX_BYTES_TO_FORMAT is the maximum number of bytes + to be read and formatted. Otherwise all input is formatted. */ +static int limit_bytes_to_format = 0; + +/* The maximum number of bytes that will be formatted. This + value is used only when LIMIT_BYTES_TO_FORMAT is non-zero. */ +static unsigned long int max_bytes_to_format; + +/* When non-zero and two or more consecutive blocks are equal, format + only the first block and output an asterisk alone on the following + line to indicate that identical blocks have been elided. */ +static int abbreviate_duplicate_blocks = 1; + +/* An array of specs describing how to format each input block. */ +static struct tspec *spec; + +/* The number of format specs. */ +static unsigned int n_specs; + +/* The allocated length of SPEC. */ +static unsigned int n_specs_allocated; + +/* The number of input bytes formatted per output line. It must be + a multiple of the least common multiple of the sizes associated with + the specified output types. It should be as large as possible, but + no larger than 16 -- unless specified with the -w option. */ +static unsigned int bytes_per_block; + +/* Human-readable representation of *file_list (for error messages). + It differs from *file_list only when *file_list is "-". */ +static char const *input_filename; + +/* A NULL-terminated list of the file-arguments from the command line. + If no file-arguments were specified, this variable is initialized + to { "-", NULL }. */ +static char const *const *file_list; + +/* The input stream associated with the current file. */ +static FILE *in_stream; + +#define LONGEST_INTEGRAL_TYPE long int + +#define MAX_INTEGRAL_TYPE_SIZE sizeof(LONGEST_INTEGRAL_TYPE) +static enum size_spec integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1]; + +#define MAX_FP_TYPE_SIZE sizeof(LONG_DOUBLE) +static enum size_spec fp_type_size[MAX_FP_TYPE_SIZE + 1]; + +static struct option long_options[] = +{ + /* POSIX options. */ + {"skip-bytes", 1, NULL, 'j'}, + {"address-radix", 1, NULL, 'A'}, + {"read-bytes", 1, NULL, 'N'}, + {"format", 1, NULL, 't'}, + {"output-duplicates", 0, NULL, 'v'}, + + /* non-POSIX options. */ + {"strings", 2, NULL, 's'}, + {"width", 2, NULL, 'w'}, + {NULL, 0, NULL, 0} +}; + +/* The name this program was run with. */ +char *program_name; + +static void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-abcdfhiloxv] [-s[bytes]] [-w[bytes]] [-A radix] [-j bytes]\n\ + [-N bytes] [-t type] [--skip-bytes=bytes] [--address-radix=radix]\n\ + [--read-bytes=bytes] [--format=type] [--output-duplicates]\n\ + [--strings[=bytes]] [--width[=bytes]] [file...]\n", + program_name); + exit (1); +} + +/* Compute the greatest common denominator of U and V + using Euclid's algorithm. */ + +static unsigned int +gcd (u, v) + unsigned int u; + unsigned int v; +{ + unsigned int t; + while (v != 0) + { + t = u % v; + u = v; + v = t; + } + return u; +} + +/* Compute the least common multiple of U and V. */ + +static unsigned int +lcm (u, v) + unsigned int u; + unsigned int v; +{ + unsigned int t = gcd (u, v); + if (t == 0) + return 0; + return u * v / t; +} + +static strtoul_error +my_strtoul (s, base, val, allow_bkm_suffix) + const char *s; + int base; + long unsigned int *val; + int allow_bkm_suffix; +{ + char *p; + unsigned long int tmp; + + assert (0 <= base && base <= 36); + + tmp = strtoul (s, &p, base); + if (errno != 0) + return UINT_OVERFLOW; + if (p == s) + return UINT_INVALID; + if (!allow_bkm_suffix) + { + if (*p == '\0') + { + *val = tmp; + return UINT_OK; + } + else + return UINT_INVALID_SUFFIX_CHAR; + } + + switch (*p) + { + case '\0': + break; + +#define BKM_SCALE(x,scale_factor) \ + do \ + { \ + if (x > (double) ULONG_MAX / scale_factor) \ + return UINT_OVERFLOW; \ + x *= scale_factor; \ + } \ + while (0) + + case 'b': + BKM_SCALE (tmp, 512); + break; + + case 'k': + BKM_SCALE (tmp, 1024); + break; + + case 'm': + BKM_SCALE (tmp, 1024 * 1024); + break; + + default: + return UINT_INVALID_SUFFIX_CHAR; + break; + } + + *val = tmp; + return UINT_OK; +} + +static void +uint_fatal_error (str, argument_type_string, err) + const char *str; + const char *argument_type_string; + strtoul_error err; +{ + switch (err) + { + case UINT_OK: + abort (); + + case UINT_INVALID: + error (2, 0, "invalid %s `%s'", argument_type_string, str); + break; + + case UINT_INVALID_SUFFIX_CHAR: + error (2, 0, "invalid character following %s `%s'", + argument_type_string, str); + break; + + case UINT_OVERFLOW: + error (2, 0, "%s `%s' larger than maximum unsigned long", + argument_type_string, str); + break; + } +} + +static void +print_s_char (n_bytes, block, fmt_string) + long unsigned int n_bytes; + const char *block; + const char *fmt_string; +{ + int i, err; + err = 0; + for (i = n_bytes; i > 0; i--) + { + int tmp = (unsigned) *(unsigned char *) block; + if (tmp > SCHAR_MAX) + tmp = (SCHAR_MAX - tmp); + assert (tmp <= SCHAR_MAX); + err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF); + block += sizeof (unsigned char); + } + if (err) + error (2, errno, "standard output"); +} + +static void +print_char (n_bytes, block, fmt_string) + long unsigned int n_bytes; + const char *block; + const char *fmt_string; +{ + int i, err; + err = 0; + for (i = n_bytes; i > 0; i--) + { + unsigned int tmp = *(unsigned char *) block; + err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF); + block += sizeof (unsigned char); + } + if (err) + error (2, errno, "standard output"); +} + +static void +print_s_short (n_bytes, block, fmt_string) + long unsigned int n_bytes; + const char *block; + const char *fmt_string; +{ + int i, err; + err = 0; + for (i = n_bytes / sizeof (unsigned short); i > 0; i--) + { + int tmp = (unsigned) *(unsigned short *) block; + if (tmp > SHRT_MAX) + tmp = (SHRT_MAX - tmp); + assert (tmp <= SHRT_MAX); + err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF); + block += sizeof (unsigned short); + } + if (err) + error (2, errno, "standard output"); +} + +static void +print_short (n_bytes, block, fmt_string) + long unsigned int n_bytes; + const char *block; + const char *fmt_string; +{ + int i, err; + err = 0; + for (i = n_bytes / sizeof (unsigned short); i > 0; i--) + { + unsigned int tmp = *(unsigned short *) block; + err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF); + block += sizeof (unsigned short); + } + if (err) + error (2, errno, "standard output"); +} + +static void +print_int (n_bytes, block, fmt_string) + long unsigned int n_bytes; + const char *block; + const char *fmt_string; +{ + int i, err; + err = 0; + for (i = n_bytes / sizeof (unsigned int); i > 0; i--) + { + unsigned int tmp = *(unsigned int *) block; + err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF); + block += sizeof (unsigned int); + } + if (err) + error (2, errno, "standard output"); +} + +static void +print_long (n_bytes, block, fmt_string) + long unsigned int n_bytes; + const char *block; + const char *fmt_string; +{ + int i, err; + err = 0; + for (i = n_bytes / sizeof (unsigned long); i > 0; i--) + { + unsigned long tmp = *(unsigned long *) block; + err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF); + block += sizeof (unsigned long); + } + if (err) + error (2, errno, "standard output"); +} + +static void +print_float (n_bytes, block, fmt_string) + long unsigned int n_bytes; + const char *block; + const char *fmt_string; +{ + int i, err; + err = 0; + for (i = n_bytes / sizeof (float); i > 0; i--) + { + float tmp = *(float *) block; + err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF); + block += sizeof (float); + } + if (err) + error (2, errno, "standard output"); +} + +static void +print_double (n_bytes, block, fmt_string) + long unsigned int n_bytes; + const char *block; + const char *fmt_string; +{ + int i, err; + err = 0; + for (i = n_bytes / sizeof (double); i > 0; i--) + { + double tmp = *(double *) block; + err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF); + block += sizeof (double); + } + if (err) + error (2, errno, "standard output"); +} + +#ifdef __GNUC__ +static void +print_long_double (n_bytes, block, fmt_string) + long unsigned int n_bytes; + const char *block; + const char *fmt_string; +{ + int i, err; + err = 0; + for (i = n_bytes / sizeof (LONG_DOUBLE); i > 0; i--) + { + LONG_DOUBLE tmp = *(LONG_DOUBLE *) block; + err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF); + block += sizeof (LONG_DOUBLE); + } + if (err) + error (2, errno, "standard output"); +} + +#endif + +static void +print_named_ascii (n_bytes, block, unused_fmt_string) + long unsigned int n_bytes; + const char *block; + const char *unused_fmt_string; +{ + int i; + for (i = n_bytes; i > 0; i--) + { + unsigned int c = *(unsigned char *) block; + unsigned int masked_c = (0x7f & c); + const char *s; + char buf[5]; + + if (masked_c == 127) + s = "del"; + else if (masked_c <= 040) + s = charname[masked_c]; + else + { + sprintf (buf, " %c", masked_c); + s = buf; + } + + if (printf ("%3s%c", s, (i == 1 ? '\n' : ' ')) == EOF) + error (2, errno, "standard output"); + block += sizeof (unsigned char); + } +} + +static void +print_ascii (n_bytes, block, unused_fmt_string) + long unsigned int n_bytes; + const char *block; + const char *unused_fmt_string; +{ + int i; + for (i = n_bytes; i > 0; i--) + { + unsigned int c = *(unsigned char *) block; + const char *s; + char buf[5]; + + switch (c) + { + case '\0': + s = " \\0"; + break; + + case '\007': + s = " \\a"; + break; + + case '\b': + s = " \\b"; + break; + + case '\f': + s = " \\f"; + break; + + case '\n': + s = " \\n"; + break; + + case '\r': + s = " \\r"; + break; + + case '\t': + s = " \\t"; + break; + + case '\v': + s = " \\v"; + break; + + default: + sprintf (buf, (isprint (c) ? " %c" : "%03o"), c); + s = (const char *) buf; + } + + if (printf ("%3s%c", s, (i == 1 ? '\n' : ' ')) == EOF) + error (2, errno, "standard output"); + block += sizeof (unsigned char); + } +} + +/* Convert a null-terminated (possibly zero-length) string S to an + unsigned long integer value. If S points to a non-digit set *P to S, + *VAL to 0, and return 0. Otherwise, accumulate the integer value of + the string of digits. If the string of digits represents a value + larger than ULONG_MAX, don't modify *VAL or *P and return non-zero. + Otherwise, advance *P to the first non-digit after S, set *VAL to + the result of the conversion and return zero. */ + +static int +simple_strtoul (s, p, val) + const char *s; + const char **p; + long unsigned int *val; +{ + unsigned long int sum; + + sum = 0; + while (isdigit (*s)) + { + unsigned int c = *s++ - '0'; + if (sum > (ULONG_MAX - c) / 10) + return 1; + sum = sum * 10 + c; + } + *p = s; + *val = sum; + return 0; +} + +/* If S points to a single valid POSIX-style od format string, put a + description of that format in *TSPEC, make *NEXT point at the character + following the just-decoded format (if *NEXT is non-NULL), and return + zero. If S is not valid, don't modify *NEXT or *TSPEC and return + non-zero. For example, if S were "d4afL" *NEXT would be set to "afL" + and *TSPEC would be + { + fmt = SIGNED_DECIMAL; + size = INT or LONG; (whichever integral_type_size[4] resolves to) + print_function = print_int; (assuming size == INT) + fmt_string = "%011d%c"; + } +*/ + +static int +decode_one_format (s, next, tspec) + const char *s; + const char **next; + struct tspec *tspec; +{ + enum size_spec size_spec; + unsigned long int size; + enum output_format fmt; + const char *pre_fmt_string; + char *fmt_string; + void (*print_function) (); + const char *p; + unsigned int c; + + assert (tspec != NULL); + + switch (*s) + { + case 'd': + case 'o': + case 'u': + case 'x': + c = *s; + ++s; + switch (*s) + { + case 'C': + ++s; + size = sizeof (char); + break; + + case 'S': + ++s; + size = sizeof (short); + break; + + case 'I': + ++s; + size = sizeof (int); + break; + + case 'L': + ++s; + size = sizeof (long int); + break; + + default: + if (simple_strtoul (s, &p, &size) != 0) + return 1; + if (p == s) + size = sizeof (int); + else + { + if (size > MAX_INTEGRAL_TYPE_SIZE + || integral_type_size[size] == NO_SIZE) + return 1; + s = p; + } + break; + } + +#define FMT_BYTES_ALLOCATED 9 + fmt_string = xmalloc (FMT_BYTES_ALLOCATED); + + size_spec = integral_type_size[size]; + + switch (c) + { + case 'd': + fmt = SIGNED_DECIMAL; + sprintf (fmt_string, "%%0%u%sd%%c", + bytes_to_signed_dec_digits[size], + (size_spec == LONG ? "l" : "")); + break; + + case 'o': + fmt = OCTAL; + sprintf (fmt_string, "%%0%u%so%%c", + bytes_to_oct_digits[size], + (size_spec == LONG ? "l" : "")); + break; + + case 'u': + fmt = UNSIGNED_DECIMAL; + sprintf (fmt_string, "%%0%u%su%%c", + bytes_to_unsigned_dec_digits[size], + (size_spec == LONG ? "l" : "")); + break; + + case 'x': + fmt = HEXADECIMAL; + sprintf (fmt_string, "%%0%u%sx%%c", + bytes_to_hex_digits[size], + (size_spec == LONG ? "l" : "")); + break; + + default: + abort (); + } + + assert (strlen (fmt_string) < FMT_BYTES_ALLOCATED); + + switch (size_spec) + { + case CHAR: + print_function = (fmt == SIGNED_DECIMAL + ? print_s_char + : print_char); + break; + + case SHORT: + print_function = (fmt == SIGNED_DECIMAL + ? print_s_short + : print_short);; + break; + + case INT: + print_function = print_int; + break; + + case LONG: + print_function = print_long; + break; + + default: + abort (); + } + break; + + case 'f': + fmt = FLOATING_POINT; + ++s; + switch (*s) + { + case 'F': + ++s; + size = sizeof (float); + break; + + case 'D': + ++s; + size = sizeof (double); + break; + + case 'L': + ++s; + size = sizeof (LONG_DOUBLE); + break; + + default: + if (simple_strtoul (s, &p, &size) != 0) + return 1; + if (p == s) + size = sizeof (double); + else + { + if (size > MAX_FP_TYPE_SIZE + || fp_type_size[size] == NO_SIZE) + return 1; + s = p; + } + break; + } + size_spec = fp_type_size[size]; + + switch (size_spec) + { + case FP_SINGLE: + print_function = print_float; + pre_fmt_string = "%%%d.%d#e%%c"; + fmt_string = xmalloc (strlen (pre_fmt_string)); + sprintf (fmt_string, pre_fmt_string, + FLT_DIG + 8, FLT_DIG); + break; + + case FP_DOUBLE: + print_function = print_double; + pre_fmt_string = "%%%d.%d#e%%c"; + fmt_string = xmalloc (strlen (pre_fmt_string)); + sprintf (fmt_string, pre_fmt_string, + DBL_DIG + 8, DBL_DIG); + break; + +#ifdef __GNUC__ + case FP_LONG_DOUBLE: + print_function = print_long_double; + pre_fmt_string = "%%%d.%d#le%%c"; + fmt_string = xmalloc (strlen (pre_fmt_string)); + sprintf (fmt_string, pre_fmt_string, + LDBL_DIG + 8, LDBL_DIG); + break; +#endif + + default: + abort (); + } + break; + + case 'a': + ++s; + fmt = NAMED_CHARACTER; + size_spec = CHAR; + fmt_string = NULL; + print_function = print_named_ascii; + break; + + case 'c': + ++s; + fmt = CHARACTER; + size_spec = CHAR; + fmt_string = NULL; + print_function = print_ascii; + break; + + default: + return 1; + } + + tspec->size = size_spec; + tspec->fmt = fmt; + tspec->print_function = print_function; + tspec->fmt_string = fmt_string; + + if (next != NULL) + *next = s; + + return 0; +} + +/* Decode the POSIX-style od format string S. Append the decoded + representation to the global array SPEC, reallocating SPEC if + necessary. Return zero if S is valid, non-zero otherwise. */ + +static int +decode_format_string (s) + const char *s; +{ + assert (s != NULL); + + while (*s != '\0') + { + struct tspec tspec; + const char *next; + + if (decode_one_format (s, &next, &tspec)) + return 1; + + assert (s != next); + s = next; + + if (n_specs >= n_specs_allocated) + { + n_specs_allocated = 1 + (3 * n_specs_allocated) / 2; + spec = (struct tspec *) xrealloc (spec, (n_specs_allocated + * sizeof (struct tspec))); + } + + bcopy ((char *) &tspec, (char *) &spec[n_specs], sizeof (struct tspec)); + ++n_specs; + } + + return 0; +} + +/* Given a list of one or more input filenames FILE_LIST, set the global + file pointer IN_STREAM to position N_SKIP in the concatenation of + those files. If any file operation fails or if there are fewer than + N_SKIP bytes in the combined input, give an error message and exit. + When possible, use seek- rather than read operations to advance + IN_STREAM. A file name of "-" is interpreted as standard input. */ + +static void +skip (n_skip) + long unsigned int n_skip; +{ + for ( /*empty */ ; *file_list != NULL; ++file_list) + { + struct stat file_stats; + int j; + + if (STREQ (*file_list, "-")) + { + input_filename = "standard input"; + in_stream = stdin; + } + else + { + input_filename = *file_list; + in_stream = fopen (input_filename, "r"); + if (in_stream == NULL) + error (2, errno, "%s", input_filename); + } + + if (n_skip == 0) + break; + + /* First try using fseek. For large offsets, all this work is + worthwhile. If the offset is below some threshold it may be + more efficient to move the pointer by reading. There are two + issues when trying to use fseek: + - the file must be seekable. + - before seeking to the specified position, make sure + that the new position is in the current file. + Try to do that by getting file's size using stat(). + But that will work only for regular files and dirs. */ + + if (fstat (fileno (in_stream), &file_stats)) + error (2, errno, "%s", input_filename); + + /* The st_size field is valid only for regular files and + directories. FIXME: is the preceding true? + If the number of bytes left to skip is at least as large as + the size of the current file, we can decrement + n_skip and go on to the next file. */ + if (S_ISREG (file_stats.st_mode) || S_ISDIR (file_stats.st_mode)) + { + if (n_skip >= file_stats.st_size) + { + n_skip -= file_stats.st_size; + if (in_stream != stdin) + { + if (fclose (in_stream)) + error (2, errno, "%s", input_filename); + } + continue; + } + else + { + if (fseek (in_stream, n_skip, SEEK_SET) == 0) + { + n_skip = 0; + break; + } + } + } + + /* fseek didn't work or wasn't attempted; do it the slow way. */ + + for (j = n_skip / BUFSIZ; j >= 0; j--) + { + char buf[BUFSIZ]; + size_t n_bytes_to_read = (j > 0 + ? BUFSIZ + : n_skip % BUFSIZ); + size_t n_bytes_read; + n_bytes_read = fread (buf, 1, n_bytes_to_read, in_stream); + n_skip -= n_bytes_read; + if (n_bytes_read != n_bytes_to_read) + { + if (ferror (in_stream)) + error (2, errno, "%s", input_filename); + else + break; + } + } + + if (n_skip == 0) + break; + } + + if (n_skip != 0) + error (2, 0, "cannot skip past end of combined input"); +} + +static const char * +format_address (address) + long unsigned int address; +{ + const char *address_string; + + if (output_address_fmt_string == NULL) + address_string = ""; + else + { + sprintf (address_fmt_buffer, output_address_fmt_string, address); + address_string = address_fmt_buffer; + } + return address_string; +} + +/* Write N_BYTES bytes from CURR_BLOCK to standard output once for each + of the N_SPEC format specs. CURRENT_OFFSET is the byte address of + CURR_BLOCK in the concatenation of input files, and it is printed + (optionally) only before the output line associated with the first + format spec. When duplicate blocks are being abbreviated, the output + for a sequence of identical input blocks is the output for the first + block followed by an asterisk alone on a line. It is valid to compare + the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK. + That condition may be false only for the last input block -- and then + only when it has not been padded to length BYTES_PER_BLOCK. */ + +static void +write_block (current_offset, n_bytes, prev_block, curr_block) + long unsigned int current_offset; + long unsigned int n_bytes; + const char *prev_block; + const char *curr_block; +{ + static int first = 1; + static int prev_pair_equal = 0; + +#define EQUAL_BLOCKS(b1, b2) (bcmp ((b1), (b2), bytes_per_block) == 0) + + if (abbreviate_duplicate_blocks + && !first && n_bytes == bytes_per_block + && EQUAL_BLOCKS (prev_block, curr_block)) + { + if (prev_pair_equal) + { + /* The two preceding blocks were equal, and the current + block is the same as the last one, so print nothing. */ + } + else + { + printf ("*\n"); + prev_pair_equal = 1; + } + } + else + { + int i; + + prev_pair_equal = 0; + for (i = 0; i < n_specs; i++) + { + if (printf ("%s ", (i == 0 + ? format_address (current_offset) + : address_pad)) + == EOF) + error (2, errno, "standard output"); + (*spec[i].print_function) (n_bytes, curr_block, spec[i].fmt_string); + } + } + first = 0; +} + +/* Read and return a single byte from the concatenation of the input + files named in the global array FILE_LIST. On the first call to this + function, the global variable IN_STREAM is expected to be an open + stream associated with the input file *FILE_LIST. If IN_STREAM is + at end-of-file, close it and update the global variables IN_STREAM, + FILE_LIST, and INPUT_FILENAME so they correspond to the next file in + the list. Then try to read a byte from the newly opened file. + Repeat if necessary until *FILE_LIST is NULL. Upon any read-, open-, + or close error give a message and exit. When EOF is reached for the + last file in FILE_LIST, return EOF. Any subsequent calls return EOF. */ + +static int +read_char () +{ + if (*file_list == NULL) + return EOF; + + while (1) + { + int c; + + c = fgetc (in_stream); + + if (c != EOF) + return c; + + if (errno != 0) + error (2, errno, "%s", input_filename); + + if (in_stream != stdin) + if (fclose (in_stream) == EOF) + error (2, errno, "%s", input_filename); + + ++file_list; + if (*file_list == NULL) + return EOF; + + if (STREQ (*file_list, "-")) + { + input_filename = "standard input"; + in_stream = stdin; + } + else + { + input_filename = *file_list; + in_stream = fopen (input_filename, "r"); + if (in_stream == NULL) + error (2, errno, "%s", input_filename); + } + } +} + +/* Read N bytes into BLOCK from the concatenation of the input files + named in the global array FILE_LIST. On the first call to this + function, the global variable IN_STREAM is expected to be an open + stream associated with the input file *FILE_LIST. On subsequent + calls, if *FILE_LIST is NULL, don't modify BLOCK and return zero. + If all N bytes cannot be read from IN_STREAM, close IN_STREAM and + update the global variables IN_STREAM, FILE_LIST, and INPUT_FILENAME. + Then try to read the remaining bytes from the newly opened file. + Repeat if necessary until *FILE_LIST is NULL. Upon any read-, open-, + or close error give a message and exit. Otherwise, return the number + of bytes read. */ + +static unsigned long int +read_block (n, block) + size_t n; + char *block; +{ + unsigned long int n_bytes_in_buffer; + + assert (n > 0 && n <= bytes_per_block); + if (n == 0) + return 0; + + n_bytes_in_buffer = 0; + + if (*file_list == NULL) + return 0; /* EOF. */ + + while (1) + { + size_t n_needed; + size_t n_read; + + n_needed = n - n_bytes_in_buffer; + n_read = fread (block + n_bytes_in_buffer, 1, n_needed, in_stream); + + if (ferror (in_stream)) + error (2, errno, "%s", input_filename); + + if (n_read == n_needed) + return n; + + n_bytes_in_buffer += n_read; + + if (in_stream != stdin) + if (fclose (in_stream) == EOF) + error (2, errno, "%s", input_filename); + + ++file_list; + if (*file_list == NULL) + return n_bytes_in_buffer; + + if (STREQ (*file_list, "-")) + { + input_filename = "standard input"; + in_stream = stdin; + } + else + { + input_filename = *file_list; + in_stream = fopen (input_filename, "r"); + if (in_stream == NULL) + error (2, errno, "%s", input_filename); + } + } +} + +/* Return the least common multiple of the sizes associated + with the format specs. */ + +static int +get_lcm () +{ + int i; + int l_c_m = 1; + + for (i = 0; i < n_specs; i++) + l_c_m = lcm (l_c_m, width_bytes[(int) spec[i].size]); + return l_c_m; +} + +/* Read chunks of size BYTES_PER_BLOCK from the input files, write the + formatted block to standard output, and repeat until the specified + maximum number of bytes has been read or until all input has been + processed. If the last block read is smaller than BYTES_PER_BLOCK + and its size is not a multiple of the size associated with a format + spec, extend the input block with zero bytes until its length is a + multiple of all format spec sizes. Write the final block. Finally, + write on a line by itself the offset of the byte after the last byte + read. */ + +static void +dump () +{ + char *block[2]; + unsigned long int current_offset; + int idx = 0; + size_t n_bytes_read; + + block[0] = (char *) alloca (bytes_per_block); + block[1] = (char *) alloca (bytes_per_block); + + current_offset = n_bytes_to_skip; + + if (limit_bytes_to_format) + { + size_t end_offset = n_bytes_to_skip + max_bytes_to_format; + + n_bytes_read = 0; + while (current_offset < end_offset) + { + size_t n_needed; + n_needed = MIN (end_offset - current_offset, bytes_per_block); + n_bytes_read = read_block (n_needed, block[idx]); + if (n_bytes_read < bytes_per_block) + break; + assert (n_bytes_read == bytes_per_block); + write_block (current_offset, n_bytes_read, + block[!idx], block[idx]); + current_offset += n_bytes_read; + idx = !idx; + } + } + else + { + while (1) + { + n_bytes_read = read_block (bytes_per_block, block[idx]); + if (n_bytes_read < bytes_per_block) + break; + assert (n_bytes_read == bytes_per_block); + write_block (current_offset, n_bytes_read, + block[!idx], block[idx]); + current_offset += n_bytes_read; + idx = !idx; + } + } + + if (n_bytes_read > 0) + { + int l_c_m; + size_t bytes_to_write; + + l_c_m = get_lcm (); + + /* Make bytes_to_write the smallest multiple of l_c_m that + is at least as large as n_bytes_read. */ + bytes_to_write = l_c_m * (int) ((n_bytes_read + l_c_m - 1) / l_c_m); + + bzero (block[idx] + n_bytes_read, bytes_to_write - n_bytes_read); + write_block (current_offset, bytes_to_write, + block[!idx], block[idx]); + current_offset += n_bytes_read; + } + + if (output_address_fmt_string != NULL) + { + if (printf ("%s\n", format_address (current_offset)) == EOF) + error (2, errno, "standard output"); + } +} + +/* STRINGS mode. Find each "string constant" in the file. + A string constant is a run of at least `string_min' ASCII graphic + (or formatting) characters terminated by a null. Based on a + function written by Richard Stallman for a pre-POSIX + version of od. */ + +static void +dump_strings () +{ + int bufsize = MAX (100, string_min); + char *buf = xmalloc (bufsize); + unsigned long address = n_bytes_to_skip; + + while (1) + { + int i; + int c; + + /* See if the next `string_min' chars are all printing chars. */ + tryline: + + if (limit_bytes_to_format + && address >= (n_bytes_to_skip + max_bytes_to_format - string_min)) + break; + + for (i = 0; i < string_min; i++) + { + c = read_char (); + address++; + if (c < 0) + return; + if (!isprint (c)) + /* Found a non-printing. Try again starting with next char. */ + goto tryline; + buf[i] = c; + } + + /* We found a run of `string_min' printable characters. + Now see if it is terminated with a null byte. */ + while (!limit_bytes_to_format + || address < n_bytes_to_skip + max_bytes_to_format) + { + if (i == bufsize) + { + bufsize = 1 + 3 * bufsize / 2; + buf = xrealloc (buf, bufsize); + } + c = read_char (); + address++; + if (c < 0) + return; + if (c == '\0') + break; /* It is; print this string. */ + if (!isprint (c)) + goto tryline; /* It isn't; give up on this string. */ + buf[i++] = c; /* String continues; store it all. */ + } + + /* If we get here, the string is all printable and null-terminated, + so print it. It is all in `buf' and `i' is its length. */ + buf[i] = 0; + if (output_address_fmt_string != NULL) + { + if (printf ("%s ", format_address (address - i - 1)) == EOF) + error (2, errno, "standard output"); + } + for (i = 0; (c = buf[i]); i++) + { + int err; + switch (c) + { + case '\007': + err = fputs ("\\a", stdout); + break; + + case '\b': + err = fputs ("\\b", stdout); + break; + + case '\f': + err = fputs ("\\f", stdout); + break; + + case '\n': + err = fputs ("\\n", stdout); + break; + + case '\r': + err = fputs ("\\r", stdout); + break; + + case '\t': + err = fputs ("\\t", stdout); + break; + + case '\v': + err = fputs ("\\v", stdout); + break; + + default: + err = putchar (c); + } + if (err == EOF) + error (2, errno, "standard output"); + } + if (putchar ('\n') == EOF) + error (2, errno, "standard output"); + } + free (buf); +} + +void +main (argc, argv) + int argc; + char **argv; +{ + int c; + int n_files; + int i; + unsigned int l_c_m; + unsigned int address_pad_len; + unsigned long int desired_width; + int width_specified = 0; + + program_name = argv[0]; + + for (i = 0; i <= MAX_INTEGRAL_TYPE_SIZE; i++) + integral_type_size[i] = NO_SIZE; + + integral_type_size[sizeof (char)] = CHAR; + integral_type_size[sizeof (short int)] = SHORT; + integral_type_size[sizeof (int)] = INT; + integral_type_size[sizeof (long int)] = LONG; + + for (i = 0; i <= MAX_FP_TYPE_SIZE; i++) + fp_type_size[i] = NO_SIZE; + + fp_type_size[sizeof (float)] = FP_SINGLE; + /* The array entry for `double' is filled in after that for LONG_DOUBLE + so that if `long double' is the same type or if long double isn't + supported FP_LONG_DOUBLE will never be used. */ + fp_type_size[sizeof (LONG_DOUBLE)] = FP_LONG_DOUBLE; + fp_type_size[sizeof (double)] = FP_DOUBLE; + + n_specs = 0; + n_specs_allocated = 5; + spec = (struct tspec *) xmalloc (n_specs_allocated * sizeof (struct tspec)); + + output_address_fmt_string = "%07o"; + address_pad_len = 7; + flag_dump_strings = 0; + + while ((c = getopt_long (argc, argv, "abcdfhilos::xw::A:j:N:t:v", + long_options, (int *) 0)) + != EOF) + { + strtoul_error err; + + switch (c) + { + case 'A': + switch (optarg[0]) + { + case 'd': + output_address_fmt_string = "%07d"; + address_pad_len = 7; + break; + case 'o': + output_address_fmt_string = "%07o"; + address_pad_len = 7; + break; + case 'x': + output_address_fmt_string = "%06x"; + address_pad_len = 6; + break; + case 'n': + output_address_fmt_string = NULL; + address_pad_len = 0; + break; + default: + error (2, 0, + "invalid output address radix `%c'; it must be one character from [doxn]", + optarg[0]); + break; + } + break; + + case 'j': + err = my_strtoul (optarg, 0, &n_bytes_to_skip, 1); + if (err != UINT_OK) + uint_fatal_error (optarg, "skip argument", err); + break; + + case 'N': + limit_bytes_to_format = 1; + + err = my_strtoul (optarg, 0, &max_bytes_to_format, 1); + if (err != UINT_OK) + uint_fatal_error (optarg, "limit argument", err); + break; + + case 's': + if (optarg == NULL) + string_min = 3; + else + { + err = my_strtoul (optarg, 0, &string_min, 1); + if (err != UINT_OK) + uint_fatal_error (optarg, "minimum string length", err); + } + ++flag_dump_strings; + break; + + case 't': + if (decode_format_string (optarg)) + error (2, 0, "invalid type string `%s'", optarg); + break; + + case 'v': + abbreviate_duplicate_blocks = 0; + break; + + /* The next several cases map the old, pre-POSIX format + specification options to the corresponding POSIX format + specs. GNU od accepts any combination of old- and + new-style options. If only POSIX format specs are used + and more than one is used, they are accumulated. If only + old-style options are used, all but the last are ignored. + If both types of specs are used in the same command, the + last old-style option and any POSIX specs following it + are accumulated. To illustrate, `od -c -t a' is the same + as `od -t ca', but `od -t a -c' is the same as `od -c'. */ + +#define CASE_OLD_ARG(old_char,new_string) \ + case old_char: \ + { \ + const char *next; \ + int tmp; \ + assert (n_specs_allocated >= 1); \ + tmp = decode_one_format (new_string, &next, &(spec[0])); \ + n_specs = 1; \ + assert (tmp == 0); \ + assert (*next == '\0'); \ + } \ + break + + CASE_OLD_ARG ('a', "a"); + CASE_OLD_ARG ('b', "oC"); + CASE_OLD_ARG ('c', "c"); + CASE_OLD_ARG ('d', "u2"); + CASE_OLD_ARG ('f', "fF"); + CASE_OLD_ARG ('h', "x2"); + CASE_OLD_ARG ('i', "d2"); + CASE_OLD_ARG ('l', "d4"); + CASE_OLD_ARG ('o', "o2"); + CASE_OLD_ARG ('x', "x2"); + +#undef CASE_OLD_ARG + + case 'w': + width_specified = 1; + if (optarg == NULL) + { + desired_width = 32; + } + else + { + err = my_strtoul (optarg, 10, &desired_width, 0); + if (err != UINT_OK) + error (2, 0, "invalid width specification `%s'", optarg); + } + break; + + default: + usage (); + break; + } + } + + if (flag_dump_strings && n_specs > 0) + error (2, 0, "no type may be specified when dumping strings"); + + assert (address_pad_len <= MAX_ADDRESS_LENGTH); + for (i = 0; i < address_pad_len; i++) + address_pad[i] = ' '; + address_pad[address_pad_len] = '\0'; + + if (n_specs == 0) + { + int err = decode_one_format ("o2", NULL, &(spec[0])); + + assert (err == 0); + n_specs = 1; + } + + n_files = argc - optind; + if (n_files > 0) + file_list = (char const *const *) &argv[optind]; + else + { + /* If no files were listed on the command line, set up the + global array FILE_LIST so that it contains the null-terminated + list of one name: "-". */ + static char const * const default_file_list[] = {"-", NULL}; + + file_list = default_file_list; + } + + skip (n_bytes_to_skip); + + /* Compute output block length. */ + l_c_m = get_lcm (); + + if (width_specified) + { + if (desired_width != 0 && desired_width % l_c_m == 0) + bytes_per_block = desired_width; + else + { + error (0, 0, "warning: invalid width %d; using %d instead", + desired_width, l_c_m); + bytes_per_block = l_c_m; + } + } + else + { + if (l_c_m < DEFAULT_BYTES_PER_BLOCK) + bytes_per_block = l_c_m * (int) (DEFAULT_BYTES_PER_BLOCK / l_c_m); + else + bytes_per_block = l_c_m; + } + +#ifdef DEBUG + for (i = 0; i < n_specs; i++) + { + printf ("%d: fmt=\"%s\" width=%d\n", + i, spec[i].fmt_string, width_bytes[spec[i].size]); + } +#endif + + if (flag_dump_strings) + { + dump_strings (); + } + else + { + dump (); + } + + exit (0); +} diff --git a/src/paste.c b/src/paste.c new file mode 100644 index 000000000..c7058a63c --- /dev/null +++ b/src/paste.c @@ -0,0 +1,458 @@ +/* paste - merge lines of files + Copyright (C) 1984 by David M. Ihnat + + This program is a total rewrite of the Bell Laboratories Unix(Tm) + command of the same name, as of System V. It contains no proprietary + code, and therefore may be used without violation of any proprietary + agreements whatsoever. However, you will notice that the program is + copyrighted by me. This is to assure the program does *not* fall + into the public domain. Thus, I may specify just what I am now: + This program may be freely copied and distributed, provided this notice + remains; it may not be sold for profit without express written consent of + the author. + Please note that I recreated the behavior of the Unix(Tm) 'paste' command + as faithfully as possible, with minor exceptions; however, + I haven't run a full set of regression tests. Thus, the user of + this program accepts full responsibility for any effects or loss; + in particular, the author is not responsible for any losses, + explicit or incidental, that may be incurred through use of this program. + + I ask that any bugs (and, if possible, fixes) be reported to me when + possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us + + The list of valid escape sequences has been expanded over the Unix + version, to include \b, \f, \r, and \v. + + POSIX changes, bug fixes, long-named options, and cleanup + by David MacKenzie <djm@ai.mit.edu>. + + Options: + --serial + -s Paste one file at a time rather than + one line from each file. + --delimiters=delim-list + -d delim-list Consecutively use the characters in + DELIM-LIST instead of tab to separate + merged lines. When DELIM-LIST is exhausted, + start again at its beginning. + A FILE of `-' means standard input. + If no FILEs are given, standard input is used. */ + +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" + +char *collapse_escapes (); +char *xmalloc (); +char *xrealloc (); +int paste_parallel (); +int paste_serial (); +void error (); +void usage (); + +/* Indicates that no delimiter should be added in the current position. */ +#define EMPTY_DELIM '\0' + +/* Element marking a file that has reached EOF and been closed. */ +#define CLOSED ((FILE *) -1) + +/* Element marking end of list of open files. */ +#define ENDLIST ((FILE *) -2) + +/* Name this program was run with. */ +char *program_name; + +/* If nonzero, we have read standard input at some point. */ +int have_read_stdin; + +/* If nonzero, merge subsequent lines of each file rather than + corresponding lines from each file in parallel. */ +int serial_merge; + +/* The delimeters between lines of input files (used cyclically). */ +char *delims; + +/* A pointer to the character after the end of `delims'. */ +char *delim_end; + +struct option longopts[] = +{ + {"serial", 0, 0, 's'}, + {"delimiters", 1, 0, 'd'}, + {0, 0, 0, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int optc, exit_status; + char default_delims[2], zero_delims[3]; + + program_name = argv[0]; + have_read_stdin = 0; + serial_merge = 0; + delims = default_delims; + strcpy (delims, "\t"); + strcpy (zero_delims, "\\0"); + + while ((optc = getopt_long (argc, argv, "d:s", longopts, (int *) 0)) + != EOF) + { + switch (optc) + { + case 'd': + /* Delimiter character(s). */ + if (optarg[0] == '\0') + optarg = zero_delims; + delims = optarg; + break; + + case 's': + serial_merge++; + break; + + default: + usage (); + } + } + + if (optind == argc) + argv[argc++] = "-"; + + delim_end = collapse_escapes (delims); + + if (!serial_merge) + exit_status = paste_parallel (argc - optind, &argv[optind]); + else + exit_status = paste_serial (argc - optind, &argv[optind]); + if (have_read_stdin && fclose (stdin) == EOF) + error (1, errno, "-"); + if (ferror (stdout) || fclose (stdout) == EOF) + error (1, errno, "write error"); + exit (exit_status); +} + +/* Replace backslash representations of special characters in + STRPTR with their actual values. + The set of possible backslash characters has been expanded beyond + that recognized by the Unix version. + + Return a pointer to the character after the new end of STRPTR. */ + +char * +collapse_escapes (strptr) + char *strptr; +{ + register char *strout; + + strout = strptr; /* Start at the same place, anyway. */ + + while (*strptr) + { + if (*strptr != '\\') /* Is it an escape character? */ + *strout++ = *strptr++; /* No, just transfer it. */ + else + { + switch (*++strptr) + { + case '0': + *strout++ = EMPTY_DELIM; + break; + + case 'b': + *strout++ = '\b'; + break; + + case 'f': + *strout++ = '\f'; + break; + + case 'n': + *strout++ = '\n'; + break; + + case 'r': + *strout++ = '\r'; + break; + + case 't': + *strout++ = '\t'; + break; + + case 'v': + *strout++ = '\v'; + break; + + default: + *strout++ = *strptr; + break; + } + strptr++; + } + } + return strout; +} + +/* Perform column paste on the NFILES files named in FNAMPTR. + Return 0 if no errors, 1 if one or more files could not be + opened or read. */ + +int +paste_parallel (nfiles, fnamptr) + int nfiles; + char **fnamptr; +{ + int errors = 0; /* 1 if open or read errors occur. */ + /* Number of files for which space is allocated in `delbuf' and `fileptr'. + Enlarged as necessary. */ + int file_list_size = 12; + int chr; /* Input character. */ + int line_length; /* Number of chars in line. */ + int somedone; /* 0 if all files empty for this line. */ + /* If all files are just ready to be closed, or will be on this + round, the string of delimiters must be preserved. + delbuf[0] through delbuf[file_list_size] + store the delimiters for closed files. */ + char *delbuf; + int delims_saved; /* Number of delims saved in `delbuf'. */ + register char *delimptr; /* Cycling pointer into `delims'. */ + FILE **fileptr; /* Streams open to the files to process. */ + int files_open; /* Number of files still open to process. */ + int i; /* Loop index. */ + int opened_stdin = 0; /* Nonzero if any fopen got fd 0. */ + + delbuf = (char *) xmalloc (file_list_size + 2); + fileptr = (FILE **) xmalloc ((file_list_size + 1) * sizeof (FILE *)); + + /* Attempt to open all files. This could be expanded to an infinite + number of files, but at the (considerable) expense of remembering + each file and its current offset, then opening/reading/closing. */ + + for (files_open = 0; files_open < nfiles; ++files_open) + { + if (files_open == file_list_size - 2) + { + file_list_size += 12; + delbuf = (char *) xrealloc (delbuf, file_list_size + 2); + fileptr = (FILE **) xrealloc (fileptr, (file_list_size + 1) + * sizeof (FILE *)); + } + if (!strcmp (fnamptr[files_open], "-")) + { + have_read_stdin = 1; + fileptr[files_open] = stdin; + } + else + { + fileptr[files_open] = fopen (fnamptr[files_open], "r"); + if (fileptr[files_open] == NULL) + error (1, errno, "%s", fnamptr[files_open]); + else if (fileno (fileptr[files_open]) == 0) + opened_stdin = 1; + } + } + + fileptr[files_open] = ENDLIST; + + if (opened_stdin && have_read_stdin) + error (1, 0, "standard input is closed"); + + /* Read a line from each file and output it to stdout separated by a + delimiter, until we go through the loop without successfully + reading from any of the files. */ + + while (files_open) + { + /* Set up for the next line. */ + somedone = 0; + delimptr = delims; + delims_saved = 0; + + for (i = 0; fileptr[i] != ENDLIST && files_open; i++) + { + line_length = 0; /* Clear so we can easily detect EOF. */ + if (fileptr[i] != CLOSED) + { + chr = getc (fileptr[i]); + if (chr != EOF && delims_saved) + { + fwrite (delbuf, sizeof (char), delims_saved, stdout); + delims_saved = 0; + } + + while (chr != EOF) + { + line_length++; + if (chr == '\n') + break; + putc (chr, stdout); + chr = getc (fileptr[i]); + } + } + + if (line_length == 0) + { + /* EOF, read error, or closed file. + If an EOF or error, close the file and mark it in the list. */ + if (fileptr[i] != CLOSED) + { + if (ferror (fileptr[i])) + { + error (0, errno, "%s", fnamptr[i]); + errors = 1; + } + if (fileptr[i] == stdin) + clearerr (fileptr[i]); /* Also clear EOF. */ + else if (fclose (fileptr[i]) == EOF) + { + error (0, errno, "%s", fnamptr[i]); + errors = 1; + } + + fileptr[i] = CLOSED; + files_open--; + } + + if (fileptr[i + 1] == ENDLIST) + { + /* End of this output line. + Is this the end of the whole thing? */ + if (somedone) + { + /* No. Some files were not closed for this line. */ + if (delims_saved) + { + fwrite (delbuf, sizeof (char), delims_saved, stdout); + delims_saved = 0; + } + putc ('\n', stdout); + } + continue; /* Next read of files, or exit. */ + } + else + { + /* Closed file; add delimiter to `delbuf'. */ + if (*delimptr != EMPTY_DELIM) + delbuf[delims_saved++] = *delimptr; + if (++delimptr == delim_end) + delimptr = delims; + } + } + else + { + /* Some data read. */ + somedone++; + + /* Except for last file, replace last newline with delim. */ + if (fileptr[i + 1] != ENDLIST) + { + if (chr != '\n') + putc (chr, stdout); + if (*delimptr != EMPTY_DELIM) + putc (*delimptr, stdout); + if (++delimptr == delim_end) + delimptr = delims; + } + else + putc (chr, stdout); + } + } + } + return errors; +} + +/* Perform serial paste on the NFILES files named in FNAMPTR. + Return 0 if no errors, 1 if one or more files could not be + opened or read. */ + +int +paste_serial (nfiles, fnamptr) + int nfiles; + char **fnamptr; +{ + int errors = 0; /* 1 if open or read errors occur. */ + register int charnew, charold; /* Current and previous char read. */ + register char *delimptr; /* Current delimiter char. */ + register FILE *fileptr; /* Open for reading current file. */ + + for (; nfiles; nfiles--, fnamptr++) + { + if (!strcmp (*fnamptr, "-")) + { + have_read_stdin = 1; + fileptr = stdin; + } + else + { + fileptr = fopen (*fnamptr, "r"); + if (fileptr == NULL) + { + error (0, errno, "%s", *fnamptr); + errors = 1; + continue; + } + } + + delimptr = delims; /* Set up for delimiter string. */ + + charold = getc (fileptr); + if (charold != EOF) + { + /* `charold' is set up. Hit it! + Keep reading characters, stashing them in `charnew'; + output `charold', converting to the appropriate delimiter + character if needed. After the EOF, output `charold' + if it's a newline; otherwise, output it and then a newline. */ + + while ((charnew = getc (fileptr)) != EOF) + { + /* Process the old character. */ + if (charold == '\n') + { + if (*delimptr != EMPTY_DELIM) + putc (*delimptr, stdout); + + if (++delimptr == delim_end) + delimptr = delims; + } + else + putc (charold, stdout); + + charold = charnew; + } + + /* Hit EOF. Process that last character. */ + putc (charold, stdout); + } + + if (charold != '\n') + putc ('\n', stdout); + + if (ferror (fileptr)) + { + error (0, errno, "%s", *fnamptr); + errors = 1; + } + if (fileptr == stdin) + clearerr (fileptr); /* Also clear EOF. */ + else if (fclose (fileptr) == EOF) + { + error (0, errno, "%s", *fnamptr); + errors = 1; + } + } + return errors; +} + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-s] [-d delim-list] [--serial] [--delimiters=delim-list]\n\ + [file...]\n", + program_name); + exit (1); +} diff --git a/src/pr.c b/src/pr.c new file mode 100644 index 000000000..10595ad73 --- /dev/null +++ b/src/pr.c @@ -0,0 +1,1844 @@ +/* pr -- convert text files for printing. + Copyright (C) 1988, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Author: Pete TerMaat. */ + +/* Things to watch: Sys V screws up on ... + pr -n -3 -s: /usr/dict/words + pr -m -o10 -n /usr/dict/words{,,,} + pr -6 -a -n -o5 /usr/dict/words + + Ideas: + + Keep a things_to_do list of functions to call when we know we have + something to print. Cleaner than current series of checks. + + Improve the printing of control prefixes. + + + Options: + + +PAGE Begin output at page PAGE of the output. + + -COLUMN Produce output that is COLUMN columns wide and print + columns down. + + -a Print columns across rather than down. The input + one + two + three + four + will be printed as + one two three + four + + -b Balance columns on the last page. + + -c Print unprintable characters as control prefixes. + Control-g is printed as ^G. + + -d Double space the output. + + -e[c[k]] Expand tabs to spaces on input. Optional argument C + is the input tab character. (Default is `\t'.) Optional + argument K is the input tab character's width. (Default is 8.) + + -F + -f Use formfeeds instead of newlines to separate pages. + + -h header Replace the filename in the header with the string HEADER. + + -i[c[k]] Replace spaces with tabs on output. Optional argument + C is the output tab character. (Default is `\t'.) Optional + argument K is the output tab character's width. (Default + is 8.) + + -l lines Set the page length to LINES. Default is 66. + + -m Print files in parallel. + + -n[c[k]] Precede each column with a line number. + (With parallel files, precede each line with a line + number.) Optional argument C is the character to print + after each number. (Default `\t'.) Optional argument + K is the number of digits per line number. (Default 5.) + + -o offset Offset each line with a margin OFFSET spaces wide. + Total page width is the size of this offset plus the + width set with `-w'. + + -r Ignore files that can't be opened. + + -s[c] Separate each line with a character. Optional argument C is + the character to be used. Default is `\t'. + + -t Do not print headers or footers. + + -v Print unprintable characters as escape sequences. + Control-G becomes \007. + + -w width Set the page width to WIDTH characters. */ + +#include <stdio.h> +#include <getopt.h> +#include <ctype.h> +#include <sys/types.h> +#include <time.h> +#include "system.h" + +#ifdef isascii +#define ISPRINT(c) (isascii (c) && isprint (c)) +#define ISDIGIT(c) (isascii (c) && isdigit (c)) +#else +#define ISPRINT(c) isprint (c) +#define ISDIGIT(c) isdigit (c) +#endif + +int char_to_clump (); +int read_line (); +int print_page (); +int print_stored (); +char *xmalloc (); +char *xrealloc (); +int open_file (); +int skip_to_page (); +void error (); +void getoptarg (); +void usage (); +void print_files (); +void init_header (); +void init_store_cols (); +void store_columns (); +void balance (); +void store_char (); +void pad_down (); +void read_rest_of_line (); +void print_char (); +void cleanup (); + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +/* Used with start_position in the struct COLUMN described below. + If start_position == ANYWHERE, we aren't truncating columns and + can begin printing a column anywhere. Otherwise we must pad to + the horizontal position start_position. */ +#define ANYWHERE 0 + +/* Each column has one of these structures allocated for it. + If we're only dealing with one file, fp is the same for all + columns. + + The general strategy is to spend time setting up these column + structures (storing columns if necessary), after which printing + is a matter of flitting from column to column and calling + print_func. + + Parallel files, single files printing across in multiple + columns, and single files printing down in multiple columns all + fit the same printing loop. + + print_func Function used to print lines in this column. + If we're storing this column it will be + print_stored(), Otherwise it will be read_line(). + + char_func Function used to process characters in this column. + If we're storing this column it will be store_char(), + otherwise it will be print_char(). + + current_line Index of the current entry in line_vector, which + contains the index of the first character of the + current line in buff[]. + + lines_stored Number of lines in this column which are stored in + buff. + + lines_to_print If we're storing this column, lines_to_print is + the number of stored_lines which remain to be + printed. Otherwise it is the number of lines + we can print without exceeding lines_per_body. + + start_position The horizontal position we want to be in before we + print the first character in this column. + + numbered True means precede this column with a line number. */ + +struct COLUMN +{ + FILE *fp; /* Input stream for this column. */ + char *name; /* File name. */ + enum + { + OPEN, + ON_HOLD, /* Hit a form feed. */ + CLOSED + } status; /* Status of the file pointer. */ + int (*print_func) (); /* Func to print lines in this col. */ + void (*char_func) (); /* Func to print/store chars in this col. */ + int current_line; /* Index of current place in line_vector. */ + int lines_stored; /* Number of lines stored in buff. */ + int lines_to_print; /* No. lines stored or space left on page. */ + int start_position; /* Horizontal position of first char. */ + int numbered; +}; + +typedef struct COLUMN COLUMN; + +#define NULLCOL (COLUMN *)0 + +/* All of the columns to print. */ +COLUMN *column_vector; + +/* When printing a single file in multiple downward columns, + we store the leftmost columns contiguously in buff. + To print a line from buff, get the index of the first char + from line_vector[i], and print up to line_vector[i + 1]. */ +char *buff; + +/* Index of the position in buff where the next character + will be stored. */ +int buff_current; + +/* The number of characters in buff. + Used for allocation of buff and to detect overflow of buff. */ +int buff_allocated; + +/* Array of indices into buff. + Each entry is an index of the first character of a line. + This is used when storing lines to facilitate shuffling when + we do column balancing on the last page. */ +int *line_vector; + +/* Array of horizonal positions. + For each line in line_vector, end_vector[line] is the horizontal + position we are in after printing that line. We keep track of this + so that we know how much we need to pad to prepare for the next + column. */ +int *end_vector; + +/* (-m) True means we're printing multiple files in parallel. */ +int parallel_files = FALSE; + +/* (-[0-9]+) True means we're given an option explicitly specifying + number of columns. Used to detect when this option is used with -m. */ +int explicit_columns = FALSE; + +/* (-t) True means we're printing headers and footers. */ +int extremities = TRUE; + +/* True means we need to print a header as soon as we know we've got input + to print after it. */ +int print_a_header; + +/* (-h) True means we're using the standard header rather than a + customized one specified by the -h flag. */ +int standard_header = TRUE; + +/* (-f) True means use formfeeds instead of newlines to separate pages. */ +int use_form_feed = FALSE; + +/* True means we haven't encountered any filenames in the argument list. */ +int input_is_stdin = TRUE; + +/* True means we have read the standard input. */ +int have_read_stdin = FALSE; + +/* True means the -a flag has been given. */ +int print_across_flag = FALSE; + +/* True means we're printing one file in multiple (>1) downward columns. */ +int storing_columns = TRUE; + +/* (-b) True means balance columns on the last page as Sys V does. */ +int balance_columns = FALSE; + +/* (-l) Number of lines on a page, including header and footer lines. */ +int lines_per_page = 66; + +/* Number of lines in the header and footer can be reset to 0 using + the -t flag. */ +int lines_per_header = 5; +int lines_per_body; +int lines_per_footer = 5; + +/* (-w) Width in characters of the page. Does not include the width of + the margin. */ +int chars_per_line = 72; + +/* Number of characters in a column. Based on the gutter and page widths. */ +int chars_per_column; + +/* (-e) True means convert tabs to spaces on input. */ +int untabify_input = FALSE; + +/* (-e) The input tab character. */ +char input_tab_char = '\t'; + +/* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... + where the leftmost column is 1. */ +int chars_per_input_tab = 8; + +/* (-i) True means convert spaces to tabs on output. */ +int tabify_output = FALSE; + +/* (-i) The output tab character. */ +char output_tab_char = '\t'; + +/* (-i) The width of the output tab. */ +int chars_per_output_tab = 8; + +/* Keeps track of pending white space. When we hit a nonspace + character after some whitespace, we print whitespace, tabbing + if necessary to get to output_position + spaces_not_printed. */ +int spaces_not_printed; + +/* Number of spaces between columns (though tabs can be used when possible to + use up the equivalent amount of space). Not sure if this is worth making + a flag for. BSD uses 0, Sys V uses 1. Sys V looks better. */ +int chars_per_gutter = 1; + +/* (-o) Number of spaces in the left margin (tabs used when possible). */ +int chars_per_margin = 0; + +/* Position where the next character will fall. + Leftmost position is 0 + chars_per_margin. + Rightmost position is chars_per_margin + chars_per_line - 1. + This is important for converting spaces to tabs on output. */ +int output_position; + +/* Horizontal position relative to the current file. + (output_position depends on where we are on the page; + input_position depends on where we are in the file.) + Important for converting tabs to spaces on input. */ +int input_position; + +/* Count number of failed opens so we can exit with non-zero + status if there were any. */ +int failed_opens = 0; + +/* The horizontal position we'll be at after printing a tab character + of width c_ from the position h_. */ +#define pos_after_tab(c_, h_) h_ - h_ % c_ + c_ + +/* The number of spaces taken up if we print a tab character with width + c_ from position h_. */ +#define tab_width(c_, h_) - h_ % c_ + c_ + +/* (-NNN) Number of columns of text to print. */ +int columns = 1; + +/* (+NNN) Page number on which to begin printing. */ +int first_page_number = 1; + +/* Number of files open (not closed, not on hold). */ +int files_ready_to_read = 0; + +/* Number of columns with either an open file or stored lines. */ +int cols_ready_to_print = 0; + +/* Current page number. Displayed in header. */ +int page_number; + +/* Current line number. Displayed when -n flag is specified. + + When printing files in parallel (-m flag), line numbering is as follows: + 1 foo goo moo + 2 hoo too zoo + + When printing files across (-a flag), ... + 1 foo 2 moo 3 goo + 4 hoo 3 too 6 zoo + + Otherwise, line numbering is as follows: + 1 foo 3 goo 5 too + 2 moo 4 hoo 6 zoo */ +int line_number; + +/* (-n) True means lines should be preceded by numbers. */ +int numbered_lines = FALSE; + +/* True means print a number as soon as we know we'll be printing + from the current column. */ +int print_a_number; + +/* (-n) Character which follows each line number. */ +char number_separator = '\t'; + +/* (-n) Width in characters of a line number. */ +int chars_per_number = 5; + +/* Used when widening the first column to accommodate numbers -- only + needed when printing files in parallel. Includes width of both the + number and the number_separator. */ +int number_width; + +/* Buffer sprintf uses to format a line number. */ +char *number_buff; + +/* (-v) True means unprintable characters are printed as escape sequences. + control-g becomes \007. */ +int use_esc_sequence = FALSE; + +/* (-c) True means unprintable characters are printed as control prefixes. + control-g becomes ^G. */ +int use_cntrl_prefix = FALSE; + +/* (-d) True means output is double spaced. */ +int double_space = FALSE; + +/* Number of files opened initially in init_files. Should be 1 + unless we're printing multiple files in parallel. */ +int total_files = 0; + +/* (-r) True means don't complain if we can't open a file. */ +int ignore_failed_opens = FALSE; + +/* (-s) True means we separate columns with a specified character. */ +int use_column_separator = FALSE; + +/* Character used to separate columns if the the -s flag has been specified. */ +char column_separator = '\t'; + +/* Number of separator characters waiting to be printed as soon as we + know that we have any input remaining to be printed. */ +int separators_not_printed; + +/* Position we need to pad to, as soon as we know that we have input + remaining to be printed. */ +int padding_not_printed; + +/* True means we should pad the end of the page. Remains false until we + know we have a page to print. */ +int pad_vertically; + +/* (-h) String of characters used in place of the filename in the header. */ +char *custom_header; + +/* String containing the date, filename or custom header, and "Page ". */ +char *header; + +int *clump_buff; + +/* True means we truncate lines longer than chars_per_column. */ +int truncate_lines = FALSE; + +/* The name under which this program was invoked. */ +char *program_name; + +void +main (argc, argv) + int argc; + char **argv; +{ + int c; + char *s; + int files = 0; + char **file_names, **file_name_vector; + int accum = 0; + + program_name = argv[0]; + + file_name_vector = (char **) xmalloc (argc * sizeof (char **)); + file_names = file_name_vector; + + for (;;) + { + c = getopt (argc, argv, "-0123456789abcde::fFh:i::l:mn::o:rs::tvw:"); + + if (c == 1) /* Non-option argument. */ + { + s = optarg; + if (*s == '+') + { + if (!ISDIGIT (*++s)) + usage ("`+' requires a numeric argument"); + first_page_number = atoi (s); + } + else + { + *file_names++ = optarg; + ++files; + } + } + else if (files > 0) + { + if (parallel_files && explicit_columns) + error (1, 0, +"Cannot specify number of columns when printing in parallel."); + + if (parallel_files && print_across_flag) + error (1, 0, +"Cannot specify both printing across and printing in parallel."); + + if (parallel_files) + print_files (files, file_name_vector); + else + { + file_names = file_name_vector; + while (files--) + print_files (1, file_names++); + } + + input_is_stdin = FALSE; + file_names = file_name_vector; + files = 0; + cleanup (); + } + + if (ISDIGIT (c)) + { + accum = accum * 10 + c - '0'; + continue; + } + else + { + if (accum > 0) + { + columns = accum; + explicit_columns = TRUE; + } + accum = 0; + } + + switch (c) + { + case 'a': + print_across_flag = TRUE; + storing_columns = FALSE; + break; + case 'b': + balance_columns = TRUE; + break; + case 'c': + use_cntrl_prefix = TRUE; + break; + case 'd': + double_space = TRUE; + break; + case 'e': + if (optarg) + getoptarg (optarg, 'e', &input_tab_char, + &chars_per_input_tab); + /* Could check tab width > 0. */ + untabify_input = TRUE; + break; + case 'f': + case 'F': + use_form_feed = TRUE; + break; + case 'h': + custom_header = optarg; + standard_header = FALSE; + break; + case 'i': + if (optarg) + getoptarg (optarg, 'i', &output_tab_char, + &chars_per_output_tab); + /* Could check tab width > 0. */ + tabify_output = TRUE; + break; + case 'l': + lines_per_page = atoi (optarg); + break; + case 'm': + parallel_files = TRUE; + storing_columns = FALSE; + break; + case 'n': + numbered_lines = TRUE; + if (optarg) + getoptarg (optarg, 'n', &number_separator, + &chars_per_number); + break; + case 'o': + chars_per_margin = atoi (optarg); + break; + case 'r': + ignore_failed_opens = TRUE; + break; + case 's': + use_column_separator = TRUE; + if (optarg) + { + s = optarg; + column_separator = *s; + if (*++s) + { + fprintf (stderr, "\ +%s: extra characters in the argument to the `-s' option: `%s'\n", + program_name, s); + usage ((char *) 0); + } + } + break; + case 't': + extremities = FALSE; + break; + case 'v': + use_esc_sequence = TRUE; + break; + case 'w': + chars_per_line = atoi (optarg); + break; + case '?': + usage ((char *) 0); + break; + } + + if (c == EOF) + break; + } + + if (input_is_stdin) + print_files (0, (char **) 0); + + if (have_read_stdin && fclose (stdin) == EOF) + error (1, errno, "standard input"); + if (ferror (stdout) || fclose (stdout) == EOF) + error (1, errno, "write error"); + if (failed_opens > 0) + exit(1); + exit (0); +} + +/* Parse options of the form -scNNN. + + Example: -nck, where 'n' is the option, c is the optional number + separator, and k is the optional width of the field used when printing + a number. */ + +void +getoptarg (arg, switch_char, character, number) + char *arg, switch_char, *character; + int *number; +{ + if (!ISDIGIT (*arg)) + *character = *arg++; + if (*arg) + { + if (ISDIGIT (*arg)) + *number = atoi (arg); + else + { + fprintf (stderr, "\ +%s: extra characters in the argument to the `-%c' option: `%s'\n", + program_name, switch_char, arg); + usage ((char *) 0); + } + } +} + +/* Set parameters related to formatting. */ + +void +init_parameters (number_of_files) + int number_of_files; +{ + int chars_used_by_number = 0; + + lines_per_body = lines_per_page - lines_per_header - lines_per_footer; + if (lines_per_body <= 0) + extremities = FALSE; + if (extremities == FALSE) + lines_per_body = lines_per_page; + + if (double_space) + lines_per_body = lines_per_body / 2; + + /* If input is stdin, cannot print parallel files. BSD dumps core + on this. */ + if (number_of_files == 0) + parallel_files = FALSE; + + if (parallel_files) + columns = number_of_files; + + /* Tabification is assumed for multiple columns. */ + if (columns > 1) + { + if (!use_column_separator) + truncate_lines = TRUE; + + untabify_input = TRUE; + tabify_output = TRUE; + } + else + storing_columns = FALSE; + + if (numbered_lines) + { + if (number_separator == input_tab_char) + { + number_width = chars_per_number + + tab_width (chars_per_input_tab, + (chars_per_margin + chars_per_number)); + } + else + number_width = chars_per_number + 1; + /* The number is part of the column width unless we are + printing files in parallel. */ + if (parallel_files) + chars_used_by_number = number_width; + } + + chars_per_column = (chars_per_line - chars_used_by_number - + (columns - 1) * chars_per_gutter) / columns; + + if (chars_per_column < 1) + error (1, 0, "page width too narrow"); + + if (numbered_lines) + { + if (number_buff != (char *) 0) + free (number_buff); + number_buff = (char *) + xmalloc (2 * chars_per_number * sizeof (char)); + } + + /* Pick the maximum between the tab width and the width of an + escape sequence. */ + if (clump_buff != (int *) 0) + free (clump_buff); + clump_buff = (int *) xmalloc ((chars_per_input_tab > 4 + ? chars_per_input_tab : 4) * sizeof (int)); +} + +/* Open the necessary files, + maintaining a COLUMN structure for each column. + + With multiple files, each column p has a different p->fp. + With single files, each column p has the same p->fp. + Return 1 if (number_of_files > 0) and no files can be opened, + 0 otherwise. */ + +int +init_fps (number_of_files, av) + int number_of_files; + char **av; +{ + int i, files_left; + COLUMN *p; + FILE *firstfp; + char *firstname; + + total_files = 0; + + if (column_vector != NULLCOL) + free ((char *) column_vector); + column_vector = (COLUMN *) xmalloc (columns * sizeof (COLUMN)); + + if (parallel_files) + { + files_left = number_of_files; + for (p = column_vector; files_left--; ++p, ++av) + { + if (open_file (*av, p) == 0) + { + --p; + --columns; + } + } + if (columns == 0) + return 1; + init_header ("", -1); + } + else + { + p = column_vector; + if (number_of_files > 0) + { + if (open_file (*av, p) == 0) + return 1; + init_header (*av, fileno (p->fp)); + } + else + { + p->name = "standard input"; + p->fp = stdin; + have_read_stdin = TRUE; + p->status = OPEN; + ++total_files; + init_header ("", -1); + } + + firstname = p->name; + firstfp = p->fp; + for (i = columns - 1, ++p; i; --i, ++p) + { + p->name = firstname; + p->fp = firstfp; + p->status = OPEN; + } + } + files_ready_to_read = total_files; + return 0; +} + +/* Determine print_func and char_func, the functions + used by each column for printing and/or storing. + + Determine the horizontal position desired when we begin + printing a column (p->start_position). */ + +void +init_funcs () +{ + int i, h, h_next; + COLUMN *p; + + h = chars_per_margin; + + if (use_column_separator) + h_next = ANYWHERE; + else + { + /* When numbering lines of parallel files, we enlarge the + first column to accomodate the number. Looks better than + the Sys V approach. */ + if (parallel_files && numbered_lines) + h_next = h + chars_per_column + number_width; + else + h_next = h + chars_per_column; + } + + /* This loop takes care of all but the rightmost column. */ + + for (p = column_vector, i = 1; i < columns; ++p, ++i) + { + if (storing_columns) /* One file, multi columns down. */ + { + p->char_func = store_char; + p->print_func = print_stored; + } + else + /* One file, multi columns across; or parallel files. */ + { + p->char_func = print_char; + p->print_func = read_line; + } + + /* Number only the first column when printing files in + parallel. */ + p->numbered = numbered_lines && (!parallel_files || i == 1); + p->start_position = h; + + /* If we're using separators, all start_positions are + ANYWHERE, except the first column's start_position when + using a margin. */ + + if (use_column_separator) + { + h = ANYWHERE; + h_next = ANYWHERE; + } + else + { + h = h_next + chars_per_gutter; + h_next = h + chars_per_column; + } + } + + /* The rightmost column. + + Doesn't need to be stored unless we intend to balance + columns on the last page. */ + if (storing_columns && balance_columns) + { + p->char_func = store_char; + p->print_func = print_stored; + } + else + { + p->char_func = print_char; + p->print_func = read_line; + } + + p->numbered = numbered_lines && (!parallel_files || i == 1); + p->start_position = h; +} + +/* Open a file. Return nonzero if successful, zero if failed. */ + +int +open_file (name, p) + char *name; + COLUMN *p; +{ + if (!strcmp (name, "-")) + { + p->name = "standard input"; + p->fp = stdin; + have_read_stdin = 1; + } + else + { + p->name = name; + p->fp = fopen (name, "r"); + } + if (p->fp == NULL) + { + ++failed_opens; + if (!ignore_failed_opens) + error (0, errno, "%s", name); + return 0; + } + p->status = OPEN; + ++total_files; + return 1; +} + +/* Close the file in P. + + If we aren't dealing with multiple files in parallel, we change + the status of all columns in the column list to reflect the close. */ + +void +close_file (p) + COLUMN *p; +{ + COLUMN *q; + int i; + + if (p->status == CLOSED) + return; + if (ferror (p->fp)) + error (1, errno, "%s", p->name); + if (p->fp != stdin && fclose (p->fp) == EOF) + error (1, errno, "%s", p->name); + + if (!parallel_files) + { + for (q = column_vector, i = columns; i; ++q, --i) + { + q->status = CLOSED; + if (q->lines_stored == 0) + { +#if 0 + if (cols_ready_to_print > 0) + --cols_ready_to_print; +#endif + q->lines_to_print = 0; + } + } + } + else + { + p->status = CLOSED; + p->lines_to_print = 0; + } + + --files_ready_to_read; +} + +/* Put a file on hold until we start a new page, + since we've hit a form feed. + + If we aren't dealing with parallel files, we must change the + status of all columns in the column list. */ + +void +hold_file (p) + COLUMN *p; +{ + COLUMN *q; + int i; + + if (!parallel_files) + for (q = column_vector, i = columns; i; ++q, --i) + q->status = ON_HOLD; + else + p->status = ON_HOLD; + p->lines_to_print = 0; + --files_ready_to_read; +} + +/* Undo hold_file -- go through the column list and change any + ON_HOLD columns to OPEN. Used at the end of each page. */ + +void +reset_status () +{ + int i = columns; + COLUMN *p; + + for (p = column_vector; i; --i, ++p) + if (p->status == ON_HOLD) + { + p->status = OPEN; + files_ready_to_read++; + } +} + +/* Print a single file, or multiple files in parallel. + + Set up the list of columns, opening the necessary files. + Allocate space for storing columns, if necessary. + Skip to first_page_number, if user has asked to skip leading pages. + Determine which functions are appropriate to store/print lines + in each column. + Print the file(s). */ + +void +print_files (number_of_files, av) + int number_of_files; + char **av; +{ + init_parameters (number_of_files); + if (init_fps (number_of_files, av)) + return; + if (storing_columns) + init_store_cols (); + + if (first_page_number > 1) + { + if (!skip_to_page (first_page_number)) + return; + else + page_number = first_page_number; + } + else + page_number = 1; + + init_funcs (); + + line_number = 1; + while (print_page ()) + ; +} + +/* Generous estimate of number of characters taken up by "Jun 7 00:08 " and + "Page NNNNN". */ +#define CHARS_FOR_DATE_AND_PAGE 50 + +/* Initialize header information. + If DESC is non-negative, it is a file descriptor open to + FILENAME for reading. + + Allocate space for a header string, + Determine the time, insert file name or user-specified string. + + It might be nice to have a "blank headers" option, since + pr -h "" still prints the date and page number. */ + +void +init_header (filename, desc) + char *filename; + int desc; +{ + int chars_per_header; + char *f = filename; + char *t, *middle; + struct stat st; + + if (filename == 0) + f = ""; + + /* If parallel files or standard input, use current time. */ + if (desc < 0 || !strcmp (filename, "-") || fstat (desc, &st)) + st.st_mtime = time ((time_t *) 0); + t = ctime (&st.st_mtime); + + t[16] = '\0'; /* Mark end of month and time string. */ + t[24] = '\0'; /* Mark end of year string. */ + + middle = standard_header ? f : custom_header; + + chars_per_header = strlen (middle) + CHARS_FOR_DATE_AND_PAGE + 1; + if (header != (char *) 0) + free (header); + header = (char *) xmalloc (chars_per_header * sizeof (char)); + + sprintf (header, "%s %s %s Page", &t[4], &t[20], middle); +} + +/* Set things up for printing a page + + Scan through the columns ... + Determine which are ready to print + (i.e., which have lines stored or open files) + Set p->lines_to_print appropriately + (to p->lines_stored if we're storing, or lines_per_body + if we're reading straight from the file) + Keep track of this total so we know when to stop printing */ + +void +init_page () +{ + int j; + COLUMN *p; + + cols_ready_to_print = 0; + + if (storing_columns) + { + store_columns (); + for (j = columns - 1, p = column_vector; j; --j, ++p) + { + p->lines_to_print = p->lines_stored; + if (p->lines_to_print != 0) + ++cols_ready_to_print; + } + + /* Last column. */ + if (balance_columns) + { + p->lines_to_print = p->lines_stored; + if (p->lines_to_print != 0) + ++cols_ready_to_print; + } + /* Since we're not balancing columns, we don't need to store + the rightmost column. Read it straight from the file. */ + else + { + if (p->status == OPEN) + { + p->lines_to_print = lines_per_body; + ++cols_ready_to_print; + } + else + p->lines_to_print = 0; + } + } + else + for (j = columns, p = column_vector; j; --j, ++p) + if (p->status == OPEN) + { + p->lines_to_print = lines_per_body; + ++cols_ready_to_print; + } + else + p->lines_to_print = 0; +} + +/* Print one page. + + As long as there are lines left on the page and columns ready to print, + Scan across the column list + if the column has stored lines or the file is open + pad to the appropriate spot + print the column + pad the remainder of the page with \n or \f as requested + reset the status of all files -- any files which where on hold because + of formfeeds are now put back into the lineup. */ + +int +print_page () +{ + int j; + int lines_left_on_page; + COLUMN *p; + + /* Used as an accumulator (with | operator) of successive values of + pad_vertically. The trick is to set pad_vertically + to zero before each run through the inner loop, then after that + loop, it tells us whether a line was actually printed (whether a + newline needs to be output -- or two for double spacing). But those + values have to be accumulated (in pv) so we can invoke pad_down + properly after the outer loop completes. */ + int pv; + + init_page (); + + if (cols_ready_to_print == 0) + return FALSE; + + if (extremities) + print_a_header = TRUE; + + /* Don't pad unless we know a page was printed. */ + pad_vertically = FALSE; + pv = FALSE; + + lines_left_on_page = lines_per_body; + if (double_space) + lines_left_on_page *= 2; + + while (lines_left_on_page > 0 && cols_ready_to_print > 0) + { + output_position = 0; + spaces_not_printed = 0; + separators_not_printed = 0; + pad_vertically = FALSE; + + for (j = 1, p = column_vector; j <= columns; ++j, ++p) + { + input_position = 0; + if (p->lines_to_print > 0) + { + padding_not_printed = p->start_position; + + if (!(p->print_func) (p)) + read_rest_of_line (p); + pv |= pad_vertically; + + if (use_column_separator) + ++separators_not_printed; + + if (--p->lines_to_print <= 0 && --cols_ready_to_print <= 0) + break; + } + } + + if (pad_vertically) + { + putchar ('\n'); + --lines_left_on_page; + } + + if (double_space && pv && extremities) + { + putchar ('\n'); + --lines_left_on_page; + } + } + + pad_vertically = pv; + + if (pad_vertically && extremities) + pad_down (lines_left_on_page + lines_per_footer); + + reset_status (); /* Change ON_HOLD to OPEN. */ + + return TRUE; /* More pages to go. */ +} + +/* Allocate space for storing columns. + + This is necessary when printing multiple columns from a single file. + Lines are stored consecutively in buff, separated by '\0'. + (We can't use a fixed offset since with the '-s' flag lines aren't + truncated.) + + We maintain a list (line_vector) of pointers to the beginnings + of lines in buff. We allocate one more than the number of lines + because the last entry tells us the index of the last character, + which we need to know in order to print the last line in buff. */ + +void +init_store_cols () +{ + int total_lines = lines_per_body * columns; + int chars_if_truncate = total_lines * (chars_per_column + 1); + + if (line_vector != (int *) 0) + free ((int *) line_vector); + line_vector = (int *) xmalloc ((total_lines + 1) * sizeof (int *)); + + if (end_vector != (int *) 0) + free ((int *) end_vector); + end_vector = (int *) xmalloc (total_lines * sizeof (int *)); + + if (buff != (char *) 0) + free (buff); + buff_allocated = use_column_separator ? 2 * chars_if_truncate + : chars_if_truncate; /* Tune this. */ + buff = (char *) xmalloc (buff_allocated * sizeof (char)); +} + +/* Store all but the rightmost column. + (Used when printing a single file in multiple downward columns) + + For each column + set p->current_line to be the index in line_vector of the + first line in the column + For each line in the column + store the line in buff + add to line_vector the index of the line's first char + buff_start is the index in buff of the first character in the + current line. */ + +void +store_columns () +{ + int i, j; + int line = 0; + int buff_start; + int last_col; /* The rightmost column which will be saved in buff */ + COLUMN *p; + + buff_current = 0; + buff_start = 0; + + if (balance_columns) + last_col = columns; + else + last_col = columns - 1; + + for (i = 1, p = column_vector; i <= last_col; ++i, ++p) + p->lines_stored = 0; + + for (i = 1, p = column_vector; i <= last_col && files_ready_to_read; + ++i, ++p) + { + p->current_line = line; + for (j = lines_per_body; j && files_ready_to_read; --j) + + if (p->status == OPEN) /* Redundant. Clean up. */ + { + input_position = 0; + + if (!read_line (p, i)) + read_rest_of_line (p); + + if (p->status == OPEN + || buff_start != buff_current) + { + ++p->lines_stored; + line_vector[line] = buff_start; + end_vector[line++] = input_position; + buff_start = buff_current; + } + } + } + + /* Keep track of the location of the last char in buff. */ + line_vector[line] = buff_start; + + if (balance_columns && p->lines_stored != lines_per_body) + balance (line); +} + +void +balance (total_stored) + int total_stored; +{ + COLUMN *p; + int i, lines; + int first_line = 0; + + for (i = 1, p = column_vector; i <= columns; ++i, ++p) + { + lines = total_stored / columns; + if (i <= total_stored % columns) + ++lines; + + p->lines_stored = lines; + p->current_line = first_line; + + first_line += lines; + } +} + +/* Store a character in the buffer. */ + +void +store_char (c) + int c; +{ + if (buff_current >= buff_allocated) + { + /* May be too generous. */ + buff_allocated = 2 * buff_allocated; + buff = (char *) xrealloc (buff, buff_allocated * sizeof (char)); + } + buff[buff_current++] = (char) c; +} + +void +number (p) + COLUMN *p; +{ + int i; + char *s; + + sprintf (number_buff, "%*d", chars_per_number, line_number++); + s = number_buff; + for (i = chars_per_number; i > 0; i--) + (p->char_func) ((int) *s++); + + if (number_separator == input_tab_char) + { + i = number_width - chars_per_number; + while (i-- > 0) + (p->char_func) ((int) ' '); + } + else + (p->char_func) ((int) number_separator); + + if (truncate_lines && !parallel_files) + input_position += number_width; +} + +/* Print (or store) padding until the current horizontal position + is position. */ + +void +pad_across_to (position) + int position; +{ + register int h = output_position; + + if (tabify_output) + spaces_not_printed = position - output_position; + else + { + while (++h <= position) + putchar (' '); + output_position = position; + } +} + +/* Pad to the bottom of the page. + + If the user has requested a formfeed, use one. + Otherwise, use newlines. */ + +void +pad_down (lines) + int lines; +{ + register int i; + + if (use_form_feed) + putchar ('\f'); + else + for (i = lines; i; --i) + putchar ('\n'); +} + +/* Read the rest of the line. + + Read from the current column's file until an end of line is + hit. Used when we've truncated a line and we no longer need + to print or store its characters. */ + +void +read_rest_of_line (p) + COLUMN *p; +{ + register int c; + FILE *f = p->fp; + + while ((c = getc (f)) != '\n') + { + if (c == '\f') + { + hold_file (p); + break; + } + else if (c == EOF) + { + close_file (p); + break; + } + } +} + +/* If we're tabifying output, + + When print_char encounters white space it keeps track + of our desired horizontal position and delays printing + until this function is called. */ + +void +print_white_space () +{ + register int h_new; + register int h_old = output_position; + register int goal = h_old + spaces_not_printed; + + while (goal - h_old > 1 + && (h_new = pos_after_tab (chars_per_output_tab, h_old)) <= goal) + { + putchar (output_tab_char); + h_old = h_new; + } + while (++h_old <= goal) + putchar (' '); + + output_position = goal; + spaces_not_printed = 0; +} + +/* Print column separators. + + We keep a count until we know that we'll be printing a line, + then print_separators() is called. */ + +void +print_separators () +{ + for (; separators_not_printed > 0; --separators_not_printed) + print_char (column_separator); +} + +/* Print (or store, depending on p->char_func) a clump of N + characters. */ + +void +print_clump (p, n, clump) + COLUMN *p; + int n; + int *clump; +{ + while (n--) + (p->char_func) (*clump++); +} + +/* Print a character. + + If we're tabifying, all tabs have been converted to spaces by + process_char(). Keep a count of consecutive spaces, and when + a nonspace is encountered, call print_white_space() to print the + required number of tabs and spaces. */ + +void +print_char (c) + int c; +{ + if (tabify_output) + { + if (c == ' ') + { + ++spaces_not_printed; + return; + } + else if (spaces_not_printed > 0) + print_white_space (); + + /* Nonprintables are assumed to have width 0, except '\b'. */ + if (!ISPRINT (c)) + { + if (c == '\b') + --output_position; + } + else + ++output_position; + } + putchar (c); +} + +/* Skip to page PAGE before printing. */ + +int +skip_to_page (page) + int page; +{ + int n, i, j; + COLUMN *p; + + for (n = 1; n < page; ++n) + { + for (i = 1; i <= lines_per_body; ++i) + { + for (j = 1, p = column_vector; j <= columns; ++j, ++p) + read_rest_of_line (p); + } + reset_status (); + } + return files_ready_to_read > 0; +} + +/* Print a header. + + Formfeeds are assumed to use up two lines at the beginning of + the page. */ + +void +print_header () +{ + if (!use_form_feed) + fprintf (stdout, "\n\n"); + + output_position = 0; + pad_across_to (chars_per_margin); + print_white_space (); + + fprintf (stdout, "%s %d\n\n\n", header, page_number++); + + print_a_header = FALSE; + output_position = 0; +} + +/* Print (or store, if p->char_func is store_char()) a line. + + Read a character to determine whether we have a line or not. + (We may hit EOF, \n, or \f) + + Once we know we have a line, + set pad_vertically = TRUE, meaning it's safe + to pad down at the end of the page, since we do have a page. + print a header if needed. + pad across to padding_not_printed if needed. + print any separators which need to be printed. + print a line number if it needs to be printed. + + Print the clump which corresponds to the first character. + + Enter a loop and keep printing until an end of line condition + exists, or until we exceed chars_per_column. + + Return FALSE if we exceed chars_per_column before reading + an end of line character, TRUE otherwise. */ + +int +read_line (p) + COLUMN *p; +{ + register int c, chars; + int last_input_position; + + c = getc (p->fp); + + last_input_position = input_position; + switch (c) + { + case '\f': + hold_file (p); + return TRUE; + case EOF: + close_file (p); + return TRUE; + case '\n': + break; + default: + chars = char_to_clump (c); + } + + if (truncate_lines && input_position > chars_per_column) + { + input_position = last_input_position; + return FALSE; + } + + if (p->char_func != store_char) + { + pad_vertically = TRUE; + + if (print_a_header) + print_header (); + + if (padding_not_printed != ANYWHERE) + { + pad_across_to (padding_not_printed); + padding_not_printed = ANYWHERE; + } + + if (use_column_separator) + print_separators (); + } + + if (p->numbered) + number (p); + + if (c == '\n') + return TRUE; + + print_clump (p, chars, clump_buff); + + for (;;) + { + c = getc (p->fp); + + switch (c) + { + case '\n': + return TRUE; + case '\f': + hold_file (p); + return TRUE; + case EOF: + close_file (p); + return TRUE; + } + + last_input_position = input_position; + chars = char_to_clump (c); + if (truncate_lines && input_position > chars_per_column) + { + input_position = last_input_position; + return FALSE; + } + + print_clump (p, chars, clump_buff); + } +} + +/* Print a line from buff. + + If this function has been called, we know we have something to + print. Therefore we set pad_vertically to TRUE, print + a header if necessary, pad across if necessary, and print + separators if necessary. + + Return TRUE, meaning there is no need to call read_rest_of_line. */ + +int +print_stored (p) + COLUMN *p; +{ + int line = p->current_line++; + register char *first = &buff[line_vector[line]]; + register char *last = &buff[line_vector[line + 1]]; + + pad_vertically = TRUE; + + if (print_a_header) + print_header (); + + if (padding_not_printed != ANYWHERE) + { + pad_across_to (padding_not_printed); + padding_not_printed = ANYWHERE; + } + + if (use_column_separator) + print_separators (); + + while (first != last) + print_char (*first++); + + if (spaces_not_printed == 0) + output_position = p->start_position + end_vector[line]; + + return TRUE; +} + +/* Convert a character to the proper format and return the number of + characters in the resulting clump. Increment input_position by + the width of the clump. + + Tabs are converted to clumps of spaces. + Nonprintable characters may be converted to clumps of escape + sequences or control prefixes. + + Note: the width of a clump is not necessarily equal to the number of + characters in clump_buff. (e.g, the width of '\b' is -1, while the + number of characters is 1.) */ + +int +char_to_clump (c) + int c; +{ + register int *s = clump_buff; + register int i; + char esc_buff[4]; + int width; + int chars; + + if (c == input_tab_char) + { + width = tab_width (chars_per_input_tab, input_position); + + if (untabify_input) + { + for (i = width; i; --i) + *s++ = ' '; + chars = width; + } + else + { + *s = c; + chars = 1; + } + + } + else if (!ISPRINT (c)) + { + if (use_esc_sequence) + { + width = 4; + chars = 4; + *s++ = '\\'; + sprintf (esc_buff, "%03o", c); + for (i = 0; i <= 2; ++i) + *s++ = (int) esc_buff[i]; + } + else if (use_cntrl_prefix) + { + if (c < 0200) + { + width = 2; + chars = 2; + *s++ = '^'; + *s++ = c ^ 0100; + } + else + { + width = 4; + chars = 4; + *s++ = '\\'; + sprintf (esc_buff, "%03o", c); + for (i = 0; i <= 2; ++i) + *s++ = (int) esc_buff[i]; + } + } + else if (c == '\b') + { + width = -1; + chars = 1; + *s = c; + } + else + { + width = 0; + chars = 1; + *s = c; + } + } + else + { + width = 1; + chars = 1; + *s = c; + } + + input_position += width; + return chars; +} + +/* We've just printed some files and need to clean up things before + looking for more options and printing the next batch of files. + + Free everything we've xmalloc'ed, except `header'. */ + +void +cleanup () +{ + if (number_buff) + free (number_buff); + if (clump_buff) + free (clump_buff); + if (column_vector) + free (column_vector); + if (line_vector) + free (line_vector); + if (end_vector) + free (end_vector); + if (buff) + free (buff); +} + +/* Complain, print a usage message, and die. */ + +void +usage (reason) + char *reason; +{ + if (reason) + fprintf (stderr, "%s: %s\n", program_name, reason); + + fprintf (stderr, "\ +Usage: %s [+PAGE] [-COLUMN] [-abcdfFmrtv] [-e[in-tab-char[in-tab-width]]]\n\ + [-h header] [-i[out-tab-char[out-tab-width]]] [-l page-length]\n\ + [-n[number-separator[digits]]] [-o left-margin]\n\ + [-s[column-separator]] [-w page-width] [file...]\n", + program_name); + exit (2); +} diff --git a/src/sort.c b/src/sort.c new file mode 100644 index 000000000..de8b937e2 --- /dev/null +++ b/src/sort.c @@ -0,0 +1,1746 @@ +/* sort - sort lines of text (with all kinds of options). + Copyright (C) 1988, 1991 Free Software Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Written December 1988 by Mike Haertel. + The author may be reached (Email) at the address mike@ai.mit.edu, + or (US mail) as Mike Haertel c/o Free Software Foundation. */ + +#define _GNU_SOURCE +#include <ctype.h> +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif +#include <sys/types.h> +#include <signal.h> +#include <stdio.h> +#include "system.h" +#ifdef _POSIX_VERSION +#include <limits.h> +#else +#ifndef UCHAR_MAX +#define UCHAR_MAX 255 +#endif +#endif +#ifndef STDC_HEADERS +char *malloc (); +char *realloc (); +void free (); +#endif + +void error (); +static void usage (); + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define UCHAR_LIM (UCHAR_MAX + 1) +#define UCHAR(c) ((unsigned char) (c)) + +#ifdef isascii +#define ISALNUM(c) (isascii(c) && isalnum(c)) +#define ISDIGIT(c) (isascii(c) && isdigit(c)) +#define ISPRINT(c) (isascii(c) && isprint(c)) +#define ISLOWER(c) (isascii(c) && islower(c)) +#else +#define ISALNUM(c) isalnum(c) +#define ISDIGIT(c) isdigit(c) +#define ISPRINT(c) isprint(c) +#define ISLOWER(c) islower(c) +#endif + +/* The kind of blanks for '-b' to skip in various options. */ +enum blanktype { bl_start, bl_end, bl_both }; + +/* The name this program was run with. */ +char *program_name; + +/* Table of digits. */ +static int digits[UCHAR_LIM]; + +/* Table of white space. */ +static int blanks[UCHAR_LIM]; + +/* Table of non-printing characters. */ +static int nonprinting[UCHAR_LIM]; + +/* Table of non-dictionary characters (not letters, digits, or blanks). */ +static int nondictionary[UCHAR_LIM]; + +/* Translation table folding lower case to upper. */ +static char fold_toupper[UCHAR_LIM]; + +/* Table mapping 3-letter month names to integers. + Alphabetic order allows binary search. */ +static struct month +{ + char *name; + int val; +} monthtab[] = +{ + "APR", 4, + "AUG", 8, + "DEC", 12, + "FEB", 2, + "JAN", 1, + "JUL", 7, + "JUN", 6, + "MAR", 3, + "MAY", 5, + "NOV", 11, + "OCT", 10, + "SEP", 9 +}; + +/* During the merge phase, the number of files to merge at once. */ +#define NMERGE 16 + +/* Initial buffer size for in core sorting. Will not grow unless a + line longer than this is seen. */ +static int sortalloc = 524288; + +/* Initial buffer size for in core merge buffers. Bear in mind that + up to NMERGE * mergealloc bytes may be allocated for merge buffers. */ +static int mergealloc = 16384; + +/* Guess of average line length. */ +static int linelength = 30; + +/* Maximum number of elements for the array(s) of struct line's, in bytes. */ +#define LINEALLOC 262144 + +/* Prefix for temporary file names. */ +static char *prefix; + +/* Flag to reverse the order of all comparisons. */ +static int reverse; + +/* Flag for stable sort. This turns off the last ditch bytewise + comparison of lines, and instead leaves lines in the same order + they were read if all keys compare equal. */ +static int stable; + +/* Tab character separating fields. If NUL, then fields are separated + by the empty string between a non-whitespace character and a whitespace + character. */ +static char tab; + +/* Flag to remove consecutive duplicate lines from the output. + Only the last of a sequence of equal lines will be output. */ +static int unique; + +/* Nonzero if any of the input files are the standard input. */ +static int have_read_stdin; + +/* Lines are held in core as counted strings. */ +struct line +{ + char *text; /* Text of the line. */ + int length; /* Length not including final newline. */ + char *keybeg; /* Start of first key. */ + char *keylim; /* Limit of first key. */ +}; + +/* Arrays of lines. */ +struct lines +{ + struct line *lines; /* Dynamically allocated array of lines. */ + int used; /* Number of slots used. */ + int alloc; /* Number of slots allocated. */ + int limit; /* Max number of slots to allocate. */ +}; + +/* Input buffers. */ +struct buffer +{ + char *buf; /* Dynamically allocated buffer. */ + int used; /* Number of bytes used. */ + int alloc; /* Number of bytes allocated. */ + int left; /* Number of bytes left after line parsing. */ +}; + +/* Lists of key field comparisons to be tried. */ +static struct keyfield +{ + int sword; /* Zero-origin 'word' to start at. */ + int schar; /* Additional characters to skip. */ + int skipsblanks; /* Skip leading white space at start. */ + int eword; /* Zero-origin first word after field. */ + int echar; /* Additional characters in field. */ + int skipeblanks; /* Skip trailing white space at finish. */ + int *ignore; /* Boolean array of characters to ignore. */ + char *translate; /* Translation applied to characters. */ + int numeric; /* Flag for numeric comparison. */ + int month; /* Flag for comparison by month name. */ + int reverse; /* Reverse the sense of comparison. */ + struct keyfield *next; /* Next keyfield to try. */ +} keyhead; + +/* The list of temporary files. */ +static struct tempnode +{ + char *name; + struct tempnode *next; +} temphead; + +/* Clean up any remaining temporary files. */ + +static void +cleanup () +{ + struct tempnode *node; + + for (node = temphead.next; node; node = node->next) + unlink (node->name); +} + +/* Allocate N bytes of memory dynamically, with error checking. */ + +char * +xmalloc (n) + unsigned n; +{ + char *p; + + p = malloc (n); + if (p == 0) + { + error (0, 0, "virtual memory exhausted"); + cleanup (); + exit (2); + } + return p; +} + +/* Change the size of an allocated block of memory P to N bytes, + with error checking. + If P is NULL, run xmalloc. + If N is 0, run free and return NULL. */ + +char * +xrealloc (p, n) + char *p; + unsigned n; +{ + if (p == 0) + return xmalloc (n); + if (n == 0) + { + free (p); + return 0; + } + p = realloc (p, n); + if (p == 0) + { + error (0, 0, "virtual memory exhausted"); + cleanup (); + exit (2); + } + return p; +} + +static FILE * +xfopen (file, how) + char *file, *how; +{ + FILE *fp = strcmp (file, "-") ? fopen (file, how) : stdin; + + if (fp == 0) + { + error (0, errno, "%s", file); + cleanup (); + exit (2); + } + if (fp == stdin) + have_read_stdin = 1; + return fp; +} + +static void +xfclose (fp) + FILE *fp; +{ + fflush (fp); + if (fp != stdin && fp != stdout) + { + if (fclose (fp) != 0) + { + error (0, errno, "error closing file"); + cleanup (); + exit (2); + } + } + else + /* Allow reading stdin from tty more than once. */ + clearerr (fp); +} + +static void +xfwrite (buf, size, nelem, fp) + char *buf; + int size, nelem; + FILE *fp; +{ + if (fwrite (buf, size, nelem, fp) != nelem) + { + error (0, errno, "write error"); + cleanup (); + exit (2); + } +} + +/* Return a name for a temporary file. */ + +static char * +tempname () +{ + static int seq; + int len = strlen (prefix); + char *name = xmalloc (len + 16); + struct tempnode *node = + (struct tempnode *) xmalloc (sizeof (struct tempnode)); + + if (len && prefix[len - 1] != '/') + sprintf (name, "%s/sort%5.5d%5.5d", prefix, getpid (), ++seq); + else + sprintf (name, "%ssort%5.5d%5.5d", prefix, getpid (), ++seq); + node->name = name; + node->next = temphead.next; + temphead.next = node; + return name; +} + +/* Search through the list of temporary files for NAME; + remove it if it is found on the list. */ + +static void +zaptemp (name) + char *name; +{ + struct tempnode *node, *temp; + + for (node = &temphead; node->next; node = node->next) + if (!strcmp (name, node->next->name)) + break; + if (node->next) + { + temp = node->next; + unlink (temp->name); + free (temp->name); + node->next = temp->next; + free ((char *) temp); + } +} + +/* Initialize the character class tables. */ + +static void +inittables () +{ + int i; + + for (i = 0; i < UCHAR_LIM; ++i) + { + if (isblank (i)) + blanks[i] = 1; + if (ISDIGIT (i)) + digits[i] = 1; + if (!ISPRINT (i)) + nonprinting[i] = 1; + if (!ISALNUM (i) && !isblank (i)) + nondictionary[i] = 1; + if (ISLOWER (i)) + fold_toupper[i] = toupper (i); + else + fold_toupper[i] = i; + } +} + +/* Initialize BUF, allocating ALLOC bytes initially. */ + +static void +initbuf (buf, alloc) + struct buffer *buf; + int alloc; +{ + buf->alloc = alloc; + buf->buf = xmalloc (buf->alloc); + buf->used = buf->left = 0; +} + +/* Fill BUF reading from FP, moving buf->left bytes from the end + of buf->buf to the beginning first. If EOF is reached and the + file wasn't terminated by a newline, supply one. Return a count + of bytes buffered. */ + +static int +fillbuf (buf, fp) + struct buffer *buf; + FILE *fp; +{ + int cc; + + bcopy (buf->buf + buf->used - buf->left, buf->buf, buf->left); + buf->used = buf->left; + + while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, '\n', buf->used))) + { + if (buf->used == buf->alloc) + { + buf->alloc *= 2; + buf->buf = xrealloc (buf->buf, buf->alloc); + } + cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp); + if (ferror (fp)) + { + error (0, errno, "read error"); + cleanup (); + exit (2); + } + buf->used += cc; + } + + if (feof (fp) && buf->used && buf->buf[buf->used - 1] != '\n') + { + if (buf->used == buf->alloc) + { + buf->alloc *= 2; + buf->buf = xrealloc (buf->buf, buf->alloc); + } + buf->buf[buf->used++] = '\n'; + } + + return buf->used; +} + +/* Initialize LINES, allocating space for ALLOC lines initially. + LIMIT is the maximum possible number of lines to allocate space + for, ever. */ + +static void +initlines (lines, alloc, limit) + struct lines *lines; + int alloc; + int limit; +{ + lines->alloc = alloc; + lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line)); + lines->used = 0; + lines->limit = limit; +} + +/* Return a pointer to the first character of the field specified + by KEY in LINE. */ + +static char * +begfield (line, key) + struct line *line; + struct keyfield *key; +{ + register char *ptr = line->text, *lim = ptr + line->length; + register int sword = key->sword, schar = key->schar; + + if (tab) + while (ptr < lim && sword--) + { + while (ptr < lim && *ptr != tab) + ++ptr; + if (ptr < lim) + ++ptr; + } + else + while (ptr < lim && sword--) + { + while (ptr < lim && blanks[UCHAR (*ptr)]) + ++ptr; + while (ptr < lim && !blanks[UCHAR (*ptr)]) + ++ptr; + } + + if (key->skipsblanks) + while (ptr < lim && blanks[UCHAR (*ptr)]) + ++ptr; + + while (ptr < lim && schar--) + ++ptr; + + return ptr; +} + +/* Return the limit of (a pointer to the first character after) the field + in LINE specified by KEY. */ + +static char * +limfield (line, key) + struct line *line; + struct keyfield *key; +{ + register char *ptr = line->text, *lim = ptr + line->length; + register int eword = key->eword, echar = key->echar; + + if (tab) + while (ptr < lim && eword--) + { + while (ptr < lim && *ptr != tab) + ++ptr; + if (ptr < lim && (eword || key->skipeblanks)) + ++ptr; + } + else + while (ptr < lim && eword--) + { + while (ptr < lim && blanks[UCHAR (*ptr)]) + ++ptr; + while (ptr < lim && !blanks[UCHAR (*ptr)]) + ++ptr; + } + + if (key->skipeblanks) + while (ptr < lim && blanks[UCHAR (*ptr)]) + ++ptr; + + while (ptr < lim && echar--) + ++ptr; + + return ptr; +} + +/* Find the lines in BUF, storing pointers and lengths in LINES. + Also replace newlines with NULs. */ + +static void +findlines (buf, lines) + struct buffer *buf; + struct lines *lines; +{ + register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr; + struct keyfield *key = keyhead.next; + + lines->used = 0; + + while (beg < lim && (ptr = memchr (beg, '\n', lim - beg)) + && lines->used < lines->limit) + { + /* There are various places in the code that rely on a NUL + being at the end of in-core lines; NULs inside the lines + will not cause trouble, though. */ + *ptr = '\0'; + + if (lines->used == lines->alloc) + { + lines->alloc *= 2; + lines->lines = (struct line *) + xrealloc ((char *) lines->lines, + lines->alloc * sizeof (struct line)); + } + + lines->lines[lines->used].text = beg; + lines->lines[lines->used].length = ptr - beg; + + /* Precompute the position of the first key for efficiency. */ + if (key) + { + if (key->eword >= 0) + lines->lines[lines->used].keylim = + limfield (&lines->lines[lines->used], key); + else + lines->lines[lines->used].keylim = ptr; + + if (key->sword >= 0) + lines->lines[lines->used].keybeg = + begfield (&lines->lines[lines->used], key); + else + { + if (key->skipsblanks) + while (blanks[UCHAR (*beg)]) + ++beg; + lines->lines[lines->used].keybeg = beg; + } + } + + ++lines->used; + beg = ptr + 1; + } + + buf->left = lim - beg; +} + +/* Compare strings A and B containing decimal fractions < 1. Each string + should begin with a decimal point followed immediately by the digits + of the fraction. Strings not of this form are considered to be zero. */ + +static int +fraccompare (a, b) + register char *a, *b; +{ + register tmpa = UCHAR (*a), tmpb = UCHAR (*b); + + if (tmpa == '.' && tmpb == '.') + { + do + tmpa = UCHAR (*++a), tmpb = UCHAR (*++b); + while (tmpa == tmpb && digits[tmpa]); + if (digits[tmpa] && digits[tmpb]) + return tmpa - tmpb; + if (digits[tmpa]) + { + while (tmpa == '0') + tmpa = UCHAR (*++a); + if (digits[tmpa]) + return 1; + return 0; + } + if (digits[tmpb]) + { + while (tmpb == '0') + tmpb = UCHAR (*++b); + if (digits[tmpb]) + return -1; + return 0; + } + return 0; + } + else if (tmpa == '.') + { + do + tmpa = UCHAR (*++a); + while (tmpa == '0'); + if (digits[tmpa]) + return 1; + return 0; + } + else if (tmpb == '.') + { + do + tmpb = UCHAR (*++b); + while (tmpb == '0'); + if (digits[tmpb]) + return -1; + return 0; + } + return 0; +} + +/* Compare strings A and B as numbers without explicitly converting them to + machine numbers. Comparatively slow for short strings, but asymptotically + hideously fast. */ + +static int +numcompare (a, b) + register char *a, *b; +{ + register int tmpa, tmpb, loga, logb, tmp; + + tmpa = UCHAR (*a), tmpb = UCHAR (*b); + + if (tmpa == '-') + { + tmpa = UCHAR (*++a); + if (tmpb != '-') + { + if (digits[tmpa] && digits[tmpb]) + return -1; + return 0; + } + tmpb = UCHAR (*++b); + + while (tmpa == '0') + tmpa = UCHAR (*++a); + while (tmpb == '0') + tmpb = UCHAR (*++b); + + while (tmpa == tmpb && digits[tmpa]) + tmpa = UCHAR (*++a), tmpb = UCHAR (*++b); + + if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa])) + return -fraccompare (a, b); + + if (digits[tmpa]) + for (loga = 1; digits[UCHAR (*++a)]; ++loga) + ; + else + loga = 0; + + if (digits[tmpb]) + for (logb = 1; digits[UCHAR (*++b)]; ++logb) + ; + else + logb = 0; + + if (tmp = logb - loga) + return tmp; + + if (!loga) + return 0; + + return tmpb - tmpa; + } + else if (tmpb == '-') + { + if (digits[UCHAR (tmpa)] && digits[UCHAR (*++b)]) + return 1; + return 0; + } + else + { + while (tmpa == '0') + tmpa = UCHAR (*++a); + while (tmpb == '0') + tmpb = UCHAR (*++b); + + while (tmpa == tmpb && digits[tmpa]) + tmpa = UCHAR (*++a), tmpb = UCHAR (*++b); + + if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa])) + return fraccompare (a, b); + + if (digits[tmpa]) + for (loga = 1; digits[UCHAR (*++a)]; ++loga) + ; + else + loga = 0; + + if (digits[tmpb]) + for (logb = 1; digits[UCHAR (*++b)]; ++logb) + ; + else + logb = 0; + + if (tmp = loga - logb) + return tmp; + + if (!loga) + return 0; + + return tmpa - tmpb; + } +} + +/* Return an integer <= 12 associated with month name S with length LEN, + 0 if the name in S is not recognized. */ + +static int +getmonth (s, len) + char *s; + int len; +{ + char month[4]; + register int i, lo = 0, hi = 12; + + if (len < 3) + return 0; + + for (i = 0; i < 3; ++i) + month[i] = fold_toupper[UCHAR (s[i])]; + month[3] = '\0'; + + while (hi - lo > 1) + if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0) + hi = (lo + hi) / 2; + else + lo = (lo + hi) / 2; + if (!strcmp (month, monthtab[lo].name)) + return monthtab[lo].val; + return 0; +} + +/* Compare two lines A and B trying every key in sequence until there + are no more keys or a difference is found. */ + +static int +keycompare (a, b) + struct line *a, *b; +{ + register char *texta, *textb, *lima, *limb, *translate; + register int *ignore; + struct keyfield *key; + int diff = 0, iter = 0, lena, lenb; + + for (key = keyhead.next; key; key = key->next, ++iter) + { + ignore = key->ignore; + translate = key->translate; + + /* Find the beginning and limit of each field. */ + if (iter || a->keybeg == NULL || b->keybeg == NULL) + { + if (key->eword >= 0) + lima = limfield (a, key), limb = limfield (b, key); + else + lima = a->text + a->length, limb = b->text + b->length; + + if (key->sword >= 0) + texta = begfield (a, key), textb = begfield (b, key); + else + { + texta = a->text, textb = b->text; + if (key->skipsblanks) + { + while (texta < lima && blanks[UCHAR (*texta)]) + ++texta; + while (textb < limb && blanks[UCHAR (*textb)]) + ++textb; + } + } + } + else + { + /* For the first iteration only, the key positions have + been precomputed for us. */ + texta = a->keybeg, lima = a->keylim; + textb = b->keybeg, limb = b->keylim; + } + + /* Find the lengths. */ + lena = lima - texta, lenb = limb - textb; + if (lena < 0) + lena = 0; + if (lenb < 0) + lenb = 0; + + /* Actually compare the fields. */ + if (key->numeric) + { + if (*lima || *limb) + { + char savea = *lima, saveb = *limb; + + *lima = *limb = '\0'; + diff = numcompare (texta, textb); + *lima = savea, *limb = saveb; + } + else + diff = numcompare (texta, textb); + + if (diff) + return key->reverse ? -diff : diff; + continue; + } + else if (key->month) + { + diff = getmonth (texta, lena) - getmonth (textb, lenb); + if (diff) + return key->reverse ? -diff : diff; + continue; + } + else if (ignore && translate) + while (texta < lima && textb < limb) + { + while (texta < lima && ignore[UCHAR (*texta)]) + ++texta; + while (textb < limb && ignore[UCHAR (*textb)]) + ++textb; + if (texta < lima && textb < limb && + translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)]) + { + diff = translate[UCHAR (*--texta)] - translate[UCHAR (*--textb)]; + break; + } + } + else if (ignore) + while (texta < lima && textb < limb) + { + while (texta < lima && ignore[UCHAR (*texta)]) + ++texta; + while (textb < limb && ignore[UCHAR (*textb)]) + ++textb; + if (texta < lima && textb < limb && *texta++ != *textb++) + { + diff = *--texta - *--textb; + break; + } + } + else if (translate) + while (texta < lima && textb < limb) + { + if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)]) + { + diff = translate[UCHAR (*--texta)] - translate[UCHAR (*--textb)]; + break; + } + } + else + diff = memcmp (texta, textb, min (lena, lenb)); + + if (diff) + return key->reverse ? -diff : diff; + if (diff = lena - lenb) + return key->reverse ? -diff : diff; + } + + return 0; +} + +/* Compare two lines A and B, returning negative, zero, or positive + depending on whether A compares less than, equal to, or greater than B. */ + +static int +compare (a, b) + register struct line *a, *b; +{ + int diff, tmpa, tmpb, mini; + + if (keyhead.next) + { + diff = keycompare (a, b); + if (diff) + return diff; + if (!unique && !stable) + { + tmpa = a->length, tmpb = b->length; + diff = memcmp (a->text, b->text, min (tmpa, tmpb)); + if (!diff) + diff = tmpa - tmpb; + } + } + else + { + tmpa = a->length, tmpb = b->length; + mini = min (tmpa, tmpb); + if (mini == 0) + diff = tmpa - tmpb; + else + { + char *ap = a->text, *bp = b->text; + + diff = *ap - *bp; + if (diff == 0) + { + diff = memcmp (ap, bp, mini); + if (diff == 0) + diff = tmpa - tmpb; + } + } + } + + return reverse ? -diff : diff; +} + +/* Check that the lines read from the given FP come in order. Return + 1 if they do and 0 if there is a disorder. */ + +static int +checkfp (fp) + FILE *fp; +{ + struct buffer buf; /* Input buffer. */ + struct lines lines; /* Lines scanned from the buffer. */ + struct line temp; /* Copy of previous line. */ + int cc; /* Character count. */ + int cmp; /* Result of calling compare. */ + int alloc, i, success = 1; + + initbuf (&buf, mergealloc); + initlines (&lines, mergealloc / linelength + 1, + LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line))); + alloc = linelength; + temp.text = xmalloc (alloc); + + cc = fillbuf (&buf, fp); + findlines (&buf, &lines); + + if (cc) + do + { + /* Compare each line in the buffer with its successor. */ + for (i = 0; i < lines.used - 1; ++i) + { + cmp = compare (&lines.lines[i], &lines.lines[i + 1]); + if ((unique && cmp >= 0) || (cmp > 0)) + { + success = 0; + goto finish; + } + } + + /* Save the last line of the buffer and refill the buffer. */ + if (lines.lines[lines.used - 1].length > alloc) + { + while (lines.lines[lines.used - 1].length + 1 > alloc) + alloc *= 2; + temp.text = xrealloc (temp.text, alloc); + } + bcopy (lines.lines[lines.used - 1].text, temp.text, + lines.lines[lines.used - 1].length + 1); + temp.length = lines.lines[lines.used - 1].length; + + cc = fillbuf (&buf, fp); + if (cc) + { + findlines (&buf, &lines); + /* Make sure the line saved from the old buffer contents is + less than or equal to the first line of the new buffer. */ + cmp = compare (&temp, &lines.lines[0]); + if ((unique && cmp >= 0) || (cmp > 0)) + { + success = 0; + break; + } + } + } + while (cc); + +finish: + xfclose (fp); + free (buf.buf); + free ((char *) lines.lines); + free (temp.text); + return success; +} + +/* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE. + Close FPS before returning. */ + +static void +mergefps (fps, nfps, ofp) + FILE *fps[], *ofp; + register int nfps; +{ + struct buffer buffer[NMERGE]; /* Input buffers for each file. */ + struct lines lines[NMERGE]; /* Line tables for each buffer. */ + struct line saved; /* Saved line for unique check. */ + int savedflag = 0; /* True if there is a saved line. */ + int savealloc; /* Size allocated for the saved line. */ + int cur[NMERGE]; /* Current line in each line table. */ + int ord[NMERGE]; /* Table representing a permutation of fps, + such that lines[ord[0]].lines[cur[ord[0]]] + is the smallest line and will be next + output. */ + register int i, j, t; + + /* Allocate space for a saved line if necessary. */ + if (unique) + { + savealloc = linelength; + saved.text = xmalloc (savealloc); + } + + /* Read initial lines from each input file. */ + for (i = 0; i < nfps; ++i) + { + initbuf (&buffer[i], mergealloc); + /* If a file is empty, eliminate it from future consideration. */ + while (i < nfps && !fillbuf (&buffer[i], fps[i])) + { + xfclose (fps[i]); + --nfps; + for (j = i; j < nfps; ++j) + fps[j] = fps[j + 1]; + } + if (i == nfps) + free (buffer[i].buf); + else + { + initlines (&lines[i], mergealloc / linelength + 1, + LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line))); + findlines (&buffer[i], &lines[i]); + cur[i] = 0; + } + } + + /* Set up the ord table according to comparisons among input lines. + Since this only reorders two items if one is strictly greater than + the other, it is stable. */ + for (i = 0; i < nfps; ++i) + ord[i] = i; + for (i = 1; i < nfps; ++i) + if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]], + &lines[ord[i]].lines[cur[ord[i]]]) > 0) + t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; + + /* Repeatedly output the smallest line until no input remains. */ + while (nfps) + { + /* If uniqified output is turned out, output only the first of + an identical series of lines. */ + if (unique) + { + if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]])) + { + xfwrite (saved.text, 1, saved.length, ofp); + putc ('\n', ofp); + savedflag = 0; + } + if (!savedflag) + { + if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1) + { + while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1) + savealloc *= 2; + saved.text = xrealloc (saved.text, savealloc); + } + saved.length = lines[ord[0]].lines[cur[ord[0]]].length; + bcopy (lines[ord[0]].lines[cur[ord[0]]].text, saved.text, + saved.length + 1); + savedflag = 1; + } + } + else + { + xfwrite (lines[ord[0]].lines[cur[ord[0]]].text, 1, + lines[ord[0]].lines[cur[ord[0]]].length, ofp); + putc ('\n', ofp); + } + + /* Check if we need to read more lines into core. */ + if (++cur[ord[0]] == lines[ord[0]].used) + if (fillbuf (&buffer[ord[0]], fps[ord[0]])) + { + findlines (&buffer[ord[0]], &lines[ord[0]]); + cur[ord[0]] = 0; + } + else + { + /* We reached EOF on fps[ord[0]]. */ + for (i = 1; i < nfps; ++i) + if (ord[i] > ord[0]) + --ord[i]; + --nfps; + xfclose (fps[ord[0]]); + free (buffer[ord[0]].buf); + free ((char *) lines[ord[0]].lines); + for (i = ord[0]; i < nfps; ++i) + { + fps[i] = fps[i + 1]; + buffer[i] = buffer[i + 1]; + lines[i] = lines[i + 1]; + cur[i] = cur[i + 1]; + } + for (i = 0; i < nfps; ++i) + ord[i] = ord[i + 1]; + continue; + } + + /* The new line just read in may be larger than other lines + already in core; push it back in the queue until we encounter + a line larger than it. */ + for (i = 1; i < nfps; ++i) + { + t = compare (&lines[ord[0]].lines[cur[ord[0]]], + &lines[ord[i]].lines[cur[ord[i]]]); + if (!t) + t = ord[0] - ord[i]; + if (t < 0) + break; + } + t = ord[0]; + for (j = 1; j < i; ++j) + ord[j - 1] = ord[j]; + ord[i - 1] = t; + } + + if (unique && savedflag) + { + xfwrite (saved.text, 1, saved.length, ofp); + putc ('\n', ofp); + free (saved.text); + } +} + +/* Sort the array LINES with NLINES members, using TEMP for temporary space. */ + +static void +sortlines (lines, nlines, temp) + struct line *lines, *temp; + int nlines; +{ + register struct line *lo, *hi, *t; + register int nlo, nhi; + + if (nlines == 2) + { + if (compare (&lines[0], &lines[1]) > 0) + *temp = lines[0], lines[0] = lines[1], lines[1] = *temp; + return; + } + + nlo = nlines / 2; + lo = lines; + nhi = nlines - nlo; + hi = lines + nlo; + + if (nlo > 1) + sortlines (lo, nlo, temp); + + if (nhi > 1) + sortlines (hi, nhi, temp); + + t = temp; + + while (nlo && nhi) + if (compare (lo, hi) <= 0) + *t++ = *lo++, --nlo; + else + *t++ = *hi++, --nhi; + while (nlo--) + *t++ = *lo++; + + for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo) + *lo++ = *t++; +} + +/* Check that each of the NFILES FILES is ordered. + Return a count of disordered files. */ + +static int +check (files, nfiles) + char *files[]; + int nfiles; +{ + int i, disorders = 0; + FILE *fp; + + for (i = 0; i < nfiles; ++i) + { + fp = xfopen (files[i], "r"); + if (!checkfp (fp)) + { + printf ("%s: disorder on %s\n", program_name, files[i]); + ++disorders; + } + } + return disorders; +} + +/* Merge NFILES FILES onto OFP. */ + +static void +merge (files, nfiles, ofp) + char *files[]; + int nfiles; + FILE *ofp; +{ + int i, j, t; + char *temp; + FILE *fps[NMERGE], *tfp; + + while (nfiles > NMERGE) + { + t = 0; + for (i = 0; i < nfiles / NMERGE; ++i) + { + for (j = 0; j < NMERGE; ++j) + fps[j] = xfopen (files[i * NMERGE + j], "r"); + tfp = xfopen (temp = tempname (), "w"); + mergefps (fps, NMERGE, tfp); + xfclose (tfp); + for (j = 0; j < NMERGE; ++j) + zaptemp (files[i * NMERGE + j]); + files[t++] = temp; + } + for (j = 0; j < nfiles % NMERGE; ++j) + fps[j] = xfopen (files[i * NMERGE + j], "r"); + tfp = xfopen (temp = tempname (), "w"); + mergefps (fps, nfiles % NMERGE, tfp); + xfclose (tfp); + for (j = 0; j < nfiles % NMERGE; ++j) + zaptemp (files[i * NMERGE + j]); + files[t++] = temp; + nfiles = t; + } + + for (i = 0; i < nfiles; ++i) + fps[i] = xfopen (files[i], "r"); + mergefps (fps, i, ofp); + for (i = 0; i < nfiles; ++i) + zaptemp (files[i]); +} + +/* Sort NFILES FILES onto OFP. */ + +static void +sort (files, nfiles, ofp) + char **files; + int nfiles; + FILE *ofp; +{ + struct buffer buf; + struct lines lines; + struct line *tmp; + int i, ntmp; + FILE *fp, *tfp; + struct tempnode *node; + int ntemp = 0; + char **tempfiles; + + initbuf (&buf, sortalloc); + initlines (&lines, sortalloc / linelength + 1, + LINEALLOC / sizeof (struct line)); + ntmp = lines.alloc; + tmp = (struct line *) xmalloc (ntmp * sizeof (struct line)); + + while (nfiles--) + { + fp = xfopen (*files++, "r"); + while (fillbuf (&buf, fp)) + { + findlines (&buf, &lines); + if (lines.used > ntmp) + { + while (lines.used > ntmp) + ntmp *= 2; + tmp = (struct line *) + xrealloc ((char *) tmp, ntmp * sizeof (struct line)); + } + sortlines (lines.lines, lines.used, tmp); + if (feof (fp) && !nfiles && !ntemp && !buf.left) + tfp = ofp; + else + { + ++ntemp; + tfp = xfopen (tempname (), "w"); + } + for (i = 0; i < lines.used; ++i) + if (!unique || i == 0 + || compare (&lines.lines[i], &lines.lines[i - 1])) + { + xfwrite (lines.lines[i].text, 1, lines.lines[i].length, tfp); + putc ('\n', tfp); + } + if (tfp != ofp) + xfclose (tfp); + } + xfclose (fp); + } + + free (buf.buf); + free ((char *) lines.lines); + free ((char *) tmp); + + if (ntemp) + { + tempfiles = (char **) xmalloc (ntemp * sizeof (char *)); + i = ntemp; + for (node = temphead.next; node; node = node->next) + tempfiles[--i] = node->name; + merge (tempfiles, ntemp, ofp); + free ((char *) tempfiles); + } +} + +/* Insert key KEY at the end of the list (`keyhead'). */ + +static void +insertkey (key) + struct keyfield *key; +{ + struct keyfield *k = &keyhead; + + while (k->next) + k = k->next; + k->next = key; + key->next = NULL; +} + +static void +badfieldspec (s) + char *s; +{ + error (2, 0, "invalid field specification `%s'", s); +} + +/* Handle interrupts and hangups. */ + +static void +sighandler (sig) + int sig; +{ +#ifdef _POSIX_VERSION + struct sigaction sigact; + + sigact.sa_handler = SIG_DFL; + sigemptyset (&sigact.sa_mask); + sigact.sa_flags = 0; + sigaction (sig, &sigact, NULL); +#else /* !_POSIX_VERSION */ + signal (sig, SIG_DFL); +#endif /* _POSIX_VERSION */ + cleanup (); + kill (getpid (), sig); +} + +/* Set the ordering options for KEY specified in S. + Return the address of the first character in S that + is not a valid ordering option. + BLANKTYPE is the kind of blanks that 'b' should skip. */ + +static char * +set_ordering (s, key, blanktype) + register char *s; + struct keyfield *key; + enum blanktype blanktype; +{ + while (*s) + { + switch (*s) + { + case 'b': + if (blanktype == bl_start || blanktype == bl_both) + key->skipsblanks = 1; + if (blanktype == bl_end || blanktype == bl_both) + key->skipeblanks = 1; + break; + case 'd': + key->ignore = nondictionary; + break; + case 'f': + key->translate = fold_toupper; + break; +#if 0 + case 'g': + /* Reserved for comparing floating-point numbers. */ + break; +#endif + case 'i': + key->ignore = nonprinting; + break; + case 'M': + key->skipsblanks = key->skipeblanks = key->month = 1; + break; + case 'n': + key->skipsblanks = key->skipeblanks = key->numeric = 1; + break; + case 'r': + key->reverse = 1; + break; + default: + return s; + } + ++s; + } + return s; +} + +void +main (argc, argv) + int argc; + char *argv[]; +{ + struct keyfield *key = NULL, gkey; + char *s; + int i, t, t2; + int checkonly = 0, mergeonly = 0, nfiles = 0; + char *minus = "-", *outfile = minus, **files, *tmp; + FILE *ofp; +#ifdef _POSIX_VERSION + struct sigaction oldact, newact; +#endif /* _POSIX_VERSION */ + + program_name = argv[0]; + have_read_stdin = 0; + inittables (); + + prefix = getenv ("TMPDIR"); + if (prefix == NULL) + prefix = "/tmp"; + +#ifdef _POSIX_VERSION + newact.sa_handler = sighandler; + sigemptyset (&newact.sa_mask); + newact.sa_flags = 0; + + sigaction (SIGINT, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGINT, &newact, NULL); + sigaction (SIGHUP, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGHUP, &newact, NULL); + sigaction (SIGPIPE, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGPIPE, &newact, NULL); + sigaction (SIGTERM, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGTERM, &newact, NULL); +#else /* !_POSIX_VERSION */ + if (signal (SIGINT, SIG_IGN) != SIG_IGN) + signal (SIGINT, sighandler); + if (signal (SIGHUP, SIG_IGN) != SIG_IGN) + signal (SIGHUP, sighandler); + if (signal (SIGPIPE, SIG_IGN) != SIG_IGN) + signal (SIGPIPE, sighandler); + if (signal (SIGTERM, SIG_IGN) != SIG_IGN) + signal (SIGTERM, sighandler); +#endif /* !_POSIX_VERSION */ + + gkey.sword = gkey.eword = -1; + gkey.ignore = NULL; + gkey.translate = NULL; + gkey.numeric = gkey.month = gkey.reverse = 0; + gkey.skipsblanks = gkey.skipeblanks = 0; + + files = (char **) xmalloc (sizeof (char *) * argc); + + for (i = 1; i < argc; ++i) + { + if (argv[i][0] == '+') + { + if (key) + insertkey (key); + key = (struct keyfield *) xmalloc (sizeof (struct keyfield)); + key->eword = -1; + key->ignore = NULL; + key->translate = NULL; + key->skipsblanks = key->skipeblanks = 0; + key->numeric = key->month = key->reverse = 0; + s = argv[i] + 1; + if (!digits[UCHAR (*s)]) + badfieldspec (argv[i]); + for (t = 0; digits[UCHAR (*s)]; ++s) + t = 10 * t + *s - '0'; + t2 = 0; + if (*s == '.') + for (++s; digits[UCHAR (*s)]; ++s) + t2 = 10 * t2 + *s - '0'; + if (t2 || t) + { + key->sword = t; + key->schar = t2; + } + else + key->sword = -1; + s = set_ordering (s, key, bl_start); + if (*s) + badfieldspec (argv[i]); + } + else if (argv[i][0] == '-' && argv[i][1]) + { + s = argv[i] + 1; + if (digits[UCHAR (*s)]) + { + if (!key) + usage (); + for (t = 0; digits[UCHAR (*s)]; ++s) + t = t * 10 + *s - '0'; + t2 = 0; + if (*s == '.') + for (++s; digits[UCHAR (*s)]; ++s) + t2 = t2 * 10 + *s - '0'; + key->eword = t; + key->echar = t2; + s = set_ordering (s, key, bl_end); + if (*s) + badfieldspec (argv[i]); + insertkey (key); + key = NULL; + } + else + while (*s) + { + s = set_ordering (s, &gkey, bl_both); + switch (*s) + { + case '\0': + break; + case 'c': + checkonly = 1; + break; + case 'k': + if (s[1]) + ++s; + else + { + if (i == argc - 1) + error (2, 0, "option `-k' requires an argument"); + else + s = argv[++i]; + } + if (key) + insertkey (key); + key = (struct keyfield *) + xmalloc (sizeof (struct keyfield)); + key->eword = -1; + key->ignore = NULL; + key->translate = NULL; + key->skipsblanks = key->skipeblanks = 0; + key->numeric = key->month = key->reverse = 0; + /* Get POS1. */ + if (!digits[UCHAR (*s)]) + badfieldspec (argv[i]); + for (t = 0; digits[UCHAR (*s)]; ++s) + t = 10 * t + *s - '0'; + if (t) + t--; + t2 = 0; + if (*s == '.') + { + for (++s; digits[UCHAR (*s)]; ++s) + t2 = 10 * t2 + *s - '0'; + if (t2) + t2--; + } + if (t2 || t) + { + key->sword = t; + key->schar = t2; + } + else + key->sword = -1; + s = set_ordering (s, key, bl_start); + if (*s && *s != ',') + badfieldspec (argv[i]); + else if (*s++) + { + /* Get POS2. */ + for (t = 0; digits[UCHAR (*s)]; ++s) + t = t * 10 + *s - '0'; + t2 = 0; + if (*s == '.') + { + for (++s; digits[UCHAR (*s)]; ++s) + t2 = t2 * 10 + *s - '0'; + if (t2) + t--; + } + key->eword = t; + key->echar = t2; + s = set_ordering (s, key, bl_end); + if (*s) + badfieldspec (argv[i]); + } + insertkey (key); + key = NULL; + goto outer; + case 'm': + mergeonly = 1; + break; + case 'o': + if (s[1]) + outfile = s + 1; + else + { + if (i == argc - 1) + error (2, 0, "option `-o' requires an argument"); + else + outfile = argv[++i]; + } + goto outer; + case 's': + stable = 1; + break; + case 't': + if (s[1]) + tab = *++s; + else if (i < argc - 1) + { + tab = *argv[++i]; + goto outer; + } + else + error (2, 0, "option `-t' requires an argument"); + break; + case 'u': + unique = 1; + break; + default: + fprintf (stderr, "%s: unrecognized option `-%c'\n", + argv[0], *s); + usage (); + } + if (*s) + ++s; + } + } + else /* Not an option. */ + { + files[nfiles++] = argv[i]; + } + outer:; + } + + if (key) + insertkey (key); + + /* Inheritance of global options to individual keys. */ + for (key = keyhead.next; key; key = key->next) + if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse + && !key->skipeblanks && !key->month && !key->numeric) + { + key->ignore = gkey.ignore; + key->translate = gkey.translate; + key->skipsblanks = gkey.skipsblanks; + key->skipeblanks = gkey.skipeblanks; + key->month = gkey.month; + key->numeric = gkey.numeric; + key->reverse = gkey.reverse; + } + + if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks + || gkey.reverse || gkey.skipeblanks + || gkey.month || gkey.numeric)) + insertkey (&gkey); + + if (nfiles == 0) + { + nfiles = 1; + files = − + } + + if (checkonly) + exit (check (files, nfiles) != 0); + + if (strcmp (outfile, "-")) + { + for (i = 0; i < nfiles; ++i) + if (!strcmp (outfile, files[i])) + break; + if (i == nfiles) + ofp = xfopen (outfile, "w"); + else + { + char buf[8192]; + FILE *fp = xfopen (outfile, "r"); + int cc; + + tmp = tempname (); + ofp = xfopen (tmp, "w"); + while ((cc = fread (buf, 1, sizeof buf, fp)) > 0) + xfwrite (buf, 1, cc, ofp); + if (ferror (fp)) + { + error (0, errno, "%s", outfile); + cleanup (); + exit (2); + } + xfclose (ofp); + xfclose (fp); + files[i] = tmp; + ofp = xfopen (outfile, "w"); + } + } + else + ofp = stdout; + + if (mergeonly) + merge (files, nfiles, ofp); + else + sort (files, nfiles, ofp); + cleanup (); + + if (have_read_stdin && fclose (stdin) == EOF) + error (1, errno, "-"); + if (ferror (stdout) || fclose (stdout) == EOF) + error (1, 0, "write error"); + + exit (0); +} + +static void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-cmus] [-t separator] [-o output-file] [-bdfiMnr] [+POS1 [-POS2]]\n\ + [-k POS1[,POS2]] [file...]\n", + program_name); + exit (2); +} diff --git a/src/split.c b/src/split.c new file mode 100644 index 000000000..ccc4535c2 --- /dev/null +++ b/src/split.c @@ -0,0 +1,532 @@ +/* split.c -- split a file into pieces. + Copyright (C) 1988, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* By tege@sics.se, with rms. + + To do: + * Implement -t CHAR or -t REGEX to specify break characters other + than newline. */ + +#include <stdio.h> +#include <getopt.h> +#include <ctype.h> +#include <sys/types.h> +#include "system.h" + +char *xmalloc (); +void error (); + +int convint (); +int isdigits (); +int stdread (); +void line_bytes_split (); +void bytes_split (); +void cwrite (); +void lines_split (); +void next_file_name (); + +/* Name under which this program was invoked. */ +char *program_name; + +/* Base name of output files. */ +char *outfile; + +/* Pointer to the end of the prefix in OUTFILE. + Suffixes are inserted here. */ +char *outfile_mid; + +/* Pointer to the end of OUTFILE. */ +char *outfile_end; + +/* Status for outfile name generation. */ +unsigned outfile_count = -1; +unsigned outfile_name_limit = 25 * 26; +unsigned outfile_name_generation = 1; + +/* Name of input file. May be "-". */ +char *infile; + +/* Descriptor on which input file is open. */ +int input_desc; + +/* Descriptor on which output file is open. */ +int output_desc; + +void +usage (reason) + char *reason; +{ + if (reason != NULL) + fprintf (stderr, "%s: %s\n", program_name, reason); + fprintf (stderr, "\ +Usage: %s [-lines] [-l lines] [-b bytes[bkm]] [-C bytes[bkm]]\n\ + [--lines=lines] [--bytes=bytes[bkm]] [--line-bytes=bytes[bkm]]\n\ + [infile [outfile-prefix]]\n", + program_name); + exit (2); +} + +struct option longopts[] = +{ + {"bytes", 1, NULL, 'b'}, + {"lines", 1, NULL, 'l'}, + {"line-bytes", 1, NULL, 'C'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char *argv[]; +{ + struct stat stat_buf; + int num; /* numeric argument from command line */ + enum + { + type_undef, type_bytes, type_byteslines, type_lines, type_digits + } split_type = type_undef; + int in_blk_size; /* optimal block size of input file device */ + char *buf; /* file i/o buffer */ + int accum = 0; + char *outbase; + int c; + int digits_optind = 0; + + program_name = argv[0]; + + /* Parse command line options. */ + + infile = "-"; + outbase = "x"; + + while (1) + { + /* This is the argv-index of the option we will read next. */ + int this_optind = optind ? optind : 1; + + c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0); + if (c == EOF) + break; + + switch (c) + { + case 'b': + if (split_type != type_undef) + usage ("cannot split in more than one way"); + split_type = type_bytes; + if (convint (optarg, &accum) == -1) + usage ("invalid number of bytes"); + break; + + case 'l': + if (split_type != type_undef) + usage ("cannot split in more than one way"); + split_type = type_lines; + if (!isdigits (optarg)) + usage ("invalid number of lines"); + accum = atoi (optarg); + break; + + case 'C': + if (split_type != type_undef) + usage ("cannot split in more than one way"); + split_type = type_byteslines; + if (convint (optarg, &accum) == -1) + usage ("invalid number of bytes"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (split_type != type_undef && split_type != type_digits) + usage ("cannot split in more than one way"); + if (digits_optind != 0 && digits_optind != this_optind) + accum = 0; /* More than one number given; ignore other. */ + digits_optind = this_optind; + split_type = type_digits; + accum = accum * 10 + c - '0'; + break; + + default: + usage ((char *)0); + } + } + + /* Handle default case. */ + if (split_type == type_undef) + { + split_type = type_lines; + accum = 1000; + } + + if (accum < 1) + usage ("invalid number"); + num = accum; + + /* Get out the filename arguments. */ + + if (optind < argc) + infile = argv[optind++]; + + if (optind < argc) + outbase = argv[optind++]; + + if (optind < argc) + usage ("too many arguments"); + + /* Open the input file. */ + if (!strcmp (infile, "-")) + input_desc = 0; + else + { + input_desc = open (infile, O_RDONLY); + if (input_desc < 0) + error (1, errno, "%s", infile); + } + + /* No output file is open now. */ + output_desc = -1; + + /* Copy the output file prefix so we can add suffixes to it. + 26**29 is certainly enough output files! */ + + outfile = xmalloc (strlen (outbase) + 30); + strcpy (outfile, outbase); + outfile_mid = outfile + strlen (outfile); + outfile_end = outfile_mid + 2; + bzero (outfile_mid, 30); + outfile_mid[0] = 'a'; + outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */ + + /* Get the optimal block size of input device and make a buffer. */ + + if (fstat (input_desc, &stat_buf) < 0) + error (1, errno, "%s", infile); + in_blk_size = ST_BLKSIZE (stat_buf); + + buf = xmalloc (in_blk_size + 1); + + switch (split_type) + { + case type_digits: + case type_lines: + lines_split (num, buf, in_blk_size); + break; + + case type_bytes: + bytes_split (num, buf, in_blk_size); + break; + + case type_byteslines: + line_bytes_split (num); + break; + } + + if (close (input_desc) < 0) + error (1, errno, "%s", infile); + if (output_desc >= 0 && close (output_desc) < 0) + error (1, errno, "%s", outfile); + + exit (0); +} + +/* Return nonzero if the string STR is composed entirely of decimal digits. */ + +int +isdigits (str) + char *str; +{ + do + { + if (!isdigit (*str)) + return 0; + str++; + } + while (*str); + return 1; +} + +/* Put the value of the number in STR into *VAL. + STR can specify a positive integer, optionally ending in `k' + to mean kilo or `m' to mean mega. + Return 0 if STR is valid, -1 if not. */ + +int +convint (str, val) + char *str; + int *val; +{ + int multiplier = 1; + int arglen = strlen (str); + + if (arglen > 1) + { + switch (str[arglen - 1]) + { + case 'b': + multiplier = 512; + str[arglen - 1] = '\0'; + break; + case 'k': + multiplier = 1024; + str[arglen - 1] = '\0'; + break; + case 'm': + multiplier = 1048576; + str[arglen - 1] = '\0'; + break; + } + } + if (!isdigits (str)) + return -1; + *val = atoi (str) * multiplier; + return 0; +} + +/* Split into pieces of exactly NCHARS bytes. + Use buffer BUF, whose size is BUFSIZE. */ + +void +bytes_split (nchars, buf, bufsize) + int nchars; + char *buf; + int bufsize; +{ + int n_read; + int new_file_flag = 1; + int to_read; + int to_write = nchars; + char *bp_out; + + do + { + n_read = stdread (buf, bufsize); + if (n_read < 0) + error (1, errno, "%s", infile); + bp_out = buf; + to_read = n_read; + for (;;) + { + if (to_read < to_write) + { + if (to_read) /* do not write 0 bytes! */ + { + cwrite (new_file_flag, bp_out, to_read); + to_write -= to_read; + new_file_flag = 0; + } + break; + } + else + { + cwrite (new_file_flag, bp_out, to_write); + bp_out += to_write; + to_read -= to_write; + new_file_flag = 1; + to_write = nchars; + } + } + } + while (n_read == bufsize); +} + +/* Split into pieces of exactly NLINES lines. + Use buffer BUF, whose size is BUFSIZE. */ + +void +lines_split (nlines, buf, bufsize) + int nlines; + char *buf; + int bufsize; +{ + int n_read; + char *bp, *bp_out, *eob; + int new_file_flag = 1; + int n = 0; + + do + { + n_read = stdread (buf, bufsize); + if (n_read < 0) + error (1, errno, "%s", infile); + bp = bp_out = buf; + eob = bp + n_read; + *eob = '\n'; + for (;;) + { + while (*bp++ != '\n') + ; /* this semicolon takes most of the time */ + if (bp > eob) + { + if (eob != bp_out) /* do not write 0 bytes! */ + { + cwrite (new_file_flag, bp_out, eob - bp_out); + new_file_flag = 0; + } + break; + } + else + if (++n >= nlines) + { + cwrite (new_file_flag, bp_out, bp - bp_out); + bp_out = bp; + new_file_flag = 1; + n = 0; + } + } + } + while (n_read == bufsize); +} + +/* Split into pieces that are as large as possible while still not more + than NCHARS bytes, and are split on line boundaries except + where lines longer than NCHARS bytes occur. */ + +void +line_bytes_split (nchars) + int nchars; +{ + int n_read; + char *bp; + int eof = 0; + int n_buffered = 0; + char *buf = (char *) xmalloc (nchars); + + do + { + /* Fill up the full buffer size from the input file. */ + + n_read = stdread (buf + n_buffered, nchars - n_buffered); + if (n_read < 0) + error (1, errno, "%s", infile); + + n_buffered += n_read; + if (n_buffered != nchars) + eof = 1; + + /* Find where to end this chunk. */ + bp = buf + n_buffered; + if (n_buffered == nchars) + { + while (bp > buf && bp[-1] != '\n') + bp--; + } + + /* If chunk has no newlines, use all the chunk. */ + if (bp == buf) + bp = buf + n_buffered; + + /* Output the chars as one output file. */ + cwrite (1, buf, bp - buf); + + /* Discard the chars we just output; move rest of chunk + down to be the start of the next chunk. */ + n_buffered -= bp - buf; + if (n_buffered > 0) + bcopy (bp, buf, n_buffered); + } + while (!eof); + free (buf); +} + +/* Write BYTES bytes at BP to an output file. + If NEW_FILE_FLAG is nonzero, open the next output file. + Otherwise add to the same output file already in use. */ + +void +cwrite (new_file_flag, bp, bytes) + int new_file_flag; + char *bp; + int bytes; +{ + if (new_file_flag) + { + if (output_desc >= 0 && close (output_desc) < 0) + error (1, errno, "%s", outfile); + + next_file_name (); + output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (output_desc < 0) + error (1, errno, "%s", outfile); + } + if (write (output_desc, bp, bytes) < 0) + error (1, errno, "%s", outfile); +} + +/* Read NCHARS bytes from the input file into BUF. + Return the number of bytes successfully read. + If this is less than NCHARS, do not call `stdread' again. */ + +int +stdread (buf, nchars) + char *buf; + int nchars; +{ + int n_read; + int to_be_read = nchars; + + while (to_be_read) + { + n_read = read (input_desc, buf, to_be_read); + if (n_read < 0) + return -1; + if (n_read == 0) + break; + to_be_read -= n_read; + buf += n_read; + } + return nchars - to_be_read; +} + +/* Compute the next sequential output file name suffix and store it + into the string `outfile' at the position pointed to by `outfile_mid'. */ + +void +next_file_name () +{ + int x; + char *ne; + + outfile_count++; + if (outfile_count < outfile_name_limit) + { + for (ne = outfile_end - 1; ; ne--) + { + x = *ne; + if (x != 'z') + break; + *ne = 'a'; + } + *ne = x + 1; + return; + } + + outfile_count = 0; + outfile_name_limit *= 26; + outfile_name_generation++; + *outfile_mid++ = 'z'; + for (x = 0; x <= outfile_name_generation; x++) + outfile_mid[x] = 'a'; + outfile_end += 2; +} diff --git a/src/sum.c b/src/sum.c new file mode 100644 index 000000000..9236614ca --- /dev/null +++ b/src/sum.c @@ -0,0 +1,217 @@ +/* sum -- checksum and count the blocks in a file + Copyright (C) 1986, 1989, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Like BSD sum or SysV sum -r, except like SysV sum if -s option is given. */ + +/* Written by Kayvan Aghaiepour and David MacKenzie. */ + +#include <stdio.h> +#include <sys/types.h> +#include <getopt.h> +#include "system.h" + +int bsd_sum_file (); +int sysv_sum_file (); +void error (); + +/* The name this program was run with. */ +char *program_name; + +/* Nonzero if any of the files read were the standard input. */ +int have_read_stdin; + +/* Right-rotate 32-bit integer variable C. */ +#define ROTATE_RIGHT(c) if ((c) & 01) (c) = ((c) >>1) + 0x8000; else (c) >>= 1; + +struct option longopts[] = +{ + {"sysv", 0, NULL, 's'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int errors = 0; + int optc; + int files_given; + int (*sum_func) () = bsd_sum_file; + + program_name = argv[0]; + have_read_stdin = 0; + + while ((optc = getopt_long (argc, argv, "rs", longopts, (int *) 0)) != -1) + { + switch (optc) + { + case 'r': /* For SysV compatibility. */ + sum_func = bsd_sum_file; + break; + + case 's': + sum_func = sysv_sum_file; + break; + + case '?': + fprintf (stderr, "\ +Usage: %s [-rs] [--sysv] [file...]\n", argv[0]); + exit (1); + } + } + + files_given = argc - optind; + if (files_given == 0) + { + if ((*sum_func) ("-", files_given) < 0) + errors = 1; + } + else + for (; optind < argc; optind++) + if ((*sum_func) (argv[optind], files_given) < 0) + errors = 1; + + if (have_read_stdin && fclose (stdin) == EOF) + error (1, errno, "-"); + exit (errors); +} + +/* Calculate and print the rotated checksum and the size in 1K blocks + of file FILE, or of the standard input if FILE is "-". + If PRINT_NAME is >1, print FILE next to the checksum and size. + The checksum varies depending on sizeof(int). + Return 0 if successful, -1 if an error occurs. */ + +int +bsd_sum_file (file, print_name) + char *file; + int print_name; +{ + register FILE *fp; + register unsigned long checksum = 0; /* The checksum mod 2^16. */ + register long total_bytes = 0; /* The number of bytes. */ + register int ch; /* Each character read. */ + + if (!strcmp (file, "-")) + { + fp = stdin; + have_read_stdin = 1; + } + else + { + fp = fopen (file, "r"); + if (fp == NULL) + { + error (0, errno, "%s", file); + return -1; + } + } + + /* This algorithm seems to depend on sign extension in `ch' in order to + give the right results. Ick. */ + while ((ch = getc (fp)) != EOF) + { + total_bytes++; + ROTATE_RIGHT (checksum); + checksum += ch; + checksum &= 0xffff; /* Keep it within bounds. */ + } + + if (ferror (fp)) + { + error (0, errno, "%s", file); + if (strcmp (file, "-")) + fclose (fp); + return -1; + } + + if (strcmp (file, "-") && fclose (fp) == EOF) + { + error (0, errno, "%s", file); + return -1; + } + + printf ("%05lu %5ld", checksum, (total_bytes + 1024 - 1) / 1024); + if (print_name > 1) + printf (" %s", file); + putchar ('\n'); + + return 0; +} + +/* Calculate and print the checksum and the size in 512-byte blocks + of file FILE, or of the standard input if FILE is "-". + If PRINT_NAME is >0, print FILE next to the checksum and size. + Return 0 if successful, -1 if an error occurs. */ + +int +sysv_sum_file (file, print_name) + char *file; + int print_name; +{ + int fd; + unsigned char buf[8192]; + register int bytes_read; + register unsigned long checksum = 0; + long total_bytes = 0; + + if (!strcmp (file, "-")) + { + fd = 0; + have_read_stdin = 1; + } + else + { + fd = open (file, O_RDONLY); + if (fd == -1) + { + error (0, errno, "%s", file); + return -1; + } + } + + while ((bytes_read = read (fd, buf, sizeof buf)) > 0) + { + register int i; + + for (i = 0; i < bytes_read; i++) + checksum += buf[i]; + total_bytes += bytes_read; + } + + if (bytes_read < 0) + { + error (0, errno, "%s", file); + if (strcmp (file, "-")) + close (fd); + return -1; + } + + if (strcmp (file, "-") && close (fd) == -1) + { + error (0, errno, "%s", file); + return -1; + } + + printf ("%lu %ld", checksum % 0xffff, (total_bytes + 512 - 1) / 512); + if (print_name) + printf (" %s", file); + putchar ('\n'); + + return 0; +} diff --git a/src/tac.c b/src/tac.c new file mode 100644 index 000000000..78e18467b --- /dev/null +++ b/src/tac.c @@ -0,0 +1,628 @@ +/* tac - concatenate and print files in reverse + Copyright (C) 1988, 1989, 1990, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Jay Lepreau (lepreau@cs.utah.edu). + GNU enhancements by David MacKenzie (djm@ai.mit.edu). */ + +/* Copy each FILE, or the standard input if none are given or when a + FILE name of "-" is encountered, to the standard output with the + order of the records reversed. The records are separated by + instances of a string, or a newline if none is given. By default, the + separator string is attached to the end of the record that it + follows in the file. + + Options: + -b, --before The separator is attached to the beginning + of the record that it precedes in the file. + -r, --regex The separator is a regular expression. + -s, --separator=separator Use SEPARATOR as the record separator. + + To reverse a file byte by byte, use (in bash, ksh, or sh): +tac -r -s '.\| +' file */ + +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include <signal.h> +#include <regex.h> +#include "system.h" + +#ifndef STDC_HEADERS +char *malloc (); +char *realloc (); +#endif + +/* The number of bytes per atomic read. */ +#define INITIAL_READSIZE 8192 + +/* The number of bytes per atomic write. */ +#define WRITESIZE 8192 + +char *mktemp (); + +RETSIGTYPE cleanup (); +int tac (); +int tac_file (); +int tac_stdin (); +char *xmalloc (); +char *xrealloc (); +void output (); +void error (); +void save_stdin (); +void xwrite (); + +/* The name this program was run with. */ +char *program_name; + +/* The string that separates the records of the file. */ +char *separator; + +/* If nonzero, print `separator' along with the record preceding it + in the file; otherwise with the record following it. */ +int separator_ends_record; + +/* 0 if `separator' is to be matched as a regular expression; + otherwise, the length of `separator', used as a sentinel to + stop the search. */ +int sentinel_length; + +/* The length of a match with `separator'. If `sentinel_length' is 0, + `match_length' is computed every time a match succeeds; + otherwise, it is simply the length of `separator'. */ +int match_length; + +/* The input buffer. */ +char *buffer; + +/* The number of bytes to read at once into `buffer'. */ +unsigned read_size; + +/* The size of `buffer'. This is read_size * 2 + sentinel_length + 2. + The extra 2 bytes allow `past_end' to have a value beyond the + end of `buffer' and `match_start' to run off the front of `buffer'. */ +unsigned buffer_size; + +/* The compiled regular expression representing `separator'. */ +static struct re_pattern_buffer compiled_separator; + +struct option longopts[] = +{ + {"before", 0, &separator_ends_record, 0}, + {"regex", 0, &sentinel_length, 0}, + {"separator", 1, NULL, 's'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + char *error_message; /* Return value from re_compile_pattern. */ + int optc, errors; + int have_read_stdin = 0; + + program_name = argv[0]; + errors = 0; + separator = "\n"; + sentinel_length = 1; + separator_ends_record = 1; + + while ((optc = getopt_long (argc, argv, "brs:", longopts, (int *) 0)) + != EOF) + { + switch (optc) + { + case 0: + break; + case 'b': + separator_ends_record = 0; + break; + case 'r': + sentinel_length = 0; + break; + case 's': + separator = optarg; + if (*separator == 0) + error (1, 0, "separator cannot be empty"); + break; + default: + fprintf (stderr, "\ +Usage: %s [-br] [-s separator] [--before] [--regex] [--separator=separator]\n\ + [file...]\n", + program_name); + exit (1); + } + } + + if (sentinel_length == 0) + { + compiled_separator.allocated = 100; + compiled_separator.buffer = (unsigned char *) + xmalloc (compiled_separator.allocated); + compiled_separator.fastmap = xmalloc (256); + compiled_separator.translate = 0; + error_message = re_compile_pattern (separator, strlen (separator), + &compiled_separator); + if (error_message) + error (1, 0, "%s", error_message); + } + else + match_length = sentinel_length = strlen (separator); + + read_size = INITIAL_READSIZE; + /* A precaution that will probably never be needed. */ + while (sentinel_length * 2 >= read_size) + read_size *= 2; + buffer_size = read_size * 2 + sentinel_length + 2; + buffer = xmalloc (buffer_size); + if (sentinel_length) + { + strcpy (buffer, separator); + buffer += sentinel_length; + } + else + ++buffer; + + if (optind == argc) + { + have_read_stdin = 1; + errors = tac_stdin (); + } + else + for (; optind < argc; ++optind) + { + if (strcmp (argv[optind], "-") == 0) + { + have_read_stdin = 1; + errors |= tac_stdin (); + } + else + errors |= tac_file (argv[optind]); + } + + /* Flush the output buffer. */ + output ((char *) NULL, (char *) NULL); + + if (have_read_stdin && close (0) < 0) + error (1, errno, "-"); + if (close (1) < 0) + error (1, errno, "write error"); + exit (errors); +} + +/* The name of a temporary file containing a copy of pipe input. */ +char *tempfile; + +/* Print the standard input in reverse, saving it to temporary + file `tempfile' first if it is a pipe. + Return 0 if ok, 1 if an error occurs. */ + +int +tac_stdin () +{ + /* Previous values of signal handlers. */ + RETSIGTYPE (*sigint) (), (*sighup) (), (*sigpipe) (), (*sigterm) (); + int errors; + struct stat stats; +#ifdef _POSIX_VERSION + struct sigaction oldact, newact; +#endif /* _POSIX_VERSION */ + + /* No tempfile is needed for "tac < file". + Use fstat instead of checking for errno == ESPIPE because + lseek doesn't work on some special files but doesn't return an + error, either. */ + if (fstat (0, &stats)) + { + error (0, errno, "standard input"); + return 1; + } + if (S_ISREG (stats.st_mode)) + return tac (0, "standard input"); + +#ifdef _POSIX_VERSION + newact.sa_handler = cleanup; + sigemptyset (&newact.sa_mask); + newact.sa_flags = 0; + + sigaction (SIGINT, NULL, &oldact); + sigint = oldact.sa_handler; + if (sigint != SIG_IGN) + sigaction (SIGINT, &newact, NULL); + + sigaction (SIGHUP, NULL, &oldact); + sighup = oldact.sa_handler; + if (sighup != SIG_IGN) + sigaction (SIGHUP, &newact, NULL); + + sigaction (SIGPIPE, NULL, &oldact); + sigpipe = oldact.sa_handler; + if (sigpipe != SIG_IGN) + sigaction (SIGPIPE, &newact, NULL); + + sigaction (SIGTERM, NULL, &oldact); + sigterm = oldact.sa_handler; + if (sigterm != SIG_IGN) + sigaction (SIGTERM, &newact, NULL); +#else /* !_POSIX_VERSION */ + sigint = signal (SIGINT, SIG_IGN); + if (sigint != SIG_IGN) + signal (SIGINT, cleanup); + + sighup = signal (SIGHUP, SIG_IGN); + if (sighup != SIG_IGN) + signal (SIGHUP, cleanup); + + sigpipe = signal (SIGPIPE, SIG_IGN); + if (sigpipe != SIG_IGN) + signal (SIGPIPE, cleanup); + + sigterm = signal (SIGTERM, SIG_IGN); + if (sigterm != SIG_IGN) + signal (SIGTERM, cleanup); +#endif /* _POSIX_VERSION */ + + save_stdin (); + + errors = tac_file (tempfile); + + unlink (tempfile); + +#ifdef _POSIX_VERSION + newact.sa_handler = sigint; + sigaction (SIGINT, &newact, NULL); + newact.sa_handler = sighup; + sigaction (SIGHUP, &newact, NULL); + newact.sa_handler = sigterm; + sigaction (SIGTERM, &newact, NULL); + newact.sa_handler = sigpipe; + sigaction (SIGPIPE, &newact, NULL); +#else /* !_POSIX_VERSION */ + signal (SIGINT, sigint); + signal (SIGHUP, sighup); + signal (SIGTERM, sigterm); + signal (SIGPIPE, sigpipe); +#endif /* _POSIX_VERSION */ + + return errors; +} + +/* Make a copy of the standard input in `tempfile'. */ + +void +save_stdin () +{ + static char *template = NULL; + static char *tempdir; + int fd; + int bytes_read; + + if (template == NULL) + { + tempdir = getenv ("TMPDIR"); + if (tempdir == NULL) + tempdir = "/tmp"; + template = xmalloc (strlen (tempdir) + 11); + } + sprintf (template, "%s/tacXXXXXX", tempdir); + tempfile = mktemp (template); + + fd = creat (tempfile, 0600); + if (fd == -1) + { + error (0, errno, "%s", tempfile); + cleanup (); + } + while ((bytes_read = read (0, buffer, read_size)) > 0) + if (write (fd, buffer, bytes_read) != bytes_read) + { + error (0, errno, "%s", tempfile); + cleanup (); + } + if (close (fd) < 0) + { + error (0, errno, "%s", tempfile); + cleanup (); + } + if (bytes_read == -1) + { + error (0, errno, "read error"); + cleanup (); + } +} + +/* Print FILE in reverse. + Return 0 if ok, 1 if an error occurs. */ + +int +tac_file (file) + char *file; +{ + int fd, errors; + + fd = open (file, 0); + if (fd == -1) + { + error (0, errno, "%s", file); + return 1; + } + errors = tac (fd, file); + if (close (fd) < 0) + { + error (0, errno, "%s", file); + return 1; + } + return errors; +} + +/* Print in reverse the file open on descriptor FD for reading FILE. + Return 0 if ok, 1 if an error occurs. */ + +int +tac (fd, file) + int fd; + char *file; +{ + /* Pointer to the location in `buffer' where the search for + the next separator will begin. */ + char *match_start; + /* Pointer to one past the rightmost character in `buffer' that + has not been printed yet. */ + char *past_end; + unsigned saved_record_size; /* Length of the record growing in `buffer'. */ + off_t file_pos; /* Offset in the file of the next read. */ + /* Nonzero if `output' has not been called yet for any file. + Only used when the separator is attached to the preceding record. */ + int first_time = 1; + char first_char = *separator; /* Speed optimization, non-regexp. */ + char *separator1 = separator + 1; /* Speed optimization, non-regexp. */ + int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */ + struct re_registers regs; + + /* Find the size of the input file. */ + file_pos = lseek (fd, (off_t) 0, SEEK_END); + if (file_pos < 1) + return 0; /* It's an empty file. */ + + /* Arrange for the first read to lop off enough to leave the rest of the + file a multiple of `read_size'. Since `read_size' can change, this may + not always hold during the program run, but since it usually will, leave + it here for i/o efficiency (page/sector boundaries and all that). + Note: the efficiency gain has not been verified. */ + saved_record_size = file_pos % read_size; + if (saved_record_size == 0) + saved_record_size = read_size; + file_pos -= saved_record_size; + /* `file_pos' now points to the start of the last (probably partial) block + in the input file. */ + + lseek (fd, file_pos, SEEK_SET); + if (read (fd, buffer, saved_record_size) != saved_record_size) + { + error (0, 1, "%s", file); + return 1; + } + + match_start = past_end = buffer + saved_record_size; + /* For non-regexp search, move past impossible positions for a match. */ + if (sentinel_length) + match_start -= match_length1; + + for (;;) + { + /* Search backward from `match_start' - 1 to `buffer' for a match + with `separator'; for speed, use strncmp if `separator' contains no + metacharacters. + If the match succeeds, set `match_start' to point to the start of + the match and `match_length' to the length of the match. + Otherwise, make `match_start' < `buffer'. */ + if (sentinel_length == 0) + { + int i = match_start - buffer; + int ret; + + ret = re_search (&compiled_separator, buffer, i, i - 1, -i, ®s); + if (ret == -1) + match_start = buffer - 1; + else if (ret == -2) + { + error (0, 0, "error in regular expression search"); + cleanup (); + } + else + { + match_start = buffer + regs.start[0]; + match_length = regs.end[0] - regs.start[0]; + } + } + else + { + /* `match_length' is constant for non-regexp boundaries. */ + while (*--match_start != first_char + || (match_length1 && strncmp (match_start + 1, separator1, + match_length1))) + /* Do nothing. */ ; + } + + /* Check whether we backed off the front of `buffer' without finding + a match for `separator'. */ + if (match_start < buffer) + { + if (file_pos == 0) + { + /* Hit the beginning of the file; print the remaining record. */ + output (buffer, past_end); + return 0; + } + + saved_record_size = past_end - buffer; + if (saved_record_size > read_size) + { + /* `buffer_size' is about twice `read_size', so since + we want to read in another `read_size' bytes before + the data already in `buffer', we need to increase + `buffer_size'. */ + char *newbuffer; + int offset = sentinel_length ? sentinel_length : 1; + + read_size *= 2; + buffer_size = read_size * 2 + sentinel_length + 2; + newbuffer = xrealloc (buffer - offset, buffer_size) + offset; + /* Adjust the pointers for the new buffer location. */ + match_start += newbuffer - buffer; + past_end += newbuffer - buffer; + buffer = newbuffer; + } + + /* Back up to the start of the next bufferfull of the file. */ + if (file_pos >= read_size) + file_pos -= read_size; + else + { + read_size = file_pos; + file_pos = 0; + } + lseek (fd, file_pos, SEEK_SET); + + /* Shift the pending record data right to make room for the new. */ + bcopy (buffer, buffer + read_size, saved_record_size); + past_end = buffer + read_size + saved_record_size; + /* For non-regexp searches, avoid unneccessary scanning. */ + if (sentinel_length) + match_start = buffer + read_size; + else + match_start = past_end; + + if (read (fd, buffer, read_size) != read_size) + { + error (0, errno, "%s", file); + return 1; + } + } + else + { + /* Found a match of `separator'. */ + if (separator_ends_record) + { + char *match_end = match_start + match_length; + + /* If this match of `separator' isn't at the end of the + file, print the record. */ + if (first_time == 0 || match_end != past_end) + output (match_end, past_end); + past_end = match_end; + first_time = 0; + } + else + { + output (match_start, past_end); + past_end = match_start; + } + match_start -= match_length - 1; + } + } +} + +/* Print the characters from START to PAST_END - 1. + If START is NULL, just flush the buffer. */ + +void +output (start, past_end) + char *start; + char *past_end; +{ + static char buffer[WRITESIZE]; + static int bytes_in_buffer = 0; + int bytes_to_add = past_end - start; + int bytes_available = WRITESIZE - bytes_in_buffer; + + if (start == 0) + { + xwrite (1, buffer, bytes_in_buffer); + bytes_in_buffer = 0; + return; + } + + /* Write out as many full buffers as possible. */ + while (bytes_to_add >= bytes_available) + { + bcopy (start, buffer + bytes_in_buffer, bytes_available); + bytes_to_add -= bytes_available; + start += bytes_available; + xwrite (1, buffer, WRITESIZE); + bytes_in_buffer = 0; + bytes_available = WRITESIZE; + } + + bcopy (start, buffer + bytes_in_buffer, bytes_to_add); + bytes_in_buffer += bytes_to_add; +} + +RETSIGTYPE +cleanup () +{ + unlink (tempfile); + exit (1); +} + +void +xwrite (desc, buffer, size) + int desc; + char *buffer; + int size; +{ + if (write (desc, buffer, size) != size) + { + error (0, errno, "write error"); + cleanup (); + } +} + +/* Allocate N bytes of memory dynamically, with error checking. */ + +char * +xmalloc (n) + unsigned n; +{ + char *p; + + p = malloc (n); + if (p == 0) + { + error (0, 0, "virtual memory exhausted"); + cleanup (); + } + return p; +} + +/* Change the size of memory area P to N bytes, with error checking. */ + +char * +xrealloc (p, n) + char *p; + unsigned n; +{ + p = realloc (p, n); + if (p == 0) + { + error (0, 0, "virtual memory exhausted"); + cleanup (); + } + return p; +} diff --git a/src/tail.c b/src/tail.c new file mode 100644 index 000000000..050c1936f --- /dev/null +++ b/src/tail.c @@ -0,0 +1,858 @@ +/* tail -- output last part of file(s) + Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Can display any amount of data, unlike the Unix version, which uses + a fixed size buffer and therefore can only deliver a limited number + of lines. + + Options: + -b Tail by N 512-byte blocks. + -c, --bytes=N[bkm] Tail by N bytes + [or 512-byte blocks, kilobytes, or megabytes]. + -f, --follow Loop forever trying to read more characters at the + end of the file, on the assumption that the file + is growing. Ignored if reading from a pipe. + Cannot be used if more than one file is given. + -k Tail by N kilobytes. + -N, -l, -n, --lines=N Tail by N lines. + -m Tail by N megabytes. + -q, --quiet, --silent Never print filename headers. + -v, --verbose Always print filename headers. + + If a number (N) starts with a `+', begin printing with the Nth item + from the start of each file, instead of from the end. + + Reads from standard input if no files are given or when a filename of + ``-'' is encountered. + By default, filename headers are printed only more than one file + is given. + By default, prints the last 10 lines (tail -n 10). + + Original version by Paul Rubin <phr@ocf.berkeley.edu>. + Extensions by David MacKenzie <djm@ai.mit.edu>. */ + +#include <stdio.h> +#include <getopt.h> +#include <ctype.h> +#include <sys/types.h> +#include "system.h" + +#ifdef isascii +#define ISDIGIT(c) (isascii ((c)) && isdigit ((c))) +#else +#define ISDIGIT(c) (isdigit ((c))) +#endif + +/* Number of items to tail. */ +#define DEFAULT_NUMBER 10 + +/* Size of atomic reads. */ +#define BUFSIZE (512 * 8) + +/* Number of bytes per item we are printing. + If 0, tail in lines. */ +int unit_size; + +/* If nonzero, read from end of file until killed. */ +int forever; + +/* If nonzero, count from start of file instead of end. */ +int from_start; + +/* If nonzero, print filename headers. */ +int print_headers; + +/* When to print the filename banners. */ +enum header_mode +{ + multiple_files, always, never +}; + +char *xmalloc (); +int file_lines (); +int pipe_bytes (); +int pipe_lines (); +int start_bytes (); +int start_lines (); +int tail (); +int tail_bytes (); +int tail_file (); +int tail_lines (); +long atou(); +void dump_remainder (); +void error (); +void parse_unit (); +void usage (); +void write_header (); +void xwrite (); + +/* The name this program was run with. */ +char *program_name; + +/* Nonzero if we have ever read standard input. */ +int have_read_stdin; + +struct option long_options[] = +{ + {"bytes", 1, NULL, 'c'}, + {"follow", 0, NULL, 'f'}, + {"lines", 1, NULL, 'n'}, + {"quiet", 0, NULL, 'q'}, + {"silent", 0, NULL, 'q'}, + {"verbose", 0, NULL, 'v'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + enum header_mode header_mode = multiple_files; + int exit_status = 0; + /* If from_start, the number of items to skip before printing; otherwise, + the number of items at the end of the file to print. Initially, -1 + means the value has not been set. */ + long number = -1; + int c; /* Option character. */ + + program_name = argv[0]; + have_read_stdin = 0; + unit_size = 0; + forever = from_start = print_headers = 0; + + if (argc > 1 + && ((argv[1][0] == '-' && ISDIGIT (argv[1][1])) + || (argv[1][0] == '+' && (ISDIGIT (argv[1][1]) || argv[1][1] == 0)))) + { + /* Old option syntax: a dash or plus, one or more digits (zero digits + are acceptable with a plus), and one or more option letters. */ + if (argv[1][0] == '+') + from_start = 1; + if (argv[1][1] != 0) + { + for (number = 0, ++argv[1]; ISDIGIT (*argv[1]); ++argv[1]) + number = number * 10 + *argv[1] - '0'; + /* Parse any appended option letters. */ + while (*argv[1]) + { + switch (*argv[1]) + { + case 'b': + unit_size = 512; + break; + + case 'c': + unit_size = 1; + break; + + case 'f': + forever = 1; + break; + + case 'k': + unit_size = 1024; + break; + + case 'l': + unit_size = 0; + break; + + case 'm': + unit_size = 1048576; + break; + + case 'q': + header_mode = never; + break; + + case 'v': + header_mode = always; + break; + + default: + error (0, 0, "unrecognized option `-%c'", *argv[1]); + usage (); + } + ++argv[1]; + } + } + /* Make the options we just parsed invisible to getopt. */ + argv[1] = argv[0]; + argv++; + argc--; + } + + while ((c = getopt_long (argc, argv, "c:n:fqv", long_options, (int *) 0)) + != EOF) + { + switch (c) + { + case 'c': + unit_size = 1; + parse_unit (optarg); + goto getnum; + case 'n': + unit_size = 0; + getnum: + if (*optarg == '+') + { + from_start = 1; + ++optarg; + } + else if (*optarg == '-') + ++optarg; + number = atou (optarg); + if (number == -1) + error (1, 0, "invalid number `%s'", optarg); + break; + + case 'f': + forever = 1; + break; + + case 'q': + header_mode = never; + break; + + case 'v': + header_mode = always; + break; + + default: + usage (); + } + } + + if (number == -1) + number = DEFAULT_NUMBER; + + /* To start printing with item `number' from the start of the file, skip + `number' - 1 items. `tail +0' is actually meaningless, but for Unix + compatibility it's treated the same as `tail +1'. */ + if (from_start) + { + if (number) + --number; + } + + if (unit_size > 1) + number *= unit_size; + + if (optind < argc - 1 && forever) + error (1, 0, "cannot follow the ends of multiple files"); + + if (header_mode == always + || (header_mode == multiple_files && optind < argc - 1)) + print_headers = 1; + + if (optind == argc) + exit_status |= tail_file ("-", number); + + for (; optind < argc; ++optind) + exit_status |= tail_file (argv[optind], number); + + if (have_read_stdin && close (0) < 0) + error (1, errno, "-"); + if (close (1) < 0) + error (1, errno, "write error"); + exit (exit_status); +} + +/* Display the last NUMBER units of file FILENAME. + "-" for FILENAME means the standard input. + Return 0 if successful, 1 if an error occurred. */ + +int +tail_file (filename, number) + char *filename; + long number; +{ + int fd; + + if (!strcmp (filename, "-")) + { + have_read_stdin = 1; + filename = "standard input"; + if (print_headers) + write_header (filename); + return tail (filename, 0, number); + } + else + { + fd = open (filename, O_RDONLY); + if (fd >= 0) + { + int errors; + + if (print_headers) + write_header (filename); + errors = tail (filename, fd, number); + if (close (fd) == 0) + return errors; + } + error (0, errno, "%s", filename); + return 1; + } +} + +void +write_header (filename) + char *filename; +{ + static int first_file = 1; + + if (first_file) + { + xwrite (1, "==> ", 4); + first_file = 0; + } + else + xwrite (1, "\n==> ", 5); + xwrite (1, filename, strlen (filename)); + xwrite (1, " <==\n", 5); +} + +/* Display the last NUMBER units of file FILENAME, open for reading + in FD. + Return 0 if successful, 1 if an error occurred. */ + +int +tail (filename, fd, number) + char *filename; + int fd; + long number; +{ + if (unit_size) + return tail_bytes (filename, fd, number); + else + return tail_lines (filename, fd, number); +} + +/* Display the last part of file FILENAME, open for reading in FD, + using NUMBER characters. + Return 0 if successful, 1 if an error occurred. */ + +int +tail_bytes (filename, fd, number) + char *filename; + int fd; + long number; +{ + struct stat stats; + + /* Use fstat instead of checking for errno == ESPIPE because + lseek doesn't work on some special files but doesn't return an + error, either. */ + if (fstat (fd, &stats)) + { + error (0, errno, "%s", filename); + return 1; + } + + if (from_start) + { + if (S_ISREG (stats.st_mode)) + lseek (fd, number, SEEK_SET); + else if (start_bytes (filename, fd, number)) + return 1; + dump_remainder (filename, fd); + } + else + { + if (S_ISREG (stats.st_mode)) + { + if (lseek (fd, 0L, SEEK_END) <= number) + /* The file is shorter than we want, or just the right size, so + print the whole file. */ + lseek (fd, 0L, SEEK_SET); + else + /* The file is longer than we want, so go back. */ + lseek (fd, -number, SEEK_END); + dump_remainder (filename, fd); + } + else + return pipe_bytes (filename, fd, number); + } + return 0; +} + +/* Display the last part of file FILENAME, open for reading on FD, + using NUMBER lines. + Return 0 if successful, 1 if an error occurred. */ + +int +tail_lines (filename, fd, number) + char *filename; + int fd; + long number; +{ + struct stat stats; + long length; + + if (fstat (fd, &stats)) + { + error (0, errno, "%s", filename); + return 1; + } + + if (from_start) + { + if (start_lines (filename, fd, number)) + return 1; + dump_remainder (filename, fd); + } + else + { + if (S_ISREG (stats.st_mode)) + { + length = lseek (fd, 0L, SEEK_END); + if (length != 0 && file_lines (filename, fd, number, length)) + return 1; + dump_remainder (filename, fd); + } + else + return pipe_lines (filename, fd, number); + } + return 0; +} + +/* Print the last NUMBER lines from the end of file FD. + Go backward through the file, reading `BUFSIZE' bytes at a time (except + probably the first), until we hit the start of the file or have + read NUMBER newlines. + POS starts out as the length of the file (the offset of the last + byte of the file + 1). + Return 0 if successful, 1 if an error occurred. */ + +int +file_lines (filename, fd, number, pos) + char *filename; + int fd; + long number; + long pos; +{ + char buffer[BUFSIZE]; + int bytes_read; + int i; /* Index into `buffer' for scanning. */ + + if (number == 0) + return 0; + + /* Set `bytes_read' to the size of the last, probably partial, buffer; + 0 < `bytes_read' <= `BUFSIZE'. */ + bytes_read = pos % BUFSIZE; + if (bytes_read == 0) + bytes_read = BUFSIZE; + /* Make `pos' a multiple of `BUFSIZE' (0 if the file is short), so that all + reads will be on block boundaries, which might increase efficiency. */ + pos -= bytes_read; + lseek (fd, pos, SEEK_SET); + bytes_read = read (fd, buffer, bytes_read); + if (bytes_read == -1) + { + error (0, errno, "%s", filename); + return 1; + } + + /* Count the incomplete line on files that don't end with a newline. */ + if (bytes_read && buffer[bytes_read - 1] != '\n') + --number; + + do + { + /* Scan backward, counting the newlines in this bufferfull. */ + for (i = bytes_read - 1; i >= 0; i--) + { + /* Have we counted the requested number of newlines yet? */ + if (buffer[i] == '\n' && number-- == 0) + { + /* If this newline wasn't the last character in the buffer, + print the text after it. */ + if (i != bytes_read - 1) + xwrite (1, &buffer[i + 1], bytes_read - (i + 1)); + return 0; + } + } + /* Not enough newlines in that bufferfull. */ + if (pos == 0) + { + /* Not enough lines in the file; print the entire file. */ + lseek (fd, 0L, SEEK_SET); + return 0; + } + pos -= BUFSIZE; + lseek (fd, pos, SEEK_SET); + } + while ((bytes_read = read (fd, buffer, BUFSIZE)) > 0); + if (bytes_read == -1) + { + error (0, errno, "%s", filename); + return 1; + } + return 0; +} + +/* Print the last NUMBER lines from the end of the standard input, + open for reading as pipe FD. + Buffer the text as a linked list of LBUFFERs, adding them as needed. + Return 0 if successful, 1 if an error occured. */ + +int +pipe_lines (filename, fd, number) + char *filename; + int fd; + long number; +{ + struct linebuffer + { + int nbytes, nlines; + char buffer[BUFSIZE]; + struct linebuffer *next; + }; + typedef struct linebuffer LBUFFER; + LBUFFER *first, *last, *tmp; + int i; /* Index into buffers. */ + int total_lines = 0; /* Total number of newlines in all buffers. */ + int errors = 0; + + first = last = (LBUFFER *) xmalloc (sizeof (LBUFFER)); + first->nbytes = first->nlines = 0; + first->next = NULL; + tmp = (LBUFFER *) xmalloc (sizeof (LBUFFER)); + + /* Input is always read into a fresh buffer. */ + while ((tmp->nbytes = read (fd, tmp->buffer, BUFSIZE)) > 0) + { + tmp->nlines = 0; + tmp->next = NULL; + + /* Count the number of newlines just read. */ + for (i = 0; i < tmp->nbytes; i++) + if (tmp->buffer[i] == '\n') + ++tmp->nlines; + total_lines += tmp->nlines; + + /* If there is enough room in the last buffer read, just append the new + one to it. This is because when reading from a pipe, `nbytes' can + often be very small. */ + if (tmp->nbytes + last->nbytes < BUFSIZE) + { + bcopy (tmp->buffer, &last->buffer[last->nbytes], tmp->nbytes); + last->nbytes += tmp->nbytes; + last->nlines += tmp->nlines; + } + else + { + /* If there's not enough room, link the new buffer onto the end of + the list, then either free up the oldest buffer for the next + read if that would leave enough lines, or else malloc a new one. + Some compaction mechanism is possible but probably not + worthwhile. */ + last = last->next = tmp; + if (total_lines - first->nlines > number) + { + tmp = first; + total_lines -= first->nlines; + first = first->next; + } + else + tmp = (LBUFFER *) xmalloc (sizeof (LBUFFER)); + } + } + if (tmp->nbytes == -1) + { + error (0, errno, "%s", filename); + errors = 1; + free ((char *) tmp); + goto free_lbuffers; + } + + free ((char *) tmp); + + /* This prevents a core dump when the pipe contains no newlines. */ + if (number == 0) + goto free_lbuffers; + + /* Count the incomplete line on files that don't end with a newline. */ + if (last->buffer[last->nbytes - 1] != '\n') + { + ++last->nlines; + ++total_lines; + } + + /* Run through the list, printing lines. First, skip over unneeded + buffers. */ + for (tmp = first; total_lines - tmp->nlines > number; tmp = tmp->next) + total_lines -= tmp->nlines; + + /* Find the correct beginning, then print the rest of the file. */ + if (total_lines > number) + { + char *cp; + + /* Skip `total_lines' - `number' newlines. We made sure that + `total_lines' - `number' <= `tmp->nlines'. */ + cp = tmp->buffer; + for (i = total_lines - number; i; --i) + while (*cp++ != '\n') + /* Do nothing. */ ; + i = cp - tmp->buffer; + } + else + i = 0; + xwrite (1, &tmp->buffer[i], tmp->nbytes - i); + + for (tmp = tmp->next; tmp; tmp = tmp->next) + xwrite (1, tmp->buffer, tmp->nbytes); + +free_lbuffers: + while (first) + { + tmp = first->next; + free ((char *) first); + first = tmp; + } + return errors; +} + +/* Print the last NUMBER characters from the end of pipe FD. + This is a stripped down version of pipe_lines. + Return 0 if successful, 1 if an error occurred. */ + +int +pipe_bytes (filename, fd, number) + char *filename; + int fd; + long number; +{ + struct charbuffer + { + int nbytes; + char buffer[BUFSIZE]; + struct charbuffer *next; + }; + typedef struct charbuffer CBUFFER; + CBUFFER *first, *last, *tmp; + int i; /* Index into buffers. */ + int total_bytes = 0; /* Total characters in all buffers. */ + int errors = 0; + + first = last = (CBUFFER *) xmalloc (sizeof (CBUFFER)); + first->nbytes = 0; + first->next = NULL; + tmp = (CBUFFER *) xmalloc (sizeof (CBUFFER)); + + /* Input is always read into a fresh buffer. */ + while ((tmp->nbytes = read (fd, tmp->buffer, BUFSIZE)) > 0) + { + tmp->next = NULL; + + total_bytes += tmp->nbytes; + /* If there is enough room in the last buffer read, just append the new + one to it. This is because when reading from a pipe, `nbytes' can + often be very small. */ + if (tmp->nbytes + last->nbytes < BUFSIZE) + { + bcopy (tmp->buffer, &last->buffer[last->nbytes], tmp->nbytes); + last->nbytes += tmp->nbytes; + } + else + { + /* If there's not enough room, link the new buffer onto the end of + the list, then either free up the oldest buffer for the next + read if that would leave enough characters, or else malloc a new + one. Some compaction mechanism is possible but probably not + worthwhile. */ + last = last->next = tmp; + if (total_bytes - first->nbytes > number) + { + tmp = first; + total_bytes -= first->nbytes; + first = first->next; + } + else + { + tmp = (CBUFFER *) xmalloc (sizeof (CBUFFER)); + } + } + } + if (tmp->nbytes == -1) + { + error (0, errno, "%s", filename); + errors = 1; + free ((char *) tmp); + goto free_cbuffers; + } + + free ((char *) tmp); + + /* Run through the list, printing characters. First, skip over unneeded + buffers. */ + for (tmp = first; total_bytes - tmp->nbytes > number; tmp = tmp->next) + total_bytes -= tmp->nbytes; + + /* Find the correct beginning, then print the rest of the file. + We made sure that `total_bytes' - `number' <= `tmp->nbytes'. */ + if (total_bytes > number) + i = total_bytes - number; + else + i = 0; + xwrite (1, &tmp->buffer[i], tmp->nbytes - i); + + for (tmp = tmp->next; tmp; tmp = tmp->next) + xwrite (1, tmp->buffer, tmp->nbytes); + +free_cbuffers: + while (first) + { + tmp = first->next; + free ((char *) first); + first = tmp; + } + return errors; +} + +/* Skip NUMBER characters from the start of pipe FD, and print + any extra characters that were read beyond that. + Return 1 on error, 0 if ok. */ + +int +start_bytes (filename, fd, number) + char *filename; + int fd; + long number; +{ + char buffer[BUFSIZE]; + int bytes_read = 0; + + while (number > 0 && (bytes_read = read (fd, buffer, BUFSIZE)) > 0) + number -= bytes_read; + if (bytes_read == -1) + { + error (0, errno, "%s", filename); + return 1; + } + else if (number < 0) + xwrite (1, &buffer[bytes_read + number], -number); + return 0; +} + +/* Skip NUMBER lines at the start of file or pipe FD, and print + any extra characters that were read beyond that. + Return 1 on error, 0 if ok. */ + +int +start_lines (filename, fd, number) + char *filename; + int fd; + long number; +{ + char buffer[BUFSIZE]; + int bytes_read = 0; + int bytes_to_skip = 0; + + while (number && (bytes_read = read (fd, buffer, BUFSIZE)) > 0) + { + bytes_to_skip = 0; + while (bytes_to_skip < bytes_read) + if (buffer[bytes_to_skip++] == '\n' && --number == 0) + break; + } + if (bytes_read == -1) + { + error (0, errno, "%s", filename); + return 1; + } + else if (bytes_to_skip < bytes_read) + xwrite (1, &buffer[bytes_to_skip], bytes_read - bytes_to_skip); + return 0; +} + +/* Display file FILENAME from the current position in FD + to the end. If `forever' is nonzero, keep reading from the + end of the file until killed. */ + +void +dump_remainder (filename, fd) + char *filename; + int fd; +{ + char buffer[BUFSIZE]; + int bytes_read; + +output: + while ((bytes_read = read (fd, buffer, BUFSIZE)) > 0) + xwrite (1, buffer, bytes_read); + if (bytes_read == -1) + error (1, errno, "%s", filename); + if (forever) + { + sleep (1); + goto output; + } +} + +void +parse_unit (str) + char *str; +{ + int arglen = strlen (str); + + if (arglen == 0) + return; + + switch (str[arglen - 1]) + { + case 'b': + unit_size = 512; + str[arglen - 1] = '\0'; + break; + case 'k': + unit_size = 1024; + str[arglen - 1] = '\0'; + break; + case 'm': + unit_size = 1048576; + str[arglen - 1] = '\0'; + break; + } +} + +/* Convert STR, a string of ASCII digits, into an unsigned integer. + Return -1 if STR does not represent a valid unsigned integer. */ + +long +atou (str) + char *str; +{ + unsigned long value; + + for (value = 0; ISDIGIT (*str); ++str) + value = value * 10 + *str - '0'; + return *str ? -1 : value; +} + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-c [+]N[bkm]] [-n [+]N] [-fqv] [--bytes=[+]N[bkm]] [--lines=[+]N]\n\ + [--follow] [--quiet] [--silent] [--verbose] [file...]\n\ + %s [{-,+}Nbcfklmqv] [file...]\n", program_name, program_name); + exit (1); +} diff --git a/src/tr.c b/src/tr.c new file mode 100644 index 000000000..bd12f383f --- /dev/null +++ b/src/tr.c @@ -0,0 +1,1813 @@ +/* tr -- a filter to translate characters + Copyright (C) 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Jim Meyering. */ + +#define _GNU_SOURCE +#include <ctype.h> +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif +#ifndef isgraph +#define isgraph(c) (isprint (c) && !isspace (c)) +#endif +#include <stdio.h> +#include <assert.h> +#include <errno.h> +#include <sys/types.h> +#include "getopt.h" +#include "system.h" + +#ifndef LONG_MAX +#define LONG_MAX 0x7FFFFFFF +#endif + +#ifndef UCHAR_MAX +#define UCHAR_MAX 0xFF +#endif + +#define N_CHARS (UCHAR_MAX + 1) + +/* A pointer to a function that returns an int. */ +typedef int (*PFI) (); + +/* Convert from character C to its index in the collating + sequence array. Just cast to an unsigned int to avoid + problems with sign-extension. */ +#define ORD(c) (unsigned int)(c) + +/* The inverse of ORD. */ +#define CHR(i) (unsigned char)(i) + +/* The value for Spec_list->state that indicates to + get_next that it should initialize the tail pointer. + Its value doesn't matter as long as it can't be + confused with a valid character code. */ +#define BEGIN_STATE (2 * N_CHARS) + +/* The value for Spec_list->state that indicates to + get_next that the element pointed to by Spec_list->tail is + being considered for the first time on this pass through the + list -- it indicates that get_next should make any necessary + initializations. */ +#define NEW_ELEMENT (BEGIN_STATE + 1) + +/* A value distinct from any character that may have been stored in a + buffer as the result of a block-read in the function squeeze_filter. */ +#define NOT_A_CHAR (unsigned int)(-1) + +/* The following (but not CC_NO_CLASS) are indices into the array of + valid character class strings. */ +enum Char_class +{ + CC_ALNUM = 0, CC_ALPHA = 1, CC_BLANK = 2, CC_CNTRL = 3, + CC_DIGIT = 4, CC_GRAPH = 5, CC_LOWER = 6, CC_PRINT = 7, + CC_PUNCT = 8, CC_SPACE = 9, CC_UPPER = 10, CC_XDIGIT = 11, + CC_NO_CLASS = 9999 +}; + +/* Character class to which a character (returned by get_next) belonged; + but it is set only if the construct from which the character was obtained + was one of the character classes [:upper:] or [:lower:]. The value + is used only when translating and then, only to make sure that upper + and lower class constructs have the same relative positions in string1 + and string2. */ +enum Upper_Lower_class +{ + UL_LOWER = 0, + UL_UPPER = 1, + UL_NONE = 2 +}; + +/* A shortcut to ensure that when constructing the translation array, + one of the values returned by paired calls to get_next (from s1 and s2) is + from [:upper:] and the other is from [:lower:], or neither is + from upper or lower. In fact, no other character classes are allowed + when translating, but that condition is tested elsewhere. This array + is indexed by values of type enum Upper_Lower_class. */ +static int class_ok[3][3] = +{ + {0, 1, 0}, + {1, 0, 0}, + {0, 0, 1} +}; + +/* The type of a List_element. See build_spec_list for more details. */ +enum Range_element_type +{ + RE_NO_TYPE = 0, + RE_NORMAL_CHAR, + RE_RANGE, + RE_CHAR_CLASS, + RE_EQUIV_CLASS, + RE_REPEATED_CHAR +}; + +/* One construct in one of tr's argument strings. + For example, consider the POSIX version of the + classic tr command: + tr -cs 'a-zA-Z_' '[\n*]' + String1 has 3 constructs, two of which are ranges (a-z and A-Z), + and a single normal character, `_'. String2 has one construct. */ +struct List_element +{ + enum Range_element_type type; + struct List_element *next; + union + { + int normal_char; + struct /* unnamed */ + { + unsigned int first_char; + unsigned int last_char; + } range; + enum Char_class char_class; + int equiv_code; + struct /* unnamed */ + { + unsigned int the_repeated_char; + long repeat_count; + } repeated_char; + } u; +}; + +/* Each of tr's argument strings is parsed into a form that is easier + to work with: a linked list of constructs (struct List_element). + Each Spec_list structure also encapsulates various attributes of + the corresponding argument string. The attributes are used mainly + to verify that the strings are legal in the context of any options + specified (like -s, -d, or -c). The main exception is the member + `tail', which is first used to construct the list. After construction, + it is used by get_next to save its state when traversing the list. + The member `state' serves a similar function. */ +struct Spec_list +{ + /* Points to the head of the list of range elements. + The first struct is a dummy; its members are never used. */ + struct List_element *head; + + /* When appending, points to the last element. When traversing via + get_next(), points to the element to process next. Setting + Spec_list.state to the value BEGIN_STATE before calling get_next + signals get_next to initialize tail to point to head->next. */ + struct List_element *tail; + + /* Used to save state between calls to get_next(). */ + unsigned int state; + + /* Length, in the sense that length('a-z[:digit:]123abc') + is 42 ( = 26 + 10 + 6). */ + int length; + + /* The number of [c*] and [c*0] constructs that appear in this spec. */ + int n_indefinite_repeats; + + /* Non-zero if this spec contains at least one equivalence + class construct e.g. [=c=]. */ + int has_equiv_class; + + /* Non-zero if this spec contains at least one of [:upper:] or + [:lower:] class constructs. */ + int has_upper_or_lower; + + /* Non-zero if this spec contains at least one of the character class + constructs (all but upper and lower) that aren't allowed in s2. */ + int has_restricted_char_class; +}; + +char *xmalloc (); +char *stpcpy (); +void error (); + +/* The name by which this program was run. */ +char *program_name; + +/* When non-zero, each sequence in the input of a repeated character + (call it c) is replaced (in the output) by a single occurrence of c + for every c in the squeeze set. */ +static int squeeze_repeats = 0; + +/* When non-zero, removes characters in the delete set from input. */ +static int delete = 0; + +/* Use the complement of set1 in place of set1. */ +static int complement = 0; + +/* When non-zero, this flag causes GNU tr to provide strict + compliance with POSIX draft 1003.2.11.2. The POSIX spec + says that when -d is used without -s, string2 (if present) + must be ignored. Silently ignoring arguments is a bad idea. + The default GNU behavior is to give a usage message and exit. + Additionally, when this flag is non-zero, tr prints warnings + on stderr if it is being used in a manner that is not portable. + Applicable warnings are given by default, but are suppressed + if the environment variable `POSIXLY_CORRECT' is set, since + being POSIX conformant means we can't issue such messages. + Warnings on the following topics are suppressed when this + variable is non-zero: + 1. Ambiguous octal escapes. */ +static int posix_pedantic; + +/* When tr is performing translation and string1 is longer than string2, + POSIX says that the result is undefined. That gives the implementor + of a POSIX conforming version of tr two reasonable choices for the + semantics of this case. + + * The BSD tr pads string2 to the length of string1 by + repeating the last character in string2. + + * System V tr ignores characters in string1 that have no + corresponding character in string2. That is, string1 is effectively + truncated to the length of string2. + + When non-zero, this flag causes GNU tr to imitate the behavior + of System V tr when translating with string1 longer than string2. + The default is to emulate BSD tr. This flag is ignored in modes where + no translation is performed. Emulating the System V tr + in this exceptional case causes the relatively common BSD idiom: + + tr -cs A-Za-z0-9 '\012' + + to break (it would convert only zero bytes, rather than all + non-alphanumerics, to newlines). + + WARNING: This switch does not provide general BSD or System V + compatibility. For example, it doesn't disable the interpretation + of the POSIX constructs [:alpha:], [=c=], and [c*10], so if by + some unfortunate coincidence you use such constructs in scripts + expecting to use some other version of tr, the scripts will break. */ +static int truncate_set1 = 0; + +/* An alias for (!delete && non_option_args == 2). + It is set in main and used there and in validate(). */ +static int translating; + +#ifndef BUFSIZ +#define BUFSIZ 8192 +#endif + +#define IO_BUF_SIZE BUFSIZ +static unsigned char io_buf[IO_BUF_SIZE]; + +char *char_class_name[] = +{ + "alnum", "alpha", "blank", "cntrl", "digit", "graph", + "lower", "print", "punct", "space", "upper", "xdigit" +}; +#define N_CHAR_CLASSES (sizeof(char_class_name) / sizeof(char_class_name[0])) + +typedef char SET_TYPE; + +/* Array of boolean values. A character `c' is a member of the + squeeze set if and only if in_squeeze_set[c] is true. The squeeze + set is defined by the last (possibly, the only) string argument + on the command line when the squeeze option is given. */ +static SET_TYPE in_squeeze_set[N_CHARS]; + +/* Array of boolean values. A character `c' is a member of the + delete set if and only if in_delete_set[c] is true. The delete + set is defined by the first (or only) string argument on the + command line when the delete option is given. */ +static SET_TYPE in_delete_set[N_CHARS]; + +/* Array of character values defining the translation (if any) that + tr is to perform. Translation is performed only when there are + two specification strings and the delete switch is not given. */ +static char xlate[N_CHARS]; + +static struct option long_options[] = +{ + {"complement", 0, NULL, 'c'}, + {"delete", 0, NULL, 'd'}, + {"squeeze-repeats", 0, NULL, 's'}, + {"truncate-set1", 0, NULL, 't'}, + {NULL, 0, NULL, 0} +}; + + +static void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-cdst] [--complement] [--delete] [--squeeze-repeats]\n\ + [--truncate-set1] string1 [string2]\n", + program_name); + exit (2); +} + +/* Return non-zero if the character C is a member of the + equivalence class containing the character EQUIV_CLASS. */ + +static int +is_equiv_class_member (equiv_class, c) + unsigned int equiv_class; + unsigned int c; +{ + return (equiv_class == c); +} + +/* Return non-zero if the character C is a member of the + character class CHAR_CLASS. */ + +static int +is_char_class_member (char_class, c) + enum Char_class char_class; + unsigned int c; +{ + switch (char_class) + { + case CC_ALNUM: + return isalnum (c); + break; + case CC_ALPHA: + return isalpha (c); + break; + case CC_BLANK: + return isblank (c); + break; + case CC_CNTRL: + return iscntrl (c); + break; + case CC_DIGIT: + return isdigit (c); + break; + case CC_GRAPH: + return isgraph (c); + break; + case CC_LOWER: + return islower (c); + break; + case CC_PRINT: + return isprint (c); + break; + case CC_PUNCT: + return ispunct (c); + break; + case CC_SPACE: + return isspace (c); + break; + case CC_UPPER: + return isupper (c); + break; + case CC_XDIGIT: + return isxdigit (c); + break; + case CC_NO_CLASS: + abort (); + return 0; + break; + } +} + +/* Perform the first pass over each range-spec argument S, + converting all \c and \ddd escapes to their one-byte representations. + The conversion is done in-place, so S must point to writable + storage. If an illegal quote sequence is found, an error message is + printed and the function returns non-zero. Otherwise the length of + the resulting string is returned through LEN and the function returns 0. + The resulting array of characters may contain zero-bytes; however, + on input, S is assumed to be null-terminated, and hence + cannot contain actual (non-escaped) zero bytes. */ + +static int +unquote (s, len) + unsigned char *s; + int *len; +{ + int i, j; + + j = 0; + for (i = 0; s[i]; i++) + { + switch (s[i]) + { + int c; + case '\\': + switch (s[i + 1]) + { + int oct_digit; + case '\\': + c = '\\'; + break; + case 'a': + c = '\007'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + c = s[i + 1] - '0'; + oct_digit = s[i + 2] - '0'; + if (0 <= oct_digit && oct_digit <= 7) + { + c = 8 * c + oct_digit; + ++i; + oct_digit = s[i + 2] - '0'; + if (0 <= oct_digit && oct_digit <= 7) + { + if (8 * c + oct_digit < N_CHARS) + { + c = 8 * c + oct_digit; + ++i; + } + else if (!posix_pedantic) + { + /* Any octal number larger than 0377 won't + fit in 8 bits. So we stop when adding the + next digit would put us over the limit and + give a warning about the ambiguity. POSIX + isn't clear on this, but one person has said + that in his interpretation, POSIX says tr + can't even give a warning. */ + error (0, 0, "warning: the ambiguous octal escape \ +\\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, `%c'", + s[i], s[i + 1], s[i + 2], + s[i], s[i + 1], s[i + 2]); + } + } + } + break; + case '\0': + error (0, 0, "invalid backslash escape at end of string"); + return 1; + break; + default: + error (0, 0, "invalid backslash escape `\\%c'", s[i + 1]); + return 1; + break; + } + ++i; + s[j++] = c; + break; + default: + s[j++] = s[i]; + break; + } + } + *len = j; + return 0; +} + +/* If CLASS_STR is a valid character class string, return its index + in the global char_class_name array. Otherwise, return CC_NO_CLASS. */ + +static enum Char_class +look_up_char_class (class_str) + unsigned char *class_str; +{ + unsigned int i; + + for (i = 0; i < N_CHAR_CLASSES; i++) + if (strcmp (class_str, char_class_name[i]) == 0) + return (enum Char_class) i; + return CC_NO_CLASS; +} + +/* Return a newly allocated string with a printable version of C. + This function is used solely for formatting error messages. */ + +static char * +make_printable_char (c) + unsigned int c; +{ + char *buf = xmalloc (5); + + assert (c < N_CHARS); + if (isprint (c)) + { + buf[0] = c; + buf[1] = '\0'; + } + else + { + sprintf (buf, "\\%03o", c); + } + return buf; +} + +/* Return a newly allocated copy of S which is suitable for printing. + LEN is the number of characters in S. Most non-printing + (isprint) characters are represented by a backslash followed by + 3 octal digits. However, the characters represented by \c escapes + where c is one of [abfnrtv] are represented by their 2-character \c + sequences. This function is used solely for printing error messages. */ + +static char * +make_printable_str (s, len) + unsigned char *s; + int len; +{ + /* Worst case is that every character expands to a backslash + followed by a 3-character octal escape sequence. */ + char *printable_buf = xmalloc (4 * len + 1); + char *p = printable_buf; + int i; + + for (i = 0; i < len; i++) + { + char buf[5]; + char *tmp = NULL; + + switch (s[i]) + { + case '\\': + tmp = "\\"; + break; + case '\007': + tmp = "\\a"; + break; + case '\b': + tmp = "\\b"; + break; + case '\f': + tmp = "\\f"; + break; + case '\n': + tmp = "\\n"; + break; + case '\r': + tmp = "\\r"; + break; + case '\t': + tmp = "\\t"; + break; + case '\v': + tmp = "\\v"; + break; + default: + if (isprint (s[i])) + { + buf[0] = s[i]; + buf[1] = '\0'; + } + else + sprintf (buf, "\\%03o", s[i]); + tmp = buf; + break; + } + p = stpcpy (p, tmp); + } + return printable_buf; +} + +/* Append a newly allocated structure representing a + character C to the specification list LIST. */ + +static void +append_normal_char (list, c) + struct Spec_list *list; + unsigned int c; +{ + struct List_element *new; + + new = (struct List_element *) xmalloc (sizeof (struct List_element)); + new->next = NULL; + new->type = RE_NORMAL_CHAR; + new->u.normal_char = c; + assert (list->tail); + list->tail->next = new; + list->tail = new; +} + +/* Append a newly allocated structure representing the range + of characters from FIRST to LAST to the specification list LIST. + Return non-zero if LAST precedes FIRST in the collating sequence, + zero otherwise. This means that '[c-c]' is acceptable. */ + +static int +append_range (list, first, last) + struct Spec_list *list; + unsigned int first; + unsigned int last; +{ + struct List_element *new; + + if (ORD (first) > ORD (last)) + { + char *tmp1 = make_printable_char (first); + char *tmp2 = make_printable_char (last); + + error (0, 0, + "range-endpoints of `%s-%s' are in reverse collating sequence order", + tmp1, tmp2); + free (tmp1); + free (tmp2); + return 1; + } + new = (struct List_element *) xmalloc (sizeof (struct List_element)); + new->next = NULL; + new->type = RE_RANGE; + new->u.range.first_char = first; + new->u.range.last_char = last; + assert (list->tail); + list->tail->next = new; + list->tail = new; + return 0; +} + +/* If CHAR_CLASS_STR is a valid character class string, append a + newly allocated structure representing that character class to the end + of the specification list LIST and return 0. If CHAR_CLASS_STR is not + a valid string, give an error message and return non-zero. */ + +static int +append_char_class (list, char_class_str, len) + struct Spec_list *list; + unsigned char *char_class_str; + int len; +{ + enum Char_class char_class; + struct List_element *new; + + char_class = look_up_char_class (char_class_str); + if (char_class == CC_NO_CLASS) + { + char *tmp = make_printable_str (char_class_str, len); + + error (0, 0, "invalid character class `%s'", tmp); + free (tmp); + return 1; + } + new = (struct List_element *) xmalloc (sizeof (struct List_element)); + new->next = NULL; + new->type = RE_CHAR_CLASS; + new->u.char_class = char_class; + assert (list->tail); + list->tail->next = new; + list->tail = new; + return 0; +} + +/* Append a newly allocated structure representing a [c*n] + repeated character construct, to the specification list LIST. + THE_CHAR is the single character to be repeated, and REPEAT_COUNT + is non-negative repeat count. */ + +static void +append_repeated_char (list, the_char, repeat_count) + struct Spec_list *list; + unsigned int the_char; + long int repeat_count; +{ + struct List_element *new; + + new = (struct List_element *) xmalloc (sizeof (struct List_element)); + new->next = NULL; + new->type = RE_REPEATED_CHAR; + new->u.repeated_char.the_repeated_char = the_char; + new->u.repeated_char.repeat_count = repeat_count; + assert (list->tail); + list->tail->next = new; + list->tail = new; +} + +/* Given a string, EQUIV_CLASS_STR, from a [=str=] context and + the length of that string, LEN, if LEN is exactly one, append + a newly allocated structure representing the specified + equivalence class to the specification list, LIST and return zero. + If LEN is not 1, issue an error message and return non-zero. */ + +static int +append_equiv_class (list, equiv_class_str, len) + struct Spec_list *list; + unsigned char *equiv_class_str; + int len; +{ + struct List_element *new; + + if (len != 1) + { + char *tmp = make_printable_str (equiv_class_str, len); + + error (0, 0, "%s: equivalence class operand must be a single character", + tmp); + free (tmp); + return 1; + } + new = (struct List_element *) xmalloc (sizeof (struct List_element)); + new->next = NULL; + new->type = RE_EQUIV_CLASS; + new->u.equiv_code = *equiv_class_str; + assert (list->tail); + list->tail->next = new; + list->tail = new; + return 0; +} + +/* Return a newly allocated copy of P[FIRST_IDX..LAST_IDX]. */ + +static unsigned char * +substr (p, first_idx, last_idx) + unsigned char *p; + int first_idx; + int last_idx; +{ + int len = last_idx - first_idx + 1; + unsigned char *tmp = (unsigned char *) xmalloc (len); + + assert (first_idx <= last_idx); + /* We must use bcopy or memcopy rather than strncpy + because `p' may contain zero-bytes. */ + bcopy (p + first_idx, tmp, len); + tmp[len] = '\0'; + return tmp; +} + +/* Search forward starting at START_IDX for the 2-char sequence + (PRE_BRACKET_CHAR,']') in the string P of length P_LEN. If such + a sequence is found, return the index of the first character, + otherwise return -1. P may contain zero bytes. */ + +static int +find_closing_delim (p, start_idx, p_len, pre_bracket_char) + unsigned char *p; + int start_idx; + int p_len; + unsigned int pre_bracket_char; +{ + int i; + + for (i = start_idx; i < p_len - 1; i++) + if (p[i] == pre_bracket_char && p[i + 1] == ']') + return i; + return -1; +} + +/* Convert a string S with explicit length LEN, possibly + containing embedded zero bytes, to a long integer value. + If the string represents a negative value, a value larger + than LONG_MAX, or if all LEN characters do not represent a + valid integer, return non-zero and do not modify *VAL. + Otherwise, return zero and set *VAL to the converted value. */ + +static int +non_neg_strtol (s, len, val) + unsigned char *s; + int len; + long int *val; +{ + int i; + long sum = 0; + unsigned int base; + + if (len <= 0) + return 1; + if (s[0] == '0') + base = 8; + else if (isdigit (s[0])) + base = 10; + else + return 1; + + for (i = 0; i < len; i++) + { + int c = s[i] - '0'; + + if (c >= base || c < 0) + return 1; + if (i > 8 && sum > (LONG_MAX - c) / base) + return 1; + sum = sum * base + c; + } + *val = sum; + return 0; +} + +/* Parse the bracketed repeat-char syntax. If the P_LEN characters + beginning with P[ START_IDX ] comprise a valid [c*n] construct, + return the character and the repeat count through the arg pointers, + CHAR_TO_REPEAT and N, and then return the index of the closing + bracket as the function value. If the second character following + the opening bracket is not `*' or if no closing bracket can be + found, return -1. If a closing bracket is found and the + second char is `*', but the string between the `*' and `]' isn't + empty, an octal number, or a decimal number, print an error message + and return -2. */ + +static int +find_bracketed_repeat (p, start_idx, p_len, char_to_repeat, n) + unsigned char *p; + int start_idx; + int p_len; + unsigned int *char_to_repeat; + long int *n; +{ + int i; + + assert (start_idx + 1 < p_len); + if (p[start_idx + 1] != '*') + return -1; + + for (i = start_idx + 2; i < p_len; i++) + { + if (p[i] == ']') + { + unsigned char *digit_str; + int digit_str_len = i - start_idx - 2; + + *char_to_repeat = p[start_idx]; + if (digit_str_len == 0) + { + /* We've matched [c*] -- no explicit repeat count. */ + *n = 0; + return i; + } + + /* Here, we have found [c*s] where s should be a string + of octal or decimal digits. */ + digit_str = &p[start_idx + 2]; + if (non_neg_strtol (digit_str, digit_str_len, n)) + { + char *tmp = make_printable_str (digit_str, digit_str_len); + error (0, 0, "invalid repeat count `%s' in [c*n] construct", tmp); + free (tmp); + return -2; + } + return i; + } + } + return -1; /* No bracket found. */ +} + +/* Convert string UNESACPED_STRING (which has been preprocessed to + convert backslash-escape sequences) of length LEN characters into + a linked list of the following 5 types of constructs: + - [:str:] Character class where `str' is one of the 12 valid strings. + - [=c=] Equivalence class where `c' is any single character. + - [c*n] Repeat the single character `c' `n' times. n may be omitted. + However, if `n' is present, it must be a non-negative octal or + decimal integer. + - r-s Range of characters from `r' to `s'. The second endpoint must + not precede the first in the current collating sequence. + - c Any other character is interpreted as itself. */ + +static int +build_spec_list (unescaped_string, len, result) + unsigned char *unescaped_string; + int len; + struct Spec_list *result; +{ + unsigned char *p; + int i; + + p = unescaped_string; + + /* The main for-loop below recognizes the 4 multi-character constructs. + A character that matches (in its context) none of the multi-character + constructs is classified as `normal'. Since all multi-character + constructs have at least 3 characters, any strings of length 2 or + less are composed solely of normal characters. Hence, the index of + the outer for-loop runs only as far as LEN-2. */ + + for (i = 0; i < len - 2;) + { + switch (p[i]) + { + int fall_through; + case '[': + fall_through = 0; + switch (p[i + 1]) + { + int closing_delim_idx; + int closing_bracket_idx; + unsigned int char_to_repeat; + long repeat_count; + case ':': + case '=': + closing_delim_idx = find_closing_delim (p, i + 2, len, p[i + 1]); + if (closing_delim_idx >= 0) + { + int parse_failed; + unsigned char *opnd_str = substr (p, i + 2, closing_delim_idx - 1); + if (p[i + 1] == ':') + parse_failed = append_char_class (result, opnd_str, + (closing_delim_idx - 1) - (i + 2) + 1); + else + parse_failed = append_equiv_class (result, opnd_str, + (closing_delim_idx - 1) - (i + 2) + 1); + free (opnd_str); + + /* Return non-zero if append_*_class reports a problem. */ + if (parse_failed) + return 1; + else + i = closing_delim_idx + 2; + break; + } + /* Else fall through. This could be [:*] or [=*]. */ + default: + /* Determine whether this is a bracketed repeat range + matching the RE \[.\*(dec_or_oct_number)?\]. */ + closing_bracket_idx = find_bracketed_repeat (p, i + 1, + len, &char_to_repeat, &repeat_count); + if (closing_bracket_idx >= 0) + { + append_repeated_char (result, char_to_repeat, repeat_count); + i = closing_bracket_idx + 1; + break; + } + else if (closing_bracket_idx == -1) + { + fall_through = 1; + } + else + /* Found a string that looked like [c*n] but the + numeric part was invalid. */ + return 1; + break; + } + if (!fall_through) + break; + + /* Here if we've tried to match [c*n], [:str:], and [=c=] + and none of them fit. So we still have to consider the + range `[-c' (from `[' to `c'). */ + default: + /* Look ahead one char for ranges like a-z. */ + if (p[i + 1] == '-') + { + if (append_range (result, p[i], p[i + 2])) + return 1; + i += 3; + } + else + { + append_normal_char (result, p[i]); + ++i; + } + break; + } + } + + /* Now handle the (2 or fewer) remaining characters p[i]..p[len - 1]. */ + for (; i < len; i++) + append_normal_char (result, p[i]); + + return 0; +} + + +/* Given a Spec_list S (with its saved state implicit in the values + of its members `tail' and `state'), return the next single character + in the expansion of S's constructs. If the last character of S was + returned on the previous call or if S was empty, this function + returns -1. For example, successive calls to get_next where S + represents the spec-string 'a-d[y*3]' will return the sequence + of values a, b, c, d, y, y, y, -1. Finally, if the construct from + which the returned character comes is [:upper:] or [:lower:], the + parameter CLASS is given a value to indicate which it was. Otherwise + CLASS is set to UL_NONE. This value is used only when constructing + the translation table to verify that any occurrences of upper and + lower class constructs in the spec-strings appear in the same relative + positions. */ + +static int +get_next (s, class) + struct Spec_list *s; + enum Upper_Lower_class *class; +{ + struct List_element *p; + int return_val; + int i; + + if (class) + *class = UL_NONE; + + if (s->state == BEGIN_STATE) + { + s->tail = s->head->next; + s->state = NEW_ELEMENT; + } + + p = s->tail; + if (p == NULL) + return -1; + + switch (p->type) + { + case RE_NORMAL_CHAR: + return_val = p->u.normal_char; + s->state = NEW_ELEMENT; + s->tail = p->next; + break; + + case RE_RANGE: + if (s->state == NEW_ELEMENT) + s->state = ORD (p->u.range.first_char); + else + ++(s->state); + return_val = CHR (s->state); + if (s->state == ORD (p->u.range.last_char)) + { + s->tail = p->next; + s->state = NEW_ELEMENT; + } + break; + + case RE_CHAR_CLASS: + if (s->state == NEW_ELEMENT) + { + for (i = 0; i < N_CHARS; i++) + if (is_char_class_member (p->u.char_class, i)) + break; + assert (i < N_CHARS); + s->state = i; + } + assert (is_char_class_member (p->u.char_class, s->state)); + return_val = CHR (s->state); + for (i = s->state + 1; i < N_CHARS; i++) + if (is_char_class_member (p->u.char_class, i)) + break; + if (i < N_CHARS) + s->state = i; + else + { + s->tail = p->next; + s->state = NEW_ELEMENT; + } + if (class) + { + switch (p->u.char_class) + { + case CC_LOWER: + *class = UL_LOWER; + break; + case CC_UPPER: + *class = UL_UPPER; + break; + default: + /* empty */ + break; + } + } + break; + + case RE_EQUIV_CLASS: + /* FIXME: this assumes that each character is alone in its own + equivalence class (which appears to be correct for my + LC_COLLATE. But I don't know of any function that allows + one to determine a character's equivalence class. */ + + return_val = p->u.equiv_code; + s->state = NEW_ELEMENT; + s->tail = p->next; + break; + + case RE_REPEATED_CHAR: + /* Here, a repeat count of n == 0 means don't repeat at all. */ + assert (p->u.repeated_char.repeat_count >= 0); + if (p->u.repeated_char.repeat_count == 0) + { + s->tail = p->next; + s->state = NEW_ELEMENT; + return_val = get_next (s, class); + } + else + { + if (s->state == NEW_ELEMENT) + { + s->state = 0; + } + ++(s->state); + return_val = p->u.repeated_char.the_repeated_char; + if (p->u.repeated_char.repeat_count > 0 + && s->state == p->u.repeated_char.repeat_count) + { + s->tail = p->next; + s->state = NEW_ELEMENT; + } + } + break; + + case RE_NO_TYPE: + abort (); + break; + } + return return_val; +} + +/* This is a minor kludge. This function is called from + get_spec_stats to determine the cardinality of a set derived + from a complemented string. It's a kludge in that some of + the same operations are (duplicated) performed in set_initialize. */ + +static int +card_of_complement (s) + struct Spec_list *s; +{ + int c; + int cardinality = N_CHARS; + SET_TYPE in_set[N_CHARS]; + + bzero (in_set, N_CHARS * sizeof (in_set[0])); + s->state = BEGIN_STATE; + while ((c = get_next (s, NULL)) != -1) + if (!in_set[c]++) + --cardinality; + return cardinality; +} + +/* Gather statistics about the spec-list S in preparation for the tests + in validate that determine the legality of the specs. This function + is called at most twice; once for string1, and again for any string2. + LEN_S1 < 0 indicates that this is the first call and that S represents + string1. When LEN_S1 >= 0, it is the length of the expansion of the + constructs in string1, and we can use its value to resolve any + indefinite repeat construct in S (which represents string2). Hence, + this function has the side-effect that it converts a valid [c*] + construct in string2 to [c*n] where n is large enough (or 0) to give + string2 the same length as string1. For example, with the command + tr a-z 'A[\n*]Z' on the second call to get_spec_stats, LEN_S1 would + be 26 and S (representing string2) would be converted to 'A[\n*24]Z'. */ + +static void +get_spec_stats (s, len_s1) + struct Spec_list *s; + int len_s1; +{ + struct List_element *p; + struct List_element *indefinite_repeat_element = NULL; + int len = 0; + + s->n_indefinite_repeats = 0; + s->has_equiv_class = 0; + s->has_restricted_char_class = 0; + s->has_upper_or_lower = 0; + for (p = s->head->next; p; p = p->next) + { + switch (p->type) + { + int i; + case RE_NORMAL_CHAR: + ++len; + break; + + case RE_RANGE: + assert (p->u.range.last_char >= p->u.range.first_char); + len += p->u.range.last_char - p->u.range.first_char + 1; + break; + + case RE_CHAR_CLASS: + for (i = 0; i < N_CHARS; i++) + if (is_char_class_member (p->u.char_class, i)) + ++len; + switch (p->u.char_class) + { + case CC_UPPER: + case CC_LOWER: + s->has_upper_or_lower = 1; + break; + default: + s->has_restricted_char_class = 1; + break; + } + break; + + case RE_EQUIV_CLASS: + for (i = 0; i < N_CHARS; i++) + if (is_equiv_class_member (p->u.equiv_code, i)) + ++len; + s->has_equiv_class = 1; + break; + + case RE_REPEATED_CHAR: + if (p->u.repeated_char.repeat_count > 0) + len += p->u.repeated_char.repeat_count; + else if (p->u.repeated_char.repeat_count == 0) + { + indefinite_repeat_element = p; + ++(s->n_indefinite_repeats); + } + break; + + case RE_NO_TYPE: + assert (0); + break; + } + } + + if (len_s1 >= len && s->n_indefinite_repeats == 1) + { + indefinite_repeat_element->u.repeated_char.repeat_count = len_s1 - len; + len = len_s1; + } + if (complement && len_s1 < 0) + s->length = card_of_complement (s); + else + s->length = len; + return; +} + +static void +spec_init (spec_list) + struct Spec_list *spec_list; +{ + spec_list->head = spec_list->tail = + (struct List_element *) xmalloc (sizeof (struct List_element)); + spec_list->head->next = NULL; +} + +/* This function makes two passes over the argument string S. The first + one converts all \c and \ddd escapes to their one-byte representations. + The second constructs a linked specification list, SPEC_LIST, of the + characters and constructs that comprise the argument string. If either + of these passes detects an error, this function returns non-zero. */ + +static int +parse_str (s, spec_list) + unsigned char *s; + struct Spec_list *spec_list; +{ + int len; + + if (unquote (s, &len)) + return 1; + if (build_spec_list (s, len, spec_list)) + return 1; + return 0; +} + +/* Given two specification lists, S1 and S2, and assuming that + S1->length > S2->length, append a single [c*n] element to S2 where c + is the last character in the expansion of S2 and n is the difference + between the two lengths. + Upon successful completion, S2->length is set to S1->length. The only + way this function can fail to make S2 as long as S1 is when S2 has + zero-length, since in that case, there is no last character to repeat. + So S2->length is required to be at least 1. + + Providing this functionality allows the user to do some pretty + non-BSD (and non-portable) things: For example, the command + tr -cs '[:upper:]0-9' '[:lower:]' + is almost guaranteed to give results that depend on your collating + sequence. */ + +static void +string2_extend (s1, s2) + struct Spec_list *s1; + struct Spec_list *s2; +{ + struct List_element *p; + int char_to_repeat; + int i; + + assert (translating); + assert (s1->length > s2->length); + assert (s2->length > 0); + + p = s2->tail; + switch (p->type) + { + case RE_NORMAL_CHAR: + char_to_repeat = p->u.normal_char; + break; + case RE_RANGE: + char_to_repeat = p->u.range.last_char; + break; + case RE_CHAR_CLASS: + for (i = N_CHARS; i >= 0; i--) + if (is_char_class_member (p->u.char_class, i)) + break; + assert (i >= 0); + char_to_repeat = CHR (i); + break; + + case RE_REPEATED_CHAR: + char_to_repeat = p->u.repeated_char.the_repeated_char; + break; + + case RE_EQUIV_CLASS: + /* This shouldn't happen, because validate exits with an error + if it finds an equiv class in string2 when translating. */ + abort (); + break; + + case RE_NO_TYPE: + abort (); + break; + } + append_repeated_char (s2, char_to_repeat, s1->length - s2->length); + s2->length = s1->length; + return; +} + +/* Die with an error message if S1 and S2 describe strings that + are not valid with the given command line switches. + A side effect of this function is that if a legal [c*] or + [c*0] construct appears in string2, it is converted to [c*n] + with a value for n that makes s2->length == s1->length. By + the same token, if the --truncate-set1 option is not + given, S2 may be extended. */ + +static void +validate (s1, s2) + struct Spec_list *s1; + struct Spec_list *s2; +{ + get_spec_stats (s1, -1); + if (s1->n_indefinite_repeats > 0) + { + error (1, 0, "the [c*] repeat construct may not appear in string1"); + } + + /* FIXME: it isn't clear from the POSIX spec that this is illegal, + but in the spirit of the other restrictions put on translation + with character classes, this seems a logical interpretation. */ + if (complement && s1->has_upper_or_lower) + { + error (1, 0, + "character classes may not be used when translating and complementing"); + } + + if (s2) + { + get_spec_stats (s2, s1->length); + if (s2->has_restricted_char_class) + { + error (1, 0, + "when translating, the only character classes that may appear in\n\ +\tstring2 are `upper' and `lower'"); + } + + if (s2->n_indefinite_repeats > 1) + { + error (1, 0, "only one [c*] repeat construct may appear in string2"); + } + + if (translating) + { + if (s2->has_equiv_class) + { + error (1, 0, + "[=c=] expressions may not appear in string2 when translating"); + } + + if (s1->length > s2->length) + { + if (!truncate_set1) + { + /* string2 must be non-empty unless --truncate-set1 is + given or string1 is empty. */ + + if (s2->length == 0) + error (1, 0, + "when not truncating set1, string2 must be non-empty"); + string2_extend (s1, s2); + } + } + + if (complement && s2->has_upper_or_lower) + error (1, 0, + "character classes may not be used when translating and complementing"); + } + else + /* Not translating. */ + { + if (s2->n_indefinite_repeats > 0) + error (1, 0, + "the [c*] construct may appear in string2 only when translating"); + } + } +} + +/* Read buffers of SIZE bytes via the function READER (if READER is + NULL, read from stdin) until EOF. When non-NULL, READER is either + read_and_delete or read_and_xlate. After each buffer is read, it is + processed and written to stdout. The buffers are processed so that + multiple consecutive occurrences of the same character in the input + stream are replaced by a single occurrence of that character if the + character is in the squeeze set. */ + +static void +squeeze_filter (buf, size, reader) + unsigned char *buf; + long int size; + PFI reader; +{ + unsigned int char_to_squeeze = NOT_A_CHAR; + int i = 0; + int nr = 0; + + for (;;) + { + int begin; + + if (i >= nr) + { + if (reader == NULL) + nr = read (0, (char *) buf, size); + else + nr = (*reader) (buf, size, NULL); + + if (nr < 0) + error (1, errno, "read error"); + if (nr == 0) + break; + i = 0; + } + + begin = i; + + if (char_to_squeeze == NOT_A_CHAR) + { + int out_len; + /* Here, by being a little tricky, we can get a significant + performance increase in most cases when the input is + reasonably large. Since tr will modify the input only + if two consecutive (and identical) input characters are + in the squeeze set, we can step by two through the data + when searching for a character in the squeeze set. This + means there may be a little more work in a few cases and + perhaps twice as much work in the worst cases where most + of the input is removed by squeezing repeats. But most + uses of this functionality seem to remove less than 20-30% + of the input. */ + for (; i < nr && !in_squeeze_set[buf[i]]; i += 2) + ; /* empty */ + + /* There is a special case when i == nr and we've just + skipped a character (the last one in buf) that is in + the squeeze set. */ + if (i == nr && in_squeeze_set[buf[i - 1]]) + --i; + + if (i >= nr) + out_len = nr - begin; + else + { + char_to_squeeze = buf[i]; + /* We're about to output buf[begin..i]. */ + out_len = i - begin + 1; + + /* But since we stepped by 2 in the loop above, + out_len may be one too large. */ + if (i > 0 && buf[i - 1] == char_to_squeeze) + --out_len; + + /* Advance i to the index of first character to be + considered when looking for a char different from + char_to_squeeze. */ + ++i; + } + if (out_len > 0 + && fwrite ((char *) &buf[begin], 1, out_len, stdout) == 0) + error (1, errno, "write error"); + } + + if (char_to_squeeze != NOT_A_CHAR) + { + /* Advance i to index of first char != char_to_squeeze + (or to nr if all the rest of the characters in this + buffer are the same as char_to_squeeze). */ + for (; i < nr && buf[i] == char_to_squeeze; i++) + ; /* empty */ + if (i < nr) + char_to_squeeze = NOT_A_CHAR; + /* If (i >= nr) we've squeezed the last character in this buffer. + So now we have to read a new buffer and continue comparing + characters against char_to_squeeze. */ + } + } +} + +/* Read buffers of SIZE bytes from stdin until one is found that + contains at least one character not in the delete set. Store + in the array BUF, all characters from that buffer that are not + in the delete set, and return the number of characters saved + or 0 upon EOF. */ + +static long +read_and_delete (buf, size, not_used) + unsigned char *buf; + long int size; + PFI not_used; +{ + long n_saved; + static int hit_eof = 0; + + assert (not_used == NULL); + assert (size > 0); + + if (hit_eof) + return 0; + + /* This enclosing do-while loop is to make sure that + we don't return zero (indicating EOF) when we've + just deleted all the characters in a buffer. */ + do + { + int i; + int nr = read (0, (char *) buf, size); + + if (nr < 0) + error (1, errno, "read error"); + if (nr == 0) + { + hit_eof = 1; + return 0; + } + + /* This first loop may be a waste of code, but gives much + better performance when no characters are deleted in + the beginning of a buffer. It just avoids the copying + of buf[i] into buf[n_saved] when it would be a NOP. */ + + for (i = 0; i < nr && !in_delete_set[buf[i]]; i++) + /* empty */ ; + n_saved = i; + + for (++i; i < nr; i++) + if (!in_delete_set[buf[i]]) + buf[n_saved++] = buf[i]; + } + while (n_saved == 0); + + return n_saved; +} + +/* Read at most SIZE bytes from stdin into the array BUF. Then + perform the in-place and one-to-one mapping specified by the global + array `xlate'. Return the number of characters read, or 0 upon EOF. */ + +static long +read_and_xlate (buf, size, not_used) + unsigned char *buf; + long int size; + PFI not_used; +{ + long chars_read = 0; + static int hit_eof = 0; + int i; + + assert (not_used == NULL); + assert (size > 0); + + if (hit_eof) + return 0; + + chars_read = read (0, (char *) buf, size); + if (chars_read < 0) + error (1, errno, "read error"); + if (chars_read == 0) + { + hit_eof = 1; + return 0; + } + + for (i = 0; i < chars_read; i++) + buf[i] = xlate[buf[i]]; + + return chars_read; +} + +/* Initialize a boolean membership set IN_SET with the character + values obtained by traversing the linked list of constructs S + using the function `get_next'. If COMPLEMENT_THIS_SET is + non-zero the resulting set is complemented. */ + +static void +set_initialize (s, complement_this_set, in_set) + struct Spec_list *s; + int complement_this_set; + SET_TYPE *in_set; +{ + int c; + int i; + + bzero (in_set, N_CHARS * sizeof (in_set[0])); + s->state = BEGIN_STATE; + while ((c = get_next (s, NULL)) != -1) + in_set[c] = 1; + if (complement_this_set) + for (i = 0; i < N_CHARS; i++) + in_set[i] = (!in_set[i]); +} + +void +main (argc, argv) + int argc; + char **argv; +{ + int c; + int non_option_args; + struct Spec_list buf1, buf2; + struct Spec_list *s1 = &buf1; + struct Spec_list *s2 = &buf2; + + program_name = argv[0]; + + while ((c = getopt_long (argc, argv, "cdst", long_options, + (int *) 0)) != EOF) + { + switch (c) + { + case 0: + break; + + case 'c': + complement = 1; + break; + + case 'd': + delete = 1; + break; + + case 's': + squeeze_repeats = 1; + break; + + case 't': + truncate_set1 = 1; + break; + + default: + usage (); + break; + } + } + + posix_pedantic = (getenv ("POSIXLY_CORRECT") != 0); + + non_option_args = argc - optind; + translating = (non_option_args == 2 && !delete); + + /* Change this test if it is legal to give tr no options and + no args at all. POSIX doesn't specifically say anything + either way, but it looks like they implied it's illegal + by omission. If you want to make tr do a slow imitation + of `cat' use `tr a a'. */ + if (non_option_args > 2) + usage (); + + if (!delete && !squeeze_repeats && non_option_args != 2) + error (1, 0, "two strings must be given when translating"); + + if (delete && squeeze_repeats && non_option_args != 2) + error (1, 0, "two strings must be given when both \ +deleting and squeezing repeats"); + + /* If --delete is given without --squeeze-repeats, then + only one string argument may be specified. But POSIX + says to ignore any string2 in this case, so if POSIXLY_CORRECT + is set, pretend we never saw string2. But I think + this deserves a fatal error, so that's the default. */ + if ((delete && !squeeze_repeats) && non_option_args != 1) + { + if (posix_pedantic && non_option_args == 2) + --non_option_args; + else + error (1, 0, + "only one string may be given when deleting without squeezing repeats"); + } + + spec_init (s1); + if (parse_str ((unsigned char *) argv[optind], s1)) + exit (1); + + if (non_option_args == 2) + { + spec_init (s2); + if (parse_str ((unsigned char *) argv[optind + 1], s2)) + exit (1); + } + else + s2 = NULL; + + validate (s1, s2); + + if (squeeze_repeats && non_option_args == 1) + { + set_initialize (s1, complement, in_squeeze_set); + squeeze_filter (io_buf, IO_BUF_SIZE, NULL); + } + else if (delete && non_option_args == 1) + { + int nr; + + set_initialize (s1, complement, in_delete_set); + do + { + nr = read_and_delete (io_buf, IO_BUF_SIZE, NULL); + if (nr > 0 && fwrite ((char *) io_buf, 1, nr, stdout) == 0) + error (1, errno, "write error"); + } + while (nr > 0); + } + else if (squeeze_repeats && delete && non_option_args == 2) + { + set_initialize (s1, complement, in_delete_set); + set_initialize (s2, 0, in_squeeze_set); + squeeze_filter (io_buf, IO_BUF_SIZE, (PFI) read_and_delete); + } + else if (translating) + { + if (complement) + { + int i; + SET_TYPE *in_s1 = in_delete_set; + + set_initialize (s1, 0, in_s1); + s2->state = BEGIN_STATE; + for (i = 0; i < N_CHARS; i++) + xlate[i] = i; + for (i = 0; i < N_CHARS; i++) + { + if (!in_s1[i]) + { + int c = get_next (s2, NULL); + assert (c != -1 || truncate_set1); + if (c == -1) + { + /* This will happen when tr is invoked like e.g. + tr -cs A-Za-z0-9 '\012'. */ + break; + } + xlate[i] = c; + } + } + assert (get_next (s2, NULL) == -1 || truncate_set1); + } + else + { + int c1, c2; + int i; + enum Upper_Lower_class class_s1; + enum Upper_Lower_class class_s2; + + for (i = 0; i < N_CHARS; i++) + xlate[i] = i; + s1->state = BEGIN_STATE; + s2->state = BEGIN_STATE; + for (;;) + { + c1 = get_next (s1, &class_s1); + c2 = get_next (s2, &class_s2); + if (!class_ok[(int) class_s1][(int) class_s2]) + error (1, 0, + "misaligned or mismatched upper and/or lower classes"); + /* The following should have been checked by validate... */ + if (c2 == -1) + break; + xlate[c1] = c2; + } + assert (c1 == -1 || truncate_set1); + } + if (squeeze_repeats) + { + set_initialize (s2, 0, in_squeeze_set); + squeeze_filter (io_buf, IO_BUF_SIZE, (PFI) read_and_xlate); + } + else + { + int chars_read; + + do + { + chars_read = read_and_xlate (io_buf, IO_BUF_SIZE, NULL); + if (chars_read > 0 + && fwrite ((char *) io_buf, 1, chars_read, stdout) == 0) + error (1, errno, "write error"); + } + while (chars_read > 0); + } + } + + exit (0); +} + diff --git a/src/unexpand.c b/src/unexpand.c new file mode 100644 index 000000000..2733ef77a --- /dev/null +++ b/src/unexpand.c @@ -0,0 +1,432 @@ +/* unexpand - convert spaces to tabs + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* By default, convert only maximal strings of initial blanks and tabs + into tabs. + Preserves backspace characters in the output; they decrement the + column count for tab calculations. + The default action is equivalent to -8. + + Options: + --tabs=tab1[,tab2[,...]] + -t tab1[,tab2[,...]] + -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1 + spaces apart instead of the default 8. Otherwise, + set the tabs at columns tab1, tab2, etc. (numbered from + 0); replace any tabs beyond the tabstops given with + single spaces. + --all + -a Use tabs wherever they would replace 2 or more spaces, + not just at the beginnings of lines. + + David MacKenzie <djm@ai.mit.edu> */ + +#define _GNU_SOURCE +#include <ctype.h> +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" + +#ifdef isascii +#define ISDIGIT(c) (isascii((c)) && isdigit((c))) +#else +#define ISDIGIT(c) (isdigit((c))) +#endif + +/* The number of bytes added at a time to the amount of memory + allocated for the output line. */ +#define OUTPUT_BLOCK 256 + +/* The number of bytes added at a time to the amount of memory + allocated for the list of tabstops. */ +#define TABLIST_BLOCK 256 + +char *xmalloc (); +char *xrealloc (); +void error (); + +FILE *next_file (); +void add_tabstop (); +void parse_tabstops (); +void unexpand (); +void usage (); +void validate_tabstops (); + +/* If nonzero, convert blanks even after nonblank characters have been + read on the line. */ +int convert_entire_line; + +/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */ +int tab_size; + +/* Array of the explicit column numbers of the tab stops; + after `tab_list' is exhausted, the rest of the line is printed + unchanged. The first column is column 0. */ +int *tab_list; + +/* The index of the first invalid element of `tab_list', + where the next element can be added. */ +int first_free_tab; + +/* Null-terminated array of input filenames. */ +char **file_list; + +/* Default for `file_list' if no files are given on the command line. */ +char *stdin_argv[] = +{ + "-", NULL +}; + +/* Nonzero if we have ever read standard input. */ +int have_read_stdin; + +/* Status to return to the system. */ +int exit_status; + +/* The name this program was run with. */ +char *program_name; + +struct option longopts[] = +{ + {"tabs", 1, NULL, 't'}, + {"all", 0, NULL, 'a'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int tabval = -1; /* Value of tabstop being read, or -1. */ + int c; /* Option character. */ + + have_read_stdin = 0; + exit_status = 0; + convert_entire_line = 0; + tab_list = NULL; + first_free_tab = 0; + program_name = argv[0]; + + while ((c = getopt_long (argc, argv, "at:,0123456789", longopts, (int *) 0)) + != EOF) + { + switch (c) + { + case '?': + usage (); + case 'a': + convert_entire_line = 1; + break; + case 't': + convert_entire_line = 1; + parse_tabstops (optarg); + break; + case ',': + add_tabstop (tabval); + tabval = -1; + break; + default: + if (tabval == -1) + tabval = 0; + tabval = tabval * 10 + c - '0'; + break; + } + } + + add_tabstop (tabval); + + validate_tabstops (tab_list, first_free_tab); + + if (first_free_tab == 0) + tab_size = 8; + else if (first_free_tab == 1) + tab_size = tab_list[0]; + else + tab_size = 0; + + if (optind == argc) + file_list = stdin_argv; + else + file_list = &argv[optind]; + + unexpand (); + + if (have_read_stdin && fclose (stdin) == EOF) + error (1, errno, "-"); + if (fclose (stdout) == EOF) + error (1, errno, "write error"); + exit (exit_status); +} + +/* Add the comma or blank separated list of tabstops STOPS + to the list of tabstops. */ + +void +parse_tabstops (stops) + char *stops; +{ + int tabval = -1; + + for (; *stops; stops++) + { + if (*stops == ',' || isblank (*stops)) + { + add_tabstop (tabval); + tabval = -1; + } + else if (ISDIGIT (*stops)) + { + if (tabval == -1) + tabval = 0; + tabval = tabval * 10 + *stops - '0'; + } + else + error (1, 0, "tab size contains an invalid character"); + } + + add_tabstop (tabval); +} + +/* Add tab stop TABVAL to the end of `tab_list', except + if TABVAL is -1, do nothing. */ + +void +add_tabstop (tabval) + int tabval; +{ + if (tabval == -1) + return; + if (first_free_tab % TABLIST_BLOCK == 0) + tab_list = (int *) xrealloc (tab_list, first_free_tab + TABLIST_BLOCK); + tab_list[first_free_tab++] = tabval; +} + +/* Check that the list of tabstops TABS, with ENTRIES entries, + contains only nonzero, ascending values. */ + +void +validate_tabstops (tabs, entries) + int *tabs; + int entries; +{ + int prev_tab = 0; + int i; + + for (i = 0; i < entries; i++) + { + if (tabs[i] == 0) + error (1, 0, "tab size cannot be 0"); + if (tabs[i] <= prev_tab) + error (1, 0, "tab sizes must be ascending"); + prev_tab = tabs[i]; + } +} + +/* Change spaces to tabs, writing to stdout. + Read each file in `file_list', in order. */ + +void +unexpand () +{ + FILE *fp; /* Input stream. */ + int c; /* Each input character. */ + /* Index in `tab_list' of next tabstop: */ + int tab_index = 0; /* For calculating width of pending tabs. */ + int print_tab_index = 0; /* For printing as many tabs as possible. */ + int column = 0; /* Column on screen of next char. */ + int next_tab_column; /* Column the next tab stop is on. */ + int convert = 1; /* If nonzero, perform translations. */ + int pending = 0; /* Pending columns of blanks. */ + + fp = next_file ((FILE *) NULL); + for (;;) + { + c = getc (fp); + if (c == EOF) + { + fp = next_file (fp); + if (fp == NULL) + break; /* No more files. */ + else + continue; + } + + if (c == ' ' && convert) + { + ++pending; + ++column; + } + else if (c == '\t' && convert) + { + if (tab_size == 0) + { + /* Do not let tab_index == first_free_tab; + stop when it is 1 less. */ + while (tab_index < first_free_tab - 1 + && column >= tab_list[tab_index]) + tab_index++; + next_tab_column = tab_list[tab_index]; + if (tab_index < first_free_tab - 1) + tab_index++; + if (column >= next_tab_column) + { + convert = 0; /* Ran out of tab stops. */ + goto flush_pend; + } + } + else + { + next_tab_column = column + tab_size - column % tab_size; + } + pending += next_tab_column - column; + column = next_tab_column; + } + else + { + flush_pend: + /* Flush pending spaces. Print as many tabs as possible, + then print the rest as spaces. */ + if (pending == 1) + { + putchar (' '); + pending = 0; + } + column -= pending; + while (pending != 0) + { + if (tab_size == 0) + { + /* Do not let tab_index == first_free_tab; + stop when it is 1 less. */ + while (tab_index < first_free_tab - 1 + && column >= tab_list[tab_index]) + print_tab_index++; + next_tab_column = tab_list[print_tab_index]; + if (print_tab_index < first_free_tab - 1) + print_tab_index++; + } + else + { + next_tab_column = column + tab_size - column % tab_size; + } + if (next_tab_column - column <= pending) + { + putchar ('\t'); + pending -= next_tab_column - column; + column = next_tab_column; + } + else + { + --print_tab_index; + column += pending; + while (pending != 0) + { + putchar (' '); + pending--; + } + } + } + + if (convert) + { + if (c == '\b') + { + if (column > 0) + --column; + } + else + { + ++column; + if (convert_entire_line == 0) + convert = 0; + } + } + + putchar (c); + + if (c == '\n') + { + tab_index = print_tab_index = 0; + column = pending = 0; + convert = 1; + } + } + } +} + +/* Close the old stream pointer FP if it is non-NULL, + and return a new one opened to read the next input file. + Open a filename of `-' as the standard input. + Return NULL if there are no more input files. */ + +FILE * +next_file (fp) + FILE *fp; +{ + static char *prev_file; + char *file; + + if (fp) + { + if (ferror (fp)) + { + error (0, errno, "%s", prev_file); + exit_status = 1; + } + if (fp == stdin) + clearerr (fp); /* Also clear EOF. */ + else if (fclose (fp) == EOF) + { + error (0, errno, "%s", prev_file); + exit_status = 1; + } + } + + while ((file = *file_list++) != NULL) + { + if (file[0] == '-' && file[1] == '\0') + { + have_read_stdin = 1; + prev_file = file; + return stdin; + } + fp = fopen (file, "r"); + if (fp) + { + prev_file = file; + return fp; + } + error (0, errno, "%s", file); + exit_status = 1; + } + return NULL; +} + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-tab1[,tab2[,...]]] [-t tab1[,tab2[,...]]] [-a]\n\ + [--tabs=tab1[,tab2[,...]]] [--all] [file...]\n", + program_name); + exit (1); +} diff --git a/src/uniq.c b/src/uniq.c new file mode 100644 index 000000000..0968cbae4 --- /dev/null +++ b/src/uniq.c @@ -0,0 +1,321 @@ +/* uniq -- remove duplicate lines from a sorted file + Copyright (C) 1986, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Richard Stallman and David MacKenzie. */ + +#define _GNU_SOURCE +#include <ctype.h> +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" +#include "linebuffer.h" + +#define min(x, y) ((x) < (y) ? (x) : (y)) + +char *find_field (); +int different (); +void check_file (); +void error (); +void usage (); +void writeline (); + +/* Number of fields to skip on each line when doing comparisons. */ +int skip_fields; + +/* Number of chars to skip after skipping any fields. */ +int skip_chars; + +/* Number of chars to compare; if 0, compare the whole lines. */ +int check_chars; + +enum countmode +{ + count_occurrences, /* -c Print count before output lines. */ + count_none /* Default. Do not print counts. */ +}; + +/* Whether and how to precede the output lines with a count of the number of + times they occurred in the input. */ +enum countmode countmode; + +enum output_mode +{ + output_repeated, /* -d Only lines that are repeated. */ + output_unique, /* -u Only lines that are not repeated. */ + output_all /* Default. Print first copy of each line. */ +}; + +/* Which lines to output. */ +enum output_mode mode; + +/* The name this program was run with. */ +char *program_name; + +struct option longopts[] = +{ + {"count", 0, NULL, 'c'}, + {"repeated", 0, NULL, 'd'}, + {"unique", 0, NULL, 'u'}, + {"skip-fields", 1, NULL, 'f'}, + {"skip-chars", 1, NULL, 's'}, + {"check-chars", 1, NULL, 'w'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char *argv[]; +{ + int optc; + char *infile = "-", *outfile = "-"; + + program_name = argv[0]; + skip_chars = 0; + skip_fields = 0; + check_chars = 0; + mode = output_all; + countmode = count_none; + + while ((optc = getopt_long (argc, argv, "0123456789cdf:s:uw:", longopts, + (int *) 0)) != EOF) + { + switch (optc) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + skip_fields = skip_fields * 10 + optc - '0'; + break; + + case 'c': + countmode = count_occurrences; + break; + + case 'd': + mode = output_repeated; + break; + + case 'f': /* Like '-#'. */ + skip_fields = atoi (optarg); + break; + + case 's': /* Like '+#'. */ + skip_chars = atoi (optarg); + break; + + case 'u': + mode = output_unique; + break; + + case 'w': + check_chars = atoi (optarg); + break; + + default: + usage (); + } + } + + while (optind < argc && argv[optind][0] == '+') + skip_chars = atoi (argv[optind++]); + + if (optind < argc) + infile = argv[optind++]; + + if (optind < argc) + outfile = argv[optind++]; + + if (optind < argc) + usage (); /* Extra arguments. */ + + check_file (infile, outfile); + + exit (0); +} + +/* Process input file INFILE with output to OUTFILE. + If either is "-", use the standard I/O stream for it instead. */ + +void +check_file (infile, outfile) + char *infile, *outfile; +{ + FILE *istream; + FILE *ostream; + struct linebuffer lb1, lb2; + struct linebuffer *thisline, *prevline, *exch; + char *prevfield, *thisfield; + int prevlen, thislen; + int match_count = 0; + + if (!strcmp (infile, "-")) + istream = stdin; + else + istream = fopen (infile, "r"); + if (istream == NULL) + error (1, errno, "%s", infile); + + if (!strcmp (outfile, "-")) + ostream = stdout; + else + ostream = fopen (outfile, "w"); + if (ostream == NULL) + error (1, errno, "%s", outfile); + + thisline = &lb1; + prevline = &lb2; + + initbuffer (thisline); + initbuffer (prevline); + + if (readline (prevline, istream) == 0) + goto closefiles; + prevfield = find_field (prevline); + prevlen = prevline->length - (prevfield - prevline->buffer); + + while (!feof (istream)) + { + if (readline (thisline, istream) == 0) + break; + thisfield = find_field (thisline); + thislen = thisline->length - (thisfield - thisline->buffer); + if (!different (thisfield, prevfield, thislen, prevlen)) + match_count++; + else + { + writeline (prevline, ostream, match_count); + match_count = 0; + + exch = prevline; + prevline = thisline; + thisline = exch; + prevfield = thisfield; + prevlen = thislen; + } + } + + writeline (prevline, ostream, match_count); + + closefiles: + if (ferror (istream) || fclose (istream) == EOF) + error (1, errno, "error reading %s", infile); + + if (ferror (ostream) || fclose (ostream) == EOF) + error (1, errno, "error writing %s", outfile); + + free (lb1.buffer); + free (lb2.buffer); +} + +/* Given a linebuffer LINE, + return a pointer to the beginning of the line's field to be compared. */ + +char * +find_field (line) + struct linebuffer *line; +{ + register int count; + register char *lp = line->buffer; + register int size = line->length; + register int i = 0; + + for (count = 0; count < skip_fields && i < size; count++) + { + while (i < size && isblank (lp[i])) + i++; + while (i < size && !isblank (lp[i])) + i++; + } + + for (count = 0; count < skip_chars && i < size; count++) + i++; + + return lp + i; +} + +/* Return zero if two strings OLD and NEW match, nonzero if not. + OLD and NEW point not to the beginnings of the lines + but rather to the beginnings of the fields to compare. + OLDLEN and NEWLEN are their lengths. */ + +int +different (old, new, oldlen, newlen) + char *old; + char *new; + int oldlen; + int newlen; +{ + register int order; + + if (check_chars) + { + if (oldlen > check_chars) + oldlen = check_chars; + if (newlen > check_chars) + newlen = check_chars; + } + order = memcmp (old, new, min (oldlen, newlen)); + if (order == 0) + return oldlen - newlen; + return order; +} + +/* Output the line in linebuffer LINE to stream STREAM + provided that the switches say it should be output. + If requested, print the number of times it occurred, as well; + LINECOUNT + 1 is the number of times that the line occurred. */ + +void +writeline (line, stream, linecount) + struct linebuffer *line; + FILE *stream; + int linecount; +{ + if ((mode == output_unique && linecount != 0) + || (mode == output_repeated && linecount == 0)) + return; + + if (countmode == count_occurrences) + fprintf (stream, "%7d\t", linecount + 1); + + fwrite (line->buffer, sizeof (char), line->length, stream); + putc ('\n', stream); +} + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s [-cdu] [-f skip-fields] [-s skip-chars] [-w check-chars]\n\ + [-#skip-fields] [+#skip-chars] [--count] [--repeated] [--unique]\n\ + [--skip-fields=skip-fields] [--skip-chars=skip-chars]\n\ + [--check-chars=check-chars] [infile] [outfile]\n", + program_name); + exit (1); +} diff --git a/src/wc.c b/src/wc.c new file mode 100644 index 000000000..72d6ea665 --- /dev/null +++ b/src/wc.c @@ -0,0 +1,231 @@ +/* wc - print the number of bytes, words, and lines in files + Copyright (C) 1985, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Paul Rubin, phr@ocf.berkeley.edu + and David MacKenzie, djm@gnu.ai.mit.edu. */ + +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" + +/* Size of atomic reads. */ +#define BUFFER_SIZE (16 * 1024) + +void error (); +void wc (); +void wc_file (); +void write_counts (); + +/* Cumulative number of lines, words, and chars in all files so far. */ +unsigned long total_lines, total_words, total_chars; + +/* Which counts to print. */ +int print_lines, print_words, print_chars; + +/* Nonzero if we have ever read the standard input. */ +int have_read_stdin; + +/* The name this program was run with. */ +char *program_name; + +/* The error code to return to the system. */ +int exit_status; + +struct option longopts[] = +{ + {"bytes", 0, NULL, 'c'}, + {"chars", 0, NULL, 'c'}, + {"lines", 0, NULL, 'l'}, + {"words", 0, NULL, 'w'}, + {NULL, 0, NULL, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int optc; + int nfiles; + + program_name = argv[0]; + exit_status = 0; + print_lines = print_words = print_chars = 0; + total_lines = total_words = total_chars = 0; + + while ((optc = getopt_long (argc, argv, "clw", longopts, (int *) 0)) != EOF) + switch (optc) + { + case 'c': + print_chars = 1; + break; + + case 'l': + print_lines = 1; + break; + + case 'w': + print_words = 1; + break; + + default: + fprintf (stderr, "\ +Usage: %s [-clw] [--bytes] [--chars] [--lines] [--words] [file...]\n", argv[0]); + exit (1); + } + + if (print_lines + print_words + print_chars == 0) + print_lines = print_words = print_chars = 1; + + nfiles = argc - optind; + + if (nfiles == 0) + { + have_read_stdin = 1; + wc (0, ""); + } + else + { + for (; optind < argc; ++optind) + wc_file (argv[optind]); + + if (nfiles > 1) + write_counts (total_lines, total_words, total_chars, "total"); + } + + if (have_read_stdin && close (0)) + error (1, errno, "-"); + + exit (exit_status); +} + +void +wc_file (file) + char *file; +{ + if (!strcmp (file, "-")) + { + have_read_stdin = 1; + wc (0, file); + } + else + { + int fd = open (file, O_RDONLY); + if (fd == -1) + { + error (0, errno, "%s", file); + exit_status = 1; + return; + } + wc (fd, file); + if (close (fd)) + { + error (0, errno, "%s", file); + exit_status = 1; + } + } +} + +void +wc (fd, file) + int fd; + char *file; +{ + char buf[BUFFER_SIZE]; + register int bytes_read; + register int in_word = 0; + register unsigned long lines, words, chars; + struct stat stats; + + lines = words = chars = 0; + + if (print_chars && !print_words && !print_lines + && fstat (fd, &stats) == 0 && S_ISREG (stats.st_mode)) + { + chars = stats.st_size; + } + else + { + while ((bytes_read = read (fd, buf, BUFFER_SIZE)) > 0) + { + register char *p = buf; + + chars += bytes_read; + do + { + switch (*p++) + { + case '\n': + lines++; + /* Fall through. */ + case '\r': + case '\f': + case '\t': + case '\v': + case ' ': + if (in_word) + { + in_word = 0; + words++; + } + break; + default: + in_word = 1; + break; + } + } + while (--bytes_read); + } + if (bytes_read < 0) + { + error (0, errno, "%s", file); + exit_status = 1; + } + if (in_word) + words++; + } + + write_counts (lines, words, chars, file); + total_lines += lines; + total_words += words; + total_chars += chars; +} + +void +write_counts (lc, wc, cc, file) + unsigned long lc, wc, cc; + char *file; +{ + if (print_lines) + printf ("%7lu", lc); + if (print_words) + { + if (print_lines) + putchar (' '); + printf ("%7lu", wc); + } + if (print_chars) + { + if (print_lines || print_words) + putchar (' '); + printf ("%7lu", cc); + } + if (*file) + printf (" %s", file); + putchar ('\n'); +} |